mrmpi-1.0~20131122/0000755000175000017500000000000012252022520013352 5ustar mathieumathieumrmpi-1.0~20131122/examples/0000755000175000017500000000000012243675121015203 5ustar mathieumathieumrmpi-1.0~20131122/examples/in.luby0000644000175000017500000000050411535457750016516 0ustar mathieumathieu# OINK script for Luby's algorithm for maximal independent set finding variable t equal time variable p equal nprocs set scratch SCRATCH #set verbosity 1 #set timer 1 rmat 16 8 0.25 0.25 0.25 0.25 0.0 12345 -o NULL mre edge_upper -i mre -o NULL mre luby_find 12345 -i mre -o tmp.mis NULL print "MIS: $t secs on $p procs" mrmpi-1.0~20131122/examples/rmat2.cpp0000644000175000017500000002656711571472414016757 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ // MapReduce random RMAT matrix generation example in C++ // Syntax: rmat N Nz a b c d frac seed {outfile} // 2^N = # of rows in RMAT matrix // Nz = non-zeroes per row // a,b,c,d = RMAT params (must sum to 1.0) // frac = RMAT randomization param (frac < 1, 0 = no randomization) // seed = RNG seed (positive int) // outfile = output RMAT matrix to this filename (optional) // // Note that this implementation (rmat2.cpp) should have less communication // than the original implementation (rmat.cpp) since it uses two MapReduce // objects to avoid repeatedly communicating edges among processors. #include "mpi.h" #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "mapreduce.h" #include "keyvalue.h" #include using namespace MAPREDUCE_NS; #define INT_TYPE uint64_t #define CHECK_FOR_BLOCKS(multivalue, valuebytes, nvalues, totalnvalues) \ int macro_nblocks = 1; \ totalnvalues = nvalues; \ MapReduce *macro_mr = NULL; \ if (!(multivalue)) { \ macro_mr = (MapReduce *) (valuebytes); \ totalnvalues = macro_mr->multivalue_blocks(macro_nblocks); \ } void do_nothing(int, KeyValue *, void *); void generate(int, KeyValue *, void *); void cull(char *, int, char *, int, int *, KeyValue *, void *); void output(char *, int, char *, int, int *, KeyValue *, void *); void nonzero(char *, int, char *, int, int *, KeyValue *, void *); void degree(char *, int, char *, int, int *, KeyValue *, void *); void histo(char *, int, char *, int, int *, KeyValue *, void *); int ncompare(char *, int, char *, int); void stats(uint64_t, char *, int, char *, int, KeyValue *, void *); struct RMAT { // RMAT params int nlevels; // generate 2^nlevels vertices int nnonzero; // average degree INT_TYPE order; // 2^nlevels INT_TYPE ngenerate; // 2^nlevels*average degree = # edges double a,b,c,d,fraction; // rmat parameters char *outfile; FILE *fp; }; typedef INT_TYPE VERTEX; // vertex ID typedef struct { // edge = 2 vertices VERTEX vi,vj; } EDGE; /* ---------------------------------------------------------------------- */ int main(int narg, char **args) { MPI_Init(&narg,&args); int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // parse command-line args if (narg != 9 && narg != 10) { if (me == 0) printf("Syntax: rmat N Nz a b c d frac seed {outfile}\n"); MPI_Abort(MPI_COMM_WORLD,1); } RMAT rmat; rmat.nlevels = atoi(args[1]); rmat.nnonzero = atoi(args[2]); rmat.a = atof(args[3]); rmat.b = atof(args[4]); rmat.c = atof(args[5]); rmat.d = atof(args[6]); rmat.fraction = atof(args[7]); int seed = atoi(args[8]); if (narg == 10) { int n = strlen(args[9]) + 1; rmat.outfile = new char[n]; strcpy(rmat.outfile,args[9]); } else rmat.outfile = NULL; if (rmat.a + rmat.b + rmat.c + rmat.d != 1.0) { if (me == 0) printf("ERROR: a,b,c,d must sum to 1\n"); MPI_Abort(MPI_COMM_WORLD,1); } if (rmat.fraction >= 1.0) { if (me == 0) printf("ERROR: fraction must be < 1\n"); MPI_Abort(MPI_COMM_WORLD,1); } srand48(seed+me); rmat.order = ((INT_TYPE) 1) << rmat.nlevels; // loop until desired number of unique nonzero entries MapReduce *mrnew = new MapReduce(MPI_COMM_WORLD); mrnew->timer = 0; MapReduce *mr = NULL; MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); int niterate = 0; INT_TYPE ntotal = (((INT_TYPE) 1) << rmat.nlevels) * rmat.nnonzero; INT_TYPE nremain = ntotal; while (nremain) { niterate++; rmat.ngenerate = nremain/nprocs; if (me < nremain % nprocs) rmat.ngenerate++; mrnew->map(nprocs,&generate,&rmat); mrnew->aggregate(NULL); if (niterate > 1) { mr->add(mrnew); } else { mr = mrnew; mrnew = new MapReduce(MPI_COMM_WORLD); mrnew->timer = 0; } uint64_t nunique = mr->convert(); nremain = ntotal - nunique; if (me == 0) std::cout << "Iteration " << niterate << " nunique = " << nunique << " of " << ntotal << "; nremain = " << nremain << std::endl; if (nunique == ntotal) break; mr->reduce(&cull,&rmat); } delete mrnew; MPI_Barrier(MPI_COMM_WORLD); double tstop = MPI_Wtime(); // output matrix if requested if (rmat.outfile) { char fname[128]; sprintf(fname,"%s.%d",rmat.outfile,me); rmat.fp = fopen(fname,"w"); if (rmat.fp == NULL) { printf("ERROR: Could not open output file"); MPI_Abort(MPI_COMM_WORLD,1); } MapReduce *mr2 = mr->copy(); mr2->reduce(&output,&rmat); fclose(rmat.fp); delete mr2; if (me == 0) { sprintf(fname,"%s.header",rmat.outfile); rmat.fp = fopen(fname,"w"); fprintf(rmat.fp, "%%%%MatrixMarket matrix coordinate real general\n%%\n"); fprintf(rmat.fp, "%ld %ld %ld\n", rmat.order, rmat.order, ntotal); fclose(rmat.fp); } } // stats to screen // include stats on number of nonzeroes per row if (me == 0) { std::cout << rmat.order << " rows in matrix" << std::endl; std::cout << ntotal << " nonzeroes in matrix" << std::endl; } mr->reduce(&nonzero,NULL); mr->collate(NULL); mr->reduce(°ree,NULL); mr->collate(NULL); mr->reduce(&histo,NULL); mr->gather(1); mr->sort_keys(&ncompare); INT_TYPE total = 0; mr->map(mr,&stats,&total); if (me == 0) std::cout << rmat.order-total << " rows with 0 nonzeroes\n" << std::endl; if (me == 0) std::cout << tstop-tstart << " secs to generate matrix on " << nprocs << " procs in " << niterate << " iterations" << std::endl; // clean up delete mr; delete [] rmat.outfile; MPI_Finalize(); } /* ---------------------------------------------------------------------- We need mr MapReduce object to be in state where it has a KeyValue structure, but we don't have anything to put in it yet. This function will do the trick. ------------------------------------------------------------------------- */ void do_nothing(int itask, KeyValue *kv, void *ptr) { } /* ---------------------------------------------------------------------- generate RMAT matrix entries emit one KV per edge: key = edge, value = NULL ------------------------------------------------------------------------- */ void generate(int itask, KeyValue *kv, void *ptr) { RMAT *rmat = (RMAT *) ptr; int nlevels = rmat->nlevels; INT_TYPE order = rmat->order; INT_TYPE ngenerate = rmat->ngenerate; double a = rmat->a; double b = rmat->b; double c = rmat->c; double d = rmat->d; double fraction = rmat->fraction; INT_TYPE i,j,delta; int ilevel; double a1,b1,c1,d1,total,rn; EDGE edge; for (INT_TYPE m = 0; m < ngenerate; m++) { delta = order >> 1; a1 = a; b1 = b; c1 = c; d1 = d; i = j = 0; for (ilevel = 0; ilevel < nlevels; ilevel++) { rn = drand48(); if (rn < a1) { } else if (rn < a1+b1) { j += delta; } else if (rn < a1+b1+c1) { i += delta; } else { i += delta; j += delta; } delta /= 2; if (fraction > 0.0) { a1 += a1*fraction * (drand48() - 0.5); b1 += b1*fraction * (drand48() - 0.5); c1 += c1*fraction * (drand48() - 0.5); d1 += d1*fraction * (drand48() - 0.5); total = a1+b1+c1+d1; a1 /= total; b1 /= total; c1 /= total; d1 /= total; } } edge.vi = i; edge.vj = j; kv->add((char *) &edge,sizeof(EDGE),NULL,0); } } /* ---------------------------------------------------------------------- eliminate duplicate edges input: one KMV per edge, MV has multiple entries if duplicates exist output: one KV per edge: key = edge, value = NULL ------------------------------------------------------------------------- */ void cull(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { kv->add(key,keybytes,NULL,0); } /* ---------------------------------------------------------------------- write edges to a file unique to this processor ------------------------------------------------------------------------- */ void output(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { RMAT *rmat = (RMAT *) ptr; EDGE *edge = (EDGE *) key; fprintf(rmat->fp,"%ld %ld 1\n",edge->vi+1,edge->vj+1); } /* ---------------------------------------------------------------------- enumerate nonzeroes in each row input: one KMV per edge output: one KV per edge: key = row I, value = NULL ------------------------------------------------------------------------- */ void nonzero(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),NULL,0); } /* ---------------------------------------------------------------------- count nonzeroes in each row input: one KMV per row, MV has entry for each nonzero output: one KV: key = # of nonzeroes, value = NULL ------------------------------------------------------------------------- */ void degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { uint64_t total_nvalues; CHECK_FOR_BLOCKS(multivalue, valuebytes, nvalues, total_nvalues); kv->add((char *) &total_nvalues,sizeof(uint64_t),NULL,0); } /* ---------------------------------------------------------------------- count rows with same # of nonzeroes input: one KMV per nonzero count, MV has entry for each row output: one KV: key = # of nonzeroes, value = # of rows ------------------------------------------------------------------------- */ void histo(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { uint64_t total_nvalues; CHECK_FOR_BLOCKS(multivalue, valuebytes, nvalues, total_nvalues); kv->add(key,keybytes,(char *) &total_nvalues,sizeof(uint64_t)); } /* ---------------------------------------------------------------------- compare two counts order values by count, largest first ------------------------------------------------------------------------- */ int ncompare(char *p1, int len1, char *p2, int len2) { uint64_t i1 = *(uint64_t *) p1; uint64_t i2 = *(uint64_t *) p2; if (i1 > i2) return -1; else if (i1 < i2) return 1; else return 0; } /* ---------------------------------------------------------------------- print # of rows with a specific # of nonzeroes ------------------------------------------------------------------------- */ void stats(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { uint64_t *total = (uint64_t *) ptr; uint64_t nnz = *(uint64_t *) key; uint64_t ncount = *(uint64_t *) value; *total += ncount; std::cout << ncount << " rows with " << nnz << " nonzeros" << std::endl; } mrmpi-1.0~20131122/examples/Makefile.mac640000644000175000017500000000032611536010650017547 0ustar mathieumathieu# MPI-based makefile using mpic++ and mpicc. CC = mpicc CPP = mpic++ CCFLAGS = -g -O -I../src -m64 LINK = mpic++ -m64 LINKFLAGS = -g -O -m64 USRLIB = ../src/libmrmpi_mac64.a SYSLIB = include Makefile.common mrmpi-1.0~20131122/examples/in.cc0000644000175000017500000000046211535457750016133 0ustar mathieumathieu# OINK script for connected component finding variable t equal time variable p equal nprocs set scratch SCRATCH #set verbosity 1 #set timer 1 rmat 16 2 0.25 0.25 0.25 0.25 0.0 12345 -o NULL mre edge_upper -i mre -o NULL mre cc_find 0 -i mre -o tmp.cc mrc print "CC: $t secs on $p procs" cc_stats -i mrc mrmpi-1.0~20131122/examples/rmat.py0000755000175000017500000001153111347714257016534 0ustar mathieumathieu#!/usr/local/bin/python # ---------------------------------------------------------------------- # MR-MPI = MapReduce-MPI library # http://www.cs.sandia.gov/~sjplimp/mapreduce.html # Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories # # Copyright (2009) Sandia Corporation. Under the terms of Contract # DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains # certain rights in this software. This software is distributed under # the modified Berkeley Software Distribution (BSD) License. # # See the README file in the top-level MapReduce directory. # ------------------------------------------------------------------------- # MapReduce random RMAT matrix generation example in C++ # Syntax: rmat.py N Nz a b c d frac seed {outfile} # 2^N = # of rows in RMAT matrix # Nz = non-zeroes per row # a,b,c,d = RMAT params (must sum to 1.0) # frac = RMAT randomization param (frac < 1, 0 = no randomization) # seed = RNG seed (positive int) # outfile = output RMAT matrix to this filename (optional) import sys, random from mrmpi import mrmpi try: import pypar except: import pypar_serial as pypar # generate RMAT matrix entries # emit one KV per edge: key = edge, value = NULL def generate(itask,mr): for m in xrange(ngenerate): delta = order / 2 a1 = a; b1 = b; c1 = c; d1 = d i = j = 0 for ilevel in xrange(nlevels): rn = random.random() if rn < a1: pass elif rn < a1+b1: j += delta elif rn < a1+b1+c1: i += delta else: i += delta j += delta delta /= 2 if fraction > 0.0: a1 += a1*fraction * (drand48() - 0.5) b1 += b1*fraction * (drand48() - 0.5) c1 += c1*fraction * (drand48() - 0.5) d1 += d1*fraction * (drand48() - 0.5) total = a1+b1+c1+d1 a1 /= total b1 /= total c1 /= total d1 /= total mr.add((i,j),None) # eliminate duplicate edges # input: one KMV per edge, MV has multiple entries if duplicates exist # output: one KV per edge: key = edge, value = NULL def cull(key,mvalue,mr): mr.add(key,None) # write edges to a file unique to this processor def output(key,mvalue,mr): print >>fp,key[0]+1,key[1]+1,1 # enumerate nonzeroes in each row # input: one KMV per edge # output: one KV per edge: key = row I, value = NULL def nonzero(key,mvalue,mr): mr.add(key[0],None) # count nonzeroes in each row # input: one KMV per row, MV has entry for each nonzero # output: one KV: key = # of nonzeroes, value = NULL def degree(key,mvalue,mr): mr.add(len(mvalue),None); # count rows with same # of nonzeroes # input: one KMV per nonzero count, MV has entry for each row # output: one KV: key = # of nonzeroes, value = # of rows def histo(key,mvalue,mr): mr.add(key,len(mvalue)) # compare two counts # order values by count, largest first def ncompare(one,two): if one > two: return -1; elif one < two: return 1; else: return 0; # print # of rows with a specific # of nonzeroes def stats(itask,key,value,mr): global total total += value; print "%d rows with %d nonzeroes" % (value,key) # main program nprocs = pypar.size() me = pypar.rank() if len(sys.argv) != 9 and len(sys.argv) != 10: if me == 0: print "Syntax: N Nz a b c d frac seed {outfile}" sys.exit() nlevels = int(sys.argv[1]) nnonzero = int(sys.argv[2]) a = float(sys.argv[3]) b = float(sys.argv[4]) c = float(sys.argv[5]) d = float(sys.argv[6]) fraction = float(sys.argv[7]) seed = int(sys.argv[8]) if len(sys.argv) == 10: outfile = sys.argv[9] else: outfile = None if a+b+c+d != 1.0: if me == 0: print "ERROR: a,b,c,d must sum to 1" sys.exit() if fraction >= 1.0: if me == 0: print "ERROR: fraction must be < 1" sys.exit() random.seed(seed+me) order = 1 << nlevels mr = mrmpi() mr.verbosity(2) mr.timer(1); # loop until desired number of unique nonzero entries pypar.barrier() tstart = pypar.time() niterate = 0 ntotal = (1 << nlevels) * nnonzero nremain = ntotal while nremain: niterate += 1 ngenerate = nremain/nprocs if me < nremain % nprocs: ngenerate += 1 mr.map(nprocs,generate,None,1) nunique = mr.collate() if nunique == ntotal: break mr.reduce(cull) nremain = ntotal - nunique pypar.barrier() tstop = pypar.time() # output matrix if requested if outfile: fp = open(outfile + "." + str(me),"w") if not fp: print "ERROR: Could not open output file" sys.exit() mr2 = mr.copy() mr2.reduce(output) fp.close() mr2.destroy() # stats to screen # include stats on number of nonzeroes per row if me == 0: print order,"rows in matrix" print ntotal,"nonzeroes in matrix" mr.reduce(nonzero) mr.collate() mr.reduce(degree) mr.collate() mr.reduce(histo) mr.gather(1) mr.sort_keys(ncompare) total = 0 mr.map_mr(mr,stats) if me == 0: print order-total,"rows with 0 nonzeroes" if me == 0: print "%g secs to generate matrix on %d procs in %d iterations" % \ (tstop-tstart,nprocs,niterate) mr.destroy() pypar.finalize() mrmpi-1.0~20131122/examples/cwordfreq.c0000644000175000017500000001107011347544230017342 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ /* MapReduce word frequency example in C Syntax: cwordfreq file1 file2 ... (1) reads all files, parses into words separated by whitespace (2) counts occurrence of each word in all files (3) prints top 10 words */ #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "sys/stat.h" #include "cmapreduce.h" void fileread(int, void *, void *); void sum(char *, int, char *, int, int *, void *, void *); int ncompare(char *, int, char *, int); void output(uint64_t, char *, int, char *, int, void *, void *); typedef struct { int n,limit,flag; } Count; /* ---------------------------------------------------------------------- */ int main(int narg, char **args) { int me,nprocs; int nwords,nunique; double tstart,tstop; Count count; MPI_Init(&narg,&args); MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); if (narg <= 1) { if (me == 0) printf("Syntax: cwordfreq file1 file2 ...\n"); MPI_Abort(MPI_COMM_WORLD,1); } void *mr = MR_create(MPI_COMM_WORLD); MR_set_verbosity(mr,2); MR_set_timer(mr,1); MPI_Barrier(MPI_COMM_WORLD); tstart = MPI_Wtime(); nwords = MR_map(mr,narg-1,&fileread,&args[1]); MR_collate(mr,NULL); nunique = MR_reduce(mr,&sum,NULL); MPI_Barrier(MPI_COMM_WORLD); tstop = MPI_Wtime(); MR_sort_values(mr,&ncompare); count.n = 0; count.limit = 10; count.flag = 0; MR_map_mr(mr,mr,&output,&count); MR_gather(mr,1); MR_sort_values(mr,&ncompare); count.n = 0; count.limit = 10; count.flag = 1; MR_map_mr(mr,mr,&output,&count); MR_destroy(mr); if (me == 0) { printf("%d total words, %d unique words\n",nwords,nunique); printf("Time to wordcount %d files on %d procs = %g (secs)\n", narg-1,nprocs,tstop-tstart); } MPI_Finalize(); } /* ---------------------------------------------------------------------- read a file for each word in file, emit key = word, value = NULL ------------------------------------------------------------------------- */ void fileread(int itask, void *kv, void *ptr) { // filesize = # of bytes in file char **files = (char **) ptr; struct stat stbuf; int flag = stat(files[itask],&stbuf); if (flag < 0) { printf("ERROR: Could not query file size\n"); MPI_Abort(MPI_COMM_WORLD,1); } int filesize = stbuf.st_size; FILE *fp = fopen(files[itask],"r"); char text[filesize+1]; int nchar = fread(text,1,filesize,fp); text[nchar] = '\0'; fclose(fp); char *whitespace = " \t\n\f\r\0"; char *word = strtok(text,whitespace); while (word) { MR_kv_add(kv,word,strlen(word)+1,NULL,0); word = strtok(NULL,whitespace); } } /* ---------------------------------------------------------------------- count word occurrence emit key = word, value = # of multi-values ------------------------------------------------------------------------- */ void sum(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *kv, void *ptr) { MR_kv_add(kv,key,keybytes,(char *) &nvalues,sizeof(int)); } /* ---------------------------------------------------------------------- compare two counts order values by count, largest first ------------------------------------------------------------------------- */ int ncompare(char *p1, int len1, char *p2, int len2) { int i1 = *(int *) p1; int i2 = *(int *) p2; if (i1 > i2) return -1; else if (i1 < i2) return 1; else return 0; } /* ---------------------------------------------------------------------- process a word and its count depending on flag, emit KV or print it, up to limit ------------------------------------------------------------------------- */ void output(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, void *kv, void *ptr) { Count *count = (Count *) ptr; count->n++; if (count->n > count->limit) return; int n = *(int *) value; if (count->flag) printf("%d %s\n",n,key); else MR_kv_add(kv,key,keybytes,(char *) &n,sizeof(int)); } mrmpi-1.0~20131122/examples/wordfreq.py0000755000175000017500000000456311347544230017421 0ustar mathieumathieu#!/usr/local/bin/python # ---------------------------------------------------------------------- # MR-MPI = MapReduce-MPI library # http://www.cs.sandia.gov/~sjplimp/mapreduce.html # Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories # # Copyright (2009) Sandia Corporation. Under the terms of Contract # DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains # certain rights in this software. This software is distributed under # the modified Berkeley Software Distribution (BSD) License. # # See the README file in the top-level MapReduce directory. # ------------------------------------------------------------------------- # MapReduce word frequency example in Python # Syntax: wordfreq.py file1 file2 ... # (1) reads all files, parses into words separated by whitespace # (2) counts occurrence of each word in all files # (3) prints top 10 words import sys from mrmpi import mrmpi try: import pypar except: import pypar_serial as pypar # read a file # for each word in file, emit key = word, value = NULL def fileread(itask,mr): text = open(files[itask]).read() words = text.split() for word in words: mr.add(word,None) # count word occurrence # emit key = word, value = # of multi-values def sum(key,mvalue,mr): mr.add(key,len(mvalue)) # compare two counts # order values by count, largest first def ncompare(key1,key2): if key1 < key2: return 1 elif key1 > key2: return -1 else: return 0 # process a word and its count # depending on flag, emit KV or print it, up to limit def output(itask,key,value,mr): count[0] += 1 if count[0] > count[1]: return if count[2]: print value,key else: mr.add(key,value) # main program nprocs = pypar.size() me = pypar.rank() if len(sys.argv) < 2: print "Syntax: wordfreq.py file1 file2 ..." sys.exit() files = sys.argv[1:] mr = mrmpi() mr.verbosity(2) mr.timer(1); pypar.barrier() tstart = pypar.time() nwords = mr.map(len(files),fileread) mr.collate() nunique = mr.reduce(sum) pypar.barrier() tstop = pypar.time() mr.sort_values(ncompare) count = [0,10,0] mr.map_mr(mr,output) mr.gather(1) mr.sort_values(ncompare) count = [0,10,1] mr.map_mr(mr,output) mr.destroy() # output if me == 0: print "%d total words, %d unique words" % (nwords,nunique) print "Time to process %d files on %d procs = %g (secs)" % \ (len(files),nprocs,tstop-tstart); pypar.finalize() mrmpi-1.0~20131122/examples/in.sssp0000755000175000017500000000042712117371716016534 0ustar mathieumathieuvariable t equal time variable p equal nprocs #set scratch SCRATCH #set verbosity 1 #set timer 1 rmat 4 3 0.25 0.25 0.25 0.25 0.0 12345 -o NULL mre #edge_upper -i mre -o NULL mre mre map/mr mre add_weight sssp 10 12345 -i mre -o tmp.sssp NULL print "SSSP: $t secs on $p procs" mrmpi-1.0~20131122/examples/in.rmat0000644000175000017500000000054411535457750016512 0ustar mathieumathieu# OINK script for RMAT generation # NOTE: OINK uses 64-bit vertex IDs, unlike rmat.cpp variable t equal time variable p equal nprocs set scratch SCRATCH #set verbosity 1 #set timer 1 rmat 16 8 0.25 0.25 0.25 0.25 0.0 12345 -o tmp.rmat mre #rmat2 16 8 0.25 0.25 0.25 0.25 0.0 12345 -o tmp.rmat mre print "RMAT: $t secs on $p procs" degree_stats 1 -i mre mrmpi-1.0~20131122/examples/Makefile.serial0000644000175000017500000000034411524063411020114 0ustar mathieumathieu# Serial Makefile for MapReduce examples, g++, no MPI CC = gcc CPP = g++ CCFLAGS = -O -I../src -I../mpistubs LINK = g++ LINKFLAGS = -O USRLIB = ../src/libmrmpi_serial.a ../mpistubs/libmpi.a SYSLIB = include Makefile.common mrmpi-1.0~20131122/examples/in.tri0000644000175000017500000000043011535457750016337 0ustar mathieumathieu# OINK script for triangle finding variable t equal time variable p equal nprocs set scratch SCRATCH #set verbosity 1 #set timer 1 rmat 16 8 0.25 0.25 0.25 0.25 0.0 12345 -o NULL mre edge_upper -i mre -o NULL mre tri_find -i mre -o tmp.tri mrt print "TRI: $t secs on $p procs" mrmpi-1.0~20131122/examples/rmat.cpp0000644000175000017500000002236612011225073016651 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ // MapReduce random RMAT matrix generation example in C++ // Syntax: rmat N Nz a b c d frac seed {outfile} // 2^N = # of rows in RMAT matrix // Nz = non-zeroes per row // a,b,c,d = RMAT params (must sum to 1.0) // frac = RMAT randomization param (frac < 1, 0 = no randomization) // seed = RNG seed (positive int) // outfile = output RMAT matrix to this filename (optional) #include "mpi.h" #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "mapreduce.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; void generate(int, KeyValue *, void *); void cull(char *, int, char *, int, int *, KeyValue *, void *); void output(char *, int, char *, int, int *, KeyValue *, void *); void nonzero(char *, int, char *, int, int *, KeyValue *, void *); void degree(char *, int, char *, int, int *, KeyValue *, void *); void histo(char *, int, char *, int, int *, KeyValue *, void *); int ncompare(char *, int, char *, int); void stats(uint64_t, char *, int, char *, int, KeyValue *, void *); struct RMAT { // RMAT params int nlevels,order; int nnonzero; int ngenerate; double a,b,c,d,fraction; char *outfile; FILE *fp; }; // NOTE: 32-bit vertex IDs, unlike in.rmat for OINK // other formatting code below would need to be changed for 64-bit vertex IDs typedef int VERTEX; // vertex ID typedef struct { // edge = 2 vertices VERTEX vi,vj; } EDGE; /* ---------------------------------------------------------------------- */ int main(int narg, char **args) { MPI_Init(&narg,&args); int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // parse command-line args if (narg != 9 && narg != 10) { if (me == 0) printf("Syntax: rmat N Nz a b c d frac seed {outfile}\n"); MPI_Abort(MPI_COMM_WORLD,1); } RMAT rmat; rmat.nlevels = atoi(args[1]); rmat.nnonzero = atoi(args[2]); rmat.a = atof(args[3]); rmat.b = atof(args[4]); rmat.c = atof(args[5]); rmat.d = atof(args[6]); rmat.fraction = atof(args[7]); int seed = atoi(args[8]); if (narg == 10) { int n = strlen(args[9]) + 1; rmat.outfile = new char[n]; strcpy(rmat.outfile,args[9]); } else rmat.outfile = NULL; if (rmat.a + rmat.b + rmat.c + rmat.d != 1.0) { if (me == 0) printf("ERROR: a,b,c,d must sum to 1\n"); MPI_Abort(MPI_COMM_WORLD,1); } if (rmat.fraction >= 1.0) { if (me == 0) printf("ERROR: fraction must be < 1\n"); MPI_Abort(MPI_COMM_WORLD,1); } srand48(seed+me); rmat.order = 1 << rmat.nlevels; MapReduce *mr = new MapReduce(MPI_COMM_WORLD); mr->verbosity = 0; mr->timer = 1; // loop until desired number of unique nonzero entries MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); int niterate = 0; int ntotal = (1 << rmat.nlevels) * rmat.nnonzero; int nremain = ntotal; while (nremain) { niterate++; rmat.ngenerate = nremain/nprocs; if (me < nremain % nprocs) rmat.ngenerate++; mr->map(nprocs,&generate,&rmat,1); int nunique = mr->collate(NULL); if (nunique == ntotal) break; mr->reduce(&cull,&rmat); nremain = ntotal - nunique; } MPI_Barrier(MPI_COMM_WORLD); double tstop = MPI_Wtime(); // output matrix if requested if (rmat.outfile) { char fname[128]; sprintf(fname,"%s.%d",rmat.outfile,me); rmat.fp = fopen(fname,"w"); if (rmat.fp == NULL) { printf("ERROR: Could not open output file"); MPI_Abort(MPI_COMM_WORLD,1); } MapReduce *mr2 = mr->copy(); mr2->reduce(&output,&rmat); fclose(rmat.fp); delete mr2; } // stats to screen // include stats on number of nonzeroes per row if (me == 0) { printf("%d rows in matrix\n",rmat.order); printf("%d nonzeroes in matrix\n",ntotal); } mr->reduce(&nonzero,NULL); mr->collate(NULL); mr->reduce(°ree,NULL); mr->collate(NULL); mr->reduce(&histo,NULL); mr->gather(1); mr->sort_keys(&ncompare); int total = 0; mr->map(mr,&stats,&total); if (me == 0) printf("%d rows with 0 nonzeroes\n",rmat.order-total); if (me == 0) printf("%g secs to generate matrix on %d procs in %d iterations\n", tstop-tstart,nprocs,niterate); // clean up delete mr; delete [] rmat.outfile; MPI_Finalize(); } /* ---------------------------------------------------------------------- generate RMAT matrix entries emit one KV per edge: key = edge, value = NULL ------------------------------------------------------------------------- */ void generate(int itask, KeyValue *kv, void *ptr) { RMAT *rmat = (RMAT *) ptr; int nlevels = rmat->nlevels; int order = rmat->order; int ngenerate = rmat->ngenerate; double a = rmat->a; double b = rmat->b; double c = rmat->c; double d = rmat->d; double fraction = rmat->fraction; int i,j,ilevel,delta; double a1,b1,c1,d1,total,rn; EDGE edge; for (int m = 0; m < ngenerate; m++) { delta = order >> 1; a1 = a; b1 = b; c1 = c; d1 = d; i = j = 0; for (ilevel = 0; ilevel < nlevels; ilevel++) { rn = drand48(); if (rn < a1) { } else if (rn < a1+b1) { j += delta; } else if (rn < a1+b1+c1) { i += delta; } else { i += delta; j += delta; } delta /= 2; if (fraction > 0.0) { a1 += a1*fraction * (drand48() - 0.5); b1 += b1*fraction * (drand48() - 0.5); c1 += c1*fraction * (drand48() - 0.5); d1 += d1*fraction * (drand48() - 0.5); total = a1+b1+c1+d1; a1 /= total; b1 /= total; c1 /= total; d1 /= total; } } edge.vi = i; edge.vj = j; kv->add((char *) &edge,sizeof(EDGE),NULL,0); } } /* ---------------------------------------------------------------------- eliminate duplicate edges input: one KMV per edge, MV has multiple entries if duplicates exist output: one KV per edge: key = edge, value = NULL ------------------------------------------------------------------------- */ void cull(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { kv->add(key,keybytes,NULL,0); } /* ---------------------------------------------------------------------- write edges to a file unique to this processor ------------------------------------------------------------------------- */ void output(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { RMAT *rmat = (RMAT *) ptr; EDGE *edge = (EDGE *) key; fprintf(rmat->fp,"%d %d 1\n",edge->vi+1,edge->vj+1); } /* ---------------------------------------------------------------------- enumerate nonzeroes in each row input: one KMV per edge output: one KV per edge: key = row I, value = NULL ------------------------------------------------------------------------- */ void nonzero(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),NULL,0); } /* ---------------------------------------------------------------------- count nonzeroes in each row input: one KMV per row, MV has entry for each nonzero output: one KV: key = # of nonzeroes, value = NULL ------------------------------------------------------------------------- */ void degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { kv->add((char *) &nvalues,sizeof(int),NULL,0); } /* ---------------------------------------------------------------------- count rows with same # of nonzeroes input: one KMV per nonzero count, MV has entry for each row output: one KV: key = # of nonzeroes, value = # of rows ------------------------------------------------------------------------- */ void histo(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { kv->add(key,keybytes,(char *) &nvalues,sizeof(int)); } /* ---------------------------------------------------------------------- compare two counts order values by count, largest first ------------------------------------------------------------------------- */ int ncompare(char *p1, int len1, char *p2, int len2) { int i1 = *(int *) p1; int i2 = *(int *) p2; if (i1 > i2) return -1; else if (i1 < i2) return 1; else return 0; } /* ---------------------------------------------------------------------- print # of rows with a specific # of nonzeroes ------------------------------------------------------------------------- */ void stats(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { int *total = (int *) ptr; int nnz = *(int *) key; int ncount = *(int *) value; *total += ncount; printf("%d rows with %d nonzeroes\n",ncount,nnz); } mrmpi-1.0~20131122/examples/in.wordfreq0000644000175000017500000000041511535457750017375 0ustar mathieumathieu# OINK script for word frequency counting # run as: oink_machine -var files *.cpp < in.wordfreq variable t equal time variable p equal nprocs set scratch SCRATCH #set verbosity 1 #set timer 1 wordfreq 10 -i v_files -o NULL NULL print "WordFreq: $t secs on $p procs" mrmpi-1.0~20131122/examples/Makefile.common0000644000175000017500000000102611535460735020137 0ustar mathieumathieu# Targets all: wordfreq cwordfreq rmat crmat wordfreq: wordfreq.o $(USRLIB) $(LINK) $(LINKFLAGS) wordfreq.o $(USRLIB) $(SYSLIB) -o wordfreq cwordfreq: cwordfreq.o $(USRLIB) $(LINK) $(LINKFLAGS) cwordfreq.o $(USRLIB) $(SYSLIB) -o cwordfreq rmat: rmat.o $(USRLIB) $(LINK) $(LINKFLAGS) rmat.o $(USRLIB) $(SYSLIB) -o rmat crmat: crmat.o $(USRLIB) $(LINK) $(LINKFLAGS) crmat.o $(USRLIB) $(SYSLIB) -o crmat clean: rm *.o wordfreq cwordfreq rmat crmat # Rules %.o:%.cpp $(CPP) $(CCFLAGS) -c $< %.o:%.c $(CC) $(CCFLAGS) -c $< mrmpi-1.0~20131122/examples/Makefile.mpicc0000644000175000017500000000031711571214567017744 0ustar mathieumathieu# MPI-based makefile using mpic++ and mpicc CC = mpicc -m64 CPP = mpic++ -m64 CCFLAGS = -g -O -I../src LINK = mpic++ LINKFLAGS = -g -O USRLIB = ../src/libmrmpi_mpicc.a SYSLIB = include Makefile.common mrmpi-1.0~20131122/examples/Makefile.linux0000644000175000017500000000036712013000173017767 0ustar mathieumathieu# Linux/MPI Makefile for MapReduce examples, g++, MPI CC = gcc CPP = g++ CCFLAGS = -g -O -I../src -DMPICH_IGNORE_CXX_SEEK LINK = g++ LINKFLAGS = -g -O USRLIB = ../src/libmrmpi_linux.so SYSLIB = -lmpich -lmpl -lpthread include Makefile.common mrmpi-1.0~20131122/examples/pypar_serial.py0000644000175000017500000000046711171703062020251 0ustar mathieumathieu# dummy Pypar routines # can use if running in serial, and Pypar isn't installed # Pypar (http://datamining.anu.edu.au/~ole/pypar) is a Python wrapper on MPI import time as clock def finalize(): pass def size(): return 1 def rank(): return 0 def barrier(): pass def time(): return clock.clock() mrmpi-1.0~20131122/examples/crmat.c0000644000175000017500000002211411347544230016455 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ /* MapReduce random RMAT matrix generation example in C++ Syntax: rmat N Nz a b c d frac seed {outfile} 2^N = # of rows in RMAT matrix Nz = non-zeroes per row a,b,c,d = RMAT params (must sum to 1.0) frac = RMAT randomization param (frac < 1, 0 = no randomization) seed = RNG seed (positive int) outfile = output RMAT matrix to this filename (optional) */ #include "mpi.h" #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "cmapreduce.h" void generate(int, void *, void *); void cull(char *, int, char *, int, int *, void *, void *); void output(char *, int, char *, int, int *, void *, void *); void nonzero(char *, int, char *, int, int *, void *, void *); void degree(char *, int, char *, int, int *, void *, void *); void histo(char *, int, char *, int, int *, void *, void *); int ncompare(char *, int, char *, int); void stats(uint64_t, char *, int, char *, int, void *, void *); typedef struct { // RMAT params int nlevels,order; int nnonzero; int ngenerate; double a,b,c,d,fraction; char *outfile; FILE *fp; } RMAT; typedef int VERTEX; // vertex ID typedef struct { // edge = 2 vertices VERTEX vi,vj; } EDGE; /* ---------------------------------------------------------------------- */ int main(int narg, char **args) { int me,nprocs; MPI_Init(&narg,&args); MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // parse command-line args if (narg != 9 && narg != 10) { if (me == 0) printf("Syntax: rmat N Nz a b c d frac seed {outfile}\n"); MPI_Abort(MPI_COMM_WORLD,1); } RMAT rmat; rmat.nlevels = atoi(args[1]); rmat.nnonzero = atoi(args[2]); rmat.a = atof(args[3]); rmat.b = atof(args[4]); rmat.c = atof(args[5]); rmat.d = atof(args[6]); rmat.fraction = atof(args[7]); int seed = atoi(args[8]); if (narg == 10) { int n = strlen(args[9]) + 1; rmat.outfile = (char *) malloc(n*sizeof(char)); strcpy(rmat.outfile,args[9]); } else rmat.outfile = NULL; if (rmat.a + rmat.b + rmat.c + rmat.d != 1.0) { if (me == 0) printf("ERROR: a,b,c,d must sum to 1\n"); MPI_Abort(MPI_COMM_WORLD,1); } if (rmat.fraction >= 1.0) { if (me == 0) printf("ERROR: fraction must be < 1\n"); MPI_Abort(MPI_COMM_WORLD,1); } srand48(seed+me); rmat.order = 1 << rmat.nlevels; void *mr = MR_create(MPI_COMM_WORLD); MR_set_verbosity(mr,2); MR_set_timer(mr,1); // loop until desired number of unique nonzero entries MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); int niterate = 0; int ntotal = (1 << rmat.nlevels) * rmat.nnonzero; int nremain = ntotal; while (nremain) { niterate++; rmat.ngenerate = nremain/nprocs; if (me < nremain % nprocs) rmat.ngenerate++; MR_map_add(mr,nprocs,&generate,&rmat,1); int nunique = MR_collate(mr,NULL); if (nunique == ntotal) break; MR_reduce(mr,&cull,&rmat); nremain = ntotal - nunique; } MPI_Barrier(MPI_COMM_WORLD); double tstop = MPI_Wtime(); // output matrix if requested if (rmat.outfile) { char fname[128]; sprintf(fname,"%s.%d",rmat.outfile,me); rmat.fp = fopen(fname,"w"); if (rmat.fp == NULL) { printf("ERROR: Could not open output file\n"); MPI_Abort(MPI_COMM_WORLD,1); } void *mr2 = MR_copy(mr); MR_reduce(mr2,&output,&rmat); fclose(rmat.fp); MR_destroy(mr2); } // stats to screen // include stats on number of nonzeroes per row if (me == 0) { printf("%d rows in matrix\n",rmat.order); printf("%d nonzeroes in matrix\n",ntotal); } MR_reduce(mr,&nonzero,NULL); MR_collate(mr,NULL); MR_reduce(mr,°ree,NULL); MR_collate(mr,NULL); MR_reduce(mr,&histo,NULL); MR_gather(mr,1); MR_sort_keys(mr,&ncompare); int total = 0; MR_map_mr(mr,mr,&stats,&total); if (me == 0) printf("%d rows with 0 nonzeroes\n",rmat.order-total); if (me == 0) printf("%g secs to generate matrix on %d procs in %d iterations\n", tstop-tstart,nprocs,niterate); // clean up MR_destroy(mr); free(rmat.outfile); MPI_Finalize(); } /* ---------------------------------------------------------------------- generate RMAT matrix entries emit one KV per edge: key = edge, value = NULL ------------------------------------------------------------------------- */ void generate(int itask, void *kv, void *ptr) { RMAT *rmat = (RMAT *) ptr; int nlevels = rmat->nlevels; int order = rmat->order; int ngenerate = rmat->ngenerate; double a = rmat->a; double b = rmat->b; double c = rmat->c; double d = rmat->d; double fraction = rmat->fraction; int i,j,ilevel,delta,m; double a1,b1,c1,d1,total,rn; EDGE edge; for (m = 0; m < ngenerate; m++) { delta = order >> 1; a1 = a; b1 = b; c1 = c; d1 = d; i = j = 0; for (ilevel = 0; ilevel < nlevels; ilevel++) { rn = drand48(); if (rn < a1) { } else if (rn < a1+b1) { j += delta; } else if (rn < a1+b1+c1) { i += delta; } else { i += delta; j += delta; } delta /= 2; if (fraction > 0.0) { a1 += a1*fraction * (drand48() - 0.5); b1 += b1*fraction * (drand48() - 0.5); c1 += c1*fraction * (drand48() - 0.5); d1 += d1*fraction * (drand48() - 0.5); total = a1+b1+c1+d1; a1 /= total; b1 /= total; c1 /= total; d1 /= total; } } edge.vi = i; edge.vj = j; MR_kv_add(kv,(char *) &edge,sizeof(EDGE),NULL,0); } } /* ---------------------------------------------------------------------- eliminate duplicate edges input: one KMV per edge, MV has multiple entries if duplicates exist output: one KV per edge: key = edge, value = NULL ------------------------------------------------------------------------- */ void cull(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *kv, void *ptr) { MR_kv_add(kv,key,keybytes,NULL,0); } /* ---------------------------------------------------------------------- write edges to a file unique to this processor ------------------------------------------------------------------------- */ void output(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *kv, void *ptr) { RMAT *rmat = (RMAT *) ptr; EDGE *edge = (EDGE *) key; fprintf(rmat->fp,"%d %d 1\n",edge->vi+1,edge->vj+1); } /* ---------------------------------------------------------------------- enumerate nonzeroes in each row input: one KMV per edge output: one KV per edge: key = row I, value = NULL ------------------------------------------------------------------------- */ void nonzero(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *kv, void *ptr) { EDGE *edge = (EDGE *) key; MR_kv_add(kv,(char *) &edge->vi,sizeof(VERTEX),NULL,0); } /* ---------------------------------------------------------------------- count nonzeroes in each row input: one KMV per row, MV has entry for each nonzero output: one KV: key = # of nonzeroes, value = NULL ------------------------------------------------------------------------- */ void degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *kv, void *ptr) { MR_kv_add(kv,(char *) &nvalues,sizeof(int),NULL,0); } /* ---------------------------------------------------------------------- count rows with same # of nonzeroes input: one KMV per nonzero count, MV has entry for each row output: one KV: key = # of nonzeroes, value = # of rows ------------------------------------------------------------------------- */ void histo(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *kv, void *ptr) { MR_kv_add(kv,key,keybytes,(char *) &nvalues,sizeof(int)); } /* ---------------------------------------------------------------------- compare two counts order values by count, largest first ------------------------------------------------------------------------- */ int ncompare(char *p1, int len1, char *p2, int len2) { int i1 = *(int *) p1; int i2 = *(int *) p2; if (i1 > i2) return -1; else if (i1 < i2) return 1; else return 0; } /* ---------------------------------------------------------------------- print # of rows with a specific # of nonzeroes ------------------------------------------------------------------------- */ void stats(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, void *kv, void *ptr) { int *total = (int *) ptr; int nnz = *(int *) key; int ncount = *(int *) value; *total += ncount; printf("%d rows with %d nonzeroes\n",ncount,nnz); } mrmpi-1.0~20131122/examples/wordfreq.cpp0000644000175000017500000001126411515657163017553 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ // MapReduce word frequency example in C++ // Syntax: wordfreq file1 dir1 file2 dir2 ... // (1) read all files and files in dirs // (2) parse into words separated by whitespace // (3) count occurrence of each word in all files // (4) print top 10 words #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "sys/stat.h" #include "mapreduce.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; void fileread(int, char *, KeyValue *, void *); void sum(char *, int, char *, int, int *, KeyValue *, void *); int ncompare(char *, int, char *, int); void output(uint64_t, char *, int, char *, int, KeyValue *, void *); struct Count { int n,limit,flag; }; /* ---------------------------------------------------------------------- */ int main(int narg, char **args) { MPI_Init(&narg,&args); int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); if (narg <= 1) { if (me == 0) printf("Syntax: wordfreq file1 file2 ...\n"); MPI_Abort(MPI_COMM_WORLD,1); } MapReduce *mr = new MapReduce(MPI_COMM_WORLD); mr->verbosity = 2; mr->timer = 1; //mr->memsize = 1; //mr->outofcore = 1; MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); int nwords = mr->map(narg-1,&args[1],0,1,0,fileread,NULL); int nfiles = mr->mapfilecount; mr->collate(NULL); int nunique = mr->reduce(sum,NULL); MPI_Barrier(MPI_COMM_WORLD); double tstop = MPI_Wtime(); mr->sort_values(&ncompare); Count count; count.n = 0; count.limit = 10; count.flag = 0; mr->map(mr,output,&count); mr->gather(1); mr->sort_values(ncompare); count.n = 0; count.limit = 10; count.flag = 1; mr->map(mr,output,&count); delete mr; if (me == 0) { printf("%d total words, %d unique words\n",nwords,nunique); printf("Time to process %d files on %d procs = %g (secs)\n", nfiles,nprocs,tstop-tstart); } MPI_Finalize(); } /* ---------------------------------------------------------------------- read a file for each word in file, emit key = word, value = NULL ------------------------------------------------------------------------- */ void fileread(int itask, char *fname, KeyValue *kv, void *ptr) { // filesize = # of bytes in file struct stat stbuf; int flag = stat(fname,&stbuf); if (flag < 0) { printf("ERROR: Could not query file size\n"); MPI_Abort(MPI_COMM_WORLD,1); } int filesize = stbuf.st_size; FILE *fp = fopen(fname,"r"); char *text = new char[filesize+1]; int nchar = fread(text,1,filesize,fp); text[nchar] = '\0'; fclose(fp); char *whitespace = " \t\n\f\r\0"; char *word = strtok(text,whitespace); while (word) { kv->add(word,strlen(word)+1,NULL,0); word = strtok(NULL,whitespace); } delete [] text; } /* ---------------------------------------------------------------------- count word occurrence emit key = word, value = # of multi-values ------------------------------------------------------------------------- */ void sum(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { kv->add(key,keybytes,(char *) &nvalues,sizeof(int)); } /* ---------------------------------------------------------------------- compare two counts order values by count, largest first ------------------------------------------------------------------------- */ int ncompare(char *p1, int len1, char *p2, int len2) { int i1 = *(int *) p1; int i2 = *(int *) p2; if (i1 > i2) return -1; else if (i1 < i2) return 1; else return 0; } /* ---------------------------------------------------------------------- process a word and its count depending on flag, emit KV or print it, up to limit ------------------------------------------------------------------------- */ void output(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { Count *count = (Count *) ptr; count->n++; if (count->n > count->limit) return; int n = *(int *) value; if (count->flag) printf("%d %s\n",n,key); else kv->add(key,keybytes,(char *) &n,sizeof(int)); } mrmpi-1.0~20131122/oink/0000755000175000017500000000000012243675122014326 5ustar mathieumathieumrmpi-1.0~20131122/oink/main.cpp0000644000175000017500000000145611524066004015756 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "oink.h" #include "input.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- main program to drive OINK ------------------------------------------------------------------------- */ int main(int argc, char **argv) { MPI_Init(&argc,&argv); OINK *oink = new OINK(argc,argv,MPI_COMM_WORLD); oink->input->file(); delete oink; MPI_Finalize(); } mrmpi-1.0~20131122/oink/oink.h0000644000175000017500000000243011524066004015430 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_OINK_H #define OINK_OINK_H #include "mpi.h" #include "stdio.h" namespace OINK_NS { class OINK { public: // ptrs to fundamental OINK classes class Memory *memory; // memory allocation functions class Error *error; // error handling class Universe *universe; // universe of processors class Input *input; // input script processing // ptrs to top-level OINK-specific classes class Object *obj; // MapReduce objects class MRMPI *mrmpi; // wrapper on MR-MPI library methods MPI_Comm world; // MPI communicator FILE *infile; // infile FILE *screen; // screen output FILE *logfile; // logfile OINK(int, char **, MPI_Comm); ~OINK(); void create(); void init(); void destroy(); }; } #endif mrmpi-1.0~20131122/oink/sssp.h0000644000175000017500000000555111536440001015463 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(sssp,SSSP) #else #ifndef OINK_SSSP_H #define OINK_SSSP_H #include "command.h" #include "keyvalue.h" #include "float.h" #include "typedefs.h" #include using MAPREDUCE_NS::KeyValue; namespace OINK_NS { typedef void REDUCE1_FN(char*, int, char*, int, int*, KeyValue*, void*); typedef void REDUCE2_FN(char*, int, char*, int, void*); typedef void MAP1_FN(int, KeyValue*, void*); typedef void MAP2_FN(uint64_t, char*, int, char*, int, KeyValue*, void*); ////////////////////////////////////////////////////////////////////////// class EDGEVALUE{ // Edge with destination vertex (8-bytes) and edge weight // Given a key that is a VERTEX Vi, EDGEVALUE contains Vj and the weight // of edge Vi->Vj. public: VERTEX v; WEIGHT wt; friend bool operator!=(const EDGEVALUE& lhs, const EDGEVALUE& rhs) { if ((lhs.wt != rhs.wt) || (lhs.v != rhs.v)) return true; return false; }; }; class DISTANCE { // Class used to pass distance information through the MapReduce system. public: DISTANCE(){ memset(&(e.v), 0, sizeof(VERTEX)); e.wt = FLT_MAX; current = true; }; ~DISTANCE(){}; EDGEVALUE e; // Edge describing the distance of a vtx from S; // e.v is predecessor vtx; e.wt is distance from S through e.v. bool current; // Flag indicating that this distance is the current state // for the vtx (the currently accepted best distance). // Needed so we can know when to stop (when no vtx distances // change in an iteration). friend bool operator!=(const DISTANCE& lhs, const DISTANCE& rhs) { if (lhs.e != rhs.e) return true; return false; }; }; ////////////////////////////////////////////////////////////////////////// class SSSP : public Command { public: SSSP(class OINK *); void run(); void params(int, char **); int me; int np; private: int ncnt; // Number of SSSP computations to do. int seed; // Random seed initialization static uint64_t NVtxLabeled; // Number of local vertices labeled so far. std::vector sourcelist; // ncnt sources with outdegree > zero. bool get_next_source(VERTEX *, int); static MAP1_FN add_source; static MAP2_FN reorganize_edges; static MAP2_FN move_to_new_mr; static MAP2_FN initialize_vertex_distances; static REDUCE1_FN get_good_sources; static REDUCE1_FN pick_shortest_distances; static REDUCE1_FN update_adjacent_distances; static REDUCE2_FN print; }; } #endif #endif mrmpi-1.0~20131122/oink/neigh_tri.cpp0000644000175000017500000000766411536442217017020 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "neigh_tri.h" #include "object.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; typedef struct { VERTEX vi,vj,vk; } TRI; #define MAXLINE 1024*1024 /* ---------------------------------------------------------------------- */ NeighTri::NeighTri(OINK *oink) : Command(oink) { ninputs = 2; noutputs = 1; } /* ---------------------------------------------------------------------- */ void NeighTri::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRn = Vi : Vj Vk ... // MRt = Vi Vj Vk : NULL char line[MAXLINE]; MapReduce *mrn = obj->input(1,nread,line); MapReduce *mrt = obj->input(2,tread,NULL); MapReduce *mrnplus = obj->copy_mr(mrn); mrnplus->map(mrt,map1,NULL,1); mrnplus->collate(NULL); mrnplus->scan(print,this); obj->output(1,mrnplus); delete [] dirname; obj->cleanup(); } /* ---------------------------------------------------------------------- */ void NeighTri::params(int narg, char **arg) { if (narg != 1) error->all("Illegal neigh_tri command"); int n = strlen(arg[0]) + 1; dirname = new char[n]; strcpy(dirname,arg[0]); } /* ---------------------------------------------------------------------- */ void NeighTri::nread(int itask, char *file, KeyValue *kv, void *ptr) { VERTEX vi,vj; char *line = (char *) ptr; char *pvj; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { vi = atoll(strtok(line," \t\n")); while (pvj = strtok(NULL," \t\n")) { vj = atoll(pvj); kv->add((char *) &vi,sizeof(VERTEX),(char *) &vj,sizeof(VERTEX)); } } fclose(fp); } /* ---------------------------------------------------------------------- */ void NeighTri::tread(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; TRI tri; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %lu %lu",&tri.vi,&tri.vj,&tri.vk); kv->add((char *) &tri,sizeof(TRI),NULL,0); } fclose(fp); } /* ---------------------------------------------------------------------- */ void NeighTri::print(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *ptr) { VERTEX vi,vj,vk; vi = *(VERTEX *) key; NeighTri *nt = (NeighTri *) ptr; char fname[128]; sprintf(fname,"%s/%lu",nt->dirname,vi); FILE *fp = fopen(fname,"w"); if (fp == NULL) { Error *error = nt->error; error->one("Could not open file in print"); } int offset = 0; for (int i = 0; i < nvalues; i++) { if (valuebytes[i] == sizeof(VERTEX)) { vj = *(VERTEX *) &multivalue[offset]; fprintf(fp,"%lu %lu\n",vi,vj); } else { vj = *(VERTEX *) &multivalue[offset]; vk = *(VERTEX *) &multivalue[offset+sizeof(VERTEX)]; fprintf(fp,"%lu %lu\n",vj,vk); } offset += valuebytes[i]; } fclose(fp); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void NeighTri::map1(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE edge; TRI *tri = (TRI *) key; edge.vi = tri->vj; edge.vj = tri->vk; kv->add((char *) &tri->vi,sizeof(VERTEX),(char *) &edge,sizeof(EDGE)); edge.vi = tri->vi; edge.vj = tri->vk; kv->add((char *) &tri->vj,sizeof(VERTEX),(char *) &edge,sizeof(EDGE)); edge.vi = tri->vi; edge.vj = tri->vj; kv->add((char *) &tri->vk,sizeof(VERTEX),(char *) &edge,sizeof(EDGE)); } mrmpi-1.0~20131122/oink/memory.cpp0000644000175000017500000003014211524066004016334 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "memory.h" #include "error.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- */ Memory::Memory(OINK *oink) : Pointers(oink) {} /* ---------------------------------------------------------------------- safe malloc ------------------------------------------------------------------------- */ void *Memory::smalloc(int n, const char *name) { if (n == 0) return NULL; void *ptr = malloc(n); if (ptr == NULL) { char str[128]; sprintf(str,"Failed to allocate %d bytes for array %s",n,name); error->one(str); } return ptr; } /* ---------------------------------------------------------------------- safe free ------------------------------------------------------------------------- */ void Memory::sfree(void *ptr) { if (ptr == NULL) return; free(ptr); } /* ---------------------------------------------------------------------- safe realloc ------------------------------------------------------------------------- */ void *Memory::srealloc(void *ptr, int n, const char *name) { if (n == 0) { sfree(ptr); return NULL; } ptr = realloc(ptr,n); if (ptr == NULL) { char str[128]; sprintf(str,"Failed to reallocate %d bytes for array %s",n,name); error->one(str); } return ptr; } /* ---------------------------------------------------------------------- create a 1d double array with index from nlo to nhi inclusive ------------------------------------------------------------------------- */ double *Memory::create_1d_double_array(int nlo, int nhi, const char *name) { int n = nhi - nlo + 1; double *array = (double *) smalloc(n*sizeof(double),name); return array-nlo; } /* ---------------------------------------------------------------------- free a 1d double array with index offset ------------------------------------------------------------------------- */ void Memory::destroy_1d_double_array(double *array, int offset) { if (array == NULL) return; sfree(array + offset); } /* ---------------------------------------------------------------------- create a 2d double array ------------------------------------------------------------------------- */ double **Memory::create_2d_double_array(int n1, int n2, const char *name) { double *data = (double *) smalloc(n1*n2*sizeof(double),name); double **array = (double **) smalloc(n1*sizeof(double *),name); int n = 0; for (int i = 0; i < n1; i++) { array[i] = &data[n]; n += n2; } return array; } /* ---------------------------------------------------------------------- free a 2d double array ------------------------------------------------------------------------- */ void Memory::destroy_2d_double_array(double **array) { if (array == NULL) return; sfree(array[0]); sfree(array); } /* ---------------------------------------------------------------------- grow or shrink 1st dim of a 2d double array last dim must stay the same if either dim is 0, return NULL ------------------------------------------------------------------------- */ double **Memory::grow_2d_double_array(double **array, int n1, int n2, const char *name) { if (array == NULL) return create_2d_double_array(n1,n2,name); double *data = (double *) srealloc(array[0],n1*n2*sizeof(double),name); array = (double **) srealloc(array,n1*sizeof(double *),name); int n = 0; for (int i = 0; i < n1; i++) { array[i] = &data[n]; n += n2; } return array; } /* ---------------------------------------------------------------------- create a 2d int array if either dim is 0, return NULL ------------------------------------------------------------------------- */ int **Memory::create_2d_int_array(int n1, int n2, const char *name) { if (n1 == 0 || n2 == 0) return NULL; int *data = (int *) smalloc(n1*n2*sizeof(int),name); int **array = (int **) smalloc(n1*sizeof(int *),name); int n = 0; for (int i = 0; i < n1; i++) { array[i] = &data[n]; n += n2; } return array; } /* ---------------------------------------------------------------------- free a 2d int array ------------------------------------------------------------------------- */ void Memory::destroy_2d_int_array(int **array) { if (array == NULL) return; sfree(array[0]); sfree(array); } /* ---------------------------------------------------------------------- grow or shrink 1st dim of a 2d int array last dim must stay the same if either dim is 0, return NULL ------------------------------------------------------------------------- */ int **Memory::grow_2d_int_array(int **array, int n1, int n2, const char *name) { if (n1 == 0 || n2 == 0) { destroy_2d_int_array(array); return NULL; } if (array == NULL) return create_2d_int_array(n1,n2,name); int *data = (int *) srealloc(array[0],n1*n2*sizeof(int),name); array = (int **) srealloc(array,n1*sizeof(int *),name); int n = 0; for (int i = 0; i < n1; i++) { array[i] = &data[n]; n += n2; } return array; } /* ---------------------------------------------------------------------- create a 2d double array with 2nd index from n2lo to n2hi inclusive ------------------------------------------------------------------------- */ double **Memory::create_2d_double_array(int n1, int n2lo, int n2hi, const char *name) { int n2 = n2hi - n2lo + 1; double **array = create_2d_double_array(n1,n2,name); for (int i = 0; i < n1; i++) array[i] -= n2lo; return array; } /* ---------------------------------------------------------------------- free a 2d double array with 2nd index offset ------------------------------------------------------------------------- */ void Memory::destroy_2d_double_array(double **array, int offset) { if (array == NULL) return; sfree(&array[0][offset]); sfree(array); } /* ---------------------------------------------------------------------- create a 3d double array ------------------------------------------------------------------------- */ double ***Memory::create_3d_double_array(int n1, int n2, int n3, const char *name) { int i,j; double *data = (double *) smalloc(n1*n2*n3*sizeof(double),name); double **plane = (double **) smalloc(n1*n2*sizeof(double *),name); double ***array = (double ***) smalloc(n1*sizeof(double **),name); int n = 0; for (i = 0; i < n1; i++) { array[i] = &plane[i*n2]; for (j = 0; j < n2; j++) { plane[i*n2+j] = &data[n]; n += n3; } } return array; } /* ---------------------------------------------------------------------- free a 3d double array ------------------------------------------------------------------------- */ void Memory::destroy_3d_double_array(double ***array) { if (array == NULL) return; sfree(array[0][0]); sfree(array[0]); sfree(array); } /* ---------------------------------------------------------------------- grow or shrink 1st dim of a 3d double array last 2 dims must stay the same if any dim is 0, return NULL ------------------------------------------------------------------------- */ double ***Memory::grow_3d_double_array(double ***array, int n1, int n2, int n3, const char *name) { int i,j; if (n1 == 0 || n2 == 0 || n3 == 0) { destroy_3d_double_array(array); return NULL; } if (array == NULL) return create_3d_double_array(n1,n2,n3,name); double *data = (double *) srealloc(array[0][0],n1*n2*n3*sizeof(double),name); double **plane = (double **) srealloc(array[0],n1*n2*sizeof(double *),name); array = (double ***) srealloc(array,n1*sizeof(double **),name); int n = 0; for (i = 0; i < n1; i++) { array[i] = &plane[i*n2]; for (j = 0; j < n2; j++) { plane[i*n2+j] = &data[n]; n += n3; } } return array; } /* ---------------------------------------------------------------------- create a 3d double array with 1st index from n1lo to n1hi inclusive ------------------------------------------------------------------------- */ double ***Memory::create_3d_double_array(int n1lo, int n1hi, int n2, int n3, const char *name) { int n1 = n1hi - n1lo + 1; double ***array = create_3d_double_array(n1,n2,n3,name); return array-n1lo; } /* ---------------------------------------------------------------------- free a 3d double array with 1st index offset ------------------------------------------------------------------------- */ void Memory::destroy_3d_double_array(double ***array, int offset) { if (array) destroy_3d_double_array(array + offset); } /* ---------------------------------------------------------------------- create a 3d double array with 1st index from n1lo to n1hi inclusive, 2nd index from n2lo to n2hi inclusive, 3rd index from n3lo to n3hi inclusive ------------------------------------------------------------------------- */ double ***Memory::create_3d_double_array(int n1lo, int n1hi, int n2lo, int n2hi, int n3lo, int n3hi, const char *name) { int n1 = n1hi - n1lo + 1; int n2 = n2hi - n2lo + 1; int n3 = n3hi - n3lo + 1; double ***array = create_3d_double_array(n1,n2,n3,name); for (int i = 0; i < n1*n2; i++) array[0][i] -= n3lo; for (int i = 0; i < n1; i++) array[i] -= n2lo; return array-n1lo; } /* ---------------------------------------------------------------------- free a 3d double array with all 3 indices offset ------------------------------------------------------------------------- */ void Memory::destroy_3d_double_array(double ***array, int n1_offset, int n2_offset, int n3_offset) { if (array == NULL) return; sfree(&array[n1_offset][n2_offset][n3_offset]); sfree(&array[n1_offset][n2_offset]); sfree(array + n1_offset); } /* ---------------------------------------------------------------------- create a 3d int array ------------------------------------------------------------------------- */ int ***Memory::create_3d_int_array(int n1, int n2, int n3, const char *name) { int i,j; int *data = (int *) smalloc(n1*n2*n3*sizeof(int),name); int **plane = (int **) smalloc(n1*n2*sizeof(int *),name); int ***array = (int ***) smalloc(n1*sizeof(int **),name); int n = 0; for (i = 0; i < n1; i++) { array[i] = &plane[i*n2]; for (j = 0; j < n2; j++) { plane[i*n2+j] = &data[n]; n += n3; } } return array; } /* ---------------------------------------------------------------------- free a 3d int array ------------------------------------------------------------------------- */ void Memory::destroy_3d_int_array(int ***array) { if (array == NULL) return; sfree(array[0][0]); sfree(array[0]); sfree(array); } /* ---------------------------------------------------------------------- create a 4d double array ------------------------------------------------------------------------- */ double ****Memory::create_4d_double_array(int n1, int n2, int n3, int n4, const char *name) { int i,j,k; double *data = (double *) smalloc(n1*n2*n3*n4*sizeof(double),name); double **cube = (double **) smalloc(n1*n2*n3*sizeof(double *),name); double ***plane = (double ***) smalloc(n1*n2*sizeof(double **),name); double ****array = (double ****) smalloc(n1*sizeof(double ***),name); int n = 0; for (i = 0; i < n1; i++) { array[i] = &plane[i*n2]; for (j = 0; j < n2; j++) { plane[i*n2+j] = &cube[i*n2*n3+j*n3]; for (k = 0; k < n3; k++) { cube[i*n2*n3+j*n3+k] = &data[n]; n += n4; } } } return array; } /* ---------------------------------------------------------------------- free a 4d double array ------------------------------------------------------------------------- */ void Memory::destroy_4d_double_array(double ****array) { if (array == NULL) return; sfree(array[0][0][0]); sfree(array[0][0]); sfree(array[0]); sfree(array); } mrmpi-1.0~20131122/oink/degree_weight.h0000644000175000017500000000155011535764350017307 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(degree_weight,DegreeWeight) #else #ifndef OINK_DEGREE_WEIGHT_H #define OINK_DEGREE_WEIGHT_H #include "command.h" namespace OINK_NS { class DegreeWeight : public Command { public: DegreeWeight(class OINK *); void run(); void params(int, char **); private: int duplicate; static void print(char *, int, char *, int, void *); static void inverse_degree(char *, int, char *, int, int *, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/scan_print_string_int.cpp0000644000175000017500000000073411536275042021437 0ustar mathieumathieu#include "stdio.h" /* ---------------------------------------------------------------------- print_string_int print out key as string and value as int, to a file input: key = string, value = int ------------------------------------------------------------------------- */ void print_string_int(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; int count = *(int *) value; fprintf(fp,"%s %d\n",key,count); } mrmpi-1.0~20131122/oink/rmat2.cpp0000644000175000017500000000472411536440627016072 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "stdio.h" #include "string.h" #include "stdlib.h" #include "rmat2.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ RMAT2::RMAT2(OINK *oink) : Command(oink) { ninputs = 0; noutputs = 1; } /* ---------------------------------------------------------------------- */ void RMAT2::run() { int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // mr = matrix edges MapReduce *mr = obj->create_mr(); MapReduce *mrnew = obj->create_mr(); // loop until desired number of unique nonzero entries int niterate = 0; uint64_t ntotal = rmat.order * rmat.nnonzero; uint64_t nremain = ntotal; while (nremain) { niterate++; rmat.ngenerate = nremain/nprocs; if (me < nremain % nprocs) rmat.ngenerate++; mrnew->map(nprocs,rmat_generate,&rmat); mrnew->aggregate(NULL); mr->add(mrnew); uint64_t nunique = mr->convert(); mr->reduce(cull,&rmat); nremain = ntotal - nunique; } obj->output(1,mr,print_edge,NULL); char msg[128]; sprintf(msg,"RMAT2: %lu rows, %lu non-zeroes, %d iterations", rmat.order,ntotal,niterate); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void RMAT2::params(int narg, char **arg) { if (narg != 8) error->all("Illegal rmat command"); rmat.nlevels = atoi(arg[0]); rmat.nnonzero = atoi(arg[1]); rmat.a = atof(arg[2]); rmat.b = atof(arg[3]); rmat.c = atof(arg[4]); rmat.d = atof(arg[5]); rmat.fraction = atof(arg[6]); int seed = atoi(arg[7]); if (rmat.a + rmat.b + rmat.c + rmat.d != 1.0) error->all("RMAT a,b,c,d must sum to 1"); if (rmat.fraction >= 1.0) error->all("RMAT fraction must be < 1"); int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); srand48(seed+me); rmat.order = 1 << rmat.nlevels; } mrmpi-1.0~20131122/oink/universe.cpp0000644000175000017500000000525411524066004016672 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "string.h" #include "stdio.h" #include "universe.h" #include "version.h" #include "memory.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- create & initialize the universe of processors in communicator ------------------------------------------------------------------------- */ Universe::Universe(OINK *oink, MPI_Comm communicator) : Pointers(oink) { version = (char *) OINK_VERSION; uworld = communicator; MPI_Comm_rank(uworld,&me); MPI_Comm_size(uworld,&nprocs); uscreen = stdout; ulogfile = NULL; existflag = 0; nworlds = 0; procs_per_world = NULL; root_proc = NULL; } /* ---------------------------------------------------------------------- */ Universe::~Universe() { memory->sfree(procs_per_world); memory->sfree(root_proc); } /* ---------------------------------------------------------------------- add 1 or more worlds to universe str == NULL -> add 1 world with all procs in universe str = NxM -> add N worlds, each with M procs str = P -> add 1 world with P procs ------------------------------------------------------------------------- */ void Universe::add_world(char *str) { int n,nper; char *ptr; if (str == NULL) { n = 1; nper = nprocs; } else if ((ptr = strchr(str,'x')) != NULL) { *ptr = '\0'; n = atoi(str); nper = atoi(ptr+1); } else { n = 1; nper = atoi(str); } procs_per_world = (int *) memory->srealloc(procs_per_world,(nworlds+n)*sizeof(int), "universe:procs_per_world"); root_proc = (int *) memory->srealloc(root_proc,(nworlds+n)*sizeof(int), "universe:root_proc"); for (int i = 0; i < n; i++) { procs_per_world[nworlds] = nper; if (nworlds == 0) root_proc[nworlds] = 0; else root_proc[nworlds] = root_proc[nworlds-1] + procs_per_world[nworlds-1]; if (me >= root_proc[nworlds]) iworld = nworlds; nworlds++; } } /* ---------------------------------------------------------------------- check if total procs in all worlds = procs in universe ------------------------------------------------------------------------- */ int Universe::consistent() { int n = 0; for (int i = 0; i < nworlds; i++) n += procs_per_world[i]; if (n == nprocs) return 1; else return 0; } mrmpi-1.0~20131122/oink/neighbor.cpp0000644000175000017500000000567311536502301016632 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "neighbor.h" #include "object.h" #include "style_map.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ Neighbor::Neighbor(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void Neighbor::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mrn = obj->create_mr(); mrn->map(mre,map1,NULL); mrn->collate(NULL); mrn->reduce(reduce1,NULL); obj->output(1,mrn,print,NULL); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void Neighbor::params(int narg, char **arg) { if (narg != 0) error->all("Illegal sgi_prune command"); } /* ---------------------------------------------------------------------- */ void Neighbor::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { VERTEX vi,vj; FILE *fp = (FILE *) ptr; vi = *(VERTEX *) key; fprintf(fp,"%lu",vi); int n = valuebytes/sizeof(VERTEX); int offset = 0; for (int i = 0; i < n; i++) { vj = *(VERTEX *) &value[offset]; fprintf(fp," %lu",vj); offset += sizeof(VERTEX); } fprintf(fp,"\n"); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void Neighbor::map1(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),(char *) &edge->vj,sizeof(VERTEX)); kv->add((char *) &edge->vj,sizeof(VERTEX),(char *) &edge->vi,sizeof(VERTEX)); } /* ---------------------------------------------------------------------- */ void Neighbor::reduce1(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) int n = nvalues_total; VERTEX *neighlist = new VERTEX[n]; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) int offset = 0; for (int i = 0; i < nvalues; i++) { neighlist[i] = *(VERTEX *) &multivalue[offset]; offset += valuebytes[i]; } END_BLOCK_LOOP kv->add(key,keybytes,(char *) neighlist,n*sizeof(VERTEX)); delete [] neighlist; } mrmpi-1.0~20131122/oink/error.h0000644000175000017500000000135211524066004015623 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_ERROR_H #define OINK_ERROR_H #include "pointers.h" namespace OINK_NS { class Error : protected Pointers { public: Error(class OINK *); void universe_all(const char *); void universe_one(const char *); void all(const char *); void one(const char *); void warning(const char *, int = 1); void message(char *, int = 1); }; } #endif mrmpi-1.0~20131122/oink/rmat.h0000644000175000017500000000130211535732267015445 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(rmat,RMAT) #else #ifndef OINK_RMAT_H #define OINK_RMAT_H #include "command.h" #include "map_rmat_generate.h" namespace OINK_NS { class RMAT : public Command { public: RMAT(class OINK *); void run(); void params(int, char **); private: RMAT_struct rmat; }; } #endif #endif mrmpi-1.0~20131122/oink/pointers.h0000644000175000017500000000261311524066004016336 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ // Pointers class contains ptrs to master copy of // fundamental OINK class ptrs stored in mrmpi.h // every OINK class inherits from Pointers to access mrmpi.h ptrs // these variables are auto-initialized by Pointer class constructor // *& variables are really pointers to the pointers in mrmpi.h // & enables them to be accessed directly in any class, e.g. error->all() #ifndef OINK_POINTERS_H #define OINK_POINTERS_H #include "mpi.h" #include "oink.h" namespace OINK_NS { class Pointers { public: Pointers(OINK *ptr) : oink(ptr), memory(ptr->memory), error(ptr->error), universe(ptr->universe), input(ptr->input), obj(ptr->obj), mrmpi(ptr->mrmpi), world(ptr->world), infile(ptr->infile), screen(ptr->screen), logfile(ptr->logfile) {} virtual ~Pointers() {} protected: OINK *oink; Memory *&memory; Error *&error; Universe *&universe; Input *&input; Object *&obj; MRMPI *&mrmpi; MPI_Comm &world; FILE *&infile; FILE *&screen; FILE *&logfile; }; } #endif mrmpi-1.0~20131122/oink/version.h0000644000175000017500000000004312243675113016161 0ustar mathieumathieu#define OINK_VERSION "22 Nov 2013" mrmpi-1.0~20131122/oink/degree.cpp0000644000175000017500000000371411536440627016276 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "string.h" #include "stdlib.h" #include "degree.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ Degree::Degree(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void Degree::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mrv = obj->create_mr(); uint64_t nedge = mre->kv_stats(0); if (duplicate == 1) mrv->map(mre,edge_to_vertex,NULL); else mrv->map(mre,edge_to_vertices,NULL); mrv->collate(NULL); uint64_t nvert = mrv->reduce(count,NULL); obj->output(1,mrv,print,NULL); char msg[128]; sprintf(msg,"Degree: %lu vertices, %lu edges",nvert,nedge); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void Degree::params(int narg, char **arg) { if (narg != 1) error->all("Illegal degree command"); duplicate = atoi(arg[0]); } /* ---------------------------------------------------------------------- */ void Degree::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; VERTEX vi = *(VERTEX *) key; int degree = *(int *) value; fprintf(fp,"%lu %d\n",vi,degree); } mrmpi-1.0~20131122/oink/MAKE/0000755000175000017500000000000012243675122015043 5ustar mathieumathieumrmpi-1.0~20131122/oink/MAKE/Makefile.serial0000755000175000017500000000163011524065425017764 0ustar mathieumathieu# serial = g++, no MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI installation CC = g++ CCFLAGS = -g -O -I../../src -I../../mpistubs DEPFLAGS = -M LINK = g++ LINKFLAGS = -g -O -L../../src LIB = -lmrmpi_serial ../../mpistubs/libmpi.a ARCHIVE = ar ARFLAGS = -rc SIZE = size # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(OBJ) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library target lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/oink/MAKE/Makefile.mac_serial0000755000175000017500000000167711524065425020617 0ustar mathieumathieu# mac_serial = Apple PowerBook G4 laptop, c++, no MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI installation CC = c++ CCFLAGS = -O -m64 -I../../src -I../../mpistubs DEPFLAGS = -M LINK = c++ LINKFLAGS = -O -m64 -L../../src LIB = -lmrmpi_mac_serial ../../mpistubs/libmpi.a ARCHIVE = ar ARFLAGS = -rc SIZE = size # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(OBJ) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library target lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/oink/MAKE/Makefile.mpicc0000755000175000017500000000162011524065425017577 0ustar mathieumathieu# mpicc = any machine with MPI compiler wrappers, mpic++ SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI installation CC = mpic++ CCFLAGS = -O -I../../src DEPFLAGS = -M LINK = mpic++ LINKFLAGS = -O -L../../src LIB = -lmrmpi_mpicc ARCHIVE = ar ARFLAGS = -rc SIZE = size # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(OBJ) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library target lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/oink/MAKE/Makefile.linux0000755000175000017500000000165311536440627017655 0ustar mathieumathieu# linux = g++, MPICH in /usr/bin/local SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI installation CC = g++4 CCFLAGS = -g -O -I../../src -DMPICH_IGNORE_CXX_SEEK DEPFLAGS = -M LINK = g++4 LINKFLAGS = -g -O -L../../src LIB = -lmrmpi_linux -lmpich -lpthread ARCHIVE = ar ARFLAGS = -rc SIZE = size # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(OBJ) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library target lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/oink/MAKE/Makefile.nebula0000755000175000017500000000166611524065425017764 0ustar mathieumathieu# nebula = nebula cluster, mpiCC, OpenMPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI installation CC = /opt/openmpi-gnu-1.3.2/bin/mpiCC CCFLAGS = -O -I../../src DEPFLAGS = -M LINK = /opt/openmpi-gnu-1.3.2/bin/mpiCC LINKFLAGS = -O -L../../src LIB = -lmrmpi_nebula ARCHIVE = ar ARFLAGS = -rc SIZE = size # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(OBJ) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library target lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/oink/MAKE/Makefile.mac0000755000175000017500000000161011524065425017243 0ustar mathieumathieu# mac = Apple PowerBook G4 laptop, c++, native MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI installation CC = c++ CCFLAGS = -O -I../../src DEPFLAGS = -M LINK = c++ LINKFLAGS = -O -L../../src LIB = -lmrmpi_mac -lmpi ARCHIVE = ar ARFLAGS = -rc SIZE = size # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(OBJ) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library target lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/oink/neigh_tri.h0000644000175000017500000000174611535732267016466 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(neigh_tri,NeighTri) #else #ifndef OINK_NEIGH_TRI_H #define OINK_NEIGH_TRI_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class NeighTri : public Command { public: NeighTri(class OINK *); void run(); void params(int, char **); private: char *dirname; static void nread(int, char *, KeyValue *, void *); static void tread(int, char *, KeyValue *, void *); static void print(char *, int, char *, int, int *, void *); static void map1(uint64_t, char *, int, char *, int, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/reduce_count.cpp0000644000175000017500000000130111535452656017514 0ustar mathieumathieu#include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- count count number of values associated with key input: KMV with key and one or more values output: key = unchanged, value = count ------------------------------------------------------------------------- */ void count(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) int count = static_cast (nvalues_total); kv->add(key,keybytes,(char *) &count,sizeof(int)); } mrmpi-1.0~20131122/oink/oink.cpp0000644000175000017500000002060611524066004015770 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "string.h" #include "oink.h" #include "input.h" #include "universe.h" #include "object.h" #include "mrmpi.h" #include "memory.h" #include "error.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- start up OINK allocate fundamental classes (memory, error, universe, input) parse input switches initialize communicators, screen & logfile output input is allocated at end after MPI info is setup ------------------------------------------------------------------------- */ OINK::OINK(int narg, char **arg, MPI_Comm communicator) { memory = new Memory(this); error = new Error(this); universe = new Universe(this,communicator); screen = NULL; logfile = NULL; // parse input switches int inflag = 0; int screenflag = 0; int logflag = 0; int iarg = 1; while (iarg < narg) { if (strcmp(arg[iarg],"-partition") == 0 || strcmp(arg[iarg],"-p") == 0) { universe->existflag = 1; if (iarg+2 > narg) error->universe_all("Invalid command-line argument"); iarg++; while (iarg < narg && arg[iarg][0] != '-') { universe->add_world(arg[iarg]); iarg++; } } else if (strcmp(arg[iarg],"-in") == 0 || strcmp(arg[iarg],"-i") == 0) { if (iarg+2 > narg) error->universe_all("Invalid command-line argument"); inflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-screen") == 0 || strcmp(arg[iarg],"-s") == 0) { if (iarg+2 > narg) error->universe_all("Invalid command-line argument"); screenflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-log") == 0 || strcmp(arg[iarg],"-l") == 0) { if (iarg+2 > narg) error->universe_all("Invalid command-line argument"); logflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-var") == 0 || strcmp(arg[iarg],"-v") == 0) { if (iarg+3 > narg) error->universe_all("Invalid command-line argument"); iarg += 2; while (iarg < narg && arg[iarg][0] != '-') iarg++; } else if (strcmp(arg[iarg],"-echo") == 0 || strcmp(arg[iarg],"-e") == 0) { if (iarg+2 > narg) error->universe_all("Invalid command-line argument"); iarg += 2; } else error->universe_all("Invalid command-line argument"); } // if no partition command-line switch, universe is one world w/ all procs if (universe->existflag == 0) universe->add_world(NULL); // sum of procs in all worlds must equal total # of procs if (!universe->consistent()) error->universe_all("Processor partitions are inconsistent"); // universe cannot use stdin for input file if (universe->existflag && inflag == 0) error->universe_all("Must use -in switch with multiple partitions"); // set universe screen and logfile if (universe->me == 0) { if (screenflag == 0) universe->uscreen = stdout; else if (strcmp(arg[screenflag],"none") == 0) universe->uscreen = NULL; else { universe->uscreen = fopen(arg[screenflag],"w"); if (universe->uscreen == NULL) error->universe_one("Cannot open universe screen file"); } if (logflag == 0) { universe->ulogfile = fopen("log.oink","w"); if (universe->ulogfile == NULL) error->universe_one("Cannot open log.oink"); } else if (strcmp(arg[logflag],"none") == 0) universe->ulogfile = NULL; else { universe->ulogfile = fopen(arg[logflag],"w"); if (universe->ulogfile == NULL) error->universe_one("Cannot open universe log file"); } } if (universe->me > 0) { if (screenflag == 0) universe->uscreen = stdout; else universe->uscreen = NULL; universe->ulogfile = NULL; } // universe does not exist on its own, only a single world // inherit settings from universe // set world screen, logfile, communicator, infile // open input script if from file if (universe->existflag == 0) { screen = universe->uscreen; logfile = universe->ulogfile; world = universe->uworld; infile = NULL; if (universe->me == 0) { if (inflag == 0) infile = stdin; else infile = fopen(arg[inflag],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[inflag]); error->one(str); } } if (universe->me == 0) { if (screen) fprintf(screen,"OINK (%s)\n",universe->version); if (logfile) fprintf(logfile,"OINK (%s)\n",universe->version); } // universe is one or more worlds // split into separate communicators // set world screen, logfile, communicator, infile // open input script } else { int me; MPI_Comm_split(universe->uworld,universe->iworld,0,&world); MPI_Comm_rank(world,&me); if (me == 0) { if (screenflag == 0) { char str[32]; sprintf(str,"screen.%d",universe->iworld); screen = fopen(str,"w"); if (screen == NULL) error->one("Cannot open screen file"); } else if (strcmp(arg[screenflag],"none") == 0) screen = NULL; else { char str[128]; sprintf(str,"%s.%d",arg[screenflag],universe->iworld); screen = fopen(str,"w"); if (screen == NULL) error->one("Cannot open screen file"); } } else screen = NULL; if (me == 0) { if (logflag == 0) { char str[32]; sprintf(str,"log.lammps.%d",universe->iworld); logfile = fopen(str,"w"); if (logfile == NULL) error->one("Cannot open logfile"); } else if (strcmp(arg[logflag],"none") == 0) logfile = NULL; else { char str[128]; sprintf(str,"%s.%d",arg[logflag],universe->iworld); logfile = fopen(str,"w"); if (logfile == NULL) error->one("Cannot open logfile"); } } else logfile = NULL; if (me == 0) { infile = fopen(arg[inflag],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[inflag]); error->one(str); } } else infile = NULL; // screen and logfile messages for universe and world if (universe->me == 0) { if (universe->uscreen) { fprintf(universe->uscreen,"OINK (%s)\n",universe->version); fprintf(universe->uscreen,"Running on %d partitions of processors\n", universe->nworlds); } if (universe->ulogfile) { fprintf(universe->ulogfile,"OINK (%s)\n",universe->version); fprintf(universe->ulogfile,"Running on %d partitions of processors\n", universe->nworlds); } } if (me == 0) { if (screen) { fprintf(screen,"OINK (%s)\n",universe->version); fprintf(screen,"Processor partition = %d\n",universe->iworld); } if (logfile) { fprintf(logfile,"OINK (%s)\n",universe->version); fprintf(logfile,"Processor partition = %d\n",universe->iworld); } } } // allocate input class now that MPI is fully setup input = new Input(this,narg,arg); // allocate top-level classes create(); } /* ---------------------------------------------------------------------- shutdown OINK delete top-level classes close screen and log files in world and universe output files were already closed in destroy() delete fundamental classes ------------------------------------------------------------------------- */ OINK::~OINK() { destroy(); if (universe->nworlds == 1) { if (logfile) fclose(logfile); } else { if (screen && screen != stdout) fclose(screen); if (logfile) fclose(logfile); if (universe->ulogfile) fclose(universe->ulogfile); } if (world != universe->uworld) MPI_Comm_free(&world); delete input; delete universe; delete error; delete memory; } /* ---------------------------------------------------------------------- allocate single instance of top-level classes fundamental classes are allocated in constructor ------------------------------------------------------------------------- */ void OINK::create() { obj = new Object(this); mrmpi = new MRMPI(this); } /* ---------------------------------------------------------------------- delete single instance of top-level classes fundamental classes are deleted in destructor ------------------------------------------------------------------------- */ void OINK::destroy() { delete obj; delete mrmpi; } mrmpi-1.0~20131122/oink/random_mars.cpp0000644000175000017500000000470611540754014017340 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ // Marsaglia random number generator #include "math.h" #include "random_mars.h" #include "error.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- */ RanMars::RanMars(OINK *oink, int seed) : Pointers(oink) { int ij,kl,i,j,k,l,ii,jj,m; double s,t; if (seed <= 0 || seed > 900000000) error->all("Invalid seed for Marsaglia random # generator"); save = 0; u = new double[97+1]; ij = (seed-1)/30082; kl = (seed-1) - 30082*ij; i = (ij/177) % 177 + 2; j = ij %177 + 2; k = (kl/169) % 178 + 1; l = kl % 169; for (ii = 1; ii <= 97; ii++) { s = 0.0; t = 0.5; for (jj = 1; jj <= 24; jj++) { m = ((i*j) % 179)*k % 179; i = j; j = k; k = m; l = (53*l+1) % 169; if ((l*m) % 64 >= 32) s = s + t; t = 0.5*t; } u[ii] = s; } c = 362436.0 / 16777216.0; cd = 7654321.0 / 16777216.0; cm = 16777213.0 / 16777216.0; i97 = 97; j97 = 33; uniform(); } /* ---------------------------------------------------------------------- */ RanMars::~RanMars() { delete [] u; } /* ---------------------------------------------------------------------- uniform RN ------------------------------------------------------------------------- */ double RanMars::uniform() { double uni = u[i97] - u[j97]; if (uni < 0.0) uni += 1.0; u[i97] = uni; i97--; if (i97 == 0) i97 = 97; j97--; if (j97 == 0) j97 = 97; c -= cd; if (c < 0.0) c += cm; uni -= c; if (uni < 0.0) uni += 1.0; return uni; } /* ---------------------------------------------------------------------- gaussian RN ------------------------------------------------------------------------- */ double RanMars::gaussian() { double first,v1,v2,rsq,fac; if (!save) { int again = 1; while (again) { v1 = 2.0*uniform()-1.0; v2 = 2.0*uniform()-1.0; rsq = v1*v1 + v2*v2; if (rsq < 1.0 && rsq != 0.0) again = 0; } fac = sqrt(-2.0*log(rsq)/rsq); second = v1*fac; first = v2*fac; save = 1; } else { first = second; save = 0; } return first; } mrmpi-1.0~20131122/oink/neighbor.h0000644000175000017500000000165211535732267016307 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(neighbor,Neighbor) #else #ifndef OINK_NEIGHBOR_H #define OINK_NEIGHBOR_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class Neighbor : public Command { public: Neighbor(class OINK *); void run(); void params(int, char **); private: static void print(char *, int, char *, int, void *); static void map1(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce1(char *, int, char *, int, int *, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/map_add_label.cpp0000644000175000017500000000105311536442217017556 0ustar mathieumathieu#include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- add_label add a default integer label to each key, key could be vertex or edge input: key = anything, value = NULL output: key = unchanged, value = 1 ------------------------------------------------------------------------- */ void add_label(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { int one = 1; kv->add(key,keybytes,(char *) &one,sizeof(int)); } mrmpi-1.0~20131122/oink/reduce_cull.cpp0000644000175000017500000000124511535452517017326 0ustar mathieumathieu#include "mapreduce.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- cull eliminate duplicate values input: KMV with key and one or more values (assumed to be duplicates) output: key = unchanged, value = first value ------------------------------------------------------------------------- */ void cull(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { if (!multivalue) { MapReduce *mr = (MapReduce *) valuebytes; mr->multivalue_block(0,&multivalue,&valuebytes); } kv->add(key,keybytes,multivalue,valuebytes[0]); } mrmpi-1.0~20131122/oink/error.cpp0000644000175000017500000000703511524066004016162 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "error.h" #include "universe.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- */ Error::Error(OINK *oink) : Pointers(oink) {} /* ---------------------------------------------------------------------- called by all procs in universe close all output, screen, and log files in world and universe ------------------------------------------------------------------------- */ void Error::universe_all(const char *str) { MPI_Barrier(universe->uworld); if (universe->me == 0) { if (universe->uscreen) fprintf(universe->uscreen,"ERROR: %s\n",str); if (universe->ulogfile) fprintf(universe->ulogfile,"ERROR: %s\n",str); } if (universe->nworlds > 1) { if (screen && screen != stdout) fclose(screen); if (logfile) fclose(logfile); } if (universe->ulogfile) fclose(universe->ulogfile); MPI_Finalize(); exit(1); } /* ---------------------------------------------------------------------- called by one proc in universe ------------------------------------------------------------------------- */ void Error::universe_one(const char *str) { if (universe->uscreen) fprintf(universe->uscreen,"ERROR on proc %d: %s\n",universe->me,str); MPI_Abort(universe->uworld,1); } /* ---------------------------------------------------------------------- called by all procs in one world close screen and log files in world ------------------------------------------------------------------------- */ void Error::all(const char *str) { MPI_Barrier(world); int me; MPI_Comm_rank(world,&me); if (me == 0) { if (screen) fprintf(screen,"ERROR: %s\n",str); if (logfile) fprintf(logfile,"ERROR: %s\n",str); } if (screen && screen != stdout) fclose(screen); if (logfile) fclose(logfile); MPI_Finalize(); exit(1); } /* ---------------------------------------------------------------------- called by one proc in world write to world screen only if non-NULL on this proc always write to universe screen ------------------------------------------------------------------------- */ void Error::one(const char *str) { int me; MPI_Comm_rank(world,&me); if (screen) fprintf(screen,"ERROR on proc %d: %s\n",me,str); if (universe->nworlds > 1) fprintf(universe->uscreen,"ERROR on proc %d: %s\n",universe->me,str); MPI_Abort(world,1); } /* ---------------------------------------------------------------------- called by one proc in world write message to screen and logfile (if logflag is set) ------------------------------------------------------------------------- */ void Error::warning(const char *str, int logflag) { if (screen) fprintf(screen,"WARNING: %s\n",str); if (logflag && logfile) fprintf(logfile,"WARNING: %s\n",str); } /* ---------------------------------------------------------------------- called by one proc in world, typically proc 0 write message to screen and logfile (if logflag is set) ------------------------------------------------------------------------- */ void Error::message(char *str, int logflag) { if (screen) fprintf(screen,"%s\n",str); if (logflag && logfile) fprintf(logfile,"%s\n",str); } mrmpi-1.0~20131122/oink/cc_stats.h0000644000175000017500000000217411535732267016315 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(cc_stats,CCStats) #else #ifndef OINK_CC_STATS_H #define OINK_CC_STATS_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class CCStats : public Command { public: CCStats(class OINK *); void run(); void params(int, char **); private: static void read(int, char *, KeyValue *kv, void *); static void print(char *, int, char *, int, void *); static void map_invert(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_sum(char *, int, char *, int, int *, KeyValue *, void *); static int compare_uint64(char *, int, char *, int); static void map_print(uint64_t, char *, int, char *, int, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/scan_print_vertex.cpp0000644000175000017500000000070211536442217020567 0ustar mathieumathieu#include "typedefs.h" #include "stdio.h" /* ---------------------------------------------------------------------- print_vertex print out an vertex to a file input: key = Vi, value = NULL ------------------------------------------------------------------------- */ void print_vertex(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; VERTEX v = *(VERTEX *) key; fprintf(fp,"%lu\n",v); } mrmpi-1.0~20131122/oink/vertex_extract.h0000644000175000017500000000136111536442217017550 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(vertex_extract,VertexExtract) #else #ifndef OINK_VERTEX_EXTRACT_H #define OINK_VERTEX_EXTRACT_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class VertexExtract : public Command { public: VertexExtract(class OINK *); void run(); void params(int, char **); }; } #endif #endif mrmpi-1.0~20131122/oink/object.h0000644000175000017500000001323311536502301015737 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_OBJECT_H #define OINK_OBJECT_H #include "pointers.h" #include "mapreduce.h" using MAPREDUCE_NS::MapReduce; using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class Object : protected Pointers { public: Object(class OINK *); ~Object(); // invoked by run() method in Command class MapReduce *create_mr(); MapReduce *create_mr(int, int, int, int); MapReduce *copy_mr(MapReduce *); int permanent(MapReduce *); MapReduce *input(int); MapReduce *input(int, void (*)(int, char *, KeyValue *, void *), void *); MapReduce *input(int, void (*)(int, char *, int, KeyValue *, void *), void *); MapReduce *input(int, void (*)(int, char *, KeyValue *, void *), void (*)(int, char *, int, KeyValue *, void *), void *); void output(int, MapReduce *); void output(int, MapReduce *, void (*)(char *, int, char *, int, void *), void *, int disallow = 0); void output(int, MapReduce *, void (*)(char *, int, char *, int, int *, void *), void *, int disallow = 0); void output(int, MapReduce *, void (*)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *, int disallow = 0); void output(int, MapReduce *, void (*)(char *, int, char *, int, int *, KeyValue *, void *), void *, int disallow = 0); void output(int, MapReduce *, void (*)(char *, int, char *, int, void *), void (*)(char *, int, char *, int, int *, void *), void (*)(uint64_t, char *, int, char *, int, KeyValue *, void *), void (*)(char *, int, char *, int, int *, KeyValue *, void *), void *, int disallow = 0); void cleanup(); // invoked internally by Command class void setup_inputs(int); void setup_outputs(int); void add_input(int, char *); void add_output(int, char *, char *); // invoked by copy command in run() method in MRMPI class void add_mr(char *, MapReduce *); // inovked directly by input script void add_mr(int, char **); void user_input(int, char **); void user_output(int, char **); void set(int, char **); // invoked by various methods int find_mr(char *); MapReduce *extract_mr(int); void delete_mr(int); private: int me,nprocs; struct MRwrap { // wrapper on MapReduce object int mode; // 0 = unnamed temporary MR, 1 = named permanent MR char *name; // name given MR object by user, NULL for temporary MapReduce *mr; // ptr to actual MapReduce library object }; int nmr,maxmr; // number of wrapped MR objects MRwrap **mrwrap; // wrapped MR objects struct Global { // global settings for all commands int verbosity; // default setting for each MapReduce object created int timer; // ditto int memsize; // ditto int outofcore; // ditto int minpage; // ditto int maxpage; // ditto int freepage; // ditto int zeropage; // ditto char *scratch; // ditto char *prepend; // str to prepend to dir/file paths for scratch/in/out int substitute; // substitution rule on % for scratch/in/out paths // 0 = proc ID, N = ID%N + 1 }; Global global; struct Input { // single command Input descriptor int index; // which input int mode; // 0 = path, 1 = named MR object int mrindex; // index of an existing MR object char *prepend; // override global/local prepend just for this input int substitute; // override global/local subst just for this input int pflag,suflag; // 1 if input-specific value has been set, 0 if not int multi; // N to generate N paths for each input, 0 if not int nstr; // # of strings resulting from paths char **strings; // final strings resulting from paths int mmode; // map method: 0 = entire files, 1 = sepchar, 2 = sepstr int recurse; // arg to map() method int self; // ditto int readfile; // ditto int nmap; // ditto char sepchar; // ditto char *sepstr; // ditto int delta; // ditto }; int ninput; // # of inputs for next command Input **inputs; // input descriptors int nuserinput; // # of user inputs for next command Input **userinputs; // user input descriptors struct Output { // single command Output descriptor int index; // which output int mode; // 0 = file, 1 = named MR object, 2 = both char *name; // name of MR object char *prepend; // override global/local prepend just for this input int substitute; // override global/local subst just for this input int pflag,suflag; // 1 if input-specific value has been set, 0 if not char *procfile; // file this proc will open/write }; int noutput; // # of outputs for next command Output **outputs; // output descriptors int nuseroutput; // # of user outputs for next command Output **useroutputs; // user outputs descriptors MapReduce *allocate_mr(); MapReduce *allocate_mr(int, int, int, int); void clear_input(int &, Input **&); void clear_output(int &, Output **&); Input *default_input(); Output *default_output(); void expandpath(char *, char *, char *, int, int, int); void createdir(int, char *); }; } #endif mrmpi-1.0~20131122/oink/histo.cpp0000644000175000017500000000432111540760073016157 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "histo.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ Histo::Histo(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void Histo::run() { int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // MR = key : NULL MapReduce *mr = obj->input(1); uint64_t ntotal = mr->kv_stats(0); // unique keys and their count // before processing it, make a copy of input MR if it is permanent if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->collate(NULL); uint64_t nunique = mr->reduce(count,NULL); obj->output(1,mr); // histogram stats // before processing it, make a copy of output MR if it is permanent if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->map(mr,invert,NULL); mr->collate(NULL); mr->reduce(count,NULL); mr->gather(1); mr->sort_keys(-1); char msg[128]; sprintf(msg,"Histo: %lu total keys, %lu unique",ntotal,nunique); if (me == 0) error->message(msg); mr->scan(print,NULL); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void Histo::params(int narg, char **arg) { if (narg != 0) error->all("Illegal histo command"); } /* ---------------------------------------------------------------------- */ void Histo::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { int ncount = *(int *) key; int nkey = *(int *) value; printf(" %d keys appear %d times\n",nkey,ncount); } mrmpi-1.0~20131122/oink/object.cpp0000644000175000017500000007411211540760073016304 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "string.h" #include "ctype.h" #include "stdlib.h" #include "sys/stat.h" #include "object.h" #include "input.h" #include "variable.h" #include "memory.h" #include "error.h" #include "mapreduce.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; #define DELTA 4 #define MAXLINE 1024 #define MBYTES 64 #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) enum{PATH,MR,BOTH,NEITHER}; enum{TEMPORARY,PERMANENT,DELETE}; /* ---------------------------------------------------------------------- */ Object::Object(OINK *oink) : Pointers(oink) { MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); nmr = maxmr = 0; mrwrap = NULL; ninput = noutput = 0; inputs = NULL; outputs = NULL; nuserinput = nuseroutput = 0; userinputs = NULL; useroutputs = NULL; #ifdef MRMPI_MEMSIZE global.memsize = MRMPI_MEMSIZE; #else global.memsize = MBYTES; #endif global.verbosity = 0; global.timer = 0; global.outofcore = 0; global.minpage = 0; global.maxpage = 0; global.freepage = 1; global.zeropage = 0; global.scratch = NULL; global.prepend = NULL; global.substitute = 0; } /* ---------------------------------------------------------------------- */ Object::~Object() { delete [] global.scratch; delete [] global.prepend; clear_input(ninput,inputs); clear_input(nuserinput,userinputs); clear_output(noutput,outputs); clear_output(nuseroutput,useroutputs); for (int i = 0; i < nmr; i++) { delete [] mrwrap[i]->name; delete mrwrap[i]->mr; delete mrwrap[i]; } memory->sfree(mrwrap); } /* ---------------------------------------------------------------------- create a MapReduce object with global settings add it to mrwrap as temporary MR object called by a command to create unnamed MR object ------------------------------------------------------------------------- */ MapReduce *Object::create_mr() { MapReduce *mr = allocate_mr(); add_mr(NULL,mr); return mr; } /* ---------------------------------------------------------------------- create a MapReduce object with specified settings add it to mrwrap as temporary MR object called by a command to create unnamed MR object ------------------------------------------------------------------------- */ MapReduce *Object::create_mr(int verbosity, int timer, int memsize, int outofcore) { MapReduce *mr = allocate_mr(verbosity,timer,memsize,outofcore); add_mr(NULL,mr); return mr; } /* ---------------------------------------------------------------------- copy a MapReduce object add new MapReduce object the copy creates to mrwrap as temporary MR object called by a command to create unnamed MR object that is copy of another ------------------------------------------------------------------------- */ MapReduce *Object::copy_mr(MapReduce *mr) { MapReduce *mr2 = mr->copy(); obj->add_mr(NULL,mr2); return mr2; } /* ---------------------------------------------------------------------- return 0/1 if MapReduce object is in a temporary/permanent MR object errror if not in a MR object ------------------------------------------------------------------------- */ int Object::permanent(MapReduce *mr) { for (int index = 0; index < nmr; index++) if (mrwrap[index]->mr == mr) return mrwrap[index]->mode; error->all("Object permanent() called for unknown MR object"); return 0; } /* ---------------------------------------------------------------------- variant of input(map1,map2) with neither map1 or map2 arg ------------------------------------------------------------------------- */ MapReduce *Object::input(int index) { return input(index,NULL,NULL,NULL); } /* ---------------------------------------------------------------------- variant of input(map1,map2) with only map1 arg ------------------------------------------------------------------------- */ MapReduce *Object::input(int index, void (*map1)(int, char *, KeyValue *, void *), void *ptr) { return input(index,map1,NULL,ptr); } /* ---------------------------------------------------------------------- variant of input(map1,map2) with only map2 arg ------------------------------------------------------------------------- */ MapReduce *Object::input(int index, void (*map2)(int, char *, int, KeyValue *, void *), void *ptr) { return input(index,NULL,map2,ptr); } /* ---------------------------------------------------------------------- process a user input from 1 to Ninput if input is a MR object, just return assocated MapReduce object if input is a file(s), then create MapReduce object and invoke map() method to populate it, and return it called at start of command's run() method on each -i arg ------------------------------------------------------------------------- */ MapReduce *Object::input(int index, void (*map1)(int, char *, KeyValue *, void *), void (*map2)(int, char *, int, KeyValue *, void *), void *ptr) { if (index < 1 || index > ninput) error->all("Command input invoked with invalid index"); index--; Input *in = inputs[index]; // user input is name of permanent MR object if (in->mode == MR) return mrwrap[in->mrindex]->mr; // user input is one or more file names // create a temporary MapReduce object and store it in MR list // populate it by reading from file(s) with appropriate map function MapReduce *mr = allocate_mr(); add_mr(NULL,mr); if (map1 && map2) { if (in->mmode == 0) mr->map(in->nstr,in->strings,in->self,in->recurse,in->readfile,map1,ptr); else if (in->mmode == 1) mr->map(in->nmap,in->nstr,in->strings, in->recurse,in->readfile,in->sepchar,in->delta,map2,ptr); else if (in->mmode == 2) mr->map(in->nmap,in->nstr,in->strings, in->recurse,in->readfile,in->sepstr,in->delta,map2,ptr); } else if (map1) { if (in->mmode != 0) error->all("Command input map function does not match input mode"); mr->map(in->nstr,in->strings,in->self,in->recurse,in->readfile,map1,ptr); } else if (map2) { if (in->mmode == 0) error->all("Comand input map function does not match input mode"); if (in->mmode == 1) mr->map(in->nmap,in->nstr,in->strings, in->recurse,in->readfile,in->sepchar,in->delta,map2,ptr); else if (in->mmode == 2) mr->map(in->nmap,in->nstr,in->strings, in->recurse,in->readfile,in->sepstr,in->delta,map2,ptr); } else error->all("Command input not allowed from file"); return mr; } /* ---------------------------------------------------------------------- variant of output(map,reduce) with neither map or reduce arg ------------------------------------------------------------------------- */ void Object::output(int index, MapReduce *mr) { output(index,mr,NULL,NULL,NULL,NULL,0); } /* ---------------------------------------------------------------------- variant of output(scankv,scankmv,map,reduce) with scankv arg ------------------------------------------------------------------------- */ void Object::output(int index, MapReduce *mr, void (*scankv)(char *, int, char *, int, void *), void *ptr, int disallow) { output(index,mr,scankv,NULL,NULL,NULL,ptr,disallow); } /* ---------------------------------------------------------------------- variant of output(scankv,scankmv,map,reduce) with scankmv arg ------------------------------------------------------------------------- */ void Object::output(int index, MapReduce *mr, void (*scankmv)(char *, int, char *, int, int *, void *), void *ptr, int disallow) { output(index,mr,NULL,scankmv,NULL,NULL,ptr,disallow); } /* ---------------------------------------------------------------------- variant of output(scankv,scankmv,map,reduce) with map arg ------------------------------------------------------------------------- */ void Object::output(int index, MapReduce *mr, void (*map)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *ptr, int disallow) { output(index,mr,NULL,NULL,map,NULL,ptr,disallow); } /* ---------------------------------------------------------------------- variant of output(scankv,scankmv,map,reduce) with reduce arg ------------------------------------------------------------------------- */ void Object::output(int index, MapReduce *mr, void (*reduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr, int disallow) { output(index,mr,NULL,NULL,NULL,reduce,ptr,disallow); } /* ---------------------------------------------------------------------- process a user output from 1 to Ninput if output is a MR object, assign permanent name to it, unless disallowed if output is a file, invoke one of 4 methods on it called at end of command's run() method on each -o arg ------------------------------------------------------------------------- */ void Object::output(int index, MapReduce *mr, void (*scankv)(char *, int, char *, int, void *), void (*scankmv)(char *, int, char *, int, int *, void *), void (*map)(uint64_t, char *, int, char *, int, KeyValue *, void *), void (*reduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr, int disallow) { if (index < 1 || index > noutput) error->all("Command output invoked with invalid index"); index--; Output *out = outputs[index]; // assign permanent name to MR object that wraps MapReduce object // change any other MR object with same name to temporary if (out->mode == MR || out->mode == BOTH) { if (disallow) error->all("Command output as MR object not allowed"); int index; for (index = 0; index < nmr; index++) if (mrwrap[index]->mr == mr) break; if (index == nmr) error->all("Command output called with unknown MR object"); delete [] mrwrap[index]->name; int n = strlen(out->name) + 1; mrwrap[index]->name = new char[n]; strcpy(mrwrap[index]->name,out->name); mrwrap[index]->mode = PERMANENT; for (int i = 0; i < nmr; i++) if (i != index && mrwrap[i]->mode == PERMANENT && strcmp(mrwrap[i]->name,out->name) == 0) { delete [] mrwrap[i]->name; mrwrap[i]->name = NULL; mrwrap[i]->mode = TEMPORARY; } } // user output was file name // invoke the one non-NULL of 4 methods on MapReduce object // for map() method, need addflag=1 to prevent KV pairs being deleted if (out->mode == PATH || out->mode == BOTH) { FILE *fp = fopen(out->procfile,"w"); if (fp == NULL) { char str[256]; sprintf(str,"Command output could not open output file %s", out->procfile); error->one(str); } if (ptr) { struct { FILE *fp; void *ptr; } two; two.fp = fp; two.ptr = ptr; if (scankv) mr->scan(scankv,&two); else if (scankmv) mr->scan(scankmv,&two); else if (map) mr->map(mr,map,&two,1); else if (reduce) mr->reduce(reduce,&two); else error->all("Command input not allowed to file"); } else { if (scankv) mr->scan(scankv,fp); else if (scankmv) mr->scan(scankmv,fp); else if (map) mr->map(mr,map,fp,1); else if (reduce) mr->reduce(reduce,fp); else error->all("Command input not allowed to file"); } fclose(fp); } } /* ---------------------------------------------------------------------- called at end of command's run() method called when an MR is explicitly deleted by delete command in input script ------------------------------------------------------------------------- */ void Object::cleanup() { // delete temporary MRs and named MRs flagged with -1 for (int i = 0; i < nmr; i++) if (mrwrap[i]->mode != PERMANENT) { delete mrwrap[i]->mr; delete [] mrwrap[i]->name; delete mrwrap[i]; for (int j = i+1; j < nmr; j++) mrwrap[j-1] = mrwrap[j]; nmr--; } // remove all input/output descriptors clear_input(ninput,inputs); clear_input(nuserinput,userinputs); clear_output(noutput,outputs); clear_output(nuseroutput,useroutputs); } /* ---------------------------------------------------------------------- called when input script command is processed ------------------------------------------------------------------------- */ void Object::setup_inputs(int n) { clear_input(ninput,inputs); inputs = (Input **) memory->smalloc(n*sizeof(Input *),"object:inputs"); ninput = n; } /* ---------------------------------------------------------------------- called when input script command is processed ------------------------------------------------------------------------- */ void Object::setup_outputs(int n) { clear_output(noutput,outputs); outputs = (Output **) memory->smalloc(n*sizeof(Output *),"object:outputs"); noutput = n; } /* ---------------------------------------------------------------------- called by each -i arg in in input script command ------------------------------------------------------------------------- */ void Object::add_input(int index, char *str) { // check if any user settings exist for this input // if so, use that input descriptor, else create a default one int iwhich; for (iwhich = 0; iwhich < nuserinput; iwhich++) if (userinputs[iwhich]->index == index) break; Input *in; if (iwhich == nuserinput) in = default_input(); else { in = userinputs[iwhich]; userinputs[iwhich] = NULL; } in->index = index; inputs[index] = in; // input is a named MR object int imr = find_mr(str); if (imr >= 0) { in->mode = MR; in->mrindex = imr; return; } // input is one or more files // convert str to nstr,strings // if str is v_name, then request one string at a time from variable // expandpath() is function of prepend,substitute,multi settings // have to use oink->input intead of input b/c Object class re-defines input in->mode = PATH; int ivar = -1; int n = 1; if (strstr(str,"v_") == str) { ivar = oink->input->variable->find(&str[2]); if (ivar < 0) error->all("Command input variable is unknown"); if (oink->input->variable->equalstyle(ivar)) error->all("Command input is equal-style variable"); n = oink->input->variable->retrieve_count(ivar); } int nstr = 0; char **strings = new char*[n*in->multi]; char *one = str; for (int i = 0; i < n; i++) { if (ivar >= 0) one = oink->input->variable->retrieve_single(ivar,i); for (int j = 0; j < in->multi; j++) { strings[nstr] = new char[MAXLINE]; if (in->pflag && in->suflag) expandpath(one,strings[nstr],in->prepend,0,in->substitute,j+1); else if (in->pflag) expandpath(one,strings[nstr],in->prepend,0,global.substitute,j+1); else if (in->suflag) expandpath(one,strings[nstr],global.prepend,0,in->substitute,j+1); else expandpath(one,strings[nstr],global.prepend,0,global.substitute,j+1); nstr++; } } in->nstr = nstr; in->strings = strings; } /* ---------------------------------------------------------------------- called by each -o arg in input script command ------------------------------------------------------------------------- */ void Object::add_output(int index, char *file, char *name) { // check if any user settings exist for this output // if so, use that output descriptor, else create a default one int iwhich; for (iwhich = 0; iwhich < nuseroutput; iwhich++) if (useroutputs[iwhich]->index == index) break; Output *out; if (iwhich == nuseroutput) out = default_output(); else { out = useroutputs[iwhich]; useroutputs[iwhich] = NULL; } out->index = index; outputs[index] = out; // output is a named MR object if (strcmp(name,"NULL") != 0) { int n = strlen(name) + 1; for (int i = 0; i < n-1; i++) if (!isalnum(name[i]) && name[i] != '_') error->all("Ouptut MR ID must be alphanumeric or " "underscore characters"); out->name = new char[n]; strcpy(out->name,name); } // output is a file // convert str to procfile // expandpath() is function of prepend,substitute settings // create dirs in path as needed via createdir() if (strcmp(file,"NULL") != 0) { out->procfile = new char[MAXLINE]; if (out->pflag && out->suflag) expandpath(file,out->procfile,out->prepend,1,out->substitute,0); else if (out->pflag) expandpath(file,out->procfile,out->prepend,1,global.substitute,0); else if (out->suflag) expandpath(file,out->procfile,global.prepend,1,out->substitute,0); else expandpath(file,out->procfile,global.prepend,1,global.substitute,0); createdir(1,out->procfile); } // set output mode if (out->procfile && out->name) out->mode = BOTH; else if (out->procfile) out->mode = PATH; else if (out->name) out->mode = MR; else out->mode = NEITHER; } /* ---------------------------------------------------------------------- add a MR object to MRwrap if name = NULL, add as temporary MR object else add as permanent MR object name assumed to not already exist (caller must check this) called with NULL by various other Object methods called with name by copy command in MRMPI::run() and via input script "mr" command ------------------------------------------------------------------------- */ void Object::add_mr(char *name, MapReduce *mr) { if (nmr == maxmr) { maxmr += DELTA; mrwrap = (MRwrap **) memory->srealloc(mrwrap,maxmr*sizeof(MRwrap *), "object:mrwrap"); } mrwrap[nmr] = new MRwrap(); if (name == NULL) { mrwrap[nmr]->mode = TEMPORARY; mrwrap[nmr]->name = NULL; } else { mrwrap[nmr]->mode = PERMANENT; int n = strlen(name) + 1; mrwrap[nmr]->name = new char[n]; strcpy(mrwrap[nmr]->name,name); } mrwrap[nmr]->mr = mr; nmr++; } /* ---------------------------------------------------------------------- allocate and add a named MR object to MRwrap additional args set MapReduce object settings called by "mr" command in input script ------------------------------------------------------------------------- */ void Object::add_mr(int narg, char **arg) { if (narg < 1 || narg > 5) error->all("Illegal mr command"); int n = strlen(arg[0]); for (int i = 0; i < n; i++) if (!isalnum(arg[0][i]) && arg[0][i] != '_') error->all("MR ID must be alphanumeric or underscore characters"); if (find_mr(arg[0]) >= 0) error->all("ID in mr command is already in use"); MapReduce *mr = allocate_mr(); add_mr(arg[0],mr); if (narg >= 2) mr->verbosity = atoi(arg[1]); if (narg >= 3) mr->timer = atoi(arg[2]); if (narg >= 4) mr->memsize = atoi(arg[3]); if (narg == 5) mr->outofcore = atoi(arg[4]); } /* ---------------------------------------------------------------------- called via "input" command in input script ------------------------------------------------------------------------- */ void Object::user_input(int narg, char **arg) { if (narg < 3) error->all("Illegal input command"); int index = atoi(arg[0]) - 1; int iwhich; for (iwhich = 0; iwhich < nuserinput; iwhich++) if (userinputs[iwhich]->index == index) break; if (iwhich == nuserinput) { nuserinput++; userinputs = (Input **) memory->srealloc(userinputs, nuserinput*sizeof(Input *), "object:userinputs"); userinputs[iwhich] = default_input(); } Input *in = userinputs[iwhich]; in->index = index; int iarg = 1; while (iarg < narg) { if (iarg+2 > narg) error->all("Illegal input command"); if (strcmp(arg[iarg],"prepend") == 0) { in->pflag = 1; delete [] in->prepend; int n = strlen(arg[iarg+1]) + 1; in->prepend = new char[n]; strcpy(in->prepend,arg[iarg+1]); } else if (strcmp(arg[iarg],"substitute") == 0) { in->suflag = 1; in->substitute = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"multi") == 0) { in->multi = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"mmode") == 0) { in->mmode = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"recurse") == 0) { in->recurse = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"self") == 0) { in->self = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"readfile") == 0) { in->readfile = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"nmap") == 0) { in->nmap = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"sepchar") == 0) { in->sepchar = arg[iarg+1][0]; } else if (strcmp(arg[iarg],"sepstr") == 0) { int n = strlen(arg[iarg+1]) + 1; in->sepstr = new char[n]; strcpy(in->sepstr,arg[iarg+1]); } else if (strcmp(arg[iarg],"delta") == 0) { in->delta = atoi(arg[iarg+1]); } else error->all("Illegal input command"); iarg += 2; } } /* ---------------------------------------------------------------------- called via "output" command in input script ------------------------------------------------------------------------- */ void Object::user_output(int narg, char **arg) { if (narg < 3) error->all("Illegal input command"); int index = atoi(arg[0]) - 1; int iwhich; for (iwhich = 0; iwhich < nuseroutput; iwhich++) if (useroutputs[iwhich]->index == index) break; if (iwhich == nuseroutput) { nuseroutput++; useroutputs = (Output **) memory->srealloc(useroutputs, nuseroutput*sizeof(Output *), "object:useroutputs"); useroutputs[iwhich] = default_output(); } Output *out = useroutputs[iwhich]; out->index = index; int iarg = 1; while (iarg < narg) { if (iarg+2 > narg) error->all("Illegal output command"); if (strcmp(arg[iarg],"prepend") == 0) { out->pflag = 1; delete [] out->prepend; int n = strlen(arg[iarg+1]) + 1; out->prepend = new char[n]; strcpy(out->prepend,arg[iarg+1]); } else if (strcmp(arg[iarg],"substitute") == 0) { out->suflag = 1; out->substitute = atoi(arg[iarg+1]); } else error->all("Illegal output command"); iarg += 2; } } /* ---------------------------------------------------------------------- called via "set" command in input script ------------------------------------------------------------------------- */ void Object::set(int narg, char **arg) { int iarg = 0; while (iarg < narg) { if (iarg+2 > narg) error->all("Illegal set command"); if (strcmp(arg[iarg],"verbosity") == 0) { global.verbosity = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"timer") == 0) { global.timer = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"memsize") == 0) { global.memsize = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"outofcore") == 0) { global.outofcore = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"minpage") == 0) { global.minpage = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"maxpage") == 0) { global.maxpage = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"freepage") == 0) { global.freepage = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"zeropage") == 0) { global.zeropage = atoi(arg[iarg+1]); } else if (strcmp(arg[iarg],"scratch") == 0) { delete [] global.scratch; int n = strlen(arg[iarg+1]) + 1; global.scratch = new char[n]; strcpy(global.scratch,arg[iarg+1]); } else if (strcmp(arg[iarg],"prepend") == 0) { delete [] global.prepend; int n = strlen(arg[iarg+1]) + 1; global.prepend = new char[n]; strcpy(global.prepend,arg[iarg+1]); } else if (strcmp(arg[iarg],"substitute") == 0) { global.substitute = atoi(arg[iarg+1]); } else error->all("Illegal set command"); iarg += 2; } } /* ---------------------------------------------------------------------- return index of a named MR object return -1 if not found ------------------------------------------------------------------------- */ int Object::find_mr(char *name) { int i; for (i = 0; i < nmr; i++) if (mrwrap[i]->mode == PERMANENT && strcmp(mrwrap[i]->name,name) == 0) return i; return -1; } /* ---------------------------------------------------------------------- return MapReduce object pointer wrapped by MR object with index ------------------------------------------------------------------------- */ MapReduce *Object::extract_mr(int index) { return mrwrap[index]->mr; } /* ---------------------------------------------------------------------- mark a MR object for deletion ------------------------------------------------------------------------- */ void Object::delete_mr(int index) { mrwrap[index]->mode = DELETE; } /* ---------------------------------------------------------------------- allocate a MapReduce object with global settings ------------------------------------------------------------------------- */ MapReduce *Object::allocate_mr() { return allocate_mr(global.verbosity,global.timer, global.memsize,global.outofcore); } /* ---------------------------------------------------------------------- allocate a MapReduce object with specified settings ------------------------------------------------------------------------- */ MapReduce *Object::allocate_mr(int verbosity, int timer, int memsize, int outofcore) { MapReduce *mr = new MapReduce(world); mr->verbosity = verbosity; mr->timer = timer; mr->memsize = memsize; mr->outofcore = outofcore; mr->minpage = global.minpage; mr->maxpage = global.maxpage; mr->freepage = global.freepage; mr->zeropage = global.zeropage; if (global.scratch) { char sdir[MAXLINE]; expandpath(global.scratch,sdir,global.prepend,0,global.substitute,0); createdir(0,sdir); mr->set_fpath(sdir); } return mr; } /* ---------------------------------------------------------------------- entry in array could be NULL ------------------------------------------------------------------------- */ void Object::clear_input(int &n, Input **&in) { for (int i = 0; i < n; i++) if (in[i]) { delete [] in[i]->prepend; for (int j = 0; j < in[i]->nstr; j++) delete [] in[i]->strings[j]; delete [] in[i]->strings; delete [] in[i]->sepstr; delete in[i]; } memory->sfree(in); n = 0; in = NULL; } /* ---------------------------------------------------------------------- entry in array could be NULL ------------------------------------------------------------------------- */ void Object::clear_output(int &n, Output **&out) { for (int i = 0; i < n; i++) if (out[i]) { delete [] out[i]->name; delete [] out[i]->prepend; delete [] out[i]->procfile; delete out[i]; } memory->sfree(out); n = 0; out = NULL; } /* ---------------------------------------------------------------------- allocated an Input descriptor and set all its defaults and strings ------------------------------------------------------------------------- */ Object::Input *Object::default_input() { Input *in = new Input(); in->index = -1; in->prepend = NULL; in->pflag = 0; in->suflag = 0; in->multi = 1; in->nstr = 0; in->strings = NULL; in->mmode = 0; in->recurse = 0; in->self = 0; in->readfile = 0; in->nmap = 0; in->sepchar = '\n'; in->sepstr = new char[2]; strcpy(in->sepstr,"\n"); in->delta = 80; return in; } /* ---------------------------------------------------------------------- allocated an Output descriptor and set all its defaults and strings ------------------------------------------------------------------------- */ Object::Output *Object::default_output() { Output *out = new Output(); out->index = -1; out->name = NULL; out->prepend = NULL; out->pflag = 0; out->suflag = 0; out->procfile = NULL; return out; } /* ---------------------------------------------------------------------- expand inpath to outpath if prepend is non-NULL, add prepend as preceding path if postpend is 1, add proc ID as trailing suffix if % appears in path, replace first instance with proc ID for substitute = 0, use proc ID directly for substitute = N, replace if with ID % N + 1 can be useful for multi-disk systems on multicore nodes if * appears in path, replace first instance with multi can be useful for multi-disk systems on multicore nodes ------------------------------------------------------------------------- */ void Object::expandpath(char *inpath, char *outpath, char *prepend, int postpend, int substitute, int multi) { char tmppath[MAXLINE]; if (prepend && postpend) sprintf(tmppath,"%s/%s.%d",prepend,inpath,me); else if (prepend) sprintf(tmppath,"%s/%s",prepend,inpath); else if (postpend) sprintf(tmppath,"%s.%d",inpath,me); else strcpy(tmppath,inpath); char *ptr = strchr(tmppath,'%'); if (!ptr) strcpy(outpath,tmppath); else if (substitute == 0) { *ptr = '\0'; sprintf(outpath,"%s%d%s",tmppath,me,ptr+1); } else { *ptr = '\0'; sprintf(outpath,"%s%d%s",tmppath,(me%substitute)+1,ptr+1); } ptr = strchr(outpath,'*'); if (!ptr) return; *ptr = '\0'; sprintf(tmppath,"%s%d%s",outpath,multi,ptr+1); strcpy(outpath,tmppath); } /* ---------------------------------------------------------------------- create dir portion(s) of path, as needed flag = 0, path = dir only flag = 1, path = dir/filename ------------------------------------------------------------------------- */ void Object::createdir(int flag, char *path) { struct stat buf; char dironly[MAXLINE],copy[MAXLINE]; // trim file from path if necessary // if no dir in path, just return strcpy(dironly,path); if (flag) { char *ptr = strrchr(dironly,'/'); if (ptr) *ptr = '\0'; else return; } // dironly is only dirs // create entire tmppath, one dir at a time, using copy // mkdir() just returns if dir already exists strcpy(copy,dironly); char *ptr = strtok(copy,"/"); while (ptr) { if (strlen(ptr)) { int offset = ptr+strlen(ptr)-copy; char savechar = dironly[offset]; dironly[offset] = '\0'; mkdir(dironly,0777); dironly[offset] = savechar; } ptr = strtok(NULL,"/"); } mkdir(dironly,0777); // test if successful if (stat(dironly,&buf) || !S_ISDIR(buf.st_mode)) { char str[256]; sprintf(str,"Could not create dir for file %s\n",path); error->one(str); } } mrmpi-1.0~20131122/oink/command.cpp0000644000175000017500000000235311524066004016445 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "command.h" #include "object.h" #include "error.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- */ Command::Command(OINK *oink) : Pointers(oink) {} /* ---------------------------------------------------------------------- */ void Command::inputs(int narg, char **arg) { if (narg != ninputs) error->all("Mismatch in command inputs"); obj->setup_inputs(ninputs); for (int i = 0; i < narg; i++) obj->add_input(i,arg[i]); } /* ---------------------------------------------------------------------- */ void Command::outputs(int narg, char **arg) { if (narg/2 != noutputs) error->all("Mismatch in command outputs"); if (narg % 2) error->all("Command outputs must be specified in pairs"); obj->setup_outputs(noutputs); for (int i = 0; i < narg; i += 2) obj->add_output(i/2,arg[i],arg[i+1]); } mrmpi-1.0~20131122/oink/scan_print_edge.cpp0000644000175000017500000000072011536275042020156 0ustar mathieumathieu#include "typedefs.h" #include "stdio.h" /* ---------------------------------------------------------------------- print_edge print out an edge to a file input: key = Vi Vj, value = NULL ------------------------------------------------------------------------- */ void print_edge(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; EDGE *edge = (EDGE *) key; fprintf(fp,"%lu %lu\n",edge->vi,edge->vj); } mrmpi-1.0~20131122/oink/luby_find.cpp0000644000175000017500000001752011536442217017013 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "luby_find.h" #include "object.h" #include "style_map.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; typedef struct { uint64_t vi,vj; } VPAIR; typedef struct { uint64_t v; double r; } VRAND; typedef struct { uint64_t v; double r; int flag; } VFLAG; typedef struct { uint64_t vi; double ri; uint64_t vj; double rj; } ERAND; /* ---------------------------------------------------------------------- */ LubyFind::LubyFind(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void LubyFind::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mrv = obj->create_mr(); MapReduce *mrw = obj->create_mr(); // assign a consistent RN to each vertex in each edge // convert to KMV mrw->map(mre,map_vert_random,&seed); mrw->clone(); // loop until all edges deleted int niterate = 0; mrv->open(); while (1) { uint64_t n = mrw->reduce(reduce_edge_winner,NULL); if (n == 0) break; mrw->collate(NULL); mrw->reduce(reduce_vert_winner,NULL); mrw->collate(NULL); mrw->reduce(reduce_vert_loser,NULL); mrw->collate(NULL); mrw->reduce(reduce_vert_emit,mrv); mrw->collate(NULL); niterate++; } uint64_t nset = mrv->close(); obj->output(1,mrv,print,NULL); char msg[128]; sprintf(msg,"Luby_find: %lu MIS vertices in %d iterations",nset,niterate); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void LubyFind::params(int narg, char **arg) { if (narg != 1) error->all("Illegal luby_find command"); seed = atoi(arg[0]); } /* ---------------------------------------------------------------------- */ void LubyFind::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; VERTEX v = *(VERTEX *) key; fprintf(fp,"%lu\n",v); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void LubyFind::map_vert_random(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { VPAIR *vpair = (VPAIR *) key; ERAND edge; int *seed = (int *) ptr; edge.vi = vpair->vi; srand48(edge.vi + *seed); edge.ri = drand48(); edge.vj = vpair->vj; srand48(edge.vj + *seed); edge.rj = drand48(); kv->add((char *) &edge,sizeof(ERAND),NULL,0); } /* ---------------------------------------------------------------------- */ void LubyFind::reduce_edge_winner(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { if (nvalues == 2 && (valuebytes[0] || valuebytes[1])) return; ERAND *edge = (ERAND *) key; int winner; if (edge->ri < edge->rj) winner = 0; else if (edge->rj < edge->ri) winner = 1; else if (edge->vi < edge->vj) winner = 0; else winner = 1; VRAND v; VFLAG vf; if (winner == 0) { v.v = edge->vi; v.r = edge->ri; vf.v = edge->vj; vf.r = edge->rj; vf.flag = 1; kv->add((char *) &v,sizeof(VRAND),(char *)&vf,sizeof(VFLAG)); v.v = edge->vj; v.r = edge->rj; vf.v = edge->vi; vf.r = edge->ri; vf.flag = 0; kv->add((char *) &v,sizeof(VRAND),(char *) &vf,sizeof(VFLAG)); } else { v.v = edge->vj; v.r = edge->rj; vf.v = edge->vi; vf.r = edge->ri; vf.flag = 1; kv->add((char *) &v,sizeof(VRAND),(char *) &vf,sizeof(VFLAG)); v.v = edge->vi; v.r = edge->ri; vf.v = edge->vj; vf.r = edge->rj; vf.flag = 0; kv->add((char *) &v,sizeof(VRAND),(char *) &vf,sizeof(VFLAG)); } } /* ---------------------------------------------------------------------- */ void LubyFind::reduce_vert_winner(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; VFLAG *vf; int winflag = 1; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) vf = (VFLAG *) multivalue; for (i = 0; i < nvalues; i++) { if (vf->flag == 0) { winflag = 0; break; } vf++;; } if (i < nvalues) break; END_BLOCK_LOOP VRAND *v = (VRAND *) key; VRAND v1out,v2out; VFLAG vfout; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) vf = (VFLAG *) multivalue; for (i = 0; i < nvalues; i++) { v1out.v = vf->v; v1out.r = vf->r; if (winflag) { vfout.v = v->v; vfout.r = v->r; vfout.flag = 0; kv->add((char *) &v1out,sizeof(VRAND),(char *) &vfout,sizeof(VFLAG)); } else { v2out.v = v->v; v2out.r = v->r; kv->add((char *) &v1out,sizeof(VRAND),(char *) &v2out,sizeof(VRAND)); } vf++;; } END_BLOCK_LOOP } /* ---------------------------------------------------------------------- */ void LubyFind::reduce_vert_loser(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; int size = 2*sizeof(uint64_t); int loseflag = 0; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) for (i = 0; i < nvalues; i++) { if (valuebytes[i] > size) { loseflag = 1; break; } } if (i < nvalues) break; END_BLOCK_LOOP VRAND *v = (VRAND *) key; VFLAG *vf; VRAND v1out,v2out; VFLAG vfout; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) for (i = 0; i < nvalues; i++) { vf = (VFLAG *) multivalue; v1out.v = vf->v; v1out.r = vf->r; if (loseflag) { vfout.v = v->v; vfout.r = v->r; vfout.flag = 0; kv->add((char *) &v1out,sizeof(VRAND),(char *) &vfout,sizeof(VFLAG)); } else { v2out.v = v->v; v2out.r = v->r; kv->add((char *) &v1out,sizeof(VRAND),(char *) &v2out,sizeof(VRAND)); } multivalue += valuebytes[i]; } END_BLOCK_LOOP } /* ---------------------------------------------------------------------- */ void LubyFind::reduce_vert_emit(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; int size = 2*sizeof(uint64_t); int winflag = 1; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) for (i = 0; i < nvalues; i++) { if (valuebytes[i] == size) { winflag = 0; break; } } if (i < nvalues) break; END_BLOCK_LOOP VRAND *v = (VRAND *) key; if (winflag) { MapReduce *mrv = (MapReduce *) ptr; mrv->kv->add((char *) &v->v,sizeof(uint64_t),NULL,0); } VFLAG *vf; ERAND edge; int flag = 0; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) for (i = 0; i < nvalues; i++) { vf = (VFLAG *) multivalue; if (v->v < vf->v) { edge.vi = v->v; edge.ri = v->r; edge.vj = vf->v; edge.rj = vf->r; } else { edge.vi = vf->v; edge.ri = vf->r; edge.vj = v->v; edge.rj = v->r; } if (valuebytes[i] == size) kv->add((char *) &edge,sizeof(ERAND),NULL,0); else kv->add((char *) &edge,sizeof(ERAND),(char *) &flag,sizeof(int)); multivalue += valuebytes[i]; } END_BLOCK_LOOP } mrmpi-1.0~20131122/oink/map_edge_to_vertices.cpp0000644000175000017500000000120711535452656021211 0ustar mathieumathieu#include "typedefs.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- edge_to_vertices emit 2 vertices for each edge input: key = Vi Vj, value = NULL output: key = Vi, value = NULL key = Vj, value = NULL ------------------------------------------------------------------------- */ void edge_to_vertices(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),NULL,0); kv->add((char *) &edge->vj,sizeof(VERTEX),NULL,0); } mrmpi-1.0~20131122/oink/cc_find.h0000644000175000017500000000323011535732700016061 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(cc_find,CCFind) #else #ifndef OINK_CC_FIND_H #define OINK_CC_FIND_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class CCFind : public Command { public: CCFind(class OINK *); void run(); void params(int, char **); private: int me,nprocs; int seed,nthresh; uint64_t nvert; int flag,pshift; uint64_t lmask; typedef struct { uint64_t zone,empty; } PAD; PAD pad; static void print(char *, int, char *, int, void *); static void reduce_self_zone(char *, int, char *, int, int *, KeyValue *, void *); static void map_edge_vert(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_edge_zone(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_zone_winner(char *, int, char *, int, int *, KeyValue *, void *); static void map_invert_multi(uint64_t, char *, int, char *, int, KeyValue *, void *); static void map_zone_multi(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_zone_reassign(char *, int, char *, int, int *, KeyValue *, void *); static void map_strip(uint64_t, char *, int, char *, int, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/typedefs.h0000644000175000017500000000263011536502301016313 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ // typedefs used by various classes #ifndef OINK_TYPEDEFS_H #define OINK_TYPEDEFS_H #define __STDC_LIMIT_MACROS #include "stdint.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; // if change these defs, check that heterogeneous structs // containing these datums are zeroed via memset() if used as keys typedef uint64_t VERTEX; typedef struct { VERTEX vi,vj; } EDGE; typedef int LABEL; typedef double WEIGHT; typedef uint64_t ULONG; typedef int (*HashFnPtr)(char *, int); typedef int (*CompareFnPtr)(char *, int, char *, int); typedef void (*MapTaskFnPtr)(int, KeyValue *, void *); typedef void (*MapFileFnPtr)(int, char *, KeyValue *, void *); typedef void (*MapStringFnPtr)(int, char *, int, KeyValue *, void *); typedef void (*MapMRFnPtr)(uint64_t, char *, int, char *, int, KeyValue *, void *); typedef void (*ReduceFnPtr)(char *, int, char *, int, int *, KeyValue *, void *); typedef void (*ScanKVFnPtr)(char *, int, char *, int, void *); typedef void (*ScanKMVFnPtr)(char *, int, char *, int, int *, void *); #endif mrmpi-1.0~20131122/oink/luby_find.h0000644000175000017500000000237011535732267016463 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(luby_find,LubyFind) #else #ifndef OINK_LUBY_FIND_H #define OINK_LUBY_FIND_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class LubyFind : public Command { public: LubyFind(class OINK *); void run(); void params(int, char **); private: int seed; static void print(char *, int, char *, int, void *); static void map_vert_random(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_edge_winner(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_vert_winner(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_vert_loser(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_vert_emit(char *, int, char *, int, int *, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/universe.h0000644000175000017500000000230411524066004016330 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_UNIVERSE_H #define OINK_UNIVERSE_H #include "mpi.h" #include "stdio.h" #include "pointers.h" namespace OINK_NS { class Universe : protected Pointers { public: char *version; // OINK version string = date MPI_Comm uworld; // communicator for entire universe int me,nprocs; // my place in universe FILE *uscreen; // universe screen output FILE *ulogfile; // universe logfile int existflag; // 1 if universe exists due to -partition flag int nworlds; // # of worlds in universe int iworld; // which world I am in int *procs_per_world; // # of procs in each world int *root_proc; // root proc in each world Universe(class OINK *, MPI_Comm); ~Universe(); void add_world(char *); int consistent(); }; } #endif mrmpi-1.0~20131122/oink/map_edge_to_vertex_pair.cpp0000644000175000017500000000113411535764350021711 0ustar mathieumathieu#include "typedefs.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- edge_to_vertex_pair emit 1 vertex for each edge, just first one input: key = Vi Vj, value = NULL output: key = Vi, value = NULL ------------------------------------------------------------------------- */ void edge_to_vertex_pair(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),(char *) &edge->vj,sizeof(VERTEX)); } mrmpi-1.0~20131122/oink/Makefile.lib0000644000175000017500000000246411524065425016541 0ustar mathieumathieu# MRMPI library multiple-machine Makefile SHELL = /bin/sh # Definitions ROOT = mrmpi EXE = lib$(ROOT)_$@.a SRC = command.cpp compare_rmat.cpp data2graph.cpp data2rare.cpp degree.cpp error.cpp input.cpp library.cpp map_rmat.cpp memory.cpp mrmpi.cpp neigh_tri.cpp neighbor.cpp object.cpp oink.cpp random_mars.cpp reduce_rmat.cpp rmat.cpp sgi_enumerate.cpp sgi_prune.cpp sgi_sample.cpp tri_find.cpp universe.cpp variable.cpp wordfreq.cpp INC = blockmacros.h command.h data2graph.h data2rare.h degree.h error.h input.h library.h map_rmat.h memory.h mrmpi.h neigh_tri.h neighbor.h object.h oink.h pointers.h random_mars.h rmat.h sgi_enumerate.h sgi_prune.h sgi_sample.h style_command.h style_compare.h style_hash.h style_map.h style_reduce.h style_scan.h tri_find.h typedefs.h universe.h variable.h version.h wordfreq.h OBJ = $(SRC:.cpp=.o) # Targets help: @echo 'Type "make target" where target is one of:' @echo '' @files="`ls MAKE/Makefile.*`"; \ for file in $$files; do head -1 $$file; done clean: rm -rf Obj_* .DEFAULT: @test -f MAKE/Makefile.$@ @if [ ! -d Obj_$@ ]; then mkdir Obj_$@; fi @cp -p $(SRC) $(INC) Obj_$@ @cp MAKE/Makefile.$@ Obj_$@/Makefile @cd Obj_$@; \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "EXE = ../$(EXE)" lib @if [ -d Obj_$@ ]; then cd Obj_$@; rm -f $(SRC) $(INC) Makefile*; fi mrmpi-1.0~20131122/oink/edge_upper.h0000644000175000017500000000124511535732267016627 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(edge_upper,EdgeUpper) #else #ifndef OINK_EDGE_UPPER_H #define OINK_EDGE_UPPER_H #include "command.h" namespace OINK_NS { class EdgeUpper : public Command { public: EdgeUpper(class OINK *); void run(); void params(int, char **); }; } #endif #endif mrmpi-1.0~20131122/oink/Make.sh0000644000175000017500000000145111524065425015540 0ustar mathieumathieu# Make.sh = update Makefile.lib or Makefile.list # Syntax: sh Make.sh Makefile.lib # sh Make.sh Makefile.list # edit Makefile.lib # called by "make makelib" # use current list of *.cpp and *.h files in src dir w/out main.cpp if (test $1 = "Makefile.lib") then list=`ls -1 *.cpp | sed s/^main\.cpp// | tr "[:cntrl:]" " "` sed -i -e "s/SRC = .*/SRC = $list/" Makefile.lib list=`ls -1 *.h | tr "[:cntrl:]" " "` sed -i -e "s/INC = .*/INC = $list/" Makefile.lib # edit Makefile.list # called by "make makelist" # use current list of *.cpp and *.h files in src dir elif (test $1 = "Makefile.list") then list=`ls -1 *.cpp | tr "[:cntrl:]" " "` sed -i -e "s/SRC = .*/SRC = $list/" Makefile.list list=`ls -1 *.h | tr "[:cntrl:]" " "` sed -i -e "s/INC = .*/INC = $list/" Makefile.list fi mrmpi-1.0~20131122/oink/Make.py0000644000175000017500000001455411535732267015575 0ustar mathieumathieu#!/usr/local/bin/python # Make.py to create style_*.h files by parsing other files # Syntax: Make.py import sys,os,glob,commands,re # style_command.h files = glob.glob("*.h") files.sort() fp = open("style_command.tmp","w") for file in files: txt = open(file,"r").read() if "COMMAND_CLASS" in txt: print >>fp,'#include "%s"' % file fp.close() if os.path.exists("style_command.h"): diff = commands.getoutput("diff style_command.h style_command.tmp") else: diff = 1 if diff: os.rename("style_command.tmp","style_command.h") else: os.remove("style_command.tmp") # style_compare.h files = glob.glob("compare_*.cpp") files.sort() hitlist = [] fp = open("style_compare.tmp","w") print >>fp,"#ifdef COMPARE_STYLE\n" pattern = re.compile("int \S+?\s*?\([^,\)]+?,[^,\)]+?," + "[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "int (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"CompareStyle(%s)" % funcname[0] print >>fp,"\n#else\n" for hit in hitlist: print >>fp,"%s;" % hit print >>fp,"\n#endif" fp.close() if os.path.exists("style_compare.h"): diff = commands.getoutput("diff style_compare.h style_compare.tmp") else: diff = 1 if diff: os.rename("style_compare.tmp","style_compare.h") else: os.remove("style_compare.tmp") # style_hash.h files = glob.glob("hash_*.cpp") files.sort() hitlist = [] fp = open("style_hash.tmp","w") print >>fp,"#ifdef HASH_STYLE\n" pattern = re.compile("int \S+?\s*?\([^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "int (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"HashStyle(%s)" % funcname[0] print >>fp,"\n#else\n" for hit in hitlist: print >>fp,"%s;" % hit print >>fp,"\n#endif" fp.close() if os.path.exists("style_hash.h"): diff = commands.getoutput("diff style_hash.h style_hash.tmp") else: diff = 1 if diff: os.rename("style_hash.tmp","style_hash.h") else: os.remove("style_hash.tmp") # style_map.h files = glob.glob("map_*.cpp") files.sort() hitlist = [] fp = open("style_map.tmp","w") print >>fp,"#if defined MAP_TASK_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"MapStyle(%s)" % funcname[0] print >>fp,"\n#elif defined MAP_FILE_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?," + "[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"MapStyle(%s)" % funcname[0] print >>fp,"\n#elif defined MAP_STRING_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?," + "[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"MapStyle(%s)" % funcname[0] print >>fp,"\n#elif defined MAP_MR_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?," + "[^,\)]+?,[^,\)]+?,[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"MapStyle(%s)" % funcname[0] print >>fp,"\n#else\n" print >>fp,'#include "mapreduce.h"' print >>fp,"using MAPREDUCE_NS::MapReduce;" print >>fp,"using MAPREDUCE_NS::KeyValue;\n" for hit in hitlist: print >>fp,"%s;" % hit print >>fp,"\n#endif" fp.close() if os.path.exists("style_map.h"): diff = commands.getoutput("diff style_map.h style_map.tmp") else: diff = 1 if diff: os.rename("style_map.tmp","style_map.h") else: os.remove("style_map.tmp") # style_reduce.h files = glob.glob("reduce_*.cpp") files.sort() hitlist = [] fp = open("style_reduce.tmp","w") print >>fp,"#ifdef REDUCE_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?," "[^,\)]+?,[^,\)]+?,[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"ReduceStyle(%s)" % funcname[0] print >>fp,"\n#else\n" print >>fp,'#include "keyvalue.h"' print >>fp,"using MAPREDUCE_NS::KeyValue;\n" for hit in hitlist: print >>fp,"%s;" % hit print >>fp,"\n#endif" fp.close() if os.path.exists("style_reduce.h"): diff = commands.getoutput("diff style_reduce.h style_reduce.tmp") else: diff = 1 if diff: os.rename("style_reduce.tmp","style_reduce.h") else: os.remove("style_reduce.tmp") # style_scan.h files = glob.glob("scan_*.cpp") files.sort() hitlist = [] fp = open("style_scan.tmp","w") print >>fp,"#if defined SCAN_KV_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?," + "[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"ScanStyle(%s)" % funcname[0] print >>fp,"\n#elif defined SCAN_KMV_STYLE\n" pattern = re.compile("void \S+?\s*?\([^,\)]+?,[^,\)]+?,[^,\)]+?,[^,\)]+?" ",[^,\)]+?,[^,\)]+?\)",re.DOTALL) for file in files: txt = open(file,"r").read() hits = re.findall(pattern,txt) hitlist += hits for hit in hits: patternword = "void (\S+?)\s*?\(" funcname = re.findall(patternword,hit) print >>fp,"ScanStyle(%s)" % funcname[0] print >>fp,"\n#else\n" for hit in hitlist: print >>fp,"%s;" % hit print >>fp,"\n#endif" fp.close() if os.path.exists("style_scan.h"): diff = commands.getoutput("diff style_scan.h style_scan.tmp") else: diff = 1 if diff: os.rename("style_scan.tmp","style_scan.h") else: os.remove("style_scan.tmp") mrmpi-1.0~20131122/oink/pagerank.cpp0000644000175000017500000000477511536442217016640 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "pagerank.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ PageRank::PageRank(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void PageRank::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : weight MapReduce *mre = obj->input(1,read_edge_weight,NULL); MapReduce *mrv = obj->create_mr(); MapReduce *mrr = obj->create_mr(); // MRv = list of vertices in the directed graph mrv->map(mre,edge_to_vertices,NULL); mrv->collate(NULL); mrv->reduce(cull,NULL); // iterate over PageRank calculations for (int iterate = 0; iterate < maxiter; iterate++) { } // MRr = Vi : rank //obj->output(1,mrr,print,NULL); obj->output(1,mre,print,NULL); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void PageRank::params(int narg, char **arg) { if (narg != 3) error->all("Illegal pagerank command"); tolerance = atof(arg[0]); maxiter = atoi(arg[1]); alpha = atof(arg[2]); } /* ---------------------------------------------------------------------- */ void PageRank::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; VERTEX v = *(VERTEX *) key; double rank = *(double *) value; fprintf(fp,"%lu %g\n",v,rank); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ // here are sample map/reduce funcs /* void PageRank::map_edge_vert(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { } void PageRank::reduce_second_degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { } */ mrmpi-1.0~20131122/oink/map_edge_upper.cpp0000644000175000017500000000136211535452656020020 0ustar mathieumathieu#include "typedefs.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- edge_upper emit each edge with Vi < Vj, drop self-edges with Vi = Vj input: key = Vi Vj, value = NULL output: key = Vi Vj, value = NULL, with Vi < Vj ------------------------------------------------------------------------- */ void edge_upper(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; if (edge->vi < edge->vj) kv->add(key,keybytes,NULL,0); else if (edge->vi > edge->vj) { EDGE newedge; newedge.vi = edge->vj; newedge.vj = edge->vi; kv->add((char *) &newedge,sizeof(EDGE),NULL,0); } } mrmpi-1.0~20131122/oink/pagerank.h0000644000175000017500000000257711535746147016313 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(pagerank,PageRank) #else #ifndef OINK_PAGERANK_H #define OINK_PAGERANK_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class PageRank : public Command { public: PageRank(class OINK *); void run(); void params(int, char **); private: double tolerance,alpha; int maxiter; static void print(char *, int, char *, int, void *); /* static void map_edge_vert(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_first_degree(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_second_degree(char *, int, char *, int, int *, KeyValue *, void *); static void map_low_degree(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_nsq_angles(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_emit_triangles(char *, int, char *, int, int *, KeyValue *, void *); */ }; } #endif #endif mrmpi-1.0~20131122/oink/blockmacros.h0000644000175000017500000000201711524065425016776 0ustar mathieumathieu// macros to simplify use of multi-page KMVs // Karen Devine, March 2010 #ifndef _BLOCKMACROS_HPP #define _BLOCKMACROS_HPP // macros to loop over blocks when reduce multivalues may span more than 1 block // use CHECK_FOR_BLOCKS initially to get # of blocks in the multivalue // enclose code for each block between BEGIN_BLOCK_LOOP and END_BLOCK_LOOP // NOTE: DO NOT put a semicolon afer these macros #define CHECK_FOR_BLOCKS(multivalue, valuebytes, nvalues, totalnvalues) \ int macro_nblocks = 1; \ totalnvalues = nvalues; \ MapReduce *macro_mr = NULL; \ if (!(multivalue)) { \ macro_mr = (MapReduce *) (valuebytes); \ totalnvalues = macro_mr->multivalue_blocks(macro_nblocks); \ } #define BEGIN_BLOCK_LOOP(multivalue, valuebytes, nvalues) \ for (int macro_iblock = 0; macro_iblock < macro_nblocks; macro_iblock++) { \ if (macro_mr) \ (nvalues) = macro_mr->multivalue_block(macro_iblock, \ &(multivalue),&(valuebytes)); #define END_BLOCK_LOOP } #endif mrmpi-1.0~20131122/oink/map_read_words.cpp0000644000175000017500000000135511536442410020020 0ustar mathieumathieu#include "string.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- read_words read words from file, separated by whitespace output: key = word, value = NULL ------------------------------------------------------------------------- */ void read_words(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; char *whitespace = " \t\n\f\r\0"; int *nfiles = (int *) ptr; (*nfiles)++; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { char *word = strtok(line,whitespace); while (word) { kv->add(word,strlen(word)+1,NULL,0); word = strtok(NULL,whitespace); } } fclose(fp); } mrmpi-1.0~20131122/oink/sssp.cpp0000644000175000017500000003236011536442757016040 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "sssp.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "error.h" #include #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" #include "keymultivalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; using namespace std; #define MAX_NUM_EXPERIMENTS 50 #ifdef NOISY #define HELLO {std::cout << "KDD " << __func__ << std::endl;} #else #define HELLO #endif uint64_t SSSP::NVtxLabeled = 0; ///////////////////////////////////////////////////////////////////////////// SSSP::SSSP(OINK *oink) : Command(oink) { HELLO MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &np); ninputs = 1; noutputs = 1; } ///////////////////////////////////////////////////////////////////////////// void SSSP::run() { HELLO int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); srand48(seed); // MRe = Eij : weight MapReduce *mredge = obj->input(1,read_edge_weight,NULL); // Create list of vertices from the edges. // The resulting graph has no singleton vertices. // The reduce operation pre-aggregates the vertices to processors as needed. MapReduce *mrvert = obj->create_mr(); mrvert->map(mredge,edge_to_vertices,NULL); mrvert->collate(NULL); mrvert->reduce(cull,NULL); // Generate a list of valid sources. Assume no singletons in mrvert. MapReduce *mrlist = mrvert->copy(); // vertices are already aggregated. mrlist->compress(get_good_sources, this); delete mrlist; // Aggregate mredge by source-vertex of each edge. No need to convert yet. // mredge and mrvert, then, are essentially local mapreduce objects. mredge->map(mredge, reorganize_edges, NULL); mredge->aggregate(NULL); // Select a source vertex. // Processor 0 selects random number S in range [1:N] for N vertices. // Processor 0 emits into Paths key-value pair [S, {-1, 0}], // signifying that vertex S has distance zero from itself, with no // predecessor. double tcompute = 0; MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); for (int cnt = 0; cnt < ncnt; cnt++) { VERTEX source; if (!get_next_source(&source, cnt)) break; // Initialize vertex distances. mrvert->map(mrvert, initialize_vertex_distances, (void *) NULL, 0); MapReduce *mrpath = obj->create_mr(); if (me == 0) std::cout << cnt << ": BEGINNING SOURCE " << source << std::endl; mrpath->map(1, add_source, &source); // Perform a BFS from S, editing distances as visit vertices. int done = 0; int iter = 0; while (!done) { done = 1; // First, determine which, if any, vertex distances have changed. // Add updated distances existing distances. mrpath->aggregate(NULL); mrvert->kv->append(); mrpath->map(mrpath, move_to_new_mr, mrvert); mrvert->kv->complete(); // Pick best distances. For vertices with changed distances, // emit new distances into mrpath. uint64_t tmp_nv = 0, tmp_ne = 0; NVtxLabeled = 0; mrpath->kv->append(); tmp_nv = mrvert->compress(pick_shortest_distances, mrpath); mrpath->kv->complete(); uint64_t nchanged; MPI_Allreduce(&(mrpath->kv->nkv), &nchanged, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); if (nchanged) { // Some vtxs' distances changed; // need to emit new distances for adjacent vtxs. done = 0; mredge->kv->append(); mrpath->map(mrpath, move_to_new_mr, mredge); mredge->kv->complete(); mrpath->kv->append(); tmp_ne = mredge->compress(update_adjacent_distances, mrpath); mrpath->kv->complete(); } else done = 1; int alldone; MPI_Allreduce(&done, &alldone, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); done = alldone; if (me == 0) std::cout << " Iteration " << iter << " MRPath size " << mrpath->kv->nkv << " MRVert size " << mrvert->kv->nkv << " MREdge size " << mredge->kv->nkv << std::endl; iter++; } MPI_Barrier(MPI_COMM_WORLD); double tstop = MPI_Wtime(); tcompute += (tstop - tstart); uint64_t GNVtxLabeled; MPI_Allreduce(&NVtxLabeled, &GNVtxLabeled, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); if (me == 0) { std::cout << cnt << ": Source = " << source << "; Iterations = " << iter << "; Num Vtx Labeled = " << GNVtxLabeled << std::endl; std::cout << cnt << ": Source = " << source << "; Iterations = " << iter << "; Compute Time = " << (tstop-tstart) << std::endl; } // Now mrpath contains one key-value per vertex Vi: // Key = Vi, Value = DISTANCE = {predecessor, weight} obj->output(1,mrpath,print,NULL); } if (me == 0) std::cout << "Total time in SSSP: " << tcompute << std::endl; obj->cleanup(); } ///////////////////////////////////////////////////////////////////////////// // reorganize_edges: Just reorganize the way edge data is presented. // Map: Input: edge data with key=(Vi,Vj), value=wt // Output: edge data with key=Vi, edge=(Vj,wt) void SSSP::reorganize_edges(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { HELLO EDGE *e = (EDGE *) key; VERTEX v = e->vi; EDGEVALUE ev; ev.v = e->vj; ev.wt = *(WEIGHT *)value; kv->add((char *) &v, sizeof(VERTEX), (char *) &ev, sizeof(EDGEVALUE)); } ///////////////////////////////////////////////////////////////////////////// // add_source: Add the source vertex to the MapReduce object as initial vtx. // Map: Input: randomly selected vertex in [1:N] for source. // Output: One key-value pair for the source. void SSSP::add_source(int nmap, KeyValue *kv, void *ptr) { HELLO VERTEX *v = (VERTEX *) ptr; DISTANCE d; d.e.wt = 0; // Distance from source to itself is zero. d.current = false; kv->add((char *) v, sizeof(VERTEX), (char *) &d, sizeof(DISTANCE)); } ///////////////////////////////////////////////////////////////////////////// // move_to_new_mr: Move KV from existing MR to new MR provided in ptr. // Map: Input: KVs in exisitng MR object, new MR object in ptr. // Output: No KVs in existing MR object; they are all added to new MR. void SSSP::move_to_new_mr(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { HELLO MapReduce *mr = (MapReduce *) ptr; mr->kv->add(key, keybytes, value, valuebytes); } ///////////////////////////////////////////////////////////////////////////// // initialize_vertex_distances: Add initial distance to all vertices. // Map: Input: KV key = Vtx ID; value = NULL // Output: KV key = Vtx ID; value = initial distance void SSSP::initialize_vertex_distances(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { HELLO DISTANCE d; kv->add(key, keybytes, (char *) &d, sizeof(DISTANCE)); } ///////////////////////////////////////////////////////////////////////////// // pick_shortest_distances: For each vertex, pick the best distance. // Emit the winning distance. Also, emit any changed vertex distances // to mrpath (in the ptr argument). void SSSP::pick_shortest_distances(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { HELLO MapReduce *mrpath = (MapReduce *) ptr; uint64_t total_nvalues; CHECK_FOR_BLOCKS(multivalue, valuebytes, nvalues, total_nvalues) DISTANCE shortest; // Shortest path so far to Vi. DISTANCE previous; // Previous best answer. bool modified = false; if (total_nvalues > 1) { // Need to find the shortest distance to Vi. BEGIN_BLOCK_LOOP(multivalue, valuebytes, nvalues) uint64_t offset = 0; for (int j = 0; j < nvalues; j++) { DISTANCE *d = (DISTANCE*) (multivalue+offset); if (d->e.wt < shortest.e.wt) { shortest = *d; // shortest path so far. } if (d->current) previous = *d; offset += valuebytes[j]; } END_BLOCK_LOOP } else { DISTANCE *d = (DISTANCE*) multivalue; shortest = *d; previous = *d; } // Did we change the vertex's distance? if (previous != shortest) modified = true; // Emit vertex with updated distance back into mrvert. shortest.current = true; kv->add(key, keybytes, (char *) &shortest, sizeof(DISTANCE)); if (shortest.e.wt < FLT_MAX) NVtxLabeled++; // If changes were made, emit the new distance into mrpath. if (modified) { mrpath->kv->add(key, keybytes, (char *) &shortest, sizeof(DISTANCE)); } } ///////////////////////////////////////////////////////////////////////////// // update_adjacent_distances: For each vertex whose distance has changed, // emit into mrpath a possible updated distance to each of its adjacencies. // Also emit the adjacency list back into mredge. void SSSP::update_adjacent_distances(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { HELLO MapReduce *mrpath = (MapReduce *) ptr; VERTEX *vi = (VERTEX *) key; bool found = false; DISTANCE shortest; uint64_t total_nvalues; CHECK_FOR_BLOCKS(multivalue, valuebytes, nvalues, total_nvalues) // Find the updated distance, if any. // Also, re-emit the edges into mredge. BEGIN_BLOCK_LOOP(multivalue, valuebytes, nvalues) uint64_t offset = 0; for (int j = 0; j < nvalues; j++) { // Multivalues are either edges or distances. Distances use more bytes. if (valuebytes[j] == sizeof(DISTANCE)) { // This is a distance value. DISTANCE *d = (DISTANCE*) (multivalue+offset); found = true; if (d->e.wt < shortest.e.wt) shortest = *d; // shortest path so far. } else { // This is an edge value. Re-emit it into mredge. kv->add(key, keybytes, multivalue+offset, valuebytes[j]); } offset += valuebytes[j]; } END_BLOCK_LOOP // If an updated distance was found, need to update distances for // outward adjacencies. Add these updates to mrpath. if (found) { BEGIN_BLOCK_LOOP(multivalue, valuebytes, nvalues) uint64_t offset = 0; for (int j = 0; j < nvalues; j++) { // Multivalues are either edges or distances. Distances use more bytes. if (valuebytes[j] == sizeof(EDGEVALUE)) { // This is an edge value. Emit the updated distance. EDGEVALUE *e = (EDGEVALUE *) (multivalue+offset); // with all wt > 0, don't follow (1) loops back to predecessor or // (2) self-loops. if ((shortest.e.v != e->v) && (e->v != *vi)) { DISTANCE dist; dist.e.v = *vi; // Predecessor of Vj along the path. dist.e.wt = shortest.e.wt + e->wt; dist.current = false; mrpath->kv->add((char *) &(e->v), sizeof(VERTEX), (char *) &dist, sizeof(DISTANCE)); } } offset += valuebytes[j]; } END_BLOCK_LOOP } } ///////////////////////////////////////////////////////////////////////////// void SSSP::get_good_sources(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { HELLO class SSSP *ths = (class SSSP *) ptr; // Check whether already have enough sources. if (ths->sourcelist.size() >= ths->ncnt) return; // If vertex made it to this routine, it has non-zero outdegree. Use it // as a source. ths->sourcelist.push_back(*((VERTEX *) key)); } ///////////////////////////////////////////////////////////////////////////// bool SSSP::get_next_source( VERTEX *source, int cnt ) { HELLO *source = 0; if (me == 0) { if (cnt < sourcelist.size()) *source = sourcelist[cnt]; } MPI_Bcast(source, sizeof(VERTEX), MPI_BYTE, 0, MPI_COMM_WORLD); return(*source != 0); } /* ---------------------------------------------------------------------- */ void SSSP::params(int narg, char **arg) { HELLO if (narg != 2) error->all("Illegal sssp command"); ncnt = atoi(arg[0]); std::cout << "PARAM ncnt=" << ncnt << std::endl; seed = atoi(arg[1]); std::cout << "PARAM seed=" << seed << std::endl; } /* ---------------------------------------------------------------------- */ void SSSP::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { HELLO FILE *fp = (FILE *) ptr; VERTEX *v = (VERTEX *) key; DISTANCE *d = (DISTANCE *) value; fprintf(fp,"%lu %g %lu\n", v, d->e.wt, d->e.v); } /* ---------------------------------------------------------------------- */ mrmpi-1.0~20131122/oink/edge_upper.cpp0000644000175000017500000000320011536502301017134 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "string.h" #include "stdlib.h" #include "edge_upper.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ EdgeUpper::EdgeUpper(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void EdgeUpper::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mr = obj->create_mr(); uint64_t nedge = mre->kv_stats(0); mr->map(mre,edge_upper,NULL); mr->collate(NULL); uint64_t unique = mr->reduce(cull,NULL); obj->output(1,mr,print_edge,NULL); char msg[128]; sprintf(msg,"EdgeUpper: %lu original edges, %lu final edges", nedge,unique); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void EdgeUpper::params(int narg, char **arg) { if (narg != 0) error->all("Illegal edge_upper command"); } mrmpi-1.0~20131122/oink/map_edge_to_vertex.cpp0000644000175000017500000000107111536001247020664 0ustar mathieumathieu#include "typedefs.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- edge_to_vertex emit 1 vertex for each edge, just first one input: key = Vi Vj, value = NULL output: key = Vi, value = NULL ------------------------------------------------------------------------- */ void edge_to_vertex(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),NULL,0); } mrmpi-1.0~20131122/oink/degree_weight.cpp0000644000175000017500000000625011536440627017643 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "string.h" #include "stdlib.h" #include "degree_weight.h" #include "object.h" #include "style_map.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ DegreeWeight::DegreeWeight(OINK *oink) : Command(oink) { ninputs = 2; noutputs = 1; } /* ---------------------------------------------------------------------- */ void DegreeWeight::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL // MRd = Vi : degree MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mrd = obj->input(2,read_vertex_weight,NULL); MapReduce *mrewt = obj->create_mr(); uint64_t nvert = mrd->kv_stats(0); mrewt->map(mre,edge_to_vertex_pair,NULL); mrewt->add(mrd); mrewt->collate(NULL); uint64_t nedge = mrewt->reduce(inverse_degree,NULL); obj->output(1,mrewt,print,NULL); char msg[128]; sprintf(msg,"DegreeWeight: %lu vertices, %lu edges",nvert,nedge); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void DegreeWeight::params(int narg, char **arg) { if (narg != 0) error->all("Illegal degree_weight command"); } /* ---------------------------------------------------------------------- */ void DegreeWeight::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; EDGE *e = (EDGE *) key; WEIGHT weight = *(WEIGHT *) value; fprintf(fp,"%lu %lu %g\n",e->vi,e->vj,weight); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void DegreeWeight::inverse_degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; char *value; EDGE edge; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) // loop over values to find degree count value = multivalue; for (i = 0; i < nvalues; i++) { if (valuebytes[i] == sizeof(int)) break; value += valuebytes[i]; } if (i < nvalues) break; END_BLOCK_LOOP int degree = *(int *) value; WEIGHT weight = 1.0/degree; edge.vi = *(VERTEX *) key; // emit one KV per edge with weight = one/degree as value BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (int i = 0; i < nvalues; i++) { if (valuebytes[i] != sizeof(int)) { edge.vj = *(VERTEX *) value; kv->add((char *) &edge,sizeof(EDGE),(char *) &weight,sizeof(WEIGHT)); } value += valuebytes[i]; } END_BLOCK_LOOP } mrmpi-1.0~20131122/oink/degree_stats.h0000644000175000017500000000140511535732267017157 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(degree_stats,DegreeStats) #else #ifndef OINK_DEGREE_STATS_H #define OINK_DEGREE_STATS_H #include "command.h" namespace OINK_NS { class DegreeStats : public Command { public: DegreeStats(class OINK *); void run(); void params(int, char **); private: int duplicate; static void print(char *, int, char *, int, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/mrmpi.h0000644000175000017500000000170411535732267015634 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_MRMPI_H #define OINK_MRMPI_H #include "typedefs.h" #include "pointers.h" namespace OINK_NS { class MRMPI : protected Pointers { public: MRMPI(class OINK *); ~MRMPI() {} void run(int, int, char **); HashFnPtr hash_lookup(char *); CompareFnPtr compare_lookup(char *); MapTaskFnPtr map_task_lookup(char *); MapFileFnPtr map_file_lookup(char *); MapStringFnPtr map_string_lookup(char *); MapMRFnPtr map_mr_lookup(char *); ReduceFnPtr reduce_lookup(char *); ScanKVFnPtr scan_kv_lookup(char *); ScanKMVFnPtr scan_kmv_lookup(char *); }; } #endif mrmpi-1.0~20131122/oink/cc_stats.cpp0000644000175000017500000000446111536440627016646 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "cc_stats.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- */ CCStats::CCStats(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void CCStats::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRv = Vi : Zi MapReduce *mrv = obj->input(1,read,NULL); MapReduce *mr = obj->create_mr(); uint64_t nvert = mr->map(mrv,invert,NULL); uint64_t ncc = mr->collate(NULL); mr->reduce(count,NULL); mr->map(mr,invert,NULL); mr->collate(NULL); mr->reduce(count,NULL); mr->gather(1); mr->sort_keys(-1); char msg[128]; sprintf(msg,"CCStats: %lu components, %lu vertices",ncc,nvert); if (me == 0) error->message(msg); mr->scan(print,NULL); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void CCStats::params(int narg, char **arg) { if (narg != 0) error->all("Illegal cc_stats command"); } /* ---------------------------------------------------------------------- */ void CCStats::read(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; VERTEX v,z; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %lu",&v,&z); kv->add((char *) &v,sizeof(VERTEX),(char *) &z,sizeof(VERTEX)); } fclose(fp); } /* ---------------------------------------------------------------------- */ void CCStats::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { uint64_t nsize = *(uint64_t *) key; uint64_t ncc = *(uint64_t *) value; printf(" %lu CCs with %lu vertices\n",ncc,nsize); } mrmpi-1.0~20131122/oink/variable.h0000644000175000017500000000376311524066004016267 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_VARIABLE_H #define OINK_VARIABLE_H #include "pointers.h" namespace OINK_NS { class Variable : protected Pointers { public: Variable(class OINK *); ~Variable(); void set(int, char **); void set(char *, int, char **); int next(int, char **); int find(char *); int equalstyle(int); char *retrieve(char *); int retrieve_count(int); char *retrieve_single(int, int); double compute_equal(int); double evaluate_boolean(char *); private: int me; int nvar; // # of defined variables int maxvar; // max # of variables arrays can hold char **names; // name of each variable int *style; // style of each variable int *num; // # of values for each variable int *which; // next available value for each variable int *offset; // offset for loop variables specified as N1 to N2 int *pad; // 1 = pad loop/uloop variables with 0s, 0 = no pad char ***data; // str value of each variable's values double PI; class RanMars *randomequal; // random number generator for equal-style vars int precedence[16]; // precedence level of math operators // set length to include OR in enum void remove(int); void extend(); void copy(int, char **, char **); double evaluate(char *); int find_matching_paren(char *, int, char *&); int math_function(char *, char *, double *, int &); int is_constant(char *); double constant(char *); int is_keyword(char *); double keyword(char *); double numeric(char *); int inumeric(char *); }; } #endif mrmpi-1.0~20131122/oink/input.h0000644000175000017500000000423611524066004015635 0ustar mathieumathieu /* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_INPUT_H #define OINK_INPUT_H #include "stdio.h" #include "pointers.h" namespace OINK_NS { class Input : protected Pointers { public: int narg; // # of command args char **arg; // parsed args for command class Variable *variable; // defined variables double deltatime; Input(class OINK *, int, char **); ~Input(); void file(); // process all input void file(const char *); // process an input script char *one(const char *); // process a single command void substitute(char *, int); // substitute for variables in a string private: int me,nprocs; // proc ID and number char *command; // ptr to current command int maxarg; // max # of args in arg char *line,*copy,*work; // input line & copy of it int echo_screen; // 0 = no, 1 = yes int echo_log; // 0 = no, 1 = yes int nfile,maxfile; // current # and max # of open input files int label_active; // 0 = no label, 1 = looking for label char *labelstr; // label string being looked for int jump_skip; // 1 if skipping next jump, 0 otherwise int statflag; // 1 if time each command FILE **infiles; // list of open input files void parse(); // parse an input text line int execute_command(); // execute a single command void clear(); // input script commands void echo(); void ifthenelse(); void include(); void jump(); void label(); void log(); void next_command(); void print(); void shell(); void variable_command(); void input_command(); // OINK commands void mr(); void output(); void set(); }; } #endif mrmpi-1.0~20131122/oink/memory.h0000644000175000017500000000350711524066004016006 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_MEMORY_H #define OINK_MEMORY_H #include "pointers.h" namespace OINK_NS { class Memory : protected Pointers { public: Memory(class OINK *); void *smalloc(int n, const char *); void sfree(void *); void *srealloc(void *, int n, const char *); double *create_1d_double_array(int, int, const char *); void destroy_1d_double_array(double *, int); double **create_2d_double_array(int, int, const char *); void destroy_2d_double_array(double **); double **grow_2d_double_array(double **, int, int, const char *); int **create_2d_int_array(int, int, const char *); void destroy_2d_int_array(int **); int **grow_2d_int_array(int **, int, int, const char *); double **create_2d_double_array(int, int, int, const char *); void destroy_2d_double_array(double **, int); double ***create_3d_double_array(int, int, int, const char *); void destroy_3d_double_array(double ***); double ***grow_3d_double_array(double ***, int, int, int, const char *); double ***create_3d_double_array(int, int, int, int, const char *); void destroy_3d_double_array(double ***, int); double ***create_3d_double_array(int, int, int, int, int, int, const char *); void destroy_3d_double_array(double ***, int, int, int); int ***create_3d_int_array(int, int, int, const char *); void destroy_3d_int_array(int ***); double ****create_4d_double_array(int, int, int, int, const char *); void destroy_4d_double_array(double ****); }; } #endif mrmpi-1.0~20131122/oink/command.h0000644000175000017500000000135111524066004016107 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_COMMAND_H #define OINK_COMMAND_H #include "pointers.h" namespace OINK_NS { class Command : protected Pointers { public: Command(class OINK *); virtual ~Command() {} virtual void run() = 0; virtual void params(int, char **) = 0; void inputs(int, char **); void outputs(int, char **); protected: int ninputs,noutputs; }; } #endif mrmpi-1.0~20131122/oink/vertex_extract.cpp0000644000175000017500000000300011536442217020073 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "vertex_extract.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ VertexExtract::VertexExtract(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void VertexExtract::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge_weight,NULL); MapReduce *mrv = obj->create_mr(); // MRv = all vertices in the graph mrv->map(mre,edge_to_vertices,NULL); mrv->collate(NULL); mrv->reduce(cull,NULL); obj->output(1,mrv,print_vertex,NULL); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void VertexExtract::params(int narg, char **arg) { if (narg != 0) error->all("Illegal VertexExtract command"); } mrmpi-1.0~20131122/oink/library.h0000644000175000017500000000152011524066004016133 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ /* C or Fortran style library interface to MRMPI new MRMPI-specific functions can be added */ #include "mpi.h" /* ifdefs allow this file to be included in a C program */ #ifdef __cplusplus extern "C" { #endif void mrmpi_open(int, char **, MPI_Comm, void **); void mrmpi_open_no_mpi(int, char **, void **); void mrmpi_close(void *); void mrmpi_file(void *, char *); char *mrmpi_command(void *, char *); void mrmpi_free(void *); #ifdef __cplusplus } #endif mrmpi-1.0~20131122/oink/rmat.cpp0000644000175000017500000000457211535732267016014 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "stdio.h" #include "string.h" #include "stdlib.h" #include "rmat.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ RMAT::RMAT(OINK *oink) : Command(oink) { ninputs = 0; noutputs = 1; } /* ---------------------------------------------------------------------- */ void RMAT::run() { int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // mr = matrix edges MapReduce *mr = obj->create_mr(); // loop until desired number of unique nonzero entries int niterate = 0; uint64_t ntotal = rmat.order * rmat.nnonzero; uint64_t nremain = ntotal; while (nremain) { niterate++; rmat.ngenerate = nremain/nprocs; if (me < nremain % nprocs) rmat.ngenerate++; mr->map(nprocs,rmat_generate,&rmat,1); uint64_t nunique = mr->collate(NULL); mr->reduce(cull,&rmat); nremain = ntotal - nunique; } obj->output(1,mr,print_edge,NULL); char msg[128]; sprintf(msg,"RMAT: %lu rows, %lu non-zeroes, %d iterations", rmat.order,ntotal,niterate); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void RMAT::params(int narg, char **arg) { if (narg != 8) error->all("Illegal rmat command"); rmat.nlevels = atoi(arg[0]); rmat.nnonzero = atoi(arg[1]); rmat.a = atof(arg[2]); rmat.b = atof(arg[3]); rmat.c = atof(arg[4]); rmat.d = atof(arg[5]); rmat.fraction = atof(arg[6]); int seed = atoi(arg[7]); if (rmat.a + rmat.b + rmat.c + rmat.d != 1.0) error->all("RMAT a,b,c,d must sum to 1"); if (rmat.fraction >= 1.0) error->all("RMAT fraction must be < 1"); int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); srand48(seed+me); rmat.order = 1 << rmat.nlevels; } mrmpi-1.0~20131122/oink/random_mars.h0000644000175000017500000000130411524066004016771 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_RANMARS_H #define OINK_RANMARS_H #include "pointers.h" namespace OINK_NS { class RanMars : protected Pointers { public: RanMars(class OINK *, int); ~RanMars(); double uniform(); double gaussian(); private: int seed,save; double second; double *u; int i97,j97; double c,cd,cm; }; } #endif mrmpi-1.0~20131122/oink/map_add_weight.cpp0000644000175000017500000000111711536442217017767 0ustar mathieumathieu#include "keyvalue.h" #include "typedefs.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- add_weight add a default floating point weight to each key, key could be vertex or edge input: key = anything, value = NULL output: key = unchanged, value = 1.0 ------------------------------------------------------------------------- */ void add_weight(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { WEIGHT one = 1.0; kv->add(key,keybytes,(char *) &one,sizeof(WEIGHT)); } mrmpi-1.0~20131122/oink/tri_find.cpp0000644000175000017500000002026711536442217016640 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "tri_find.h" #include "object.h" #include "style_map.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; typedef struct { int di,dj; } DEGREE; typedef struct { VERTEX vi,vj,vk; } TRI; #define INTMAX 0x7FFFFFFF /* ---------------------------------------------------------------------- */ TriFind::TriFind(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void TriFind::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mrt = obj->create_mr(); // augment edges with degree of each vertex // mrt = (Eij,(Di,Dj)) mrt->map(mre,map_edge_vert,NULL); mrt->collate(NULL); mrt->reduce(reduce_first_degree,this); mrt->collate(NULL); mrt->reduce(reduce_second_degree,NULL); // find triangles in degree-augmented graph // nsq_angles creates angles = triangles missing an edge // add in edges from original graph // emit_triangles finds completed triangles // mrt = ((Vi,Vj,Vk),NULL) mrt->map(mrt,map_low_degree,NULL); mrt->collate(NULL); mrt->reduce(reduce_nsq_angles,NULL); mrt->add(mre); mrt->collate(NULL); uint64_t ntri = mrt->reduce(reduce_emit_triangles,NULL); obj->output(1,mrt,print,NULL); char msg[128]; sprintf(msg,"Tri_find: %lu triangles",ntri); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void TriFind::params(int narg, char **arg) { if (narg != 0) error->all("Illegal tri_find command"); } /* ---------------------------------------------------------------------- */ void TriFind::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; TRI *tri = (TRI *) key; fprintf(fp,"%lu %lu %lu\n",tri->vi,tri->vj,tri->vk); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void TriFind::map_edge_vert(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),(char *) &edge->vj,sizeof(VERTEX)); kv->add((char *) &edge->vj,sizeof(VERTEX),(char *) &edge->vi,sizeof(VERTEX)); } /* ---------------------------------------------------------------------- */ void TriFind::reduce_first_degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; char *value; VERTEX vi,vj; EDGE edge; DEGREE degree; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) if (nvalues_total > INTMAX) { TriFind *tf = (TriFind *) ptr; Error *error = tf->error; error->one("Too many edges for one vertex in reduce first_degree"); } int ndegree = nvalues_total; vi = *(VERTEX *) key; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (i = 0; i < nvalues; i++) { vj = *(VERTEX *) value; if (vi < vj) { edge.vi = vi; edge.vj = vj; degree.di = ndegree; degree.dj = 0; kv->add((char *) &edge,sizeof(EDGE),(char *) °ree,sizeof(DEGREE)); } else { edge.vi = vj; edge.vj = vi; degree.di = 0; degree.dj = ndegree; kv->add((char *) &edge,sizeof(EDGE),(char *) °ree,sizeof(DEGREE)); } value += valuebytes[i]; } END_BLOCK_LOOP } /* ---------------------------------------------------------------------- */ void TriFind::reduce_second_degree(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { DEGREE *one = (DEGREE *) multivalue; DEGREE *two = (DEGREE *) &multivalue[valuebytes[0]]; DEGREE degree; if (one->di) { degree.di = one->di; degree.dj = two->dj; kv->add(key,keybytes,(char *) °ree,sizeof(DEGREE)); } else { degree.di = two->di; degree.dj = one->dj; kv->add(key,keybytes,(char *) °ree,sizeof(DEGREE)); } } /* ---------------------------------------------------------------------- */ void TriFind::map_low_degree(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; DEGREE *degree = (DEGREE *) value; if (degree->di < degree->dj) kv->add((char *) &edge->vi,sizeof(VERTEX), (char *) &edge->vj,sizeof(VERTEX)); else if (degree->dj < degree->di) kv->add((char *) &edge->vj,sizeof(VERTEX), (char *) &edge->vi,sizeof(VERTEX)); else if (edge->vi < edge->vj) kv->add((char *) &edge->vi,sizeof(VERTEX), (char *) &edge->vj,sizeof(VERTEX)); else kv->add((char *) &edge->vj,sizeof(VERTEX), (char *) &edge->vi,sizeof(VERTEX)); } /* ---------------------------------------------------------------------- */ void TriFind::reduce_nsq_angles(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int j,k,nv,nv2,iblock,jblock; VERTEX vj,vk; EDGE edge; if (nvalues) { for (j = 0; j < nvalues-1; j++) { vj = *(VERTEX *) &multivalue[j*sizeof(VERTEX)]; for (k = j+1; k < nvalues; k++) { vk = *(VERTEX *) &multivalue[k*sizeof(VERTEX)]; if (vj < vk) { edge.vi = vj; edge.vj = vk; kv->add((char *) &edge,sizeof(EDGE),key,sizeof(VERTEX)); } else { edge.vi = vk; edge.vj = vj; kv->add((char *) &edge,sizeof(EDGE),key,sizeof(VERTEX)); } } } } else { MapReduce *mr = (MapReduce *) valuebytes; int nblocks; mr->multivalue_blocks(nblocks); for (iblock = 0; iblock < nblocks; iblock++) { nv = mr->multivalue_block(iblock,&multivalue,&valuebytes); for (j = 0; j < nv-1; j++) { vj = *(VERTEX *) &multivalue[j*sizeof(VERTEX)]; for (k = j+1; k < nv; k++) { vk = *(VERTEX *) &multivalue[k*sizeof(VERTEX)]; if (vj < vk) { edge.vi = vj; edge.vj = vk; kv->add((char *) &edge,sizeof(EDGE),key,sizeof(VERTEX)); } else { edge.vi = vk; edge.vj = vj; kv->add((char *) &edge,sizeof(EDGE),key,sizeof(VERTEX)); } } for (jblock = iblock+1; jblock < nblocks; jblock++) { nv2 = mr->multivalue_block(jblock,&multivalue,&valuebytes); for (k = 0; k < nv2; k++) { vk = *(VERTEX *) &multivalue[k*sizeof(VERTEX)]; if (vj < vk) { edge.vi = vj; edge.vj = vk; kv->add((char *) &edge,sizeof(EDGE),key,sizeof(VERTEX)); } else { edge.vi = vk; edge.vj = vj; kv->add((char *) &edge,sizeof(EDGE),key,sizeof(VERTEX)); } } } if (iblock < nblocks) mr->multivalue_block(iblock,&multivalue,&valuebytes); } } } } /* ---------------------------------------------------------------------- */ void TriFind::reduce_emit_triangles(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; char *value; // loop over values to find a NULL int flag = 0; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) for (i = 0; i < nvalues; i++) if (valuebytes[i] == 0) { flag = 1; break; } if (i < nvalues) break; END_BLOCK_LOOP if (!flag) return; // emit triangle for each vertex TRI tri; EDGE *edge = (EDGE *) key; tri.vj = edge->vi; tri.vk = edge->vj; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (i = 0; i < nvalues; i++) { if (valuebytes[i]) { tri.vi = *(VERTEX *) value; kv->add((char *) &tri,sizeof(TRI),NULL,0); } value += valuebytes[i]; } END_BLOCK_LOOP } mrmpi-1.0~20131122/oink/library.cpp0000644000175000017500000000577711524066004016510 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ // C or Fortran style library interface to OINK // customize by adding new OINK-specific functions #include "mpi.h" #include "string.h" #include "stdlib.h" #include "library.h" #include "oink.h" #include "input.h" using namespace OINK_NS; /* ---------------------------------------------------------------------- create an instance of OINK and return pointer to it pass in command-line args and MPI communicator to run on ------------------------------------------------------------------------- */ void oink_open(int argc, char **argv, MPI_Comm communicator, void **ptr) { OINK *oink = new OINK(argc,argv,communicator); *ptr = (void *) oink; } /* ---------------------------------------------------------------------- create an instance of OINK and return pointer to it caller doesn't know MPI communicator, so use MPI_COMM_WORLD intialize MPI if needed ------------------------------------------------------------------------- */ void oink_open_no_mpi(int argc, char **argv, void **ptr) { int flag; MPI_Initialized(&flag); if (!flag) { int argc = 0; char **argv = NULL; MPI_Init(&argc,&argv); } MPI_Comm communicator = MPI_COMM_WORLD; OINK *oink = new OINK(argc,argv,communicator); *ptr = (void *) oink; } /* ---------------------------------------------------------------------- destruct an instance of OINK ------------------------------------------------------------------------- */ void oink_close(void *ptr) { OINK *oink = (OINK *) ptr; delete oink; } /* ---------------------------------------------------------------------- process an input script in filename str ------------------------------------------------------------------------- */ void oink_file(void *ptr, char *str) { OINK *oink = (OINK *) ptr; oink->input->file(str); } /* ---------------------------------------------------------------------- process a single input command in str ------------------------------------------------------------------------- */ char *oink_command(void *ptr, char *str) { OINK *oink = (OINK *) ptr; return oink->input->one(str); } /* ---------------------------------------------------------------------- clean-up function to free memory allocated by lib and returned to caller ------------------------------------------------------------------------- */ void oink_free(void *ptr) { free(ptr); } /* ---------------------------------------------------------------------- add OINK-specific library functions all must receive OINK pointer as argument customize by adding a function here and in library.h header file ------------------------------------------------------------------------- */ mrmpi-1.0~20131122/oink/cc_find.cpp0000644000175000017500000002144011536440627016424 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "string.h" #include "stdlib.h" #include "cc_find.h" #include "object.h" #include "style_map.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; #define ALLBITS UINT64_MAX #define INT64MAX INT64_MAX #define HIBIT UINT64_MAX-INT64_MAX /* ---------------------------------------------------------------------- */ CCFind::CCFind(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void CCFind::run() { MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // hardwire a seed for splitting big zones int seed = 123456789; srand48(seed+me); // pshift = amount to left shift a proc ID, to put it 1 bit away from top // lmask will mask off hi-bit and proc ID setting in hi-bits of zone int pbits = 0; while ((1 << pbits) < nprocs) pbits++; pshift = 63 - pbits; int hbits = pbits + 1; lmask = ALLBITS >> hbits; // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mrv = obj->create_mr(); MapReduce *mrz = obj->create_mr(); // assign each vertex initially to its own zone mrv->map(mre,edge_to_vertices,NULL); mrv->collate(NULL); mrv->reduce(reduce_self_zone,NULL); // loop until zones do not change int niterate = 0; while (1) { niterate++; mrz->map(mre,map_edge_vert,NULL); mrz->add(mrv); mrz->collate(NULL); mrz->reduce(reduce_edge_zone,NULL); mrz->collate(NULL); flag = 0; mrz->reduce(reduce_zone_winner,this); int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); if (!flagall) break; mrv->map(mrv,map_invert_multi,this); mrv->map(mrz,map_zone_multi,this,1); mrv->collate(NULL); mrv->reduce(reduce_zone_reassign,this); } // strip any hi-bits from final (Vi,Zi) key/values before output mrv->map(mrv,map_strip,NULL); obj->output(1,mrv,print,NULL); // count # of unique CCs mrz->map(mrv,invert,NULL); uint64_t ncc = mrz->collate(NULL); char msg[128]; sprintf(msg,"CC_find: %lu components in %d iterations",ncc,niterate); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void CCFind::params(int narg, char **arg) { if (narg != 1) error->all("Illegal cc_find command"); nthresh = atoi(arg[0]); } /* ---------------------------------------------------------------------- */ void CCFind::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { FILE *fp = (FILE *) ptr; VERTEX v = *(VERTEX *) key; VERTEX z = *(VERTEX *) &key[sizeof(VERTEX)]; fprintf(fp,"%lu %lu\n",v,z); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void CCFind::reduce_self_zone(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { kv->add(key,keybytes,key,keybytes); } /* ---------------------------------------------------------------------- */ void CCFind::map_edge_vert(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { EDGE *edge = (EDGE *) key; kv->add((char *) &edge->vi,sizeof(VERTEX),key,sizeof(EDGE)); kv->add((char *) &edge->vj,sizeof(VERTEX),key,sizeof(EDGE)); } /* ---------------------------------------------------------------------- */ void CCFind::reduce_edge_zone(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { int i; char *value; // loop over values to find zone ID uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (i = 0; i < nvalues; i++) { if (valuebytes[i] == sizeof(uint64_t)) break; value += valuebytes[i]; } if (i < nvalues) break; END_BLOCK_LOOP uint64_t zone = *(uint64_t *) value; // emit one KV per edge with zone ID as value BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (int i = 0; i < nvalues; i++) { if (valuebytes[i] != sizeof(uint64_t)) kv->add(value,valuebytes[i],(char *) &zone,sizeof(uint64_t)); value += valuebytes[i]; } END_BLOCK_LOOP } /* ---------------------------------------------------------------------- */ void CCFind::reduce_zone_winner(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { // z0,z1 have hi-bit stripped off uint64_t *z = (uint64_t *) multivalue; uint64_t z0 = z[0] & INT64MAX; uint64_t z1 = z[1] & INT64MAX; if (z0 == z1) return; // emit zone pair with hi-bits // append extra word to value, // so zone can be distinguished from vertex values in next stage of CC CCFind *data = (CCFind *) ptr; data->flag = 1; PAD *pad = &(data->pad); if (z0 > z1) { pad->zone = z[1]; kv->add((char *) &z[0],sizeof(uint64_t),(char *) pad,sizeof(PAD)); } else { pad->zone = z[0]; kv->add((char *) &z[1],sizeof(uint64_t),(char *) pad,sizeof(PAD)); } } /* ---------------------------------------------------------------------- */ void CCFind::map_invert_multi(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { uint64_t z = *(uint64_t *) value; // if z has hibit set, add random iproc in hibits, retain hibit setting if (z >> 63) { CCFind *data = (CCFind *) ptr; uint64_t iproc = static_cast (data->nprocs * drand48()); uint64_t znew = z | (iproc << data->pshift); kv->add((char *) &znew,sizeof(uint64_t),key,keybytes); } else kv->add(value,valuebytes,key,keybytes); } /* ---------------------------------------------------------------------- */ void CCFind::map_zone_multi(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { uint64_t z = *(uint64_t *) key; // if z has hibit set: // remove hibit, add random iproc in hibits, reset hibit if (z >> 63) { CCFind *data = (CCFind *) ptr; uint64_t zstrip = z & INT64MAX; kv->add((char *) &zstrip,sizeof(uint64_t),value,valuebytes); int nprocs = data->nprocs; int pshift = data->pshift; uint64_t znew; for (uint64_t iproc = 0; iproc < nprocs; iproc++) { znew = zstrip | (iproc << pshift); znew |= HIBIT; kv->add((char *) &znew,sizeof(uint64_t),value,valuebytes); } } else kv->add(key,keybytes,value,valuebytes); } /* ---------------------------------------------------------------------- */ void CCFind::reduce_zone_reassign(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) { CCFind *data = (CCFind *) ptr; int nthresh = data->nthresh; uint64_t lmask = data->lmask; int i,hnew; char *value; uint64_t znew; // loop over values, compute winning zone ID // hibit is set if winning Z has its hibit set uint64_t zcount = 0; uint64_t zone = *(uint64_t *) key; int hkey = zone >> 63; zone &= lmask; int hwinner = 0; uint64_t nvalues_total; CHECK_FOR_BLOCKS(multivalue,valuebytes,nvalues,nvalues_total) BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (i = 0; i < nvalues; i++) { if (valuebytes[i] != sizeof(uint64_t)) { znew = *(uint64_t *) value; hnew = znew >> 63; znew &= INT64MAX; if (znew < zone) { zone = znew; hwinner = hnew; } zcount++; } value += valuebytes[i]; } END_BLOCK_LOOP // emit one KV per vertex with zone ID as value // add hi-bit to zone if necessary if (hkey || hwinner) zone |= HIBIT; else if (nvalues_total-zcount > nthresh) zone |= HIBIT; BEGIN_BLOCK_LOOP(multivalue,valuebytes,nvalues) value = multivalue; for (i = 0; i < nvalues; i++) { if (valuebytes[i] == sizeof(uint64_t)) kv->add(value,valuebytes[i],(char *) &zone,sizeof(uint64_t)); value += valuebytes[i]; } END_BLOCK_LOOP } /* ---------------------------------------------------------------------- */ void CCFind::map_strip(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { uint64_t zone = *(uint64_t *) value; zone &= INT64MAX; kv->add(key,keybytes,(char *) &zone,sizeof(uint64_t)); } mrmpi-1.0~20131122/oink/variable.cpp0000644000175000017500000011511311524066004016613 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "math.h" #include "stdlib.h" #include "string.h" #include "ctype.h" #include "unistd.h" #include "variable.h" #include "universe.h" #include "input.h" #include "random_mars.h" #include "memory.h" #include "error.h" using namespace OINK_NS; #define VARDELTA 4 #define MAXLEVEL 4 #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) #define MYROUND(a) (( a-floor(a) ) >= .5) ? ceil(a) : floor(a) enum{INDEX,LOOP,WORLD,UNIVERSE,ULOOP,STRING,EQUAL}; enum{ARG,OP}; // customize by adding a function enum{DONE,ADD,SUBTRACT,MULTIPLY,DIVIDE,CARAT,UNARY, NOT,EQ,NE,LT,LE,GT,GE,AND,OR, SQRT,EXP,LN,LOG,SIN,COS,TAN,ASIN,ACOS,ATAN,ATAN2, RANDOM,NORMAL,CEIL,FLOOR,ROUND}; #define BIG 1.0e20 /* ---------------------------------------------------------------------- */ Variable::Variable(OINK *oink) : Pointers(oink) { MPI_Comm_rank(world,&me); nvar = maxvar = 0; names = NULL; style = NULL; num = NULL; which = NULL; offset = NULL; pad = NULL; data = NULL; randomequal = NULL; precedence[DONE] = 0; precedence[OR] = 1; precedence[AND] = 2; precedence[EQ] = precedence[NE] = 3; precedence[LT] = precedence[LE] = precedence[GT] = precedence[GE] = 4; precedence[ADD] = precedence[SUBTRACT] = 5; precedence[MULTIPLY] = precedence[DIVIDE] = 6; precedence[CARAT] = 7; precedence[UNARY] = precedence[NOT] = 8; PI = 4.0*atan(1.0); } /* ---------------------------------------------------------------------- */ Variable::~Variable() { for (int i = 0; i < nvar; i++) { delete [] names[i]; if (style[i] == LOOP || style[i] == ULOOP) delete [] data[i][0]; else for (int j = 0; j < num[i]; j++) delete [] data[i][j]; delete [] data[i]; } memory->sfree(names); memory->sfree(style); memory->sfree(num); memory->sfree(which); memory->sfree(offset); memory->sfree(pad); memory->sfree(data); delete randomequal; } /* ---------------------------------------------------------------------- called by variable command in input script ------------------------------------------------------------------------- */ void Variable::set(int narg, char **arg) { if (narg < 2) error->all("Illegal variable command"); // DELETE // doesn't matter if variable no longer exists if (strcmp(arg[1],"delete") == 0) { if (narg != 2) error->all("Illegal variable command"); if (find(arg[0]) >= 0) remove(find(arg[0])); return; // INDEX // num = listed args, which = 1st value, data = copied args } else if (strcmp(arg[1],"index") == 0) { if (narg < 3) error->all("Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) extend(); style[nvar] = INDEX; num[nvar] = narg - 2; which[nvar] = 0; offset[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(num[nvar],&arg[2],data[nvar]); // LOOP // 1 arg + pad: num = N, which = offset = 1st value, data = single string // 2 args + pad: same but num = N2-N1+1, offset = N1 } else if (strcmp(arg[1],"loop") == 0) { if (find(arg[0]) >= 0) return; if (nvar == maxvar) extend(); style[nvar] = LOOP; int nfirst,nlast; if (narg == 3 || (narg == 4 && strcmp(arg[3],"pad") == 0)) { nfirst = 1; nlast = atoi(arg[2]); if (nlast <= 0) error->all("Illegal variable command"); if (narg == 4 && strcmp(arg[3],"pad") == 0) { char digits[12]; sprintf(digits,"%d",nlast); pad[nvar] = strlen(digits); } else pad[nvar] = 0; } else if (narg == 4 || (narg == 5 && strcmp(arg[4],"pad") == 0)) { nfirst = atoi(arg[2]); nlast = atoi(arg[3]); if (nfirst > nlast || nlast <= 0) error->all("Illegal variable command"); if (narg == 5 && strcmp(arg[4],"pad") == 0) { char digits[12]; sprintf(digits,"%d",nlast); pad[nvar] = strlen(digits); } else pad[nvar] = 0; } else error->all("Illegal variable command"); num[nvar] = nlast-nfirst+1; which[nvar] = 0; offset[nvar] = nfirst; data[nvar] = new char*[1]; data[nvar][0] = NULL; // WORLD // num = listed args, which = partition this proc is in, data = copied args // error check that num = # of worlds in universe } else if (strcmp(arg[1],"world") == 0) { if (narg < 3) error->all("Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) extend(); style[nvar] = WORLD; num[nvar] = narg - 2; if (num[nvar] != universe->nworlds) error->all("World variable count doesn't match # of partitions"); which[nvar] = universe->iworld; offset[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(num[nvar],&arg[2],data[nvar]); // UNIVERSE and ULOOP // for UNIVERSE: num = listed args, data = copied args // for ULOOP: num = N, data = single string // which = partition this proc is in // universe proc 0 creates lock file // error check that all other universe/uloop variables are same length } else if (strcmp(arg[1],"universe") == 0 || strcmp(arg[1],"uloop") == 0) { if (strcmp(arg[1],"universe") == 0) { if (narg < 3) error->all("Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) extend(); style[nvar] = UNIVERSE; num[nvar] = narg - 2; data[nvar] = new char*[num[nvar]]; copy(num[nvar],&arg[2],data[nvar]); } else if (strcmp(arg[1],"uloop") == 0) { if (narg < 3 || narg > 4 || (narg == 4 && strcmp(arg[3],"pad") != 0)) error->all("Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) extend(); style[nvar] = ULOOP; num[nvar] = atoi(arg[2]); offset[nvar] = 0; data[nvar] = new char*[1]; data[nvar][0] = NULL; if (narg == 4) { char digits[12]; sprintf(digits,"%d",num[nvar]); pad[nvar] = strlen(digits); } else pad[nvar] = 0; } if (num[nvar] < universe->nworlds) error->all("Universe/uloop variable count < # of partitions"); which[nvar] = universe->iworld; if (universe->me == 0) { FILE *fp = fopen("tmp.oink.variable","w"); fprintf(fp,"%d\n",universe->nworlds); fclose(fp); } for (int jvar = 0; jvar < nvar; jvar++) if (num[jvar] && (style[jvar] == UNIVERSE || style[jvar] == ULOOP) && num[nvar] != num[jvar]) error->all("All universe/uloop variables must have same # of values"); // STRING // remove pre-existing var if also style STRING (allows it to be reset) // num = 1, which = 1st value // data = 1 value, string to eval } else if (strcmp(arg[1],"string") == 0) { if (narg != 3) error->all("Illegal variable command"); if (find(arg[0]) >= 0) { if (style[find(arg[0])] != STRING) error->all("Cannot redefine variable as a different style"); remove(find(arg[0])); } if (nvar == maxvar) extend(); style[nvar] = STRING; num[nvar] = 1; which[nvar] = 0; offset[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); // EQUAL // remove pre-existing var if also style EQUAL (allows it to be reset) // num = 2, which = 1st value // data = 2 values, 1st is string to eval, 2nd is filled on retrieval } else if (strcmp(arg[1],"equal") == 0) { if (narg != 3) error->all("Illegal variable command"); if (find(arg[0]) >= 0) { if (style[find(arg[0])] != EQUAL) error->all("Cannot redefine variable as a different style"); remove(find(arg[0])); } if (nvar == maxvar) extend(); style[nvar] = EQUAL; num[nvar] = 2; which[nvar] = 0; offset[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); data[nvar][1] = NULL; } else error->all("Illegal variable command"); // set name of variable // must come at end, since STRING/EQUAL/ATOM reset may have removed name // name must be all alphanumeric chars or underscores int n = strlen(arg[0]) + 1; names[nvar] = new char[n]; strcpy(names[nvar],arg[0]); for (int i = 0; i < n-1; i++) if (!isalnum(names[nvar][i]) && names[nvar][i] != '_') error->all("Variable name must be alphanumeric or " "underscore characters"); nvar++; } /* ---------------------------------------------------------------------- INDEX variable created by command-line argument make it INDEX rather than STRING so cannot be re-defined in input script ------------------------------------------------------------------------- */ void Variable::set(char *name, int narg, char **arg) { char **newarg = new char*[2+narg]; newarg[0] = name; newarg[1] = (char *) "index"; for (int i = 0; i < narg; i++) newarg[2+i] = arg[i]; set(2+narg,newarg); delete [] newarg; } /* ---------------------------------------------------------------------- increment variable(s) return 0 if OK if successfully incremented return 1 if any variable is exhausted, free the variable to allow re-use ------------------------------------------------------------------------- */ int Variable::next(int narg, char **arg) { int ivar; if (narg == 0) error->all("Illegal next command"); // check that variables exist and are all the same style // exception: UNIVERSE and ULOOP variables can be mixed in same next command for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); if (ivar == -1) error->all("Invalid variable in next command"); if (style[ivar] == ULOOP && style[find(arg[0])] == UNIVERSE) continue; else if (style[ivar] == UNIVERSE && style[find(arg[0])] == ULOOP) continue; else if (style[ivar] != style[find(arg[0])]) error->all("All variables in next command must be same style"); } // invalid styles STRING or EQUAL or WORLD int istyle = style[find(arg[0])]; if (istyle == STRING || istyle == EQUAL || istyle == WORLD) error->all("Invalid variable style with next command"); // increment all variables in list // if any variable is exhausted, set flag = 1 and remove var to allow re-use int flag = 0; if (istyle == INDEX || istyle == LOOP) { for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); which[ivar]++; if (which[ivar] >= num[ivar]) { flag = 1; remove(ivar); } } } else if (istyle == UNIVERSE || istyle == ULOOP) { // wait until lock file can be created and owned by proc 0 of this world // read next available index and Bcast it within my world // set all variables in list to nextindex int nextindex; if (me == 0) { while (1) { if (!rename("tmp.oink.variable","tmp.oink.variable.lock")) break; usleep(100000); } FILE *fp = fopen("tmp.oink.variable.lock","r"); fscanf(fp,"%d",&nextindex); fclose(fp); fp = fopen("tmp.oink.variable.lock","w"); fprintf(fp,"%d\n",nextindex+1); fclose(fp); rename("tmp.oink.variable.lock","tmp.oink.variable"); if (universe->uscreen) fprintf(universe->uscreen, "Increment via next: value %d on partition %d\n", nextindex+1,universe->iworld); if (universe->ulogfile) fprintf(universe->ulogfile, "Increment via next: value %d on partition %d\n", nextindex+1,universe->iworld); } MPI_Bcast(&nextindex,1,MPI_INT,0,world); for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); which[ivar] = nextindex; if (which[ivar] >= num[ivar]) { flag = 1; remove(ivar); } } } return flag; } /* ---------------------------------------------------------------------- return ptr to the data text associated with a variable if INDEX or WORLD or UNIVERSE or STRING var, return ptr to stored string if LOOP or ULOOP var, write int to data[0] and return ptr to string if EQUAL var, evaluate variable and put result in str return NULL if no variable or which is bad, caller must respond ------------------------------------------------------------------------- */ char *Variable::retrieve(char *name) { int ivar = find(name); if (ivar == -1) return NULL; if (which[ivar] >= num[ivar]) return NULL; if (style[ivar] == INDEX || style[ivar] == WORLD || style[ivar] == UNIVERSE || style[ivar] == STRING) return data[ivar][which[ivar]]; if (style[ivar] == LOOP || style[ivar] == ULOOP) { char result[16]; if (pad[ivar] == 0) sprintf(result,"%d",which[ivar]+offset[ivar]); else { char padstr[16]; sprintf(padstr,"%%0%dd",pad[ivar]); sprintf(result,padstr,which[ivar]+offset[ivar]); } int n = strlen(result) + 1; delete [] data[ivar][0]; data[ivar][0] = new char[n]; strcpy(data[ivar][0],result); return data[ivar][0]; } if (style[ivar] == EQUAL) { char result[32]; double answer = evaluate(data[ivar][0]); sprintf(result,"%.10g",answer); int n = strlen(result) + 1; if (data[ivar][1]) delete [] data[ivar][1]; data[ivar][1] = new char[n]; strcpy(data[ivar][1],result); return data[ivar][1]; } return NULL; } /* ---------------------------------------------------------------------- return count of data strings associated with variable ivar should not be called for EQUAL var ------------------------------------------------------------------------- */ int Variable::retrieve_count(int ivar) { return num[ivar]; } /* ---------------------------------------------------------------------- return Nth data string associated with variable ivar incrementing is NOT performed caller insures N is within proper range 0 to num-1 should not be called for EQUAL var ------------------------------------------------------------------------- */ char *Variable::retrieve_single(int ivar, int nth) { if (style[ivar] == INDEX || style[ivar] == WORLD || style[ivar] == UNIVERSE || style[ivar] == STRING) return data[ivar][nth]; // style LOOP or ULOOP char result[16]; if (pad[ivar] == 0) sprintf(result,"%d",nth+offset[ivar]); else { char padstr[16]; sprintf(padstr,"%%0%dd",pad[ivar]); sprintf(result,padstr,nth+offset[ivar]); } int n = strlen(result) + 1; delete [] data[ivar][0]; data[ivar][0] = new char[n]; strcpy(data[ivar][0],result); return data[ivar][0]; } /* ---------------------------------------------------------------------- return result of equal-style variable evaluation ------------------------------------------------------------------------- */ double Variable::compute_equal(int ivar) { return evaluate(data[ivar][0]); } /* ---------------------------------------------------------------------- search for name in list of variables names return index or -1 if not found ------------------------------------------------------------------------- */ int Variable::find(char *name) { for (int i = 0; i < nvar; i++) if (strcmp(name,names[i]) == 0) return i; return -1; } /* ---------------------------------------------------------------------- return 1 if variable is EQUAL style, 0 if not ------------------------------------------------------------------------- */ int Variable::equalstyle(int ivar) { if (style[ivar] == EQUAL) return 1; return 0; } /* ---------------------------------------------------------------------- remove Nth variable from list and compact list ------------------------------------------------------------------------- */ void Variable::remove(int n) { delete [] names[n]; if (style[n] == LOOP || style[n] == ULOOP) delete [] data[n][0]; else for (int i = 0; i < num[n]; i++) delete [] data[n][i]; delete [] data[n]; for (int i = n+1; i < nvar; i++) { names[i-1] = names[i]; style[i-1] = style[i]; num[i-1] = num[i]; which[i-1] = which[i]; offset[i-1] = offset[i]; pad[i-1] = pad[i]; data[i-1] = data[i]; } nvar--; } /* ---------------------------------------------------------------------- make space in arrays for new variable ------------------------------------------------------------------------- */ void Variable::extend() { maxvar += VARDELTA; names = (char **) memory->srealloc(names,maxvar*sizeof(char *),"var:names"); style = (int *) memory->srealloc(style,maxvar*sizeof(int),"var:style"); num = (int *) memory->srealloc(num,maxvar*sizeof(int),"var:num"); which = (int *) memory->srealloc(which,maxvar*sizeof(int),"var:which"); offset = (int *) memory->srealloc(offset,maxvar*sizeof(int),"var:offset"); pad = (int *) memory->srealloc(pad,maxvar*sizeof(int),"var:pad"); data = (char ***) memory->srealloc(data,maxvar*sizeof(char **),"var:data"); } /* ---------------------------------------------------------------------- copy narg strings from **from to **to, and allocate space for them ------------------------------------------------------------------------- */ void Variable::copy(int narg, char **from, char **to) { int n; for (int i = 0; i < narg; i++) { n = strlen(from[i]) + 1; to[i] = new char[n]; strcpy(to[i],from[i]); } } /* ---------------------------------------------------------------------- recursive evaluation of a string str str is an equal-style or atom-style formula containing one or more items: number = 0.0, -5.45, 2.8e-4, ... constant = PI keyword = nprocs, time math operation = (),-x,x+y,x-y,x*y,x/y,x^y, x==y,x!=y,xy,x>=y,x&&y,x||y, sqrt(x),exp(x),ln(x),log(x), sin(x),cos(x),tan(x),asin(x),atan2(y,x),... variable = v_name, v_name[i] evaluate the formula, return result as a double ------------------------------------------------------------------------- */ double Variable::evaluate(char *str) { int op,opprevious; double value1,value2; char onechar; char *ptr; double argstack[MAXLEVEL]; int opstack[MAXLEVEL]; int nargstack = 0; int nopstack = 0; int i = 0; int expect = ARG; while (1) { onechar = str[i]; // whitespace: just skip if (isspace(onechar)) i++; // ---------------- // parentheses: recursively evaluate contents of parens // ---------------- else if (onechar == '(') { if (expect == OP) error->all("Invalid syntax in variable formula"); expect = OP; char *contents; i = find_matching_paren(str,i,contents); i++; // evaluate contents and push on stack argstack[nargstack++] = evaluate(contents); delete [] contents; // ---------------- // number: push value onto stack // ---------------- } else if (isdigit(onechar) || onechar == '.') { if (expect == OP) error->all("Invalid syntax in variable formula"); expect = OP; // istop = end of number, including scientific notation int istart = i; while (isdigit(str[i]) || str[i] == '.') i++; if (str[i] == 'e' || str[i] == 'E') { i++; if (str[i] == '+' || str[i] == '-') i++; while (isdigit(str[i])) i++; } int istop = i - 1; int n = istop - istart + 1; char *number = new char[n+1]; strncpy(number,&str[istart],n); number[n] = '\0'; argstack[nargstack++] = atof(number); delete [] number; // ---------------- // letter: v_name, exp(), PI, keyword // ---------------- } else if (isalpha(onechar)) { if (expect == OP) error->all("Invalid syntax in variable formula"); expect = OP; // istop = end of word // word = all alphanumeric or underscore int istart = i; while (isalnum(str[i]) || str[i] == '_') i++; int istop = i-1; int n = istop - istart + 1; char *word = new char[n+1]; strncpy(word,&str[istart],n); word[n] = '\0'; // ---------------- // variable // ---------------- if (strncmp(word,"v_",2) == 0) { n = strlen(word) - 2 + 1; char *id = new char[n]; strcpy(id,&word[2]); int ivar = find(id); if (ivar < 0) error->all("Invalid variable name in variable formula"); // v_name = scalar from non atom-style global scalar char *var = retrieve(id); if (var == NULL) error->all("Invalid variable evaluation in variable formula"); argstack[nargstack++] = atof(var); delete [] id; // ---------------- // math function or constant or keyword // ---------------- } else { // ---------------- // math function // ---------------- if (str[i] == '(') { char *contents; i = find_matching_paren(str,i,contents); i++; if (math_function(word,contents,argstack,nargstack)); else error->all("Invalid math function in variable formula"); delete [] contents; // ---------------- // constant // ---------------- } else if (is_constant(word)) { value1 = constant(word); argstack[nargstack++] = value1; // ---------------- // keyword // ---------------- } else if (is_keyword(word)) { value1 = keyword(word); argstack[nargstack++] = value1; } else error->all("Invalid keyword in variable formula"); } delete [] word; // ---------------- // math operator, including end-of-string // ---------------- } else if (strchr("+-*/^<>=!&|\0",onechar)) { if (onechar == '+') op = ADD; else if (onechar == '-') op = SUBTRACT; else if (onechar == '*') op = MULTIPLY; else if (onechar == '/') op = DIVIDE; else if (onechar == '^') op = CARAT; else if (onechar == '=') { if (str[i+1] != '=') error->all("Invalid syntax in variable formula"); op = EQ; i++; } else if (onechar == '!') { if (str[i+1] == '=') { op = NE; i++; } else op = NOT; } else if (onechar == '<') { if (str[i+1] != '=') op = LT; else { op = LE; i++; } } else if (onechar == '>') { if (str[i+1] != '=') op = GT; else { op = GE; i++; } } else if (onechar == '&') { if (str[i+1] != '&') error->all("Invalid syntax in variable formula"); op = AND; i++; } else if (onechar == '|') { if (str[i+1] != '|') error->all("Invalid syntax in variable formula"); op = OR; i++; } else op = DONE; i++; if (op == SUBTRACT && expect == ARG) { opstack[nopstack++] = UNARY; continue; } if (op == NOT && expect == ARG) { opstack[nopstack++] = op; continue; } if (expect == ARG) error->all("Invalid syntax in variable formula"); expect = ARG; // evaluate stack as deep as possible while respecting precedence // before pushing current op onto stack while (nopstack && precedence[opstack[nopstack-1]] >= precedence[op]) { opprevious = opstack[--nopstack]; value2 = argstack[--nargstack]; if (opprevious != UNARY && opprevious != NOT) value1 = argstack[--nargstack]; if (opprevious == ADD) argstack[nargstack++] = value1 + value2; else if (opprevious == SUBTRACT) argstack[nargstack++] = value1 - value2; else if (opprevious == MULTIPLY) argstack[nargstack++] = value1 * value2; else if (opprevious == DIVIDE) { if (value2 == 0.0) error->all("Divide by 0 in variable formula"); argstack[nargstack++] = value1 / value2; } else if (opprevious == CARAT) { if (value2 == 0.0) error->all("Power by 0 in variable formula"); argstack[nargstack++] = pow(value1,value2); } else if (opprevious == UNARY) { argstack[nargstack++] = -value2; } else if (opprevious == NOT) { if (value2 == 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == EQ) { if (value1 == value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == NE) { if (value1 != value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == LT) { if (value1 < value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == LE) { if (value1 <= value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == GT) { if (value1 > value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == GE) { if (value1 >= value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == AND) { if (value1 != 0.0 && value2 != 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == OR) { if (value1 != 0.0 || value2 != 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } } // if end-of-string, break out of entire formula evaluation loop if (op == DONE) break; // push current operation onto stack opstack[nopstack++] = op; } else error->all("Invalid syntax in variable formula"); } if (nopstack) error->all("Invalid syntax in variable formula"); // return remaining arg if (nargstack != 1) error->all("Invalid syntax in variable formula"); return argstack[0]; } /* ---------------------------------------------------------------------- find matching parenthesis in str, allocate contents = str between parens i = left paren return loc or right paren ------------------------------------------------------------------------- */ int Variable::find_matching_paren(char *str, int i,char *&contents) { // istop = matching ')' at same level, allowing for nested parens int istart = i; int ilevel = 0; while (1) { i++; if (!str[i]) break; if (str[i] == '(') ilevel++; else if (str[i] == ')' && ilevel) ilevel--; else if (str[i] == ')') break; } if (!str[i]) error->all("Invalid syntax in variable formula"); int istop = i; int n = istop - istart - 1; contents = new char[n+1]; strncpy(contents,&str[istart+1],n); contents[n] = '\0'; return istop; } /* ---------------------------------------------------------------------- process a math function in formula push result onto tree or arg stack word = math function contents = str between parentheses with one,two,three args return 0 if not a match, 1 if successfully processed customize by adding a math function: sqrt(),exp(),ln(),log(),sin(),cos(),tan(),asin(),acos(),atan(), atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round() ------------------------------------------------------------------------- */ int Variable::math_function(char *word, char *contents, double *argstack, int &nargstack) { // word not a match to any math function if (strcmp(word,"sqrt") && strcmp(word,"exp") && strcmp(word,"ln") && strcmp(word,"log") && strcmp(word,"sin") && strcmp(word,"cos") && strcmp(word,"tan") && strcmp(word,"asin") && strcmp(word,"acos") && strcmp(word,"atan") && strcmp(word,"atan2") && strcmp(word,"random") && strcmp(word,"normal") && strcmp(word,"ceil") && strcmp(word,"floor") && strcmp(word,"round")) return 0; // parse contents for arg1,arg2,arg3 separated by commas // ptr1,ptr2 = location of 1st and 2nd comma, NULL if none char *arg1,*arg2,*arg3; char *ptr1,*ptr2; ptr1 = strchr(contents,','); if (ptr1) { *ptr1 = '\0'; ptr2 = strchr(ptr1+1,','); if (ptr2) *ptr2 = '\0'; } else ptr2 = NULL; int n = strlen(contents) + 1; arg1 = new char[n]; strcpy(arg1,contents); int narg = 1; if (ptr1) { n = strlen(ptr1+1) + 1; arg2 = new char[n]; strcpy(arg2,ptr1+1); narg = 2; } else arg2 = NULL; if (ptr2) { n = strlen(ptr2+1) + 1; arg3 = new char[n]; strcpy(arg3,ptr2+1); narg = 3; } else arg3 = NULL; // evaluate args double tmp,value1,value2,value3; if (narg == 1) { value1 = evaluate(arg1); } else if (narg == 2) { value1 = evaluate(arg1); value2 = evaluate(arg2); } else if (narg == 3) { value1 = evaluate(arg1); value2 = evaluate(arg2); value3 = evaluate(arg3); } if (strcmp(word,"sqrt") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); if (value1 < 0.0) error->all("Sqrt of negative value in variable formula"); argstack[nargstack++] = sqrt(value1); } else if (strcmp(word,"exp") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = exp(value1); } else if (strcmp(word,"ln") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); if (value1 <= 0.0) error->all("Log of zero/negative value in variable formula"); argstack[nargstack++] = log(value1); } else if (strcmp(word,"log") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); if (value1 <= 0.0) error->all("Log of zero/negative value in variable formula"); argstack[nargstack++] = log10(value1); } else if (strcmp(word,"sin") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = sin(value1); } else if (strcmp(word,"cos") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = cos(value1); } else if (strcmp(word,"tan") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = tan(value1); } else if (strcmp(word,"asin") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); if (value1 < -1.0 || value1 > 1.0) error->all("Arcsin of invalid value in variable formula"); argstack[nargstack++] = asin(value1); } else if (strcmp(word,"acos") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); if (value1 < -1.0 || value1 > 1.0) error->all("Arccos of invalid value in variable formula"); argstack[nargstack++] = acos(value1); } else if (strcmp(word,"atan") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = atan(value1); } else if (strcmp(word,"atan2") == 0) { if (narg != 2) error->all("Invalid math function in variable formula"); argstack[nargstack++] = atan2(value1,value2); } else if (strcmp(word,"random") == 0) { if (narg != 3) error->all("Invalid math function in variable formula"); if (randomequal == NULL) { int seed = static_cast (value3); if (seed <= 0) error->all("Invalid math function in variable formula"); randomequal = new RanMars(oink,seed); } argstack[nargstack++] = randomequal->uniform()*(value2-value1) + value1; } else if (strcmp(word,"normal") == 0) { if (narg != 3) error->all("Invalid math function in variable formula"); if (value2 < 0.0) error->all("Invalid math function in variable formula"); if (randomequal == NULL) { int seed = static_cast (value3); if (seed <= 0) error->all("Invalid math function in variable formula"); randomequal = new RanMars(oink,seed); } argstack[nargstack++] = value1 + value2*randomequal->gaussian(); } else if (strcmp(word,"ceil") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = ceil(value1); } else if (strcmp(word,"floor") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = floor(value1); } else if (strcmp(word,"round") == 0) { if (narg != 1) error->all("Invalid math function in variable formula"); argstack[nargstack++] = MYROUND(value1); } delete [] arg1; delete [] arg2; delete [] arg3; return 1; } /* ---------------------------------------------------------------------- check if word matches a constant return 1 if yes, else 0 customize by adding a constant: PI ------------------------------------------------------------------------- */ int Variable::is_constant(char *word) { if (strcmp(word,"PI") == 0) return 1; return 0; } /* ---------------------------------------------------------------------- process a constant in formula customize by adding a constant: PI ------------------------------------------------------------------------- */ double Variable::constant(char *word) { if (strcmp(word,"PI") == 0) return PI; return 0.0; } /* ---------------------------------------------------------------------- check if word matches a keyword return 1 if yes, else 0 customize by adding a keyword: nprocs, time ------------------------------------------------------------------------- */ int Variable::is_keyword(char *word) { if (strcmp(word,"nprocs") == 0) return 1; else if (strcmp(word,"time") == 0) return 1; return 0; } /* ---------------------------------------------------------------------- process a keyword in formula customize by adding a keyword: nprocs, time ------------------------------------------------------------------------- */ double Variable::keyword(char *word) { if (strcmp(word,"nprocs") == 0) { int nprocs; MPI_Comm_size(world,&nprocs); return 1.0*nprocs; } else if (strcmp(word,"time") == 0) { return input->deltatime; } return 0.0; } /* ---------------------------------------------------------------------- read a floating point value from a string generate an error if not a legitimate floating point value ------------------------------------------------------------------------- */ double Variable::numeric(char *str) { int n = strlen(str); for (int i = 0; i < n; i++) { if (isdigit(str[i])) continue; if (str[i] == '-' || str[i] == '+' || str[i] == '.') continue; if (str[i] == 'e' || str[i] == 'E') continue; error->all("Expected floating point parameter in variable definition"); } return atof(str); } /* ---------------------------------------------------------------------- read an integer value from a string generate an error if not a legitimate integer value ------------------------------------------------------------------------- */ int Variable::inumeric(char *str) { int n = strlen(str); for (int i = 0; i < n; i++) { if (isdigit(str[i]) || str[i] == '-' || str[i] == '+') continue; error->all("Expected integer parameter in variable definition"); } return atoi(str); } /* ---------------------------------------------------------------------- recursive evaluation of string str called from "if" command in input script str is a boolean expression containing one or more items: number = 0.0, -5.45, 2.8e-4, ... math operation = (),x==y,x!=y,xy,x>=y,x&&y,x||y ------------------------------------------------------------------------- */ double Variable::evaluate_boolean(char *str) { int op,opprevious; double value1,value2; char onechar; char *ptr; double argstack[MAXLEVEL]; int opstack[MAXLEVEL]; int nargstack = 0; int nopstack = 0; int i = 0; int expect = ARG; while (1) { onechar = str[i]; // whitespace: just skip if (isspace(onechar)) i++; // ---------------- // parentheses: recursively evaluate contents of parens // ---------------- else if (onechar == '(') { if (expect == OP) error->all("Invalid Boolean syntax in if command"); expect = OP; char *contents; i = find_matching_paren(str,i,contents); i++; // evaluate contents and push on stack argstack[nargstack++] = evaluate_boolean(contents); delete [] contents; // ---------------- // number: push value onto stack // ---------------- } else if (isdigit(onechar) || onechar == '.' || onechar == '-') { if (expect == OP) error->all("Invalid Boolean syntax in if command"); expect = OP; // istop = end of number, including scientific notation int istart = i++; while (isdigit(str[i]) || str[i] == '.') i++; if (str[i] == 'e' || str[i] == 'E') { i++; if (str[i] == '+' || str[i] == '-') i++; while (isdigit(str[i])) i++; } int istop = i - 1; int n = istop - istart + 1; char *number = new char[n+1]; strncpy(number,&str[istart],n); number[n] = '\0'; argstack[nargstack++] = atof(number); delete [] number; // ---------------- // Boolean operator, including end-of-string // ---------------- } else if (strchr("<>=!&|\0",onechar)) { if (onechar == '=') { if (str[i+1] != '=') error->all("Invalid Boolean syntax in if command"); op = EQ; i++; } else if (onechar == '!') { if (str[i+1] == '=') { op = NE; i++; } else op = NOT; } else if (onechar == '<') { if (str[i+1] != '=') op = LT; else { op = LE; i++; } } else if (onechar == '>') { if (str[i+1] != '=') op = GT; else { op = GE; i++; } } else if (onechar == '&') { if (str[i+1] != '&') error->all("Invalid Boolean syntax in if command"); op = AND; i++; } else if (onechar == '|') { if (str[i+1] != '|') error->all("Invalid Boolean syntax in if command"); op = OR; i++; } else op = DONE; i++; if (op == NOT && expect == ARG) { opstack[nopstack++] = op; continue; } if (expect == ARG) error->all("Invalid Boolean syntax in if command"); expect = ARG; // evaluate stack as deep as possible while respecting precedence // before pushing current op onto stack while (nopstack && precedence[opstack[nopstack-1]] >= precedence[op]) { opprevious = opstack[--nopstack]; value2 = argstack[--nargstack]; if (opprevious != NOT) value1 = argstack[--nargstack]; if (opprevious == NOT) { if (value2 == 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == EQ) { if (value1 == value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == NE) { if (value1 != value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == LT) { if (value1 < value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == LE) { if (value1 <= value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == GT) { if (value1 > value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == GE) { if (value1 >= value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == AND) { if (value1 != 0.0 && value2 != 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == OR) { if (value1 != 0.0 || value2 != 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } } // if end-of-string, break out of entire formula evaluation loop if (op == DONE) break; // push current operation onto stack opstack[nopstack++] = op; } else error->all("Invalid Boolean syntax in if command"); } if (nopstack) error->all("Invalid Boolean syntax in if command"); if (nargstack != 1) error->all("Invalid Boolean syntax in if command"); return argstack[0]; } mrmpi-1.0~20131122/oink/Makefile.list0000644000175000017500000000251511524065425016743 0ustar mathieumathieu# MRMPI multiple-machine Makefile with explicit file list SHELL = /bin/sh # Definitions ROOT = mrmpi EXE = $(ROOT)_$@ SRC = command.cpp compare_rmat.cpp data2graph.cpp data2rare.cpp degree.cpp error.cpp input.cpp library.cpp main.cpp map_rmat.cpp memory.cpp mrmpi.cpp neigh_tri.cpp neighbor.cpp object.cpp oink.cpp random_mars.cpp reduce_rmat.cpp rmat.cpp sgi_enumerate.cpp sgi_prune.cpp sgi_sample.cpp tri_find.cpp universe.cpp variable.cpp wordfreq.cpp INC = blockmacros.h command.h data2graph.h data2rare.h degree.h error.h input.h library.h map_rmat.h memory.h mrmpi.h neigh_tri.h neighbor.h object.h oink.h pointers.h random_mars.h rmat.h sgi_enumerate.h sgi_prune.h sgi_sample.h style_command.h style_compare.h style_hash.h style_map.h style_reduce.h style_scan.h tri_find.h typedefs.h universe.h variable.h version.h wordfreq.h OBJ = $(SRC:.cpp=.o) # Targets help: @echo 'Type "make target" where target is one of:' @echo '' @files="`ls MAKE/Makefile.*`"; \ for file in $$files; do head -1 $$file; done clean: rm -rf Obj_* .DEFAULT: @test -f MAKE/Makefile.$@ @if [ ! -d Obj_$@ ]; then mkdir Obj_$@; fi @cp -p $(SRC) $(INC) Obj_$@ @cp MAKE/Makefile.$@ Obj_$@/Makefile @cd Obj_$@; \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "EXE = ../$(EXE)" ../$(EXE) @if [ -d Obj_$@ ]; then cd Obj_$@; rm -f $(SRC) $(INC) Makefile*; fi mrmpi-1.0~20131122/oink/rmat2.h0000644000175000017500000000131011535732267015526 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(rmat2,RMAT2) #else #ifndef OINK_RMAT2_H #define OINK_RMAT2_H #include "command.h" #include "map_rmat_generate.h" namespace OINK_NS { class RMAT2 : public Command { public: RMAT2(class OINK *); void run(); void params(int, char **); private: RMAT_struct rmat; }; } #endif #endif mrmpi-1.0~20131122/oink/histo.h0000644000175000017500000000131411540754014015621 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(histo,Histo) #else #ifndef OINK_HISTO_H #define OINK_HISTO_H #include "command.h" namespace OINK_NS { class Histo : public Command { public: Histo(class OINK *); void run(); void params(int, char **); private: static void print(char *, int, char *, int, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/map_invert.cpp0000644000175000017500000000070611535452656017211 0ustar mathieumathieu#include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- invert invert key and value input: key, value output: key = value, value = key ------------------------------------------------------------------------- */ void invert(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { kv->add(value,valuebytes,key,keybytes); } mrmpi-1.0~20131122/oink/mrmpi.cpp0000644000175000017500000003704511524066004016161 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "string.h" #include "stdlib.h" #include "ctype.h" #include "mrmpi.h" #include "object.h" #include "input.h" #include "variable.h" #include "style_compare.h" #include "style_hash.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ MRMPI::MRMPI(OINK *oink) : Pointers(oink) {} /* ---------------------------------------------------------------------- invoke a MR-MPI library method on a MR object stored in Object class ------------------------------------------------------------------------- */ void MRMPI::run(int index, int narg, char **arg) { if (narg < 1) error->all("Illegal MapReduce object command"); MapReduce *mr = obj->extract_mr(index); // execute one of suite of MR-MPI library methods char *command = arg[0]; arg++; narg--; if (strcmp(command,"delete") == 0) { if (narg > 0) error->all("Illegal MR object delete command"); obj->delete_mr(index); obj->cleanup(); } else if (strcmp(command,"copy") == 0) { if (narg != 1) error->all("Illegal MR object copy command"); int index2 = obj->find_mr(arg[0]); if (index2 > 0) error->all("MR object created by copy already exists"); MapReduce *mr2 = mr->copy(); obj->add_mr(arg[0],mr2); } else if (strcmp(command,"add") == 0) { if (narg != 1) error->all("Illegal MR object add command"); int index2 = obj->find_mr(arg[0]); if (index2 < 0) error->all("MR object add comand MR object does not exist"); MapReduce *mr2 = obj->extract_mr(index2); mr->add(mr2); } else if (strcmp(command,"aggregate") == 0) { if (narg != 1) error->all("Illegal MR object aggregate command"); if (strcmp(arg[0],"NULL") == 0) mr->aggregate(NULL); else { HashFnPtr hash = hash_lookup(arg[0]); mr->aggregate(hash); } } else if (strcmp(command,"broadcast") == 0) { if (narg != 1) error->all("Illegal MR object broadcast command"); int root = atoi(arg[0]); mr->broadcast(root); } else if (strcmp(command,"clone") == 0) { if (narg != 0) error->all("Illegal MR object clone command"); mr->clone(); } else if (strcmp(command,"close") == 0) { if (narg != 0) error->all("Illegal MR object close command"); mr->close(); } else if (strcmp(command,"collapse") == 0) { if (narg != 2) error->all("Illegal MR object collapse command"); if (strcmp(arg[0],"int") == 0) { int ikey = atoi(arg[1]); mr->collapse((char *) &ikey,sizeof(int)); } else if (strcmp(arg[0],"uint64") == 0) { uint64_t ikey = atoll(arg[1]); mr->collapse((char *) &ikey,sizeof(uint64_t)); } else if (strcmp(arg[0],"double") == 0) { double dkey = atof(arg[1]); mr->collapse((char *) &dkey,sizeof(double)); } else if (strcmp(arg[0],"str") == 0) { char *str = arg[1]; mr->collapse(str,strlen(str)+1); } } else if (strcmp(command,"collate") == 0) { if (narg != 1) error->all("Illegal MR object collate command"); if (strcmp(arg[0],"NULL") == 0) mr->collate(NULL); else { HashFnPtr hash = hash_lookup(arg[0]); mr->collate(hash); } } else if (strcmp(command,"compress") == 0) { if (narg != 1) error->all("Illegal MR object compress command"); ReduceFnPtr reduce = reduce_lookup(arg[0]); mr->compress(reduce,NULL); } else if (strcmp(command,"convert") == 0) { if (narg != 0) error->all("Illegal MR object convert command"); mr->convert(); } else if (strcmp(command,"gather") == 0) { if (narg != 1) error->all("Illegal MR object gather command"); int nprocs = atoi(arg[0]); mr->gather(nprocs); } else if (strcmp(command,"map/task") == 0) { if (narg < 2 || narg > 3) error->all("Illegal MR object map/task command"); int nmap = atoi(arg[0]); MapTaskFnPtr map = map_task_lookup(arg[1]); int addflag = 0; if (narg == 3) addflag = 1; mr->map(nmap,map,NULL,addflag); } else if (strcmp(command,"map/file") == 0) { if (narg < 5 || narg > 6) error->all("Illegal MR object map/file command"); int nstr = 1; char **strings = &arg[0]; if (strstr(arg[0],"v_") == arg[0]) { int index = input->variable->find(&arg[0][2]); if (index < 0) error->all("MR object map command variable is unknown"); if (input->variable->equalstyle(index)) error->all("MR object command input is equal-style variable"); nstr = input->variable->retrieve_count(index); strings = new char*[nstr]; for (int i = 0; i < nstr; i++) { char *one = input->variable->retrieve_single(index,i); int n = strlen(one) + 1; strings[i] = new char[n]; strcpy(strings[i],one); } } int self = atoi(arg[1]); int recurse = atoi(arg[2]); int readfile = atoi(arg[3]); MapFileFnPtr map = map_file_lookup(arg[4]); int addflag = 0; if (narg == 6) addflag = 1; mr->map(nstr,strings,self,recurse,readfile,map,NULL,addflag); if (strings != &arg[0]) { for (int i = 0; i < nstr; i++) delete [] strings[i]; delete [] strings; } } else if (strcmp(command,"map/char") == 0) { if (narg < 7 || narg > 8) error->all("Illegal MR object map/char command"); int nmap = atoi(arg[0]); int nstr = 1; char **strings = &arg[1]; if (strstr(arg[1],"v_") == arg[1]) { int index = input->variable->find(&arg[1][2]); if (index < 0) error->all("MR object map command variable is unknown"); if (input->variable->equalstyle(index)) error->all("MR object command input is equal-style variable"); nstr = input->variable->retrieve_count(index); strings = new char*[nstr]; for (int i = 0; i < nstr; i++) { char *one = input->variable->retrieve_single(index,i); int n = strlen(one) + 1; strings[i] = new char[n]; strcpy(strings[i],one); } } int recurse = atoi(arg[2]); int readfile = atoi(arg[3]); char sepchar = arg[4][0]; int delta = atoi(arg[5]); MapStringFnPtr map = map_string_lookup(arg[6]); int addflag = 0; if (narg == 8) addflag = 1; mr->map(nmap,nstr,strings,recurse,readfile,sepchar,delta,map,NULL,addflag); if (strings != &arg[0]) { for (int i = 0; i < nstr; i++) delete [] strings[i]; delete [] strings; } } else if (strcmp(command,"map/string") == 0) { if (narg < 7 || narg > 8) error->all("Illegal MR object map/string command"); int nmap = atoi(arg[0]); int nstr = 1; char **strings = &arg[1]; if (strstr(arg[1],"v_") == arg[1]) { int index = input->variable->find(&arg[1][2]); if (index < 0) error->all("MR object map command variable is unknown"); if (input->variable->equalstyle(index)) error->all("MR object command input is equal-style variable"); nstr = input->variable->retrieve_count(index); strings = new char*[nstr]; for (int i = 0; i < nstr; i++) { char *one = input->variable->retrieve_single(index,i); int n = strlen(one) + 1; strings[i] = new char[n]; strcpy(strings[i],one); } } int recurse = atoi(arg[2]); int readfile = atoi(arg[3]); char *sepstr = arg[4]; int delta = atoi(arg[5]); MapStringFnPtr map = map_string_lookup(arg[6]); int addflag = 0; if (narg == 8) addflag = 1; mr->map(nmap,nstr,strings,recurse,readfile,sepstr,delta,map,NULL,addflag); if (strings != &arg[0]) { for (int i = 0; i < nstr; i++) delete [] strings[i]; delete [] strings; } } else if (strcmp(command,"map/mr") == 0) { if (narg < 2 || narg > 3) error->all("Illegal MR object map/mr command"); int index2 = obj->find_mr(arg[0]); if (index2 < 0) error->all("MR object map command MR object does not exist"); MapReduce *mr2 = obj->extract_mr(index2); MapMRFnPtr map = map_mr_lookup(arg[1]); int addflag = 0; if (narg == 3) addflag = 1; mr->map(mr2,map,NULL,addflag); } else if (strcmp(command,"open") == 0) { if (narg != 0) error->all("Illegal MR object open command"); mr->open(); } else if (strcmp(command,"print") == 0) { if (narg == 4) { int proc = atoi(arg[0]); int nstride = atoi(arg[1]); int kflag = atoi(arg[2]); int vflag = atoi(arg[3]); mr->print(proc,nstride,kflag,vflag); } else if (narg == 6) { char *file = arg[0]; int fflag = atoi(arg[1]); int proc = atoi(arg[2]); int nstride = atoi(arg[3]); int kflag = atoi(arg[4]); int vflag = atoi(arg[5]); mr->print(file,fflag,proc,nstride,kflag,vflag); } else error->all("Illegal MR object print command"); } else if (strcmp(command,"reduce") == 0) { if (narg != 1) error->all("Illegal MR object reduce command"); ReduceFnPtr reduce = reduce_lookup(arg[0]); mr->reduce(reduce,NULL); } else if (strcmp(command,"scan/kv") == 0) { if (narg != 1) error->all("Illegal MR object scan/kv command"); ScanKVFnPtr scan = scan_kv_lookup(arg[0]); mr->scan(scan,NULL); } else if (strcmp(command,"scan/kmv") == 0) { if (narg != 1) error->all("Illegal MR object scan/kmv command"); ScanKMVFnPtr scan = scan_kmv_lookup(arg[0]); mr->scan(scan,NULL); } else if (strcmp(command,"scrunch") == 0) { if (narg != 3) error->all("Illegal MR object scrunch command"); int nprocs = atoi(arg[0]); if (strcmp(arg[1],"int") == 0) { int ikey = atoi(arg[2]); mr->scrunch(nprocs,(char *) &ikey,sizeof(int)); } else if (strcmp(arg[1],"uint64") == 0) { uint64_t ikey = atoll(arg[2]); mr->scrunch(nprocs,(char *) &ikey,sizeof(uint64_t)); } else if (strcmp(arg[1],"double") == 0) { double dkey = atoi(arg[2]); mr->scrunch(nprocs,(char *) &dkey,sizeof(double)); } else if (strcmp(arg[1],"str") == 0) { char *str = arg[2]; mr->scrunch(nprocs,str,strlen(str)+1); } } else if (strcmp(command,"sort_keys") == 0) { if (narg != 1) error->all("Illegal MR object sort_keys command"); if (isdigit(arg[0][0])) { int flag = atoi(arg[0]); mr->sort_keys(flag); } else { CompareFnPtr compare = compare_lookup(arg[0]); mr->sort_keys(compare); } } else if (strcmp(command,"sort_values") == 0) { if (narg != 1) error->all("Illegal MR object sort_values command"); if (isdigit(arg[0][0])) { int flag = atoi(arg[0]); mr->sort_values(flag); } else { CompareFnPtr compare = compare_lookup(arg[0]); mr->sort_values(compare); } } else if (strcmp(command,"sort_multivalues") == 0) { if (narg != 1) error->all("Illegal MR object sotr_multivalues command"); if (isdigit(arg[0][0])) { int flag = atoi(arg[0]); mr->sort_multivalues(flag); } else { CompareFnPtr compare = compare_lookup(arg[0]); mr->sort_multivalues(compare); } } else if (strcmp(command,"kv_stats") == 0) { if (narg != 1) error->all("Illegal MR object kv_stats command"); int level = atoi(arg[0]); mr->kv_stats(level); } else if (strcmp(command,"kmv_stats") == 0) { if (narg != 1) error->all("Illegal MR object kmv_stats command"); int level = atoi(arg[0]); mr->kmv_stats(level); } else if (strcmp(command,"cummulative_stats") == 0) { if (narg != 2) error->all("Illegal MR object cummulative stats command"); int level = atoi(arg[0]); int reset = atoi(arg[1]); mr->cummulative_stats(level,reset); } else if (strcmp(command,"set") == 0) { if (narg != 2) error->all("Illegal MR object set command"); if (strcmp(arg[1],"mapstyle") == 0) mr->mapstyle = atoi(arg[2]); else if (strcmp(arg[1],"all2all") == 0) mr->all2all = atoi(arg[2]); else if (strcmp(arg[1],"verbosity") == 0) mr->verbosity = atoi(arg[2]); else if (strcmp(arg[1],"timer") == 0) mr->timer = atoi(arg[2]); else if (strcmp(arg[1],"memsize") == 0) mr->memsize = atoi(arg[2]); else if (strcmp(arg[1],"minpage") == 0) mr->minpage = atoi(arg[2]); else if (strcmp(arg[1],"maxpage") == 0) mr->maxpage = atoi(arg[2]); else if (strcmp(arg[1],"freepage") == 0) mr->freepage = atoi(arg[2]); else if (strcmp(arg[1],"outofcore") == 0) mr->outofcore = atoi(arg[2]); else if (strcmp(arg[1],"zeropage") == 0) mr->zeropage = atoi(arg[2]); else if (strcmp(arg[1],"keyalign") == 0) mr->keyalign = atoi(arg[2]); else if (strcmp(arg[1],"valuealign") == 0) mr->valuealign = atoi(arg[2]); else if (strcmp(arg[1],"fpath") == 0) mr->set_fpath(arg[2]); else error->all("Illegal MR object set command"); } else error->all("Illegal MR object command"); } /* ---------------------------------------------------------------------- lookup methods to match function names with function pointers ------------------------------------------------------------------------- */ HashFnPtr MRMPI::hash_lookup(char *str) { if (0) return NULL; #define HASH_STYLE #define HashStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_hash.h" #undef HASH_STYLE return NULL; } /* ---------------------------------------------------------------------- */ CompareFnPtr MRMPI::compare_lookup(char *str) { if (0) return NULL; #define COMPARE_STYLE #define CompareStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_compare.h" #undef COMPARE_STYLE return NULL; } /* ---------------------------------------------------------------------- */ MapTaskFnPtr MRMPI::map_task_lookup(char *str) { if (0) return NULL; #define MAP_TASK_STYLE #define MapStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_map.h" #undef MAP_TASK_STYLE return NULL; } /* ---------------------------------------------------------------------- */ MapFileFnPtr MRMPI::map_file_lookup(char *str) { if (0) return NULL; #define MAP_FILE_STYLE #define MapStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_map.h" #undef MAP_FILE_STYLE return NULL; } /* ---------------------------------------------------------------------- */ MapStringFnPtr MRMPI::map_string_lookup(char *str) { if (0) return NULL; #define MAP_STRING_STYLE #define MapStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_map.h" #undef MAP_STRING_STYLE return NULL; } /* ---------------------------------------------------------------------- */ MapMRFnPtr MRMPI::map_mr_lookup(char *str) { if (0) return NULL; #define MAP_MR_STYLE #define MapStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_map.h" #undef MAP_MR_STYLE return NULL; } /* ---------------------------------------------------------------------- */ ReduceFnPtr MRMPI::reduce_lookup(char *str) { if (0) return NULL; #define REDUCE_STYLE #define ReduceStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_reduce.h" #undef REDUCE_STYLE return NULL; } /* ---------------------------------------------------------------------- */ ScanKVFnPtr MRMPI::scan_kv_lookup(char *str) { if (0) return NULL; #define SCAN_KV_STYLE #define ScanStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_scan.h" #undef SCAN_KV_STYLE return NULL; } /* ---------------------------------------------------------------------- */ ScanKMVFnPtr MRMPI::scan_kmv_lookup(char *str) { if (0) return NULL; #define SCAN_KMV_STYLE #define ScanStyle(name) else if (strcmp(str,#name) == 0) return name; #include "style_scan.h" #undef SCAN_KMV_STYLE return NULL; } mrmpi-1.0~20131122/oink/input.cpp0000644000175000017500000005275311536502301016175 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "ctype.h" #include "unistd.h" #include "sys/stat.h" #include "input.h" #include "command.h" #include "style_command.h" #include "object.h" #include "mrmpi.h" #include "universe.h" #include "variable.h" #include "memory.h" #include "error.h" using namespace OINK_NS; #define MAXLINE 2048 #define DELTA 4 /* ---------------------------------------------------------------------- */ Input::Input(OINK *oink, int argc, char **argv) : Pointers(oink) { MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); deltatime = 0.0; line = new char[MAXLINE]; copy = new char[MAXLINE]; work = new char[MAXLINE]; narg = maxarg = 0; arg = NULL; echo_screen = 0; echo_log = 1; label_active = 0; labelstr = NULL; jump_skip = 0; statflag = 0; if (me == 0) { nfile = maxfile = 1; infiles = (FILE **) memory->smalloc(sizeof(FILE *),"input:infiles"); infiles[0] = infile; } else infiles = NULL; variable = new Variable(oink); // process command-line args // check for args "-var" and "-echo" // caller has already checked that sufficient arguments exist int iarg = 0; while (iarg < argc) { if (strcmp(argv[iarg],"-var") == 0 || strcmp(argv[iarg],"-v") == 0) { int jarg = iarg+2; while (jarg < argc && argv[jarg][0] != '-') jarg++; variable->set(argv[iarg+1],jarg-iarg-2,&argv[iarg+2]); iarg = jarg; } else if (strcmp(argv[iarg],"-echo") == 0 || strcmp(argv[iarg],"-e") == 0) { narg = 1; char **tmp = arg; // trick echo() into using argv instead of arg arg = &argv[iarg+1]; echo(); arg = tmp; iarg += 2; } else iarg++; } } /* ---------------------------------------------------------------------- */ Input::~Input() { // don't free command and arg strings // they just point to other allocated memory delete variable; delete [] line; delete [] copy; delete [] work; if (labelstr) delete [] labelstr; if (arg) memory->sfree(arg); if (infiles) memory->sfree(infiles); } /* ---------------------------------------------------------------------- process all input from infile infile = stdin or file if command-line arg "-in" was used ------------------------------------------------------------------------- */ void Input::file() { int m,n; while (1) { // read a line from input script // if line ends in continuation char '&', concatenate next line(s) // n = length of line including str terminator, 0 if end of file // m = position of last printable char in line or -1 if blank line if (me == 0) { m = 0; while (1) { if (fgets(&line[m],MAXLINE-m,infile) == NULL) n = 0; else n = strlen(line) + 1; if (n == 0) break; m = n-2; while (m >= 0 && isspace(line[m])) m--; if (m < 0 || line[m] != '&') break; } } // bcast the line // if n = 0, end-of-file // error if label_active is set, since label wasn't encountered // if original input file, code is done // else go back to previous input file MPI_Bcast(&n,1,MPI_INT,0,world); if (n == 0) { if (label_active) error->all("Label wasn't found in input script"); if (me == 0) { if (infile != stdin) fclose(infile); nfile--; } MPI_Bcast(&nfile,1,MPI_INT,0,world); if (nfile == 0) break; if (me == 0) infile = infiles[nfile-1]; continue; } MPI_Bcast(line,n,MPI_CHAR,0,world); // if n = MAXLINE, line is too long if (n == MAXLINE) { char str[MAXLINE+32]; sprintf(str,"Input line too long: %s",line); error->all(str); } // echo the command unless scanning for label if (me == 0 && label_active == 0) { if (echo_screen && screen) fprintf(screen,"%s",line); if (echo_log && logfile) fprintf(logfile,"%s",line); } // parse the line // if no command, skip to next line in input script parse(); if (command == NULL) continue; // if scanning for label, skip command unless it's a label command if (label_active && strcmp(command,"label") != 0) continue; // execute the command if (execute_command()) { char str[MAXLINE]; sprintf(str,"Unknown command: %s",line); error->all(str); } } } /* ---------------------------------------------------------------------- process all input from filename ------------------------------------------------------------------------- */ void Input::file(const char *filename) { // error if another nested file still open // if single open file is not stdin, close it // open new filename and set infile, infiles[0] if (me == 0) { if (nfile > 1) error->one("Another input script is already being processed"); if (infile != stdin) fclose(infile); infile = fopen(filename,"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",filename); error->one(str); } infiles[0] = infile; } else infile = NULL; file(); } /* ---------------------------------------------------------------------- parse the command in single and execute it return command name to caller ------------------------------------------------------------------------- */ char *Input::one(const char *single) { strcpy(line,single); // echo the command unless scanning for label if (me == 0 && label_active == 0) { if (echo_screen && screen) fprintf(screen,"%s\n",line); if (echo_log && logfile) fprintf(logfile,"%s\n",line); } // parse the line // if no command, just return NULL parse(); if (command == NULL) return NULL; // if scanning for label, skip command unless it's a label command if (label_active && strcmp(command,"label") != 0) return NULL; // execute the command and return its name if (execute_command()) { char str[MAXLINE]; sprintf(str,"Unknown command: %s",line); error->all(str); } return command; } /* ---------------------------------------------------------------------- parse copy of command line strip comment = all chars from # on replace all $ via variable substitution command = first word narg = # of args arg[] = individual args treat text between single/double quotes as one arg ------------------------------------------------------------------------- */ void Input::parse() { // make a copy to work on strcpy(copy,line); // strip any # comment by resetting string terminator // do not strip # inside single/double quotes char quote = '\0'; char *ptr = copy; while (*ptr) { if (*ptr == '#' && !quote) { *ptr = '\0'; break; } if (*ptr == quote) quote = '\0'; else if (*ptr == '"' || *ptr == '\'') quote = *ptr; ptr++; } // perform $ variable substitution (print changes) // except if searching for a label since earlier variable may not be defined if (!label_active) substitute(copy,1); // command = 1st arg command = strtok(copy," \t\n\r\f"); if (command == NULL) return; // point arg[] at each subsequent arg // treat text between single/double quotes as one arg // insert string terminators in copy to delimit args quote = '\0'; int iarg,argstart; narg = 0; while (1) { if (narg == maxarg) { maxarg += DELTA; arg = (char **) memory->srealloc(arg,maxarg*sizeof(char *),"input:arg"); } arg[narg] = strtok(NULL," \t\n\r\f"); if (!arg[narg]) break; if (!quote && (arg[narg][0] == '"' || arg[narg][0] == '\'')) { quote = arg[narg][0]; argstart = narg; arg[narg] = &arg[narg][1]; } if (quote && arg[narg][strlen(arg[narg])-1] == quote) { for (iarg = argstart; iarg < narg; iarg++) arg[iarg][strlen(arg[iarg])] = ' '; arg[narg][strlen(arg[narg])-1] = '\0'; narg = argstart; quote = '\0'; } narg++; } if (quote) error->all("Unbalanced quotes in input line"); } /* ---------------------------------------------------------------------- substitute for $ variables in str and return it str assumed to be long enough to hold expanded version print updated string if flag is set and not searching for label ------------------------------------------------------------------------- */ void Input::substitute(char *str, int flag) { // use work[] as scratch space to expand str, then copy back to str // do not replace $ inside single/double quotes // var = pts at variable name, ended by NULL // if $ is followed by '{', trailing '}' becomes NULL // else $x becomes x followed by NULL // beyond = pts at text following variable char *var,*value,*beyond; char quote = '\0'; char *ptr = str; while (*ptr) { if (*ptr == '$' && !quote) { if (*(ptr+1) == '{') { var = ptr+2; int i = 0; while (var[i] != '\0' && var[i] != '}') i++; if (var[i] == '\0') error->one("Invalid variable name"); var[i] = '\0'; beyond = ptr + strlen(var) + 3; } else { var = ptr; var[0] = var[1]; var[1] = '\0'; beyond = ptr + strlen(var) + 1; } value = variable->retrieve(var); if (value == NULL) error->one("Substitution for illegal variable"); *ptr = '\0'; strcpy(work,str); if (strlen(work)+strlen(value) >= MAXLINE) error->one("Input line too long after variable substitution"); strcat(work,value); if (strlen(work)+strlen(beyond) >= MAXLINE) error->one("Input line too long after variable substitution"); strcat(work,beyond); strcpy(str,work); ptr += strlen(value); if (flag && me == 0 && label_active == 0) { if (echo_screen && screen) fprintf(screen,"%s",str); if (echo_log && logfile) fprintf(logfile,"%s",str); } continue; } if (*ptr == quote) quote = '\0'; else if (*ptr == '"' || *ptr == '\'') quote = *ptr; ptr++; } } /* ---------------------------------------------------------------------- process a single parsed command return 0 if successful, -1 if did not recognize command ------------------------------------------------------------------------- */ int Input::execute_command() { double tstart,tstop; int flag = 1; if (!strcmp(command,"clear")) clear(); else if (!strcmp(command,"echo")) echo(); else if (!strcmp(command,"if")) ifthenelse(); else if (!strcmp(command,"include")) include(); else if (!strcmp(command,"jump")) jump(); else if (!strcmp(command,"label")) label(); else if (!strcmp(command,"log")) log(); else if (!strcmp(command,"next")) next_command(); else if (!strcmp(command,"print")) print(); else if (!strcmp(command,"shell")) shell(); else if (!strcmp(command,"variable")) variable_command(); else if (!strcmp(command,"input")) input_command(); else if (!strcmp(command,"mr")) mr(); else if (!strcmp(command,"output")) output(); else if (!strcmp(command,"set")) set(); else flag = 0; // return if command was listed above if (flag) return 0; // check if command is a Command added via style_command.h Command *cmd = NULL; if (0) return 0; // dummy line to enable else-if macro expansion #define COMMAND_CLASS #define CommandStyle(key,Class) \ else if (strcmp(command,#key) == 0) cmd = new Class(oink); #include "style_command.h" #undef COMMAND_CLASS // process command arguments and invoke the command // look for -i and -o switches and their corresponding args // call cmd->inputs and cmd->outputs even if no -i or -o for error checking if (cmd) { int iarg = 0; while (iarg < narg && strcmp(arg[iarg],"-i") != 0 && strcmp(arg[iarg],"-o") != 0) iarg++; cmd->params(iarg,arg); int iswitch = 0; int oswitch = 0; while (iarg < narg) { if (strcmp(arg[iarg],"-i") == 0) { int jarg = iarg+1; while (jarg < narg && strcmp(arg[jarg],"-o") != 0) jarg++; cmd->inputs(jarg-iarg-1,&arg[iarg+1]); iswitch = 1; iarg = jarg; } else if (strcmp(arg[iarg],"-o") == 0) { int jarg = iarg+1; while (jarg < narg && strcmp(arg[jarg],"-i") != 0) jarg++; cmd->outputs(jarg-iarg-1,&arg[iarg+1]); oswitch = 1; iarg = jarg; } else error->all("Invalid command switch"); } if (!iswitch) cmd->inputs(0,NULL); if (!oswitch) cmd->outputs(0,NULL); MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); cmd->run(); MPI_Barrier(MPI_COMM_WORLD); deltatime = MPI_Wtime() - tstart; delete cmd; return 0; } // check if command is a previously named MR object // if so, invoke the MR method directly via mrmpi int index = obj->find_mr(command); if (index >= 0) { MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); mrmpi->run(index,narg,arg); MPI_Barrier(MPI_COMM_WORLD); deltatime = MPI_Wtime() - tstart; return 0; } // unrecognized command return -1; } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void Input::clear() { if (narg > 0) error->all("Illegal clear command"); oink->destroy(); oink->create(); } /* ---------------------------------------------------------------------- */ void Input::echo() { if (narg != 1) error->all("Illegal echo command"); if (strcmp(arg[0],"none") == 0) { echo_screen = 0; echo_log = 0; } else if (strcmp(arg[0],"screen") == 0) { echo_screen = 1; echo_log = 0; } else if (strcmp(arg[0],"log") == 0) { echo_screen = 0; echo_log = 1; } else if (strcmp(arg[0],"both") == 0) { echo_screen = 1; echo_log = 1; } else error->all("Illegal echo command"); } /* ---------------------------------------------------------------------- */ void Input::ifthenelse() { if (narg < 3) error->all("Illegal if command"); // substitute for variables in Boolean expression for "if" // in case expression was enclosed in quotes // must substitute on copy of arg else will step on subsequent args char *scopy = new char[MAXLINE]; strcpy(scopy,arg[0]); substitute(scopy,0); // evaluate Boolean expression for "if" double btest = variable->evaluate_boolean(scopy); // bound "then" commands if (strcmp(arg[1],"then") != 0) error->all("Illegal if command"); int first = 2; int iarg = first; while (iarg < narg && (strcmp(arg[iarg],"elif") != 0 && strcmp(arg[iarg],"else") != 0)) iarg++; int last = iarg-1; // execute "then" commands // make copies of all arg string commands // required because re-parsing a command via one() will wipe out args if (btest != 0.0) { int ncommands = last-first + 1; if (ncommands <= 0) error->all("Illegal if command"); char **commands = new char*[ncommands]; ncommands = 0; for (int i = first; i <= last; i++) { int n = strlen(arg[i]) + 1; if (n == 1) error->all("Illegal if command"); commands[ncommands] = new char[n]; strcpy(commands[ncommands],arg[i]); ncommands++; } for (int i = 0; i < ncommands; i++) char *command = input->one(commands[i]); for (int i = 0; i < ncommands; i++) delete [] commands[i]; delete [] commands; delete [] scopy; return; } // done if no "elif" or "else" if (iarg == narg) { delete [] scopy; return; } // check "elif" or "else" until find commands to execute // substitute for variables and evaluate Boolean expression for "elif" // must substitute on copy of arg else will step on subsequent args // bound and execute "elif" or "else" commands while (1) { if (iarg+2 > narg) error->all("Illegal if command"); if (strcmp(arg[iarg],"elif") == 0) { strcpy(scopy,arg[iarg+1]); substitute(scopy,0); btest = variable->evaluate_boolean(scopy); first = iarg+2; } else { btest = 1.0; first = iarg+1; } iarg = first; while (iarg < narg && (strcmp(arg[iarg],"elif") != 0 && strcmp(arg[iarg],"else") != 0)) iarg++; last = iarg-1; if (btest == 0.0) continue; int ncommands = last-first + 1; if (ncommands <= 0) error->all("Illegal if command"); char **commands = new char*[ncommands]; ncommands = 0; for (int i = first; i <= last; i++) { int n = strlen(arg[i]) + 1; if (n == 1) error->all("Illegal if command"); commands[ncommands] = new char[n]; strcpy(commands[ncommands],arg[i]); ncommands++; } // execute the list of commands for (int i = 0; i < ncommands; i++) char *command = input->one(commands[i]); // clean up for (int i = 0; i < ncommands; i++) delete [] commands[i]; delete [] commands; delete [] scopy; return; } } /* ---------------------------------------------------------------------- */ void Input::include() { if (narg != 1) error->all("Illegal include command"); if (me == 0) { if (nfile == maxfile) { maxfile++; infiles = (FILE **) memory->srealloc(infiles,maxfile*sizeof(FILE *),"input:infiles"); } infile = fopen(arg[0],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[0]); error->one(str); } infiles[nfile++] = infile; } } /* ---------------------------------------------------------------------- */ void Input::jump() { if (narg < 1 || narg > 2) error->all("Illegal jump command"); if (jump_skip) { jump_skip = 0; return; } if (me == 0) { if (strcmp(arg[0],"SELF") == 0) rewind(infile); else { if (infile != stdin) fclose(infile); infile = fopen(arg[0],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[0]); error->one(str); } infiles[nfile-1] = infile; } } if (narg == 2) { label_active = 1; if (labelstr) delete [] labelstr; int n = strlen(arg[1]) + 1; labelstr = new char[n]; strcpy(labelstr,arg[1]); } } /* ---------------------------------------------------------------------- */ void Input::label() { if (narg != 1) error->all("Illegal label command"); if (label_active && strcmp(labelstr,arg[0]) == 0) label_active = 0; } /* ---------------------------------------------------------------------- */ void Input::log() { if (narg != 1) error->all("Illegal log command"); if (me == 0) { if (logfile) fclose(logfile); if (strcmp(arg[0],"none") == 0) logfile = NULL; else { logfile = fopen(arg[0],"w"); if (logfile == NULL) { char str[128]; sprintf(str,"Cannot open logfile %s",arg[0]); error->one(str); } } if (universe->nworlds == 1) universe->ulogfile = logfile; } } /* ---------------------------------------------------------------------- */ void Input::next_command() { if (variable->next(narg,arg)) jump_skip = 1; } /* ---------------------------------------------------------------------- */ void Input::print() { if (narg != 1) error->all("Illegal print command"); // substitute for $ variables (no printing) and print arg substitute(arg[0],0); if (me == 0) { if (screen) fprintf(screen,"%s ",arg[0]); if (logfile) fprintf(logfile,"%s ",arg[0]); } if (me == 0) { if (screen) fprintf(screen,"\n"); if (logfile) fprintf(logfile,"\n"); } } /* ---------------------------------------------------------------------- */ void Input::shell() { if (narg < 1) error->all("Illegal shell command"); if (strcmp(arg[0],"cd") == 0) { if (narg != 2) error->all("Illegal shell command"); chdir(arg[1]); } else if (strcmp(arg[0],"mkdir") == 0) { if (narg < 2) error->all("Illegal shell command"); #if !defined(WINDOWS) && !defined(__MINGW32_VERSION) if (me == 0) for (int i = 1; i < narg; i++) mkdir(arg[i], S_IRWXU | S_IRGRP | S_IXGRP); #endif } else if (strcmp(arg[0],"mv") == 0) { if (narg != 3) error->all("Illegal shell command"); if (me == 0) rename(arg[1],arg[2]); } else if (strcmp(arg[0],"rm") == 0) { if (narg < 2) error->all("Illegal shell command"); if (me == 0) for (int i = 1; i < narg; i++) unlink(arg[i]); } else if (strcmp(arg[0],"rmdir") == 0) { if (narg < 2) error->all("Illegal shell command"); if (me == 0) for (int i = 1; i < narg; i++) rmdir(arg[i]); } else error->all("Illegal shell command"); } /* ---------------------------------------------------------------------- */ void Input::variable_command() { variable->set(narg,arg); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- one function for each OINK-specific input script command ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void Input::input_command() { obj->user_input(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::mr() { obj->add_mr(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::output() { obj->user_output(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::set() { obj->set(narg,arg); } mrmpi-1.0~20131122/oink/degree_stats.cpp0000644000175000017500000000413311536502301017474 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "typedefs.h" #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "degree_stats.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "error.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ DegreeStats::DegreeStats(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 0; } /* ---------------------------------------------------------------------- */ void DegreeStats::run() { int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge,NULL); MapReduce *mr = obj->create_mr(); uint64_t nedge = mre->kv_stats(0); if (duplicate == 1) mr->map(mre,edge_to_vertex,NULL); else mr->map(mre,edge_to_vertices,NULL); mr->collate(NULL); uint64_t nvert = mr->reduce(count,NULL); mr->map(mr,invert,NULL); mr->collate(NULL); mr->reduce(count,NULL); mr->gather(1); mr->sort_keys(-1); char msg[128]; sprintf(msg,"DegreeStats: %lu vertices, %lu edges",nvert,nedge); if (me == 0) error->message(msg); mr->scan(print,NULL); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void DegreeStats::params(int narg, char **arg) { if (narg != 1) error->all("Illegal degree_stats command"); duplicate = atoi(arg[0]); } /* ---------------------------------------------------------------------- */ void DegreeStats::print(char *key, int keybytes, char *value, int valuebytes, void *ptr) { int nedge = *(int *) key; int nvert = *(int *) value; printf(" %d vertices with %d edges\n",nvert,nedge); } mrmpi-1.0~20131122/oink/map_rmat_generate.cpp0000644000175000017500000000312411535764350020511 0ustar mathieumathieu#include "typedefs.h" #include "stdlib.h" #include "map_rmat_generate.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- rmat_generate generate graph edges via recursive R-MAT algorithm input: # to generated & R-MAT params extracted from RMAT_struct in ptr output: key = Vi Vj, value = NULL ------------------------------------------------------------------------- */ void rmat_generate(int itask, KeyValue *kv, void *ptr) { RMAT_struct *rmat = (RMAT_struct *) ptr; uint64_t order = rmat->order; uint64_t ngenerate = rmat->ngenerate; int nlevels = rmat->nlevels; double a = rmat->a; double b = rmat->b; double c = rmat->c; double d = rmat->d; double fraction = rmat->fraction; uint64_t i,j,delta; int ilevel; double a1,b1,c1,d1,total,rn; EDGE edge; for (uint64_t m = 0; m < ngenerate; m++) { delta = order >> 1; a1 = a; b1 = b; c1 = c; d1 = d; i = j = 0; for (ilevel = 0; ilevel < nlevels; ilevel++) { rn = drand48(); if (rn < a1) { } else if (rn < a1+b1) { j += delta; } else if (rn < a1+b1+c1) { i += delta; } else { i += delta; j += delta; } delta /= 2; if (fraction > 0.0) { a1 += a1*fraction * (drand48() - 0.5); b1 += b1*fraction * (drand48() - 0.5); c1 += c1*fraction * (drand48() - 0.5); d1 += d1*fraction * (drand48() - 0.5); total = a1+b1+c1+d1; a1 /= total; b1 /= total; c1 /= total; d1 /= total; } } edge.vi = i; edge.vj = j; kv->add((char *) &edge,sizeof(EDGE),NULL,0); } } mrmpi-1.0~20131122/oink/map_read_edge_label.cpp0000644000175000017500000000135311536442217020730 0ustar mathieumathieu#include "typedefs.h" #include "string.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- read_edge_label read edges and labels from file file format = 2 vertices and integer label per line output: key = Vi Vj, value = label ------------------------------------------------------------------------- */ void read_edge_label(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; EDGE edge; int label; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %lu %d",&edge.vi,&edge.vj,&label); kv->add((char *) &edge,sizeof(EDGE),(char *) &label,sizeof(int)); } fclose(fp); } mrmpi-1.0~20131122/oink/map_rmat_generate.h0000644000175000017500000000117011535732267020157 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifndef OINK_RMAT_GENERATE_H #define OINK_RMAT_GENERATE_H #include "typedefs.h" // data structure for RMAT parameters struct RMAT_struct { uint64_t order; uint64_t ngenerate; int nlevels; int nnonzero; double a,b,c,d,fraction; }; #endif mrmpi-1.0~20131122/oink/map_read_vertex_label.cpp0000644000175000017500000000132611536442217021341 0ustar mathieumathieu#include "typedefs.h" #include "string.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- read_vertex_label read vertices and labels from file file format = vertex and integer label per line output: key = Vi, value = label ------------------------------------------------------------------------- */ void read_vertex_label(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; VERTEX v; int label; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %d",&v,&label); kv->add((char *) &v,sizeof(VERTEX),(char *) &label,sizeof(int)); } fclose(fp); } mrmpi-1.0~20131122/oink/degree.h0000644000175000017500000000134411535754363015744 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(degree,Degree) #else #ifndef OINK_DEGREE_H #define OINK_DEGREE_H #include "command.h" namespace OINK_NS { class Degree : public Command { public: Degree(class OINK *); void run(); void params(int, char **); private: int duplicate; static void print(char *, int, char *, int, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/map_read_edge.cpp0000644000175000017500000000122411535732267017574 0ustar mathieumathieu#include "typedefs.h" #include "string.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- read_edge read edges from file, formatted with 2 vertices per line output: key = Vi Vj, value = NULL ------------------------------------------------------------------------- */ void read_edge(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; EDGE edge; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %lu",&edge.vi,&edge.vj); kv->add((char *) &edge,sizeof(EDGE),NULL,0); } fclose(fp); } mrmpi-1.0~20131122/oink/tri_find.h0000644000175000017500000000251011535732267016302 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(tri_find,TriFind) #else #ifndef OINK_TRI_FIND_H #define OINK_TRI_FIND_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class TriFind : public Command { public: TriFind(class OINK *); void run(); void params(int, char **); private: static void print(char *, int, char *, int, void *); static void map_edge_vert(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_first_degree(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_second_degree(char *, int, char *, int, int *, KeyValue *, void *); static void map_low_degree(uint64_t, char *, int, char *, int, KeyValue *, void *); static void reduce_nsq_angles(char *, int, char *, int, int *, KeyValue *, void *); static void reduce_emit_triangles(char *, int, char *, int, int *, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/map_read_vertex_weight.cpp0000644000175000017500000000135411536440627021555 0ustar mathieumathieu#include "typedefs.h" #include "string.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- read_vertex_weight read vertices and weights from file file format = vertex and floating point weight per line output: key = Vi, value = weight ------------------------------------------------------------------------- */ void read_vertex_weight(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; VERTEX v; WEIGHT weight; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %lg",&v,&weight); kv->add((char *) &v,sizeof(VERTEX),(char *) &weight,sizeof(WEIGHT)); } fclose(fp); } mrmpi-1.0~20131122/oink/map_read_edge_weight.cpp0000644000175000017500000000140111536440627021135 0ustar mathieumathieu#include "typedefs.h" #include "string.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; #define MAXLINE 1024 /* ---------------------------------------------------------------------- read_edge_weight read edges and weights from file file format = 2 vertices and floating point weight per line output: key = Vi Vj, value = weight ------------------------------------------------------------------------- */ void read_edge_weight(int itask, char *file, KeyValue *kv, void *ptr) { char line[MAXLINE]; EDGE edge; WEIGHT weight; FILE *fp = fopen(file,"r"); while (fgets(line,MAXLINE,fp)) { sscanf(line,"%lu %lu %lg",&edge.vi,&edge.vj,&weight); kv->add((char *) &edge,sizeof(EDGE),(char *) &weight,sizeof(WEIGHT)); } fclose(fp); } mrmpi-1.0~20131122/oink/wordfreq.cpp0000644000175000017500000000531111540760561016664 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "string.h" #include "stdlib.h" #include "wordfreq.h" #include "object.h" #include "style_map.h" #include "style_reduce.h" #include "style_scan.h" #include "error.h" #include "blockmacros.h" #include "mapreduce.h" #include "keyvalue.h" using namespace OINK_NS; using namespace MAPREDUCE_NS; struct Count { int n,limit,flag; }; /* ---------------------------------------------------------------------- */ WordFreq::WordFreq(OINK *oink) : Command(oink) { ninputs = 1; noutputs = 1; } /* ---------------------------------------------------------------------- */ void WordFreq::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MR = word : NULL int nfiles = 0; MapReduce *mr = obj->input(1,read_words,&nfiles); uint64_t nwords = mr->kv_stats(0); int nfiles_all; MPI_Allreduce(&nfiles,&nfiles_all,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); // unique words and their count // before processing it, make a copy of input MR if it is permanent if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->collate(NULL); uint64_t nunique = mr->reduce(count,NULL); obj->output(1,mr,print_string_int,NULL); // frequency stats // before processing it, make a copy of output MR if it is permanent if (ntop) { if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->sort_values(-1); Count count; count.n = 0; count.limit = 10; count.flag = 0; mr->map(mr,output,&count); mr->gather(1); mr->sort_values(-1); count.n = 0; count.limit = ntop; count.flag = 1; mr->map(mr,output,&count); } char msg[128]; sprintf(msg,"WordFreq: %d files, %lu words, %lu unique", nfiles_all,nwords,nunique); if (me == 0) error->message(msg); obj->cleanup(); } /* ---------------------------------------------------------------------- */ void WordFreq::params(int narg, char **arg) { if (narg != 1) error->all("Illegal wordfreq command"); ntop = atoi(arg[0]); } /* ---------------------------------------------------------------------- */ void WordFreq::output(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) { Count *count = (Count *) ptr; if (count->n >= count->limit) return; count->n++; int n = *(int *) value; if (count->flag) printf("%d %s\n",n,key); else kv->add(key,keybytes,(char *) &n,sizeof(int)); } mrmpi-1.0~20131122/oink/wordfreq.h0000644000175000017500000000147011535732267016341 0ustar mathieumathieu/* ---------------------------------------------------------------------- OINK - scripting wrapper on MapReduce-MPI library http://www.sandia.gov/~sjplimp/mapreduce.html, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level MR-MPI directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(wordfreq,WordFreq) #else #ifndef OINK_WORD_FREQ_H #define OINK_WORD_FREQ_H #include "command.h" #include "keyvalue.h" using MAPREDUCE_NS::KeyValue; namespace OINK_NS { class WordFreq : public Command { public: WordFreq(class OINK *); void run(); void params(int, char **); private: int ntop; static void output(uint64_t, char *, int, char *, int, KeyValue *, void *); }; } #endif #endif mrmpi-1.0~20131122/oink/Makefile0000644000175000017500000000317511524065425015774 0ustar mathieumathieu# OINK multiple-machine Makefile SHELL = /bin/sh #.IGNORE: # Definitions ROOT = oink EXE = $(ROOT)_$@ SRC = $(wildcard *.cpp) INC = $(wildcard *.h) OBJ = $(SRC:.cpp=.o) # List of all targets help: @echo '' @echo 'make clean-all delete all object files' @echo 'make clean-machine delete object files for one machine' @echo 'make tar oink_src.tar.gz of src dir' @echo 'make makelib update Makefile.lib for library build' @echo 'make makelist update Makefile.list used by old makes' @echo '' @echo 'make machine build OINK where machine is one of:' @echo '' @files="`ls MAKE/Makefile.*`"; \ for file in $$files; do head -1 $$file; done @echo '' # Build the code .DEFAULT: @test -f MAKE/Makefile.$@ @if [ ! -d Obj_$@ ]; then mkdir Obj_$@; fi @python Make.py @cp -p *.cpp *.h Obj_$@ @cp MAKE/Makefile.$@ Obj_$@/Makefile @cd Obj_$@; \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "EXE = ../$(EXE)" ../$(EXE) @if [ -d Obj_$@ ]; then cd Obj_$@; rm -f $(SRC) $(INC) Makefile*; fi # Remove machine-specific object files clean: @echo 'make clean-all delete all object files' @echo 'make clean-machine delete object files for one machine' clean-all: rm -rf Obj_* clean-%: rm -rf Obj_$(@:clean-%=%) # Create a tarball of this dir tar: @cd ..; tar cvzf src/$(ROOT)_src.tar.gz \ src/Make* src/MAKE src/*.cpp src/*.h --exclude=*/.svn @echo "Created $(ROOT)_src.tar.gz" # Update Makefile.lib and Makefile.list makelib: @$(SHELL) Make.sh style @$(SHELL) Make.sh Makefile.lib makelist: @$(SHELL) Make.sh style @$(SHELL) Make.sh Makefile.list mrmpi-1.0~20131122/doc/0000755000175000017500000000000012252022520014117 5ustar mathieumathieumrmpi-1.0~20131122/doc/kv_add.txt0000644000175000017500000000300411734437536016132 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line KeyValue add() method :h3 void KeyValue::add(char *key, int keybytes, char *value, int valuebytes) void KeyValue::add(int n, char *keys, int keybytes, char *values, int valuebytes) void KeyValue::add(int n, char *keys, int *keybytes, char *values, int *valuebytes) :pre The methods are called by the mymap(), mycompress(), and myreduce() functions in your program to register key/value pairs with the KeyValue object stored by the MapReduce object whose map(), compress(), or reduce() method was invoked. The first version registers a single key/value pair. The second version registers N key/value pairs, where the keys are all the same length and the values are all the same length. The third version registers a set of N key/value pairs where the length of each key and of each value is specified. As explained "here"_Program.html, from the perspective of the MR-MPI library, keys and values are variable-length byte strings. To register such strings, you must specify their length in bytes. This is done via the keybytes and valuebytes arguments, either as a single length or as a vectors of lengths. Note that if your key or value is a text string, it should typically include a trailing "0" to terminate the string. See the "Settings"_settings.html and "Technical Details"_Technical.html sections for details on the byte-alignment of keys and values you register with these add methods. mrmpi-1.0~20131122/doc/scrunch.txt0000644000175000017500000000264711734437536016363 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce scrunch() method :h3 uint64_t MapReduce::scrunch(int nprocs, char *key, int keybytes) :pre This calls the scrunch() method of a MapReduce object, which gathers a KeyValue object onto nprocs and collapses it into a KeyMultiValue object. This method is exactly the same as performing a "gather()"_gather.html followed by a "collapse()"_collapse.html. The method returns the total number of key/value pairs in the KeyMultiValue object which should be one for each of the nprocs. The nprocs argument is used by the "gather()"_gather.html portion of the operation. See the "gather()"_gather.html doc page for details. The key and keybytes arguments are used by the "collapse()"_collapse.html portion of the operation. See the "collapse()"_collapse.html doc page for details. Note that if nprocs > 1, then the same key will be assigned to the collapsed key/multi-value pairs on each processor. This method can be used to collect a set of key/value pairs to use in a "reduce()"_reduce.html method so that it can all be passed to a single invocation of your myreduce() function for output. This method is a parallel operation ("gather()"_gather.html), followed by an on-processor operation ("collapse()"_collapse.html). :line [Related methods]: "collapse()"_collapse.html, "gather()"_gather.html mrmpi-1.0~20131122/doc/settings.txt0000644000175000017500000003423211734437536016551 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Settings and defaults :h3 These are internal library variables that can be set by your program: mapstyle = 0 (chunk) or 1 (stride) or 2 (master/slave) all2all = 0 (irregular communication) or 1 (use MPI_Alltoallv) verbosity = 0 (none) or 1 (summary) or 2 (histogrammed) timer = 0 (none) or 1 (summary) or 2 (histogrammed) memsize = N = number of Mbytes per page of memory minpage = N = # of pages to pre-allocate per processor maxpage = N = max # of pages allocatable per processor freepage = 1 if memory pages are freed in between operations, 0 if held outofcore = 1 if even 1-page data sets are forced to disk, 0 if not, -1 if cannot write to disk zeropage = 1 if zero out every allocated page, 0 if not keyalign = N = byte-alignment of keys valuealign = N = byte-alignment of values fpath = string :ul All the settings except {fpath} are set in the following manner from C++: MapReduce *mr = new MapReduce(MPI_COMM_WORLD); mr->verbosity = 1; :pre Because {fpath} takes a string argument, it is set with the following function: mr->set_fpath(char *string); :pre See the "C interface"_Interface_c.html and "Python interface"_Interface_python.html doc pages for how to set the various settings from C and Python. As documented below, some of these settings can be changed at any time. Others only have effect if they are changed before the MapReduce object begins to operate on KeyValue and KeyMultiValue objects. :line The {mapstyle} setting determines how the N map tasks are assigned to the P processors by the "map()"_map.html method. A value of 0 means split the tasks into "chunks" so that processor 0 is given tasks from 0 to N/P, proc 1 is given tasks from N/P to 2N/P, etc. Proc P-1 is given tasks from N - N/P to N. A value of 1 means "strided" assignment, so proc 0 is given tasks 0,P,2P,etc and proc 1 is given tasks 1,P+1,2P+1,etc and so forth. A value of 2 uses a "master/slave" paradigm for assigning tasks. Proc 0 becomes the "master"; the remaining processors are "slaves". Each is given an initial task by the master and reports back when it is finished. It is then assigned the next available task which continues until all tasks are completed. This is a good choice if the CPU time required by various mapping tasks varies greatly, since it will tend to load-balance the work across processors. Note however that proc 0 performs no mapping tasks. This setting can be changed at any time. The default value for {mapstyle} is 0. :line The {all2all} setting determines how point-to-point communication is done when the "aggregate()"_aggregate.html method is invoked, either by itself or as part of a "collate()"_collate.html. A value of 0 means custom routines for irregular communication are used. A value of 1 means the MPI_Alltoallv() function from the MPI library is used. The results should be identical. Which is faster depends on the MPI library implementation of the MPI standard on a particular machine. This setting can be changed at any time. The default value for {all2all} is 1. :line The {verbosity} setting determines how much diagnostic output each library call prints to the screen. A value of 0 means "none". A value of 1 means a "summary" of the results across all processors is printed, typically a count of total key/value pairs and the memory required to store them. A value of 2 prints the summary results and also a "histogram" of these quantities by processor, so that you can detect memory usage imbalance. This setting can be changed at any time. The default value for {verbosity} is 0. :line The {timer} setting prints out timing information for each call to the library. A value of 0 means "none". A value of 1 invokes an MPI_Barrier() at the beginning and end of the operation and prints the elapsed time, which will be the same on all processors. A value of 2 invokes no MPI_Barrier() calls and prints a one-line summary of timing results across all processors and also a "histogram" of the time on each processor, so that you can detect computational imbalance. This setting can be changed at any time. The default value for {timer} is 0. :line The {memsize} setting determines the page size (in Mbytes) of each page of memory allocated by the MapReduce object to perform its operations. The number of pages required by different methods varies; 1 to 7 is typical. The {freepage} setting (see below) determines whether pages are freed or not between operations, once allocated. See "this section"_Interface_c++.html for a summary of memory page requirements. The minimum allowed value for the {memsize} setting is 1, meaning 1 Mb pages. IMPORTANT NOTE: The maximum value is unlimited, but you should insure the total memory consumed by all pages allocated by all the MapReduce objects you create, does not exceed the physical memory available (which may be shared by several processors if running on a multi-core node). If you do this, then many systems will allocate virtual memory, which will typically cause MR-MPI library operations to run very slowly and thrash the disk. If the data owned by a processor in its collection of KeyValue or KeyMultiValue pairs fits within one page, then no disk I/O is performed; the MR-MPI library runs in-core. If data exceeds the page size, then it is written to temporary disk files and read back in for subsequent operations; the MR-MPI library runs out-of-core. See "this section"_Technical.html#ooc for more discussion of out-of-core operations. These files are created on a per-processor basis and are deleted when no longer needed. Thus if you delete all MapReduce objects that you have instantiated, no such files should exist at the end of the user program. If you should need to clean them up yourselves (e.g. your program crashes), see the discussion of the {fpath} setting which describes how they are named and where they reside. If you set {memsize} small, then processing a large data set will induce many reads and writes to disk. If you make it large, then the reads and writes will happen in large chunks, which generally yields better I/O performance. However, past a few MBytes in size, there may be little gain in I/O performance. This setting can only be changed before the first KeyValue or KeyMultiValue object is created by the MapReduce object. If changed after that, it will have no effect. The default value for {memsize} is 64, meaning 64 Mbyte pages. The default value can be changed by a compiler setting when the MR-MPI library is built. Using this flag for the compilation of the src/mapreduce.cpp file: -DMRMPI_MEMSIZE=n :pre where n = 16, for example, will build the library with the default set to 16 Mbyte pages, instead of 64. :line The {minpage} setting determines how many memory pages each processor pre-allocates as a block of contiguous memory when the MapReduce object performs its first operation. {Minpage} can be set to a number >= 0. Note that if the {freepage} setting is 1 then memory pages will be freed after each MapReduce operation. This will include the initial {minpage} block of pages if none of them are in use. This setting can only be changed before the first KeyValue or KeyMultiValue object is created by the MapReduce object. If changed after that, it will have no effect. The default value for {minpage} is 0. :line The {maxpage} setting determines the maximum number of pages a processor can ever allocate when performing MapReduce operations. Normally this will be no more than 7; see the discussion in "this section"_techincal.html#ooc for more details. {Maxpage} can be set to a number >= 0. A value of 0 means there is no limit; new pages are allocated whenever they are needed. This setting can be changed at any time, though previously-allocated pages are not deleted if {maxpage} is set to a smaller number. The default value for {maxpage} is 0. :line The {freepage} setting determines whether or not the MapReduce ojbect frees unused memory pages after each operation is completed. If {freepage} is set to 0, then once allocated, pages are never deallocated until the MapReduce object itself is deleted. In this case pages are reused by successive operations performed by the library. If {freepage} is set to 1, then after each operation, pages used by the operation are freed, and then reallocated (as needed) by the next operation. The default {freepage} setting of 1 is useful to limit memory use, particularly if your code uses several MapReduce objects or you are running in parallel on a multi-core node where all the cores share the same physical memory. If memory is not an issue, setting {freepage} to 0 may be somewhat faster, since memory pages will not be repeatedly allocated and freed. See the {zeropage} setting for an additional source of overhead when pages are repeatedly freed and allocated. If the {outofcore} setting is 1, then setting {freepage} to 1 means that all memory pages will be released after each MapReduce operation. If {outofcore} is set to 0, and data fits in a single page, then the MapReduce object will always hold onto a single page of memory for that data even if {freepage} is set to 1. This setting can be changed at any time. The default value for {freepage} is 1. :line The {outofcore} setting determines whether data that could fit in a single page of memory, within a KeyValue or KeyMultiValue object, will still be written to disk. If the data does not fit in a single page, it is always written to disk. If {outofcore} is 1, then disk files will be written. If {outofcore} is 0, then disk files are not written if not needed. If {outofcore} is -1, then disk files cannot be created and an error will result if they are needed. The latter setting is a way to insure that your data set fits in memory. Note that if the {freepage} setting and the {outofcore} setting are both 1, then all memory pages will be released after each MapReduce operation. This can be useful to insure if your application uses many MapReduce objects and wants to limit its memory use. This setting can be changed at any time. The default value for {outofcore} is 0. :line The {zeropage} setting determines whether newly allocated pages are filled with 0 bytes when allocated by the MapReduce object. Note that this does not apply to reused pages that were not freed. A setting of 1 means zero each page. A setting of 0 leaves them uninitialized. Normally it should not be necessary to zero out allocated memory, and it only consumes time, especially if large pages are being used and are freed and allocated often (e.g. with {freepage} set to 1). But it can be useful when debugging with memory checkers, which may flag certain bytes within pages as uninitialized, even when this doesn't matter. This is because the byte-alignment rules for keys and values (discussed below) can skip over bytes in the page when data is written to the page. This setting can be changed at any time. The default value for {zeropage} is 0. :line The {keyalign} and {valuealign} settings determine the byte alignment of keys and values generated by the user program when they are stored inside the library and passed back to the user program. A setting of N means N-byte alignment. N must always be a power of two. As explained in "this section"_Program.html, keys and values are variable-length strings of bytes. The MR-MPI library knows nothing of their contents and simply treats them as contiguous chunks of bytes. "This section"_Technical.html#align explains why it may be important to insure proper alignment of numeric data such as integers and floating point values. Because keys are stored following integer lengths, keys are always at least 4-byte aligned. A larger alignment value can be specified if desired. Because they follow keys, which may be of arbitrary length (e.g. a string), values can be 1-byte aligned. Note that if all keys are integers, then values will also be 4-byte aligned. A larger alignment value can be specified if desired. When a multi-value is returned to the user program, e.g. by the callback of a "reduce()"_reduce.html method, only the first value in the multi-value is aligned to the {valuealign} setting. Subsequent values are packed one after the other. If all values are the same data-type, e.g. integers, then they will all have the same alignment. However, if the values are mixed data types (e.g. strings and integers), then you may need to insure each value is aligned properly before using it in your myreduce() function. See the "Technical Details"_Technical.html#align for more discussion of data alignment. These settings can only be changed before the first KeyValue or KeyMultiValue object is created by the MapReduce object. If changed after that, they will have no effect. The default value for {keyalign} and {valuealign} is 4, meaning 4-byte alignment of keys and values. :line The {fpath} setting determines the pathname for all disk files created by the MR-MPI library when it runs in "out-of-core mode"_Technical.html#ooc. Note that it is not a pathname for user data files read by the "map()"_map.html method. Those should be specified directly as part of the filename. Out-of-core disk files are created with names like "fpath/mrmpi.kv,N,M,P" where "kv" is an file-type string ("kv", or "kmv" or "sort" or "part" or "set"), N is a number unique to each MapReduce object, M is a file counter, and P is the processor ID. fpath/mrmpi.kmv.N.P. Sort files are created by the sorting methods. Part and set files are created by "collate()"_collate.html or "convert()"_convert.html methods. Setting {fpath} may be useful for specifying a disk local to each processor, or for a parallel file system that each processor can access. This setting can only be changed before the first KeyValue or KeyMultiValue object is created by the MapReduce object. If changed after that, it will have no effect. The default value for {fpath} is ".", which means the current working directory. The default value can be changed by a compiler setting when the MR-MPI library is built. Using this flag for the compilation of the src/mapreduce.cpp file: -DMRMPI_FPATH=foo :pre where foo is the desired pathname, will build the library with the default fpath set to foo, instead of the current working directory. mrmpi-1.0~20131122/doc/clone.txt0000644000175000017500000000315111734437536016005 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce clone() method :h3 uint64_t MapReduce::clone() :pre This calls the clone() method of a MapReduce object, which converts a KeyValue object directly into a KeyMultiValue object. It simply turns each key in KeyValue object into a key in the new KeyMultiValue object, with the same value. The method returns the total number of key/value pairs in the KeyMultiValue object, which will be the same as the number in the KeyValue object. This method essentially enables a KeyValue object to be passed directly to a reduce operation, which requires a KeyMultiValue object as input. Typically you would only do this if the keys in the KeyValue object are already unique, to avoid the extra overhead of an "aggregate()"_aggregate.html or "convert()"_convert.html or "collate()"_collate.html, but this is not required. If they are not, then there will also be duplicate keys in the KeyMultiValue object. Note that one of the "map()"_map.html methods allows an existing KeyValue object to be passed as input to a user mymap() function, generating a new Keyvalue object in the process. Thus there is typically no need to invoke clone() followed by "reduce()"_reduce.html. This method is an on-processor operation, requiring no communication. When run in parallel, the key/value pairs of the new KeyMultiValue object are stored on the same processor which owns the corresponding KeyValue pairs. :line [Related methods]: "collapse()"_collapse.html, "collate"_collate.html, "convert()"_convert.html mrmpi-1.0~20131122/doc/reduce.txt0000644000175000017500000001404711734437536016162 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce reduce() method :h3 MapReduce multivalue_blocks() method :h3 MapReduce multivalue_block() method :h3 MapReduce multivalue_block_select() method :h3 uint64_t MapReduce::reduce(void (*myreduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr) :pre uint64_t MapReduce::multivalue_blocks() :pre int MapReduce::multivalue_block(int iblock, char **ptr_multivalue, int **ptr_valuesizes) :pre void MapReduce::multivalue_block_select(int which) :pre This calls the reduce() method of a MapReduce object, passing it a function pointer to a myreduce function you write. It operates on a KeyMultiValue object, calling your myreduce function once for each unique key/multi-value (KMV) pair owned by that processor. A new KeyValue object is created which stores all the key/value pairs generated by your myreduce() function. The method returns the total number of new key/value pairs stored by all processors. You can give this method a pointer (void *ptr) which will be returned to your myreduce() function. See the "Technical Details"_Technical.html section for why this can be useful. Just specify a NULL if you don't need this. In this example the user function is called myreduce() and it must have the following interface, which is the same as that used by the "compress()"_compress.html method: void myreduce(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) :pre A single KMV pair is passed to your function from the KeyMultiValue object stored by the MapReduce object. The key is typically unique to this reduce task and the multi-value is a list of the nvalues associated with that key in the KeyMultiValue object. There are two possibilities for a KMV pair returned to your function. The first is that it fits in one page of memory allocated by the MapReduce object, which is the usual case. See the {memsize} "setting"_settings.html for details on memory allocation. In this case, the char *multivalue argument is a pointer to the beginning of the multi-value which contains all nvalues, packed one after the other. The int *valuebytes argument is an array which stores the length of each value in bytes. If needed, it can be used by your function to compute an offset into char *values for where each individual value begins. Your function is also passed a kv pointer to a new KeyValue object created and stored internally by the MapReduce object. If the KMV pair does not fit in one page of memory, then the meaning of the arguments passed to your function is changed. Your function must call two additional library functions in order to retrieve a block of values that does fit in memory, and process them one block at a time. In this case, the char *multivalue argument will be NULL and the nvalues argument will be 0. Either of these can be tested for within your function. If you know that no KMV pair will overflow one page of memory, then the test is not needed. The meaning of the kv and ptr arguments is the same as discussed above. However, the int *valuebytes argument is changed to be a pointer to the MapReduce object. This is to allow you to make the following two kinds of calls back to the library: MapReduce *mr = (MapReduce *) valuebytes; int nblocks; uint64_t nvalues_total = mr->multivalue_blocks(nblocks); for (int iblock = 0; iblock < nblocks; iblock++) \{ int nv = mr->multivalue_block(iblock,&multivalue,&valuebytes); for (int i = 0; i < nv; i++) \{ process each value within the block of values \} \} :pre The call to multivalue_blocks() returns both the total number of values (as an unsigned 64-bit integer), and the number of blocks of values in the multi-value. Each call to multivalue_block() retrieves one block of values. The number of values in the block is returned, as nv in this case. The multivalue and valuebytes arguments are pointers to a char * and int * (i.e. a char ** and int **), which will be set to point to the block of values and their lengths respectively, so they can then be used just as the multivalue and valuebytes arguments in the myreduce() callback itself (when the values do not exceed available memory). Note that in this example we are re-using (and thus overwriting) the original multivalue and valuebytes arguments as local variables. Also note that your myreduce() function can call multivalue_block() as many times as it wishes and process the blocks of values multiple times or in any order, though looping through blocks in ascending order will typically give the best disk I/O performance. If you need to load and process two blocks of values simultaneously (e.g. in a double loop), then the multivalue_block_select() function can be called with which = 1 or 2 to specify a page of memory to read a block of values into. This should be set just before the call to multivalue_block(), to insure one block of values is not overwritten by reading a second block. Your myreduce() function can produce key/value pairs (though this is not required) which it registers with the MapReduce object by calling the "add()"_kv_add.html method of the KeyValue object. The syntax for registration is described on the doc page of the KeyValue "add()"_kv_add.html method. Alternatively, your myreduce() function can write information to an output file. See the "Settings"_settings.html and "Technical Details"_Technical.html sections for details on the byte-alignment of keys and values that are passed to your myreduce() function and on those you register with the KeyValue "add()"_kv_add.html methods. Note that only the first value of a multi-value (or of each block of values) passed to your myreduce() function will be aligned to the {valuealign} "setting"_settings.html. This method is an on-processor operation, requiring no communication. When run in parallel, each processor performs a myreduce() on each of the key/value pairs it owns and stores any new key/value pairs it generates. :line [Related methods]: "Keyvalue add()"_kv_add.html, "map()"_map.html mrmpi-1.0~20131122/doc/scan.txt0000644000175000017500000000764511734437536015645 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce scan() method :h3 uint64_t MapReduce::scan(void (*myscan)(char *, int, char *, int, void *), void *ptr) uint64_t MapReduce::scan(void (*myscan)(char *, int, char *, int, int *, void *), void *ptr) :pre This calls the scan() method of a MapReduce object, passing it a function pointer to a myscan function you write. Depending on whether you pass it a function for processing key/value (KV) or key/multi-value (KMV) pairs, it will call your myscan function once for each KV or KMV pair owned by that processor. The KV or KMV pairs stored by the MapReduce object are not altered by this operation, nor are you allowed to emit any new KV pairs. Thus your myscan function is not passed a KV pointer. This is a useful way to simply scan over the existing KV or KMV pairs and process them in some way, e.g. for debugging or statistics generation or output. Contrast this method with the "map()"_map.html method variant that takes a MapReduce object as input and returns KV pairs to your mymap() function. If that MapReduce object is the same as the caller and if the addflag parameter is set to 0, your existing KV pairs are deleted by this action. If the addflag parameter is set to 1, and you emit no new KV pairs, then your existing KV pairs are unchanged. However a copy of all your KV pairs is first performed to insure this outcome. The scan() method avoids this copy. Also contrast this method with the "reduce()"_reduce.html method which returns KMV pairs to your myreduce() function. Your existing KMV pairs are deleted by this action, and replaced with new KV pairs which you generate. You can give this method a pointer (void *ptr) which will be returned to your myscan() function. See the "Technical Details"_Technical.html section for why this can be useful. Just specify a NULL if you don't need this. In this example the user function is called myscan() and it must have one of the two following interfaces, depending on whether the MapReduce object currently contains KV or KMV pairs: void myscan(char *key, int keybytes, char *value, int valuebytes, void *ptr) void myscan(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, void *ptr) :pre Either a single KV or KMV pair is passed to your function from the KeyValue or KeyMultiValue object stored by the MapReduce object. In the case of KMV pairs, the key is typically unique to this scan task and the multi-value is a list of the nvalues associated with that key in the KeyMultiValue object. There are two possibilities for a KMV pair returned to your function. The first is that it fits in one page of memory allocated by the MapReduce object, which is the usual case. Or it does not, in which case the meaning of the arguments passed to your function is changed. This behavior is identical to that of the "reduce()"_reduce.html method, including the meaning of the arguments returned to your myscan() function, and the 3 additional library functions you can call to retrieve additional values in the KMV pair, namely: uint64_t MapReduce::multivalue_blocks() int MapReduce::multivalue_block(int iblock, char **ptr_multivalue, int **ptr_valuesizes) void MapReduce::multivalue_block_select(int which) :pre See the "reduce()"_reduce.html method doc page for details. See the "Settings"_settings.html and "Technical Details"_Technical.html sections for details on the byte-alignment of keys and values that are passed to your myscan() function. Note that only the first value of a multi-value (or of each block of values) passed to your myscan() function will be aligned to the {valuealign} "setting"_settings.html. This method is an on-processor operation, requiring no communication. When run in parallel, each processor performs a myscan() on each of the KV or KMV pairs it owns. :line [Related methods]: "map()"_map.html, "reduce()"_reduce.html mrmpi-1.0~20131122/doc/map.txt0000644000175000017500000003024412216067724015457 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce map() method :h3 Variant 1: uint64_t MapReduce::map(int nmap, void (*mymap)(int, KeyValue *, void *), void *ptr) uint64_t MapReduce::map(int nmap, void (*mymap)(int, KeyValue *, void *), void *ptr, int addflag) :pre Variant 2: uint64_t MapReduce::map(int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, KeyValue *, void *), void *ptr) uint64_t MapReduce::map(int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, KeyValue *, void *), void *ptr, int addflag) :pre Variant 3: uint64_t MapReduce::map(int nmap, int nstr, char **strings, int recurse, int readfile, char sepchar, int delta, void (*mymap)(int, char *, int, KeyValue *, void *), void *ptr) uint64_t MapReduce::map(int nmap, int nstr, char **strings, int recurse, int readfile, char sepchar, int delta, void (*mymap)(int, char *, int, KeyValue *, void *), void *ptr, int addflag) :pre Variant 4: uint64_t MapReduce::map(int nmap, int nstr, char **strings, int recurse, int readfile, char *sepstr, int delta, void (*mymap)(int, char *, int, KeyValue *, void *), void *ptr) uint64_t MapReduce::map(int nmap, int nstr, char **strings, int recurse, int readfile, char *sepstr, int delta, void (*mymap)(int, char *, int, KeyValue *, void *), void *ptr, int addflag) :pre Variant 5: uint64_t MapReduce::map(MapReduce *mr2, void (*mymap)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *ptr) uint64_t MapReduce::map(MapReduce *mr2, void (*mymap)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *ptr, int addflag) :pre This calls the map() method of a MapReduce object. A function pointer to a mapping function you write is specified as an argument. This method either creates a new KeyValue object to store all the key/value pairs generated by your mymap function, or adds them to an existing KeyValue object. The method returns the total number of key/value pairs in the KeyValue object. There are several variants of the map() methods to allow for different ways to process input data. This also induces variants of the callback mymap() function. For the first set of variants (with or without addflag) you simply specify a total number of map tasks {nmap} to perform across all processors. The index of a map task is passed back to your mymap() function. The MapReduce library assigns map tasks to processors; see more details below. For the second set of variants, you specify {nstr} and {strings} which are file and/or directory names. Using these strings, a list of filenames is generated. Each filename in the list is passed back to your mymap() function which can open the file and process it. If {self} is 0, then only processor 0 generates the list of filenames, and the MapReduce library assigns files to processors; see more details below. If {self} is 1, then each processor generates its own list of filenames and those files are assigned to that processor. Note that in the {self} = 0 case, it is assumed that every processor can read any file that is assigned to it. Also note, that with {self} = 1 you can assign files to a processor that reside on a disk local to a processor, or with a parallel disk system you can pass different strings to different processors so that each processor reads from different set of files/directories. The list of filenames is generated in the following manner. Each of the {strings} is checked for whether it is a file or directory. If it is a file, it is added to the list of files. If it is a directory, the directory is opened and all the files in it are added to the list of files. If the {recurse} flag is set to 1, then if sub-directories are found in the directory, they are opened and the files in them are also added to the list of files (and so forth, recursively). The {readfile} setting adds one additional wrinkle. If {readfile} is 1, then instead of adding each filename to the list, each file is opened, and filenames are read from that file and added to the list. In this mode, each file should contain contain one filename per line. Blank lines are not allowed. Leading and trailing whitespace around each filename is OK. The number of files that are generated and processed can be accessed after the map() method is invoked, but the variable mapfilecount, e.g. MapReduce *mr = new MapReduce(); mr->map(nstr,strings,1,0,1,mymap,NULL); int ntotalfiles = mr->mapfilecount; :pre The third and fourth set of variants allow large file(s) to be broken into chunks and one or more sections to be passed back to your mymap() function as a string so it can process it. {Nmap} is the number of chunks to generate from all the files in aggregate (not {nmap} chunks per file). As with the previous variant, you also specify {nstr}, {strings}, {recurse}, and {readfile}. This generates a list of filenames, the same as in the previous variant. The only difference is that no {self} setting is allowed, because only processor 0 does this. The specified {nmap} should be >= the number of files in the generated list; it is reset to the number of files if that is not the case. For the third set of variants you specify a separation character {sepchar}. For the fourth set of variants, you specify a separation string {sepstr}. The files in the generated list of files are split into {nmap} chunks with roughly equal numbers of bytes in each chunk. Think of all the files concatenated together and then split into {nmap} chunks. For each call to your mymap() function, a chunk is read from a particular file, and passed to your function as a string, so your code does not read the file. See details below about the splitting methodology and the delta input parameter. For the fifth set of variants, you specify an existing MapReduce object mr2 with key/value pairs, which can either be this MapReduce object or another one. The key/value pairs from mr2 are passed back to your mymap() function, one key/value at a time, allowing you to generate new key/value pairs from an existing set. :line You can give any of the map() methods a pointer (void *ptr) which will be returned to your mymap() function. See the "Technical Details"_Technical.html section for why this can be useful. Just specify a NULL if you don't need this. The meaning of the final {addflag} argument is as follows. For all but the last variant, if {addflag} is omitted or is specified as 0, then map() will create a new KeyValue object, deleting any existing KeyValue object. If addflag is non-zero, then KV pairs generated by your mymap() function are added to an existing KeyValue object, which is created if needed. For the last variant, if the source of KeyValue pairs (mr2) is different than the MapReduce object mr, then the KV pairs in mr2 are not altered or deleted, regardless of the addflag setting. If addflag is 0, then the KeyValue object in mr is deleted, and newly generated KV pairs are added to a new KeyValue object. If addflag is 1, then newly generated KV pairs are added to the existing KeyValue object in mr. For the last variant, if the source of KeyValue pairs (mr2) is the same as MapReduce object mr, there are two possibilities. If addflag is 1, then newly generated KV pairs are added to the existing KeyValue object. If addflag is 0, then the existing KeyValue object is effectively replaced by the newly generated KV pairs. Note that the addflag=1 option requires the KeyValue object to first be copied. If your mymap() function will not generate any new KV pairs, then it is more efficient to use the "scan()"_scan.html method, which simply allows you to iterated over the existing KV pairs. :line In these examples the user function is called mymap() and it has one of four interfaces depending on which variant of the map() method is invoked: void mymap(int itask, KeyValue *kv, void *ptr) void mymap(int itask, char *file, KeyValue *kv, void *ptr) void mymap(int itask, char *str, int size, KeyValue *kv, void *ptr) void mymap(uint64_t itask, char *key, int keybytes, char *value, int valuebytes, KeyValue *kv, void *ptr) :pre In all cases, the final 2 arguments passed to your function are a pointer to a KeyValue object (kv) stored internally by the MapReduce object, and the original pointer you specified as an argument to the map() method, as void *ptr. In the first mymap() variant, itask is passed to your function with a value 0 <= itask < {nmap}, where {nmap} was specified in the map() call. For example, you could use itask to select a file from a list stored by your application. Your mymap() function could open and read the file or perform some other operation. In the second mymap() variant, itask will have a value 0 <= itask < nfiles, where nfiles is either the number of filenames in the list of files that was generated. Your function is also passed a single filename, which it will presumably open and read. In the third mymap() variant, itask will have a value from 0 <= itask < {nmap}, where {nmap} was specified in the map() call and is the number of file segments generated. It is also passed a string of bytes (str) of length size read from one of the files. Size includes a trailing '\0' that is appended to the string. For map() methods that take files and a separation criterion as arguments, you must specify {nmap} >= nfiles, so that there is one or more map tasks per file. For files that are split into multiple chunks, the split is done at occurrences of the separation character or string. You specify a delta of how many extra bytes to read with each chunk that will guarantee the splitting character or string is found within that many bytes. For example if the files are lines of text, you could choose a newline character '\n' as the sepchar, and a delta of 80 (if the longest line in your files is 80 characters). If the files are snapshots of simulation data where each snapshot is 1000 lines (no more than 80 characters per line), you could choose the first line of each snapshot (e.g. "Snapshot") as the sepstr, and a delta of 80000. Note that if the separation character or string is not found within delta bytes, an error will be generated. Also note that there is no harm in choosing a large delta so long as it is not larger than the chunk size for a particular file. If the separation criterion is a character (sepchar), the chunk of bytes passed to your mymap() function will start with the character after a sepchar, and will end with a sepchar (followed by a '\0'). If the separation criterion is a string (sepstr), the chunk of bytes passed to your mymap() function will start with sepstr, and will end with the character immediately preceeding a sepstr (followed by a '\0'). Note that this means your mymap() function will be passed different byte strings if you specify sepchar = 'A' vs sepstr = "A". In the fourth mymap() variant, itask will have a value from 0 <= itask < nkey, where nkey is a unsigned 64-bit int and is the number of key/value pairs in the specified MapReduce object. Key and value are the byte strings for a single key/value pair and are of length keybytes and valuebytes respectively. :line The MapReduce library assigns map tasks to processors. Options for how it does this can be controlled by "MapReduce settings"_settings.html. Basically, {nmap}/P tasks are assigned to each processor, where P is the number of processors in the MPI communicator you instantiated the MapReduce object with. Typically, your mymap() function will produce key/value pairs which it registers with the MapReduce object by calling the "add()"_kv_add.html method of the KeyValue object. The syntax for registration is described on the doc page of the KeyValue "add()"_kv_add.html method. See the "Settings"_settings.html and "Technical Details"_Technical.html sections for details on the byte-alignment of keys and values you register with the KeyValue "add()"_kv_add.html methods or that are passed to your mymap() function. Aside from the assignment of tasks to processors, this method is really an on-processor operation, requiring no communication. When run in parallel, each processor generates key/value pairs and stores them, independently of other processors. :line [Related methods]: "Keyvalue add()"_kv_add.html, "reduce()"_reduce.html mrmpi-1.0~20131122/doc/convert.txt0000644000175000017500000000241311734437536016365 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce convert() method :h3 uint64_t MapReduce::convert() :pre This calls the convert() method of a MapReduce object, which converts a KeyValue object into a KeyMultiValue object. It does this by finding duplicate keys (stored only by this processor) and concatenating their values into a list of values which it associates with the key in the KeyMultiValue object. The method returns the total number of key/value pairs in the KeyMultiValue object, which will be the number of unique keys in the KeyValue object. This operation creates a hash table to find duplicate keys efficiently. More details are given in the "Technical Details"_Technical.html section. This method is an on-processor operation, requiring no communication. When run in parallel, each processor converts only the key/value pairs it owns into key/multi-value pairs. Thus, this operation is typically performed only after the "aggregate()"_aggregate.html method has collected all duplicate keys to the same processor. The "collate()"_collate.html method performs an "aggregate()"_aggregate.html followed by a convert(). :line [Related methods]: "collate()"_collate.html mrmpi-1.0~20131122/doc/Manual.txt0000644000175000017500000001433111734440021016103 0ustar mathieumathieu MapReduce-MPI Library Users Manual "MapReduce-MPI WWW Site"_mws :c :link(mws,httpmapreduce.sandia.gov) :line

MapReduce-MPI (MR-MPI) Library Documentation :h2,c Version info: :h4 The MR-MPI "version" is the date when it was released, such as 1 May 2010. MR-MPI is updated continuously. Whenever we fix a bug or add a feature, we release it immediately, and post a notice on "this page of the WWW site"_bug. Each dated copy of MR-MPI contains all the features and bug-fixes up to and including that version date. The version date is printed to the screen every time you run a program that uses MR-MPI. It is also in the file src/version.h and in the MR-MPI directory name created when you unpack a tarball. If you browse the HTML or PDF doc pages on the MR-MPI WWW site, they always describe the most current version of MR-MPI. :ulb,l If you browse the HTML or PDF doc pages included in your tarball, they describe the version you have. :ule,l The MapReduce-MPI (MR-MPI) library is open-source software that implements the "MapReduce operation"_wiki popularized by Google on top of standard MPI message passing. The library is designed for parallel execution on distributed-memory platforms, but will also operate on a single processor. It requires no additional software to build and run, except linking with an MPI library if you wish to perform MapReduces in parallel. Similar to the original Google design, a user performs a MapReduce by writing a small program that invokes the library. The user typically provides two application-specific functions, a "map()" and a "reduce()", that are called back from the library when a MapReduce operation is executed. "Map()" and "reduce()" are serial functions, meaning they are invoked independently on individual processors on portions of your data when performing a MapReduce operation in parallel. The MR-MPI library is written in C++ and is callable from hi-level langauges such as C++, C, Fortran. A Python wrapper is also included, so MapReduce programs can be written in Python, including map() and reduce() user callback methods. A hi-level scripting interface to the MR-MPI library, called OINK, is also included which can be used to develop and chain MapReduce algorithms together in scripts with commands that simplify data management tasks. OINK has its own "manual and doc pages"_../oinkdoc/Manual.html. The goal of the MR-MPI library is to provide a simple and portable interface for users to create their own MapReduce programs, which can then be run on any desktop or large parallel machine using MPI. See the Background section for features and limitations of this implementation. The distrubution includes a few examples of simple programs that illustrate the use of MR-MPI. Source code for the library and OINK is freely available for download from the "MR-MPI web site"_mrmpi and is licensed under the modified "Berkeley Software Distribution (BSD) License"_bsd. This basically means they can be used by anyone for any purpose. See the LICENSE file provided with the distribution for more details. The authors of the MR-MPI library are "Steve Plimpton"_sjp and "Karen Devine"_kdd who can be contacted via email: sjplimp,kddevin at sandia.gov. :link(wiki,http://en.wikipedia.org/wiki/Mapreduce) :link(bsd,http://en.wikipedia.org/wiki/BSD_license) :link(kdd,http://www.cs.sandia.gov/~kddevin) :link(sjp,http://www.sandia.gov/~sjplimp) :link(mrmpi,http://mapreduce.sandia.gov) :link(bug,http://mapreduce.sandia.gov/bug.html) :line The MR-MPI documentation is organized into the following sections. If you find errors or omissions in this manual or have suggestions for useful information to add, please send an email to the developers so we can improve the MR-MPI documentation. Once you are familiar with MR-MPI, you may want to bookmark "this page"_Interface_c++.html at interface_c++.html, since it gives quick access to documentation for all the MR-MPI library methods. "PDF file"_Manual.pdf of the entire manual, generated by "htmldoc"_http://www.easysw.com/htmldoc "Background"_Background.html :ulb,l "What is a MapReduce?"_Whatis.html :l "Getting Started"_Start.html :l "Writing a MapReduce program"_Program.html :l "C++ Interface to the MapReduce-MPI Library"_Interface_c++.html :l "Create a MapReduce object"_create.html :ulb,l "Copy a MapReduce object"_copy.html :l "Destroy a MapReduce object"_destroy.html :l "MapReduce::add()"_add.html :l "MapReduce::aggregate()"_aggregate.html :l "MapReduce::broadcast()"_broadcast.html :l "MapReduce::clone()"_clone.html :l "MapReduce::close()"_open.html :l "MapReduce::collapse()"_collapse.html :l "MapReduce::collate()"_collate.html :l "MapReduce::compress()"_compress.html :l "MapReduce::multivalue_blocks()"_compress.html :ulb,l "MapReduce::multivalue_block()"_compress.html :l,ule "MapReduce::convert()"_convert.html :l "MapReduce::gather()"_gather.html :l "MapReduce::map()"_map.html :l "MapReduce::open()"_open.html :l "MapReduce::print()"_print.html :l "MapReduce::reduce()"_reduce.html :l "MapReduce::multivalue_blocks()"_reduce.html :ulb,l "MapReduce::multivalue_block()"_reduce.html :l,ule "MapReduce::scan()"_scan.html :l "MapReduce::scrunch()"_scrunch.html :l "MapReduce::sort_keys()"_sort_keys.html :l "MapReduce::sort_values()"_sort_values.html :l "MapReduce::sort_multivalues()"_sort_multivalues.html :l "MapReduce::kv_stats()"_stats.html :l "MapReduce::kmv_stats()"_stats.html :l "MapReduce::cummulative_stats()"_stats.html :l "KeyValue::add()"_kv_add.html :l "Settings and defaults"_settings.html :l,ule "C interface to the MapReduce-MPI Library"_Interface_c.html :l "Python interface to the MapReduce-MPI Library"_Interface_python.html :l "OINK interface to the MapReduce-MPI Library"_Interface_oink.html :l "Technical Details"_Technical.html :l "Examples"_Examples/html :l "Word frequency"_Examples.html#word :ulb,l "R-MAT matrices"_Examples.html#rmat :l,ule :ule mrmpi-1.0~20131122/doc/sort_values.txt0000644000175000017500000000533211734437536017256 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce sort_values() method :h3 uint64_t MapReduce::sort_values(int (*mycompare)(char *, int, char *, int)) uint64_t MapReduce::sort_values(int flag) :pre This calls the sort_values() method of a MapReduce object, which sorts a KeyValue object by its values to produce a new KeyValue object. For the first variant, you provide a mycompare() function which compares pairs of values for the sort, since the MapReduce object does not know how to interpret the content of your values. The method returns the total number of key/value pairs in the new KeyValue object which will be the same as in the original. For the second variant, you can select one of several pre-defined compare functions, so you do not have to write the compare function yourself: flag = 1 : compare 2 integers flag = 2 : compare 2 64-bit unsigned integers flag = 3 : compare 2 floats flag = 4 : compare 2 doubles flag = 5 : compare 2 NULL-terminated strings via strcmp() flag = 6 : compare 2 arbitrary strings via strncmp() :tb(s=:,ea=c) For the flag = 6 case, the 2 strings do not have to be NULL-terminated since only the first N characters are compared, where N is the shorter of the 2 string lengths. This method is used to sort key/value pairs by value before a KeyValue object is transformed into a KeyMultiValue object, e.g. via the "clone()"_clone.html, "collapse()"_collapse.html, or "convert()"_convert.html methods. Note that these operations preserve the order of pairs in the KeyValue object when creating a KeyMultiValue object, which can then be passed to your application for output, e.g. via the "reduce()"_reduce.html method. Note however, that sort_values() does NOT sort values across all processors but only sorts the values on each processor within the KeyValue object. Thus if you "gather()"_gather.html or "aggregate()"_aggregate.html after performing a sort_values(), the sorted order will be lost, since those methods move key/value pairs to new processors. In this example for the first variant, the user function is called mycompare() and it must have the following interface int mycompare(char *value1, int len1, char *value2, int len2) :pre Value1 and value2 are pointers to the byte strings for 2 values, each of length len1 and len2. Your function should compare them and return a -1, 0, or 1 if value1 is less than, equal to, or greater than value2, respectively. This method is an on-processor operation, requiring no communication. When run in parallel, each processor operates only on the key/value pairs it stores. :line [Related methods]: "sort_keys()"_sort_keys.html, "sort_multivalues()"_sort_multivalues.html mrmpi-1.0~20131122/doc/gather.txt0000644000175000017500000000263211734437536016162 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce gather() method :h3 uint64_t MapReduce::gather(int nprocs) :pre This calls the gather() method of a MapReduce object, which collects the key/value pairs of a KeyValue object spread across all processors to form a new KeyValue object on a subset (nprocs) of processors. Nprocs can be 1 or any number smaller than P, the total number of processors. The gathering is done to the lowest ID processors, from 0 to nprocs-1. Processors with ID >= nprocs end up with an empty KeyValue object containing no key/value pairs. The method returns the total number of key/value pairs in the new KeyValue object, which will be the same as in the original KeyValue object. This method can be used to collect the results of a "reduce()"_reduce.html to a single processor for output. See the "collapse()"_collapse.html and "scrunch()"_scrunch.html methods for related ways to collect key/value pairs for output. A gather() may also be useful before a "reduce()"_reduce.html if the number of unique key/value pairs is small enough that you wish to perform the reduce tasks on fewer processors. This method requires parallel point-to-point communication as processors send their key/value pairs to other processors. :line [Related methods]: "scrunch()"_scrunch.html, "broadcast()"_broadcast.html mrmpi-1.0~20131122/doc/sort_keys.txt0000644000175000017500000000553111734437536016733 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce sort_keys() method :h3 uint64_t MapReduce::sort_keys(int (*mycompare)(char *, int, char *, int)) uint64_t MapReduce::sort_keys(int flag) :pre This calls the sort_keys() method of a MapReduce object, which sorts a KeyValue object by its keys to produce a new KeyValue object. For the first variant, you provide a mycompare() function which compares pairs of keys for the sort, since the MapReduce object does not know how to interpret the content of your keys. The method returns the total number of key/value pairs in the new KeyValue object which will be the same as in the original. For the second variant, you can select one of several pre-defined compare functions, so you do not have to write the compare function yourself: flag = +/- 1 : compare 2 integers flag = +/- 2 : compare 2 64-bit unsigned integers flag = +/- 3 : compare 2 floats flag = +/- 4 : compare 2 doubles flag = +/- 5 : compare 2 NULL-terminated strings via strcmp() flag = +/- 6 : compare 2 arbitrary strings via strncmp() :tb(s=:,ea=c) If the flag is positive, the sorting is done is ascending order; if the flag is negative, the sorting is done is descending order. For the flag = +/- 6 case, the 2 strings do not have to be NULL-terminated since only the first N characters are compared, where N is the shorter of the 2 string lengths. This method is used to sort key/value pairs by key before a KeyValue object is transformed into a KeyMultiValue object, e.g. via the "clone()"_clone.html, "collapse()"_collapse.html, or "convert()"_convert.html methods. Note that these operations preserve the order of paires in the KeyValue object when creating a KeyMultiValue object, which can then be passed to your application for output, e.g. via the "reduce()"_reduce.html method. Note however, that sort_keys() does NOT sort keys across all processors but only sorts the keys on each processor within the KeyValue object. Thus if you "gather()"_gather.html or "aggregate()"_aggregate.html after performing a sort_keys(), the sorted order will be lost, since those methods move key/value pairs to new processors. In this example for the first variant, the user function is called mycompare() and it must have the following interface int mycompare(char *key1, int len1, char *key2, int len2) :pre Key1 and key2 are pointers to the byte strings for 2 keys, each of length len1 and len2. Your function should compare them and return a -1, 0, or 1 if key1 is less than, equal to, or greater than key2, respectively. This method is an on-processor operation, requiring no communication. When run in parallel, each processor operates only on the key/value pairs it stores. :line [Related methods]: "sort_values()"_sort_values.html, "sort_multivalues()"_sort_multivalues.html mrmpi-1.0~20131122/doc/add.txt0000644000175000017500000000260711734437536015442 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce add() method :h3 uint64_t MapReduce::add(MapReduce *mr2) :pre This calls the add() method of a MapReduce object, to add the KeyValue pairs contained in a second MapReduce object mr2, to the KeyValue object of the first MapReduce object, which is created if one does not exist. This is useful if multiple MapReduce objects have been created and populated with key/value pairs and you wish to combine them before performing further operations, such as a "collate()"_collate.html and "reduce()"_reduce.html. For example, this sequence of calls: MapReduce *mr1 = new MapReduce(MPI_COMM_WORLD); mr1->map(ntasks,&mymap,NULL); MapReduce *mr2 = mr1->copy(); mr2->collate(NULL); mr2->reduce(&myreduce2,NULL); mr1->add(mr2); delete mr2; mr1->collate(NULL); mr1->reduce(&myreduce1,NULL); :pre would generate one set of key/value pairs from the initial "map()"_map.html operation, then make a "copy"_copy.html of them, which are then "collated"_collate.html and "reduced"_reduce.html to a new set of key/value pairs. The new set of key/value pairs are "added"_add.html to the original set produced by the "map()"_map.html operation to form an augmented set of key/value pairs, which could be further processed. :line [Related methods]: "copy"_copy.html, "map()"_map.html mrmpi-1.0~20131122/doc/Program.txt0000644000175000017500000000436011734437536016317 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Writing a MapReduce program :h3 The usual way to use the MR-MPI library is to write a small main program that calls the library. In C++, your program includes two library header files and uses the MapReduce namespace: #include "mapreduce.h" #include "keyvalue.h" using namespace MAPREDUCE_NS :pre Follow these links for info on using the library from a "C program"_Interface_c.html or from a "Python program"_Interface_python.html. Arguments to the library's "map()"_map.html and "reduce()"_reduce.html methods include function pointers to serial "mymap" and "myreduce" functions in your code (named anything you wish), which will be "called back to" from the library as it performs the parallel map and reduce operations. A typical simple MapReduce program involves these steps: MapReduce *mr = new MapReduce(MPI_COMM_WORLD); // instantiate an MR object mr->map(nfiles,&mymap); // parallel map mr->collate() // collate keys mr->reduce(&myreduce); // parallel reduce delete mr; // delete the MR object :pre The main program you write may be no more complicated than this. The API for the MR-MPI library is a handful of methods which are components of a MapReduce operation. They can be combined in more complex sequences of calls than listed above. For example, one "map()"_map.html may be followed by several "reduce()"_reduce.html operations to massage your data in a desired way. Output of final results is typically performed as part of a myreduce() function you write which executes on one or more processors and writes to a file(s) or the screen. The MR-MPI library operates on "keys" and "values" which are generated and manipulated by your mymap() and myreduce() functions. A key and a value are simply byte strings of arbitrary length which are logically associated with each other, and can thus represent anything you wish. For example, a key can be a text string or a particle or grid cell ID. A value can be one or more numeric values or a text string or a composite data structure that you create. mrmpi-1.0~20131122/doc/Whatis.txt0000644000175000017500000001035011734437536016143 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line What is a MapReduce? :h3 The canonical example of a MapReduce operation, described in both the "Dean and Sanjay"_#Dean and "Tu, et al"_#Tu papers, is counting the frequency of words in a collection of text files. Imagine a large corpus of text comprising Gbytes or Tbytes of data. To count how often each word appears, the following algorithm would work, written in Python: dict = \{\} for file in sys.argv\[1:\]: text = open(file,'r').read() words = text.split() for word in words: if word not in dict: dict\[word\] = 1 else: dict\[word\] += 1 unique = dict.keys() for word in unique: print dict\[word\],word :pre Dict is a "dictionary" or associative array which is a collection of key/value pairs where the keys are unique. In this case, the key is a word and its value is the number of times it appears in any text file. The program loops over files, and splits the contents into words (separated by whitespace). For each word, it either adds it to the dictionary or increments its associated value. Finally, the resulting dictionary of unique words and their counts is printed. The drawback of this implementation is that it is inherently serial. The files are read one by one. More importantly the dictionary data structure is updated one word at a time. A MapReduce formulation of the same task is as follows: array = \[\] for file in sys.argv\[1:\]: array += map(file) newarray = collate(array) unique = \[\] for entry in newarray: unique += reduce(entry) for entry in unique: print entry\[1\],entry\[0\] :pre Array is now a linear list of key/value pairs where a key may appear many times (not a dictionary). The map() function reads a file, splits it into words, and generates a key/value pair for each word ialignn the file. The key is the word itself and the value is the integer 1. The collate() function reorganizes the (potentially very large) list of key/value pairs into a new array of key/value pairs where each unique key appears exactly once and the associated value is a concatenated list of all the values associated with the same key in the original array. Thus, a key/value pair in the new array would be ("dog",\[1,1,1,1,1\]) if the word "dog" appeared 5 times in the text corpus. The reduce() function takes a single key/value entry from the new array and returns a key/value pair that has the word as its key and the count as its value, ("dog",5) in this case. Finally, the elements of the unique array are printed. As written, the MapReduce algorithm could be executed on a single processor. However, there is now evident parallelism. The map() function calls are independent of each other and can be executed on different processors simultaneously. Ditto for the reduce() function calls. In this scenario, each processor would accumulate its own local "array" and "unique" lists of key/value pairs. Also note that if the map and reduce functions are viewed as black boxes that produce a list of key/value pairs (in the case of map) or convert a single key/value pair into a new key/value pair (in the case of reduce), then they are the only part of the above algorithm that is application-specific. The remaining portions (the collate function, assignment of map or reduce tasks to processors, combining of the map/reduce output across processors) can be handled behind the scenes in an application-independent fashion. That is the portion of the code that is handled by the MR-MPI (or other) MapReduce library. The user only needs to provide a small driving program to call the library and serial functions for performing the desired map() and reduce() operations. :line :link(Dean) [(Dean)] J. Dean and S. Ghemawat, "MapReduce: Simplified Data Processing on Large Clusters", OSDI'04 conference (2004); J. Dean and S. Ghemawat, "MapReduce: Simplified Data Processing on Large Clusters", Communications of the ACM, 51, p 107-113 (2008). :link(Tu) [(Tu)] T. Tu, C. A. Rendleman, D. W. Borhani, R. O. Dror, J. Gullingsrud, M. O. Jensen, J. L. Kelpeis, P. Maragakis, P. Miller, K. A. Stafford, D. E. Shaw, "A Scalable Parallel Framework for Analyzing Terascale Molecular Dynamics Trajectories", SC08 proceedings (2008). mrmpi-1.0~20131122/doc/destroy.txt0000644000175000017500000000127411734437536016402 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Destroy a MapReduce object :h3 MapReduce::~MapReduce() :pre This destroys a previously created MapReduce object, freeing all the memory it allocated internally to store keys and values. If you created the MapReduce object in this manner: MapReduce *mr = new MapReduce(MPI_COMM_WORLD); :pre then you should destroy it with delete mr :pre If you created the MapReduce object in this manner: MapReduce mr(MPI_COMM_WORLD); :pre then it will be destroyed automatically when the "mr" variable goes out of scope. :line [Related methods]: "create"_create.html mrmpi-1.0~20131122/doc/Interface_oink.txt0000644000175000017500000000171711734437536017633 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line OINK interface to the MapReduce-MPI Library :h3 OINK is a C++ application that provdes a hi-level scripting interface to the MR-MPI library which it uses internally. These are three goals of OINK: (1) To allow MapReduce algorithms which call the MR-MPI library to be written with a minimum of extraneous code, to work with input/output in various forms, and to be chained together and driven via a simple, yet versatile scripting language. :ulb,l (2) To create an archive of map() and reduce() functions for re-use by different algorithms. :l (3) To provide a scripted interface to the lo-level MR-MPI library calls that can speed development/debugging of new algortihms before coding them up in C++ or another language. :l,ule OINK has its own "manual and doc pages"_../oinkdoc/Manual.html, so further details are not given here. mrmpi-1.0~20131122/doc/Start.txt0000644000175000017500000001237212013024745015770 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Getting Started :h3 Once you have "downloaded"_http://www.sandia.gov/~sjplimp/download.html the MapReduce MPI (MR-MPI) library, you should have the tarball mapreduce.tar.gz on your machine. Unpack it with the following commands: gunzip mapreduce.tar.gz tar xvf mapreduce.tar :pre which should create a mapreduce directory containing the following: README LICENSE doc examples mpistubs oink oinkdoc python src user :ul The doc directory contains this documentation. The oink and oinkdoc directories contain the "OINK scripting interface"_../oinkdoc/Manual.html to the MR-MPI library and its separate documentation. The examples directory contains a few simple MapReduce programs which call the MR-MPI library. These are documented by a README file in that directory and are discussed below. The mpistubs directory contains a dummy MPI library which can be used to build a MapReduce program on a serial machine. The python directory contains the Python wrapper files needed to call the MR-MPI library from Python. The src directory contains the files that comprise the MR-MPI library. The user directory contains user-contributed MapReduce programs. See the README in that directory for further details. [Static library:] :h5 To build a static library for use by a C++ or C program (*.a file on Linux), go to the src directory and type make :pre You will see a list of machine names, each of which has their own Makefile.machine file in the src/MAKE directory. You can choose one of these and attempt to build the MR-MPI library by typing make machine :pre If you are successful, this will produce the file "libmrmpi_machine.a" which can be linked by other programs. If not, you will need to create a src/MAKE/Makefile.machine file compatible with your platform, using one of the existing files as a template. The only settings in a Makefile.machine file that need to be specified are those for the compiler and the MPI library on your machine. If MPI is not already installed, you can install one of several free versions that work on essentially all platforms. MPICH and OpenMPI are the most common. Within Makefile.machine you can either specify via -I and -L switches where the MPI include and library files are found, or you can use a compiler wrapper provided with MPI, like mpiCC or mpic++, which will know where those files are. You can also build the MR-MPI library without MPI, using the dummy MPI library provided in the mpistubs directory. In this case you can only run the library on a single processor. To do this, first build the dummy MPI library, by typing "make" from within the mpistubs directory. Again, you may need to edit mpistubs/Makefile for your machine. Then from the src directory, type "make serial" which uses the src/MAKE/Makefile.serial file. Both a C++ and "C interface"_Interface_c.html are part of the MR-MPI library, so it should be usable from any hi-level language. [Shared library:] :h5 You can also build the MR-MPI library as a dynamic shared library (*.so file instead of *.a on Linux). This is required if you want to use the library from Python. To do this, type make -f Makefile.shlib machine :pre This will create the file libmrmpi_machine.so, as well as a soft link libmrmpi.so, which is what the Python wrapper will load by default. Note that if you are building multiple machine versions of the shared library, the soft link is always set to the most recently built version. [Additional requirement for using a shared library:] :h5 The operating system finds shared libraries to load at run-time using the environment variable LD_LIBRARY_PATH. So you may wish to copy the file src/libmrmpi.so or src/libmrmpi_g++.so (for example) to a place the system can find it by default, such as /usr/local/lib, or you may wish to add the MR-MPI src directory to LD_LIBRARY_PATH, so that the current version of the shared library is always available to programs that use it. For the csh or tcsh shells, you would add something like this to your ~/.cshrc file: setenv LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/home/sjplimp/mrmpi/src :pre :line The MapReduce programs in the examples directory can be built by typing make -f Makefile.machine :pre from within the examples directory, where Makefile.machine is one of the Makefiles in the examples directory. Again, you may need to modify one of the existing ones to create a new one for your machine. Some of the example programs are provided as a C++ program, a C program, as a Python script, or as an OINK input script. Once you have built OINK, the latter can be run as, for example, oink_linux < in.rmat :pre When you run one of the example MapReduce programs or your own, if you get an immediate error about the MRMPI_BIGINT data type, you will need to edit the file src/mrtype.h and re-compile the library. Mrtype.h and the error check insures that your MPI will perform operations on 8-byte unsigned integers as required by the MR-MPI library. For the MPI on most machines, this is satisfied by the MPI data type MPI_UNSIGNED_LONG_LONG. But some machines do not support the "long long" data type, and you may need a different setting for your machine and installed MPI, such as MPI_UNSIGNED_LONG. mrmpi-1.0~20131122/doc/collapse.txt0000644000175000017500000000342311734437536016511 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce collapse() method :h3 uint64_t MapReduce::collapse(char *key, int keybytes) :pre This calls the collapse() method of a MapReduce object, which collapses a KeyValue object into a KeyMultiValue object with a single new key, given as an argument with its length in bytes. The single new value in the KeyMultiValue object is a concatentated list of all the keys and values in the KeyValue object. The method returns the total number of key/value pairs in the KeyMultiValue object, which will be 1 for each processor owning pairs. For example, if the KeyValue object contains these key/value pairs: ("dog",3), ("me",45), ("parallel",1) :pre then the new KeyMultiValue object will contain a single key/value pair: (key,\["dog",3,"me",45,"parallel",1\]) :pre This method can be used to collect a set of key/value pairs to use in a "reduce()"_reduce.html method so that it can all be passed to a single invocation of your myreduce() function for output. See the "Technical Details"_Technical.html section for details on how the collapse() method affects the alignment of keys and values that may eventually be passed to your myreduce() function via the "reduce()"_reduce.html method. This method is an on-processor operation, requiring no communication. When run in parallel, each processor collapses the key/value pairs it owns into a single key/value pair. Thus each processor will assign the same key to its new pair. See the "gather()"_gather.html and "scrunch()"_scrunch.html methods for ways to collect all key/value pairs on to one or a few processors. :line [Related methods]: "clone()"_clone.html, "collate"_collate.html, "convert()"_convert.html mrmpi-1.0~20131122/doc/create.txt0000644000175000017500000000453011734437536016152 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Create a MapReduce object :h3 MapReduce::MapReduce(MPI_Comm comm) MapReduce::MapReduce() MapReduce::MapReduce(double dummy) :pre You can create a MapReduce object in any of the three ways shown, as well as via the "copy()"_copy.html method. The three creation methods differ slightly in how MPI is initialized and finalized. In the first case, you pass an MPI communicator to the constructor. This means your program should initialize (and finalize) MPI, which creates the MPI_COMM_WORLD communicator (all the processors you are running on). Normally this is what you pass to the MapReduce constructor, but you can pass a communicator for a subset of your processors if desired. You can also instantiate multiple MapReduce objects, giving them each a communicator for all the processors or communicators for a subset of processors. The second case can be used if your program does not use MPI at all. The library will initialize MPI if it has not already been initialized. It will not finalize MPI, but this should be fine. Worst case, your program may complain when it exits if MPI has not been finalized. The third case is the same as the second except that the library will finalize MPI when the last instance of a MapReduce object is destructed. Note that this means your program cannot delete all its MapReduce objects in a early phase of the program and then instantiate more MapReduce objects later. This limitation is why the second case is provided. The third case is invoked by passing a double to the constructor. If this is done for any instantiated MapReduce object, then the library will finalize MPI. The value of the double doesn't matter as it isn't used. The use of a double is simply to make it different than the first case, since MPI_Comm is often implemented by MPI libraries as a type cast to an integer. As examples, any of these lines of code will create a MapReduce object, where "mr" is either a pointer to the object or the object itself: MapReduce *mr = new MapReduce(MPI_COMM_WORLD); MapReduce *mr = new MapReduce(); MapReduce *mr = new MapReduce(0.0); MapReduce mr(MPI_COMM_WORLD); MapReduce mr(); MapReduce mr; MapReduce mr(0.0); :pre :line [Related methods]: "destroy"_destroy.html, "copy()"_copy.html mrmpi-1.0~20131122/doc/sort_multivalues.txt0000644000175000017500000000470411734437536020333 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce sort_multivalues() method :h3 uint64_t MapReduce::sort_multivalues(int (*mycompare)(char *, int, char *, int)) uint64_t MapReduce::sort_multivalues(int) :pre This calls the sort_multivalues() method of a MapReduce object, which sorts the values for each key within a KeyMultiValue object to produce a new KeyMultiValue object. For the first variant, you provide a mycompare() function which compares pairs of values for the sort, since the MapReduce object does not know how to interpret the content of your values. The method returns the total number of key/multi-value pairs in the new KeyMultiValue object which will be the same as in the original. For the second variant, you can select one of several pre-defined compare functions, so you do not have to write the compare function yourself: flag = 1 : compare 2 integers flag = 2 : compare 2 64-bit unsigned integers flag = 3 : compare 2 floats flag = 4 : compare 2 doubles flag = 5 : compare 2 NULL-terminated strings via strcmp() flag = 6 : compare 2 arbitrary strings via strncmp() :tb(s=:,ea=c) For the flag = 6 case, the 2 strings do not have to be NULL-terminated since only the first N characters are compared, where N is the shorter of the 2 string lengths. This method can be used to sort a set of multi-values within a key before they are passed to your application, e.g. via the "reduce()"_reduce.html method. Note that it typically only makes sense to use sort_multivalues() for a KeyMultiValue object created by the "convert()"_convert.html or "collate()"_collate.html methods, not KeyMultiValue objects created by the "clone()"_clone.html or "collapse()"_collapse.html or "scrunch()"_scrunch.html methods. In this example for the first variant, the user function is called mycompare() and it must have the following interface int mycompare(char *value1, int len1, char *value2, int len2) :pre Value1 and value2 are pointers to the byte strings for 2 values, each of length len1 and len2. Your function should compare them and return a -1, 0, or 1 if value1 is less than, equal to, or greater than value2, respectively. This method is an on-processor operation, requiring no communication. When run in parallel, each processor operates only on the key/multi-value pairs it stores. :line [Related methods]: "sort_keys()"_sort_keys.html, "sort_values()"_sort_values.html mrmpi-1.0~20131122/doc/collate.txt0000644000175000017500000000313711734437536016334 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce collate() method :h3 uint64_t MapReduce::collate(int (*myhash)(char *, int)) :pre This calls the collate() method of a MapReduce object, which aggregates a KeyValue object across processors and converts it into a KeyMultiValue object. This method is exactly the same as performing an "aggregate()"_aggregate.html followed by a "convert()"_convert.html. The method returns the total number of unique key/value pairs in the KeyMultiValue object. The hash argument is used by the "aggregate()"_aggregate.html portion of the operation and can be specified as NULL. See the "aggregate()"_aggregate.html doc page for details. Note that if your map operation does not produce duplicate keys, you do not typically need to perform a collate(). Instead you can convert a KeyValue object into a KeyMultiValue object directly via the "clone()"_clone.html method, which requires no communication. Or you can pass it directly to another "map()"_map.html operation. One exception would be if your map operation produces a KeyValue object which is highly imbalanced across processors. The "aggregate()"_aggregate.html portion of the operation should redistribute the key/value pairs more evenly. This method is a parallel operation ("aggregate()"_aggregate.html), followed by an on-processor operation ("convert()"_convert.html). :line [Related methods]: "aggregate()"_aggregate.html, "clone"_clone.html, "collapse()"_collapse.html, "compress()"_compress.html, "convert()"_convert.html mrmpi-1.0~20131122/doc/Interface_c.txt0000644000175000017500000001577612243674677017134 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line C interface to the MapReduce-MPI Library :h3 The MR-MPI library can be called from a C program, using the interface defined in src/cmapreduce.h. This is a C file which should be included in your C program to define the API to the library: #include "cmapreduce.h" :pre Note that the C interface should also be usable to call the MapReduce MPI library from Fortran or other hi-level languages, including scripting languages. See information below on how to do this from "Python"_Interface_python.html. The C interface consists of the following functions. Their functionality and arguments are described in the "C++ interface section"_Interface_c++.html. void *MR_create(MPI_Comm comm); void *MR_create_mpi(); void *MR_create_mpi_finalize(); void *MR_copy(void *MRptr); void MR_destroy(void *MRptr); :pre uint64_t MR_add(void *MRptr); uint64_t MR_aggregate(void *MRptr, int (*myhash)(char *, int)); uint64_t MR_broadcast(void *MRptr, int root); uint64_t MR_clone(void *MRptr); uint64_t MR_close(void *MRptr); uint64_t MR_collapse(void *MRptr, char *key, int keybytes); uint64_t MR_collate(void *MRptr, int (*myhash)(char *, int)); uint64_t MR_compress(void *MRptr, void (*mycompress)(char *, int, char *, int, int *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_convert(void *MRptr); uint64_t MR_gather(void *MRptr, int numprocs); :pre uint64_t MR_map(void *MRptr, int nmap, void (*mymap)(int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_add(void *MRptr, int nmap, void (*mymap)(int, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_file(void *MRptr, int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_file_add(void *MRptr, int nstr, char *strings, int self, int recurse, int readfile, void (*mymap)(int, char *, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_file_char(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readfile, char sepchar, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_file_char_add(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readfile, char sepchar, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_file_str(void *MRptr, int nmap, int files, char **files, char *sepstr, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_file_str_add(void *MRptr, int nmap, int files, char **files, char *sepstr, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_mr(void *MRptr, void *MRptr2, void (*mymap)(uint64_t, char *, int, char *, int *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_mr_add(void *MRptr, void *MRptr2, void (*mymap)(uint64_t, char *, int, char *, int *, void *KVptr, void *APPptr), void *APPptr, int addflag); :pre void MR_open(void *MRptr, int addflag); void MR_open_add(void *MRptr); void MR_print(void *MRptr, int, int, int, int); void MR_print_file(void *MRptr, char *, int, int, int, int, int); :pre uint64_t MR_reduce(void *MRptr, void (*myreduce)(char *, int, char *, int, int *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_multivalue_blocks(void *MRptr); void MR_multivalue_block_select(void *MRptr, int which); int MR_multivalue_block(void *MRptr, int iblock, char **ptr_multivalue, int **ptr_valuesizes); :pre uint64_t MR_scan_kv(void *MRptr, void (*myscan)(uint64_t, char *, int, char *, int, void *), void *APPptr); uint64_t MR_scan_kmv(void *MRptr, void (*myscan)(char *, int, char *, int, int *, void *), void *APPptr); :pre uint64_t MR_scrunch(void *MRptr, int numprocs, char *key, int keybytes); :pre uint64_t MR_sort_keys(void *MRptr, int (*mycompare)(char *, int, char *, int)); uint64_t MR_sort_keys_flag(void *MRptr, int); uint64_t MR_sort_values(void *MRptr, int (*mycompare)(char *, int, char *, int)); uint64_t MR_sort_values_flag(void *MRptr, int); uint64_t MR_sort_multivalues(void *MRptr, int (*mycompare)(char *, int, char *, int)); :pre uint64_t MR_sort_multivalues_flag(void *MRptr, int); void MR_kv_stats(void *MRptr, int level); void MR_kmv_stats(void *MRptr, int level); :pre void MR_set_mapstyle(void *MRptr, int value); void MR_set_verbosity(void *MRptr, int value); void MR_set_timer(void *MRptr, int value); void MR_set_memsize(void *MRptr, int value); void MR_set_keyalign(void *MRptr, int value); void MR_set_valuealign(void *MRptr, int value); :pre void MR_kv_add(void *KVptr, char *key, int keybytes, char *value, int valuebytes); void MR_kv_add_multi_static(void *KVptr, int n, char *key, int keybytes, char *value, int valuebytes); void MR_kv_add_multi_dynamic(void *KVptr, int n, char *key, int *keybytes, char *value, int *valuebytes); :pre void *MR_get_kv(void *MRptr); void *MR_get_kmv(void *MRptr); :pre These functions correspond one-to-one with the C++ methods described "here"_Interface_c++.html, except that for C++ methods with multiple interfaces (e.g. "map()"_map.html), there are multiple C functions, with slightly different names. The MR_set() functions are added to the C interface to enable the corresponding library variables to be set. The final MR_get_kv() and MR_get_kmv() functions have on C++ counterpart; they were added to allow extraction of the internal KV and KMV pointers from a C-style calling program, which can be done directly in C++ since the pointers are public members. Note that when you call MR_create() or MR_copy(), they return a "void *MRptr" which is a pointer to the MapReduce object created by the library. This pointer is used as the first argument of all the other MR calls. This means a C program can effectively instantiate multiple MapReduce objects by simply keeping track of the pointers returned to it. The remaining arguments of each function call are the same as those used with the C++ methods. The only exceptions are several of the MR_kv_add() functions which take a KVptr as their first argument. This is a pointer to a KeyValue object. These calls are made from your program's mymap(), myreduce(), and mycompress() functions to register key/value pairs with the MR-MPI library. The KVptr is passed as an argument to your functions when they are called back from the MR-MPI library. See the C programs in the examples directory for "examples"_Examples.html of how these calls are made from a C program. They are conceptually identical to the C++ programs in the same directory. mrmpi-1.0~20131122/doc/Background.txt0000644000175000017500000001271511734437536016772 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Background :h3 MapReduce is the programming paradigm popularized by Google researchers "Dean and Ghemawat"_#Dean. Their motivation was to enable analysis programs to be rapidly developed and deployed within Google to operate on the massive data sets residing on their large distributed clusters. Their paper introduced a novel way of thinking about certain kinds of large-scale computations as "map" operations followed by "reduces". The power of the paradigm is that when cast in this way, a traditionally serial algorithm now becomes two highly parallel application-specific operations (requiring no communication) sandwiched around an intermediate operation that requires parallel communication, but which can be encapsulated in a library since the operation is independent of the application. The Google implementation of MapReduce was a C++ library with communication between networked machines via remote procedure calls. They allow for fault tolerance when large numbers of machines are used, and can use disks as out-of-core memory to process huge data sets. Thousands of MapReduce programs have since been written by Google researchers and are part of the daily compute tasks run by the company. While I had heard about MapReduce, I didn't appreciate its power for scientific computing on a monolithic distributed-memory parallel machine, until reading a SC08 paper by "Tu, et al"_#Tu of the D.E. Shaw company. They showed how to think about tasks such as the post-processing of simulation output as MapReduce operations. In this context it can be useful for computations that would normally be thought of as serial, such as reading in a large data set and scanning it for events of a desired kind. As before, the computation can be formulated as a highly parallel "map" followed by a "reduce". The encapsulated parallel operation in the middle requires all-to-all communication to reorgnanize the data, a familiar MPI operation. Tu's implementation of MapReduce was in parallel Python with communication between processors via MPI, again allowing disks to be used for out-of-core operations. This MapReduce-MPI (MR-MPI) library is a very simple and lightweight implementation of the basic MapReduce functionality, borrowing ideas from both the "Dean and Sanjay"_#Dean and "Tu, et al"_#Tu papers. It has the following features: C++ library using MPI for inter-processor communication. This allows precise control over the memory allocated during a large-scale MapReduce. :ulb,l C++ and C and Python interfaces provided. A C++ interface means that one or more MapReduce objects can be instantiated and invoked by the user's program. A C interface means that the library can also be called from C or other hi-level languages such as Fortran. A Python interface means the library can be called from a Python script, allowing you to write serial map() and reduce() functions in Python. If your machine can run Python in parallel, you can also run a parallel MapReduce in that manner. :l Small, portable. The entire library is a few thousand lines of C++ code in a handful of C++ files which can be built on any machine with a C++ compiler. For parallel operation, you link with MPI, a standard message passing library available on all distributed memory machines. For serial operation, a dummy MPI library can be substituted, which is provided. The Python wrapper can be installed on any machine with a version of Python that includes the ctypes module, typically Python 2.5 or later. :l In-core or Out-of-core operation. Each MapReduce object created allocates per-processor "pages" of memory, where the page size is determined by the user. Typical MapReduce operations can be performed using just a few such pages. If your data set (key/value pairs) fits in a single page, then the library performs its operations in-core. If your data set exceeds the page size, then processors write to temporary disk files as needed and subsequently read from them. This allows processing of data sets that are larger than will fit in the aggregate memory of all the processors. :l,ule This library also has the following limitation: No fault tolerance. Current MPI implementations do not enable easy detection of a dead processor. So like most MPI programs, a MapReduce operation will hang or crash if a processor goes away. :ulb,l,ule Finally, I call attention to "recent work"_#Gray by Alexander Gray and colleagues at Georgia Tech. They show that various kinds of scientific computations such as N-body forces via multipole expansions, k-means clustering, and machine learning algorithms, can be formulated as MapReduce operations. Thus there is an expanding set of data-intense or compute-intense problems that may be amenable to solution using a MapReduce library such as this. :line :link(Dean) [(Dean)] J. Dean and S. Ghemawat, "MapReduce: Simplified Data Processing on Large Clusters", OSDI'04 conference (2004); J. Dean and S. Ghemawat, "MapReduce: Simplified Data Processing on Large Clusters", Communications of the ACM, 51, p 107-113 (2008). :link(Tu) [(Tu)] T. Tu, C. A. Rendleman, D. W. Borhani, R. O. Dror, J. Gullingsrud, M. O. Jensen, J. L. Kelpeis, P. Maragakis, P. Miller, K. A. Stafford, D. E. Shaw, "A Scalable Parallel Framework for Analyzing Terascale Molecular Dynamics Trajectories", SC08 proceedings (2008). :link(Gray) [(Gray)] A. Gray, Georgia Tech, http://www.cc.gatech.edu/~agray mrmpi-1.0~20131122/doc/Interface_c++.txt0000644000175000017500000001134411734437536017240 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line C++ Interface to the MapReduce-MPI Library :h3 This mutiple-page section discusses how to call the MR-MPI library from a C++ program and gives a description of all its methods and variable settings. Use of the library from a "C program"_Interface_c.html (or other hi-level language) or from "Python"_Interface_python.html is discussed in other sections of the manual. All the library methods operate on two basic data structures stored within the MapReduce object, a KeyValue object (KV) and a KeyMultiValue object (KMV). When running in parallel, these objects are stored in a distributed fashion across multiple processors. A KV is a collection of key/value pairs. The same key may appear many times in the collection, associated with values which may or may not be the same. A KMV is also a collection of key/value pairs. But each key in the KMV is unique, meaning it appears exactly once (see the clone() method for a possible exception). The value associated with a KMV key is a concatenated list (a multi-value) of all the values associated with the same key in the original KV. More details about how KV and KMV objects are stored are given in the "Technical Details"_Technical.html section. Here is an overview of how the various library methods operate on KV and KMV objects. This is useful to understand, since this determines how the various operations can be chained together in your program. "add()"_add.html, KV -> KV, add pairs from one KV to another, serial, 2 pages "aggregate()"_aggregate.html, KV -> KV, pairs are aggregated onto procs, parallel, 7 pages "broadcast()"_broadcast.html, KV -> KV, send pairs from one proc to all procs, parallel, 2 pages "clone()"_clone.html, KV -> KMV, each KV pair becomes a KMV pair, serial, 2 pages "close()"_close.html, KV, allows one MapReduce object to add KV pairs to another, serial, 0 pages "collapse()"_collapse.html, KV -> KMV, all KV pairs become one KMV pair, serial, 2 pages "collate()"_collate.html, KV -> KMV, aggregate + convert, parallel, 4+ pages "compress()"_compress.html, KV -> KV, calls back to user program to compress duplicate keys, serial, 4+ pages "convert()"_convert.html, KV -> KMV, duplicate KV keys become one KMV key, serial, 4+ pages "gather()"_gather.html, KV -> KV, collect pairs on many procs to few procs, parallel, 2 pages "map()"_map.html, create or add to a KV, calls back to user program to generate pairs, serial, 1 page "reduce()"_reduce.html, KMV -> KV, calls back to user program to process KMV pairs, serial, 3 pages "open()"_open.html, create or add to a KV, allows one MapReduce object to add KV pairs to another, serial, 0 pages "print()"_print.html, KV or KMV, print KV or KMV pairs to screen or file(s), serial, 1 page "scan()"_scan.html, KV or KMV, calls back to user program to process KV or KMV pairs, serial, 1 page "scrunch()"_scrunch.html, KV -> KMV, gather + collapse, parallel, 3 pages "sort_keys()"_sort_keys.html, KV -> KV, calls back to user program to sort pairs by key, serial, 5 pages "sort_values()"_sort_values.html, KV -> KV, calls back to user program to sort pairs by value, serial, 5 pages "sort_multivalues()"_sort_multivalues.html, KMV -> KMV, calls back to user program to sort multi-values within each pair, serial, 4 pages "kv_stats()"_stats.html, KV, print stats about a KV, serial, 0 pages "kmv_stats()"_stats.html, KMV, print stats about a KMV, serial, 0 pages :tb() Note that each MapReduce object contains a single KV or KMV object (or neither) when its method is called. (Some methods operate on 2 or more MapReduce objects.) When the method completes, the MapReduce object also contains a single KV or KMV object. Thus if a method creates a new KV or KMV object, the old one is deleted, if it existed. The KV object is also deleted if a KMV object is produced, and vice versa. The methods flagged as "serial" perform their operation on the portion of a KV or KMV owned by an individual processor. They involve only local computation (performed simultaneously on all processors) and no parallel comuunication. The methods flagged as "parallel" involve communication between processors. The listed page counts are the number of memory pages that method requires. See the {memsize} "setting"_settings.html for a discussion of what memory pages are and how their size is set. The methods whose page count is listed as 4+ all perform a "convert()"_convert.html operation internally. The minimum number of pages this requires is 4. Depending on the page size and the characteristics of the KV pairs being converted to KMV pairs, more pages can be required. See the out-of-core discussion in "this section"_Technical.html#ooc for more details. mrmpi-1.0~20131122/doc/Interface_python.txt0000644000175000017500000004653712013454362020210 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Python interface to the MapReduce-MPI Library :h3 A Python wrapper for the MR-MPI library is included in the distribution. The advantage of using Python is how concise the language is, enabling rapid development and debugging of MapReduce programs. The disadvantage is speed, since Python is slower than a compiled language. Using the MR-MPI library from Python incurs two additional overheads, discussed in the "Technical Details"_Technical.html section. Before using MR-MPI from a Python script, you need to do two things. You need to build MR-MPI as a dynamic shared library, so it can be loaded by Python. And you need to tell Python how to find the library and the Python wrapper file python/mrmpi.py. Both these steps are discussed below. If you wish to run MR-MPI in parallel from Python, you also need to extend your Python with MPI. This is also discussed below. The Python wrapper for MR-MPI uses the amazing and magical (to me) "ctypes" package in Python, which auto-generates the interface code needed between Python and a set of C interface routines for a library. Ctypes is part of standard Python for versions 2.5 and later. You can check which version of Python you have installed, by simply typing "python" at a shell prompt. The following sub-sections cover the rest of the Python discussion: "Building MR-MPI as a shared library"_#py_1 "Installing the Python wrapper into Python"_#py_2 "Extending Python with MPI to run in parallel"_#py_3 "Testing the Python/MR-MPI interface"_#py_4 "Using the MR-MPI library from Python"_#py_5 :ul :line :line [Building MR-MPI as a shared library] :link(py_1) Instructions on how to build MR-MPI as a shared library are given in the "Start section"_Start.html. A shared library is one that is dynamically loadable, which is what Python requires. On Linux this is a library file that ends in ".so", not ".a". From the src directory, type make -f Makefile.shlib foo :pre where foo is the machine target name, such as linux or g++ or serial. This should create the file libmrmpir_foo.so in the src directory, as well as a soft link libmrmpi.so, which is what the Python wrapper will load by default. Note that if you are building multiple machine versions of the shared library, the soft link is always set to the most recently built version. If this fails, see the "Start section"_Start.html for more details. :line [Installing the Python wrapper into Python] :link(py_2) For Python to invoke MR-MPI, there are 2 files it needs to know about: python/mrmpi.py src/libmrmpi.so :ul Mrmpi.py is the Python wrapper on the MR-MPI library interface. Libmrmpi.so is the shared MR-MPI library that Python loads, as described above. You can insure Python can find these files in one of two ways: set two environment variables run the python/install.py script :ul If you set the paths to these files as environment variables, you only have to do it once. For the csh or tcsh shells, add something like this to your ~/.cshrc file, one line for each of the two files: setenv PYTHONPATH ${PYTHONPATH}:/home/sjplimp/mrmpi/python setenv LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/home/sjplimp/mrmpi/src :pre If you use the python/install.py script, you need to invoke it every time you rebuild MR-MPI (as a shared library) or make changes to the python/mrmpi.py file. You can invoke install.py from the python directory as % python install.py \[libdir\] \[pydir\] :pre The optional libdir is where to copy the MR-MPI shared library to; the default is /usr/local/lib. The optional pydir is where to copy the mrmpi.py file to; the default is the site-packages directory of the version of Python that is running the install script. Note that libdir must be a location that is in your default LD_LIBRARY_PATH, like /usr/local/lib or /usr/lib. And pydir must be a location that Python looks in by default for imported modules, like its site-packages dir. If you want to copy these files to non-standard locations, such as within your own user space, you will need to set your PYTHONPATH and LD_LIBRARY_PATH environment variables accordingly, as above. If the install.py script does not allow you to copy files into system directories, prefix the python command with "sudo". If you do this, make sure that the Python that root runs is the same as the Python you run. E.g. you may need to do something like % sudo /usr/local/bin/python install.py \[libdir\] \[pydir\] :pre You can also invoke install.py from the make command in the src directory as % make install-python :pre In this mode you cannot append optional arguments. Again, you may need to prefix this with "sudo". In this mode you cannot control which Python is invoked by root. Note that if you want Python to be able to load different versions of the MR-MPI shared library (see "this section"_#py_5 below), you will need to manually copy files like lmpmrmpi_g++.so into the appropriate system directory. This is not needed if you set the LD_LIBRARY_PATH environment variable as described above. :line 11.3 Extending Python with MPI to run in parallel :link(py_3),h4 If you wish to run MR-MPI in parallel from Python, you need to extend your Python with an interface to MPI. This also allows you to make MPI calls directly from Python in your script, if you desire. There are several Python packages available that purport to wrap MPI as a library and allow MPI functions to be called from Python. These include "pyMPI"_http://pympi.sourceforge.net/ "maroonmpi"_http://code.google.com/p/maroonmpi/ "mpi4py"_http://code.google.com/p/mpi4py/ "myMPI"_http://nbcr.sdsc.edu/forum/viewtopic.php?t=89&sid=c997fefc3933bd66204875b436940f16 "Pypar"_http://code.google.com/p/pypar :ul All of these except pyMPI work by wrapping the MPI library and exposing (some portion of) its interface to your Python script. This means Python cannot be used interactively in parallel, since they do not address the issue of interactive input to multiple instances of Python running on different processors. The one exception is pyMPI, which alters the Python interpreter to address this issue, and (I believe) creates a new alternate executable (in place of "python" itself) as a result. In principle any of these Python/MPI packages should work to invoke MR-MPI in parallel and MPI calls themselves from a Python script which is itself running in parallel. However, when I downloaded and looked at a few of them, their documentation was incomplete and I had trouble with their installation. It's not clear if some of the packages are still being actively developed and supported. The one I recommend, since I have successfully used it with MR-MPI, is Pypar. Pypar requires the ubiquitous "Numpy package"_http://numpy.scipy.org be installed in your Python. After launching python, type import numpy :pre to see if it is installed. If not, here is how to install it (version 1.3.0b1 as of April 2009). Unpack the numpy tarball and from its top-level directory, type python setup.py build sudo python setup.py install :pre The "sudo" is only needed if required to copy Numpy files into your Python distribution's site-packages directory. To install Pypar (version pypar-2.1.4_94 as of Aug 2012), unpack it and from its "source" directory, type python setup.py build sudo python setup.py install :pre Again, the "sudo" is only needed if required to copy Pypar files into your Python distribution's site-packages directory. If you have successully installed Pypar, you should be able to run Python and type import pypar :pre without error. You should also be able to run python in parallel on a simple test script % mpirun -np 4 python test.py :pre where test.py contains the lines import pypar print "Proc %d out of %d procs" % (pypar.rank(),pypar.size()) :pre and see one line of output for each processor you run on. IMPORTANT NOTE: To use Pypar and MR-MPI in parallel from Python, you must insure both are using the same version of MPI. If you only have one MPI installed on your system, this is not an issue, but it can be if you have multiple MPIs. Your MR-MPI build is explicit about which MPI it is using, since you specify the details in your lo-level src/MAKE/Makefile.foo file. Pypar uses the "mpicc" command to find information about the MPI it uses to build against. And it tries to load "libmpi.so" from the LD_LIBRARY_PATH. This may or may not find the MPI library that MR-MPI is using. If you have problems running both Pypar and MR-MPI together, this is an issue you may need to address, e.g. by moving other MPI installations so that Pypar finds the right one. :line 11.4 Testing the Python-MR-MPI interface :link(py_4),h4 To test if MR-MPI is callable from Python in serial, launch Python interactively and type: >>> from mrmpi import mrmpi >>> mr = mrmpi() :pre If you get no errors, you're ready to use MR-MPI from Python. If the 2nd command fails, the most common error to see is OSError: Could not load MR-MPI dynamic library :pre which means Python was unable to load the MR-MPI shared library. This typically occurs if the system can't find the MR-MPI shared library, or if something about the library is incompatible with your Python. The error message should give you an indication of what went wrong. You can also test the load directly in Python as follows, without first importing from the mrmpi.py file: >>> from ctypes import CDLL >>> CDLL("libmrmpi.so") :pre If an error occurs, carefully go thru the steps in "Start"_Start.html and above about building a shared library and about insuring Python can find the necessary two files it needs. [Test MR-MPI and Python in parallel:] :h5 To run MR-MPI in parallel, assuming you have installed the "Pypar"_http://datamining.anu.edu.au/~ole/pypar package as discussed above, create a test.py file containing these lines: import pypar from mrmpi import mrmpi mr = mrmpi() print "Proc %d out of %d procs has" % (pypar.rank(),pypar.size()),mr pypar.finalize() :pre You can then run it in parallel as: % mpirun -np 4 python test.py :pre Note that if you leave out the 3 lines from test.py that specify Pypar commands you will instantiate and run MR-MPI independently on each of the P processors specified in the mpirun command. In this case you should get 4 sets of output, each showing that a MR-MPI was initialized on a single processor, instead of one set of output showing MR-MPI was initialized on 4 processors. If the 1-processor outputs occur, it means that Pypar is not working correctly. Also note that once you import the PyPar module, Pypar initializes MPI for you, and you can use MPI calls directly in your Python script, as described in the Pypar documentation. The last line of your Python script should be pypar.finalize(), to insure MPI is shut down correctly. [Running Python scripts:] :h5 Note that any Python script (not just for MR-MPI) can be invoked in one of several ways: % python foo.script % python -i foo.script % foo.script :pre The last command requires that the first line of the script be something like this: #!/usr/local/bin/python #!/usr/local/bin/python -i :pre where the path points to where you have Python installed, and that you have made the script file executable: % chmod +x foo.script :pre Without the "-i" flag, Python will exit when the script finishes. With the "-i" flag, you will be left in the Python interpreter when the script finishes, so you can type subsequent commands. As mentioned above, you can only run Python interactively when running Python on a single processor, not in parallel. :line :line [Using the MR-MPI library from Python] :link(py_5) The Python interface to MR-MPI consists of a Python "mrmpi" module, the source code for which is in python/mrmpi.py, which creates a "mrmpi" object, with a set of methods that can be invoked on that object. The sample Python code below assumes you have first imported the "mrmpi" module in your Python script, as follows: from mrmpi import mrmpi :pre These are the methods defined by the mrmpi module. Some of them take callback functions as arguments, e.g. "map()"_map.html and "reduce()"_reduce.html. These are Python functions you define elsewhere in your script. When you register "keys" and "values" with the library, they can be simple quantities like strings or ints or floats. Or they can be Python data structures like lists or tuples. These are the class methods defined by the mrmpi module. Their functionality and arguments are described in the "C++ interface section"_Interface_c++.html. mr = mrmpi() # create a MR-MPI object using the default libmrmpi.so library mr = mrmpi(mpi_comm) # ditto, but with a specified MPI communicator mr = mrmpi(0.0) # ditto, and the library will finalize MPI mr = mrmpi(None,"g++") # create a MR-MPI object using the libmrmpi_g++.so library mr = mrmpi(mpi_comm,"g++") # ditto, but with a specified MPI communicator mr = mrmpi(0.0,"g++") # ditto, and the library will finalize MPI :pre mr2 = mr.copy() # copy mr to create mr2 :pre mr.destroy() # destroy an mrmpi object, freeing its memory # this will also occur if Python garbage collects :pre mr.add(mr2) mr.aggregate() mr.aggregate(myhash) # if specified, myhash is a hash function # called back from the library as myhash(key) # myhash() should return an integer (a proc ID) mr.broadcast(root) mr.clone() mr.close() mr.collapse(key) mr.collate() mr.collate(myhash) # if specified, myhash is the same function # as for aggregate() :pre mr.compress(mycompress) # mycompress is a function called back from the # library as mycompress(key,mvalue,mr,ptr) # where mvalue is a list of values associated # with the key, mr is the MapReduce object, # and you (optionally) provide ptr (see below) # your mycompress function should typically # make calls like mr->add(key,value) mr.compress(mycompress,ptr) # if specified, ptr is any Python datum # and is passed back to your mycompress() # if not specified, ptr = None :pre mr.convert() mr.gather(nprocs) :pre mr.map(nmap,mymap) # mymap is a function called back from the # library as mymap(itask,mr,ptr) # where mr is the MapReduce object, # and you (optionally) provide ptr (see below) # your mymap function should typically # make calls like mr->add(key,value) mr.map(nmap,mymap,ptr) # if specified, ptr is any Python datum # and is passed back to your mymap() # if not specified, ptr = None mr.map(nmap,mymap,ptr,addflag) # if addflag is specfied as a non-zero int, # new key/value pairs will be added to the # existing key/value pairs :pre mr.map_file(files,self,recurse,readfile,mymap) # files is a list of filenames and dirnames # mymap is a function called back from the # library as mymap(itask,filename,mr,ptr) # as above, ptr and addflag are optional args mr.map_file_char(nmap,files,recurse,readfile,sepchar,delta,mymap) # files is a list of filenames and dirnames # mymap is a function called back from the # library as mymap(itask,str,mr,ptr) # as above, ptr and addflag are optional args mr.map_file_str(nmap,files,recurse,readfile,sepstr,delta,mymap) # files is a list of filenames and dirnames # mymap is a function called back from the # library as mymap(itask,str,mr,ptr) # as above, ptr and addflag are optional args mr.map_mr(mr2,mymap) # pass key/values in mr2 to mymap # mymap is a function called back from the # library as mymap(itask,key,value,mr,ptr) # as above, ptr and addflag are optional args :pre mr.open() mr.open(addflag) mr.print_screen(proc,nstride,kflag,vflag) mr.print_file(file,fflag,proc,nstride,kflag,vflag) :pre mr.reduce(myreduce) # myreduce is a function called back from the # library as myreduce(key,mvalue,mr,ptr) # where mvalue is a list of values associated # with the key, mr is the MapReduce object, # and you (optionally) provide ptr (see below) # your myreduce function should typically # make calls like mr->add(key,value) mr.reduce(myreduce,ptr) # if specified, ptr is any Python datum # and is passed back to your myreduce() # if not specified, ptr = None :pre mr.scan_kv(myscan) # myscan is a function called back from the # library as myscan(key,value,ptr) # for each key/value pair # and you (optionally) provide ptr (see below) mr.scan_kv(myscan,ptr) # if specified, ptr is any Python datum # and is passed back to your myreduce() # if not specified, ptr = None :pre mr.scan_kmv(myscan) # myscan is a function called back from the # library as myreduce(key,mvalue,ptr) # where mvalue is a list of values associated # with the key, # and you (optionally) provide ptr (see below) mr.scan_kmv(myscan,ptr) # if specified, ptr is any Python datum # and is passed back to your myreduce() # if not specified, ptr = None :pre mr.scrunch(nprocs,key) mr.sort_keys(mycompare) mr.sort_values(mycompare) mr.sort_multivalues(mycompare) # compare is a function called back from the # library as mycompare(a,b) where # a and b are two keys or two values # your mycompare() should compare them # and return a -1, 0, or 1 # if a < b, or a == b, or a > b mr.sort_keys_flag(flag) mr.sort_values_flag(flag) mr.sort_multivalues_flag(flag) :pre mr.kv_stats(level) mr.kmv_stats(level) :pre mr.mapstyle(value) # set mapstyle to value mr.all2all(value) # set all2all to value mr.verbosity(value) # set verbosity to value mr.timer(value) # set timer to value mr.memsize(value) # set memsize to value mr.minpage(value) # set minpage to value mr.maxpage(value) # set maxpage to value :pre mr.add(key,value) # add single key and value mr.add_multi_static(keys,values) # add list of keys and values # all keys are assumed to be same length # all values are assumed to be same length mr.add_multi_dynamic(keys,values) # add list of keys and values # each key may be different length # each value may be different length :pre :line Note that you can create multiple MR-MPI objects in your Python script, and coordinate the data stored in each and moved between them, just as can from a C or C++ program. The class methods above correspond one-to-one with the C++ methods described "here"_Interface_c++.html, except that for C++ methods with multiple interfaces (e.g. "map()"_map.html), there are multiple Python methods with slightly different names, similar to the "C interface"_Interface_c.html. There is no set function the the {keyalign} and {valuealign} "settings"_settings.html. These are hard-wired to 1 for the Python interface, since no other values make sense, due to the pickling/unpickling that is performed in key and value data. See the Python scripts in the examples directory for "examples"_Examples.html of how these calls are made from a Python program. They are conceptually identical to the C++ and C programs in the same directory. mrmpi-1.0~20131122/doc/aggregate.txt0000644000175000017500000000442211734437536016635 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce aggregate() method :h3 uint64_t MapReduce::aggregate(int (*myhash)(char *, int)) :pre This calls the aggregate() method of a MapReduce object, which reorganizes a KeyValue object across processors into a new KeyValue object. In the original object, duplicates of the same key may be stored on many processors. In the new object, all duplicates of a key are stored by the same processor. The method returns the total number of key/value pairs in the new KeyValue object, which will be the same as the number in the original object. A hashing function is used to assign keys to processors. Typically you will not care how this is done, in which case you can specify a NULL, i.e. mr->aggregate(NULL), and the MR-MPI library will use its own internal hash function, which will distribute them randomly and hopefully evenly across processors. On the other had, if you know the best way to do this for your data, then you should provide the hashing function. For example, if your keys are integer IDs for particles or grid cells, you might want to use the ID (modulo the processor count) to choose the processor it is assigned to. Ideally, you want a hash function that will distribute keys to processors in a load-balanced fashion. In this example the user function is called myhash() and it must have the following interface: int iproc = myhash(char *key, int keybytes) :pre Your function will be passed a key (byte string) and its length in bytes. Typically you want to return an integer such that 0 <= iproc < P, where P is the number of processors. But you can return any integer, since the MR-MPI library uses the result in this manner to assign the key to a processor: int iproc = myhash(key,keybytes) % P; :pre Because the aggregate() method will, in general, reassign all key/value pairs to new processors, it incurs a large volume of all-to-all communication. However, this is performed concurrently, taking advantage of the large bisection bandwidth most large parallel machines provide. The aggregate() method should load-balance key/value pairs across processors if they are initially imbalanced. :line [Related methods]: "collate()"_collate.html mrmpi-1.0~20131122/doc/compress.txt0000644000175000017500000001523311734437536016544 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce compress() method :h3 MapReduce multivalue_blocks() method :h3 MapReduce multivalue_block() method :h3 uint64_t MapReduce::compress(void (*mycompress)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr) :pre uint64_t MapReduce::multivalue_blocks() :pre int MapReduce::multivalue_block(int iblock, char **ptr_multivalue, int **ptr_valuesizes) :pre This calls the compress() method of a MapReduce object, passing it a function pointer to a mycompress function you write. This method compresses a KeyValue object with duplicate keys into a new KeyValue object, where each key appears once (on that processor) and has a single new value. The new value is a combination of the values associated with that key in the original KeyValue object. The mycompress() function you provide generates the new value, once for each unique key (on that processor). The method returns the total number of key/value pairs in the new KeyValue object. This method is used to compress a large set of key/value pairs produced by the "map()"_map.html method into a smaller set before proceeding with the rest of a MapReduce operation, e.g. with a "collate()"_collate.html and "reduce()"_reduce.html. You can give this method a pointer (void *ptr) which will be returned to your mycompress() function. See the "Technical Details"_Technical.html section for why this can be useful. Just specify a NULL if you don't need this. In this example the user function is called mycompress() and it must have the following interface, which is the same as that used by the "reduce()"_reduce.html method: void mycompress(char *key, int keybytes, char *multivalue, int nvalues, int *valuebytes, KeyValue *kv, void *ptr) :pre A single key/multi-value (KMV) pair is passed to your function from a temporary KeyMultiValue object created by the library. That object creates a multi-value for each unique key in the KeyValue object which contains a list of the nvalues associated with that key. Note that this is only the values on this processor, not across all processors. There are two possibilities for a KMV pair returned to your function. The first is that it fits in one page of memory allocated by the MapReduce object, which is the usual case. See the {memsize} "setting"_settings.html for details on memory allocation. In this case, the char *multivalue argument is a pointer to the beginning of the multi-value which contains all nvalues, packed one after the other. The int *valuebytes argument is an array which stores the length of each value in bytes. If needed, it can be used by your function to compute an offset into char *values for where each individual value begins. Your function is also passed a kv pointer to a new KeyValue object created and stored internally by the MapReduce object. If the KMV pair does not fit in one page of memory, then the meaning of the arguments passed to your function is changed. Your function must call two additional library functions in order to retrieve a block of values that does fit in memory, and process them one block at a time. In this case, the char *multivalue argument will be NULL and the nvalues argument will be 0. Either of these can be tested for within your function. If you know huge multi-values will not occur, then the test is not needed. The meaning of the kv and ptr arguments is the same as discussed above. However, the int *valuebytes argument is changed to be a pointer to the MapReduce object. This is to allow you to make the following two kinds of calls back to the library: MapReduce *mr = (MapReduce *) valuebytes; int nblocks; uint64_t nvalues_total = mr->multivalue_blocks(nblocks); for (int iblock = 0; iblock < nblocks; iblock++) { int nv = mr->multivalue_block(iblock,&multivalue,&valuebytes); for (int i = 0; i < nv; i++) { process each value within the block of values } } :pre The call to multivalue_blocks() returns both the total number of values (as an unsigned 64-bit integer), and the number of blocks of values in the multi-value. Each call to multivalue_block() retrieves one block of values. The number of values in the block is returned, as nv in this case. The multivalue and valuebytes arguments are pointers to a char * and int * (i.e. a char ** and int **), which will be set to point to the block of values and their lengths respectively, so they can then be used just as the multivalue and valuebytes arguments in the myreduce() callback itself (when the values do not exceed available memory). The call to multivalue_blocks() returns the number of blocks of values in the multi-value. Each call to multivalue_block() retrieves one block of values. The number of values in the block (nv in this case) is returned. The multivalue and valuebytes arguments are pointers to a char * and int * (i.e. a char ** and int **), which will be set to point to the block of values and their lengths respectively, so they can then be used just as the multivalue and valuebytes arguments in the mycompress() callback itself (when the values do not exceed available memory). Note that in this example we are re-using (and thus overwriting) the original multivalue and valuebytes arguments as local variables. Also note that your mycompress() function can call multivalue_block() as many times as it wishes and process the blocks of values multiple times or in any order, though looping through blocks in ascending order will typically give the best disk I/O performance. Your mycompress() function should typicaly produce a single key/value pair which it registers with the MapReduce object by calling the "add()"_kv_add.html method of the KeyValue object. The syntax for this call is described on the doc page for the KeyValue "add()"_kv_add.html methd. For example, if the set of nvalues were integers, the compressed value might be the sum of those integers. See the "Settings"_settings.html and "Technical Details"_Technical.html sections for details on the byte-alignment of keys and values that are passed to your mycompress() function and on those you register with the KeyValue "add()"_kv_add.html methods. Note that only the first value of a multi-value (or of each block of values) passed to your mycompress() function will be aligned to the {valuealign} "setting"_settings.html. This method is an on-processor operation, requiring no communication. When run in parallel, each processor operates only on the key/value pairs it stores. Thus you are NOT compressing all values associated with a particular key across all processors, but only those currently owned by one processor. :line [Related methods]: "collate()"_collate.html mrmpi-1.0~20131122/doc/copy.txt0000644000175000017500000000342411734437536015662 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Copy a MapReduce object :h3 MapReduce *MapReduce::copy() :pre This calls the copy() method of a MapReduce object, which creates a second MapReduce object which is an exact copy of the first, including all "settings"_settings.html, and returns a pointer to the new copy. If the original MapReduce object contained a KeyValue or KeyMultiValue object, as discussed "here"_Interface_c++.html, then the new MapReduce object will contain a copy of it. This means that all the key/value and/or key/multivalue pairs contained in the first MapReduce object are copied into the new MapReduce object. Thus the first MapReduce object could be subsequently deleted without affecting the new MapReduce object. This is useful if you wish to retain a copy of a set of key/value pairs before processing it further. See the "add()"_add.html method for how to merge the key/value pairs from two MapReduce objects into one. For example, this sequence of calls: MapReduce *mr1 = new MapReduce(MPI_COMM_WORLD); mr1->map(ntasks,&mymap,NULL); MapReduce *mr2 = mr1->copy(); mr2->collate(NULL); mr2->reduce(&myreduce2,NULL); mr1->add(mr2); delete mr2; mr1->collate(NULL); mr1->reduce(&myreduce1,NULL); :pre would generate one set of key/value pairs from the initial "map()"_map.html operation, then make a "copy"_copy.html of them, which are then "collated"_collate.html and "reduced"_reduce.html to a new set of key/value pairs. The new set of key/value pairs are "added"_add.html to the original set produced by the "map()"_map.html operation to form an augmented set of key/value pairs, which could be further processed. :line [Related methods]: "create"_create.html, "add()"_add.html mrmpi-1.0~20131122/doc/Technical.txt0000644000175000017500000004764311734437536016615 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Technical Details :h3 This section provides additional details about using the MapReduce library and how it is implemented. These topics are covered: "Length and byte-alignment of keys and values"_#align "Memory requirements for KeyValue and KeyMultiValue objects"_#memory "Out-of-core operation"_#ooc "Fundamemtal library limits"_#limits "Hash functions"_#hash "Callback functions"_#callback "Python overhead"_#python "Error messages"_#error :ul :line :line Length and byte-alignment of keys and values :link(align),h4 As explained in "this section"_Program.html, keys and values are variable-length strings of bytes. The MR-MPI library knows nothing of their contents and simply treats them as contiguous chunks of bytes. When you register a key and value in your "mymap()"_map.html or "mycompress()"_compress.html or "myreduce()"_reduce.html function via the KeyValue "add()"_kv_add.html method, you specify their lengths in bytes. Keys and values are typically returned to your program for further processing or output, e.g. as arguments passed to your myreduce() function by the "reduce()"_reduce.html operation, as are their lengths. Keys and values are passed as character pointers to your functions where you may need to convert the pointer to an appropriate data type and then correctly interpret the byte string. For example, either of these lines could be used: int *iptr = (int *) key; int myvalue = *(int *) key; :pre If the key or value is a variable-length text string, you may want to terminate it with a "0", and include the trailing "0" in the byte count, so that C-library-style string functions can later be invoked on it. If a key or value is a complex data structure, your function must be able to decode it. IMPORTANT NOTE: An eaay way to encapsulate several datums as a key (or value) is to create a C struct that includes each of them. Then the sizeof() function gives the byte count of the struct and the compiler takes care of data alignment issues, as described below. If you do this for creating a key, then be aware that your individual datums may not use up all the bytes returned by the sizeof() function. Again this is due to alignment constraints imposed by the compiler. Normally this isn't something your code would worry about since you only acces the datums, but if the struct is used as a key, and some bytes in the key are never intialized (by you filling in the datums), then when that key is hashed by the MR-MPI library. e.g. to perform a "collate()"_collate.html operation, those uninitialized bytes will also be hashed. Since the uninitialed bytes may contain random garbage, this means 2 keys with identical datums, might not hash identically, and thus their values would not be combined as you expect into a single KeyMultiValue. The only solution for this is for you to initialize the struct before setting its datums, e.g. typedef struct { double x; int i; } Tuple; Tuple tuple; memset(&tuple,0,sizeof(Tuple)); tuple.x = 1.0; tuple.i = 1; :pre The memset() function initializes the entire tuple to 0. Note that in this case sizeof(Tuple) is likely 16 bytes, but the x and i datums will only set 12 of the 16 bytes, leaving the last 4 uninitialized. Also note that this whole discussion is irrelevant if the struct is used only as a value, since only keys are hashed. A related issue with keys and values is the byte-alignment of integer or floating point values they include. For example, it is usually a bad idea to store an 8-byte double such that it is mis-aligned with respect to an 8-byte boundary in memory. The reason is that using a mis-aligned double in a computation may be slow. If your keys or values are homogeneous (e.g. all integers), you can use the {keyalign} and {valuealign} settings, discussed "here"_settings.html, to insure alignment of keys and values to desired byte boundaries. Since this may incur extra memory costs, you should not typically make these settings larger than needed. Special care may need to be taken if your values are heterogeneous, e.g. a mixture of strings and integers. This is because the MR-MPI library packs values one after the other into one long byte string when it is returned to your program as a multi-value, e.g. as an argument to the callback of a "reduce()"_reduce.html method. Only the first value in the multi-value is aligned to the {valuealign} "setting"_settings.html. Similarly, the "collapse()"_collapse.html method creates a multi-value that is sequence of key,value,key,value,etc from a KV. If the keys are variable-length text strings and the values are integers, then the values will not be aligned on 4-byte boundaries. Here are two ideas that can be used to insure alignment of heterogeneous data: (a) Say your "value" is a 4-byte integer followed by an 8-byte double. You might think it can be stored and registered as 12 contiguous bytes. However, this would likely mean the double is mis-aligned. One solution is to convert the integer to a double before storing both quantities in a 16-byte value string. Another solution is to create a struct to store the integer and double and use the sizeof() function to determine the length of the struct and use that as the length of your "value". The compiler should then guarantee proper alignment of each structure member. If you use such a struct as a key, be aware of the "IMPORTANT NOTE" explained above. (b) Your callback function can always copy the bytes of a key or value into a local data structure with the proper alignment, e.g. using the C memcpy() function. E.g. in the collapse example above, these lines of code: int myvalue; memcpy(&myvalue,&multivalue\[offset\],sizeof(int)); :pre would load the 4 bytes of a particular value (at location offset) in the multi-value into the local integer "myvalue", where it can then be used for computation. :line Memory requirements for KeyValue and KeyMultiValue objects :link(memory),h4 KeyValue and KeyMultiValue objects are described in "this section"_Interface_c++.html. A MapReduce object contains either a single KeyValue object (KV) or a single KeyMultiValue object (KMV), depending on which methods you have invoked. The memory cost for storing key/value pairs in a KV is as follows. The key and value each have a byte length. Two integers are also stored for the key and value length. There may also be additional bytes added to align the key and value on byte boundaries in memory; see the {keyalign} and {valuealign} settings, discussed in "this section"_settings.html. Thus the total size of a KV is the memory for the key/value datums plus 2 integers per pair plus any extra alignment bytes. A KMV contains key/multi-value pairs where the number of pairs is typically the number of unique keys in the original KV. The memory cost for storing key/multi-value pairs in a KMV is as follows. The key and multi-value each have a byte length. For the multi-value, this is the sum of individual value lengths. Again, there may also be additional bytes added to align the key and multi-value on byte boundaries in memory; see the {keyalign} and {valuealign} settings, discussed in "this section"_settings.html. Three integers are also stored: the key and multi-value length, and the number of values N in the multi-value. An N-length array of integers is also stored for the length of each value in the multi-value. Thus the total size of a KMV is the memory for the key/multi-value datums plus 3 integers per pair plus 1 integer per value in the original KV plus any extra alignment bytes. Note that memory for key data in a KMV is typically less than in the original KV, since the KMV only stores unique keys. The memory for multi-value data is the same as the value data in the original KV, since all the original KV values are contained in the multi-values. Note that in parallel, for a KV or KMV, each processor stores the above data for only a fraction of key/value pairs it generated during a "map()"_map.html operation or acquired during other operations, like a "collate()"_collate.html. If this is imbalanced, one processor may own and process datums more than other processors. If KV or KMV data on a processor exceeds the page size determined by the {memsize} setting, discussed "here"_settings.html, then data is written to temporary disk files, on a per-processor basis. :line Out-of-core operation :link(ooc),h4 If the KV or KMV pairs of a data set owned by a processor fit within a single page of memory, whose size is determined by the {memsize} "setting"_setting.html, then the MR-MPI library operates on the data in-core; no disk files are written or read. When the data on any single processor exceeds the page size, that processor will write data, one page at a time, to one or more temporary disk files, and later read it back in as needed, again one page at a time. Thus all the MR-MPI methods can be invoked on data sets larger than fit in the aggregate memory of the processors being used. The only real limitation in this case is available disk space. All of the MR-MPI methods, except one, perform their operations within a fixed number of memory pages. This includes memory needed for message passing calls to the MPI library, e.g. buffers used to send and receive data. Any large data exchanges are performed with pre-posted receives (MPI_Irecv) into user-space memory, which do not require additional internal MPI library memory. The number of required pages ranges from 1 to 7, and is listed on "this page"_Interface_c++.html for each MR-MPI library method. This means, for example, that even if the page size is 1 Mb (smallest allowed value), and the data set size is 10 Gb per processor, and the "sort_keys()"_sort_keys.html method is invoked, which requires 5 pages per processor, that the operation will successfully complete, using only 5 Mb per processor. Of course, there may be considerable disk I/O performed along the way. The one exception is the "convert()"_convert.html method, also called by the "collate()"_collate.html and "commpress()"_compress.html methods, which performs an on-processor reorganization of the data in a KV to produce a KMV. For large data sets this requires breaking up the large KV data file into smaller files, each of which holds data that will contribute to one page of the eventual KMV file. Each smaller file requires an in-memory buffer to store data that is written to the file. The number of these smaller files, and hence the number of buffers, is hard to predict in advance or even bound. It depends on the page size and the characteristics of the KV pairs, e.g. how many unique keys there are. The number of extra allocated pages needed to store these buffers depends of the number of small files and the minimum buffer size, which is currently set at 16K bytes for reasonable disk I/O performance. If a very large number of small files are needed to partition the KV data and the page size is small, then several extra memory pages may need to be allocated. This is not normally the case, but the number of small files and number of allocated pages can be monitored if the {verbosity} "setting"_setting.html is non-zero. Note that a larger page size will reduce the number of extra pages the "convert()"_convert.html method needs to allocate. IMPORTANT NOTE: You should choose a {memsize} "setting"_setting.html that insures the total memory consumed by all pages allocated by all the MapReduce objects you create, does not exceed the physical memory available (which may be shared by several processors if running on a multi-core node). If you do this, then many systems will allocate virtual memory, which will typically cause MR-MPI library operations to run very slowly and thrash the disk. Also note that in addition to "pages", there are numerous additional small allocations of memory made by the MR-MPI library. Here are two examples. The "aggregate()"_aggregate.html method allocates vectors of length P = the number of processors. Out-of-core disk files are stored as "pages" of data. Each page requires some in-memory bookkeeping so it can be written and read. Thus if a file grows to 1000s of pages, the corresponding in-memory bookkeeping structure will also become larger. For normal page sizes as determined by the {memsize} "setting"_setting.html, e.g. the 64 Mbyte default, these additional in-memory allocations should be small compared to the size of a single page. :line Fundamemtal library limits :link(limits),h4 Even in out-of-core mode, the MR-MPI library has limitations on the data set sizes it can process. In practice, these are hopefully not restrictive limits. Define: INTMAX = 2^31 - 1 = largest 32-bit signed int UINT64MAX = 2^64 - 1 = largest 64-bit unsigned int pagesize = size (in bytes) of 1 page of memory :ul Internal storage limits within library: KV = KeyValue, KMV = KeyMultiValue UINT64MAX = max byte count of KV or KMV data across all procs UINT64MAX = max # of KV or KMV pairs across all procs UINT64MAX = max # of values in a single KMV pair UINT64MAX = max pagesize min(pagesize,INTMAX) = max size of 1 KV pair INTMAX = max number of KV or KMV pairs in one page (on a processor) INTMAX = max # of values in single KMV pair, before split across pages INTMAX = max summed value size in single KMV pair, before split across pages :ul Additional notes: The user sets the "pagesize" via the {memsize} setting, in Mbytes. The pagesize can exceed INTMAX, though it should not exceed the physical memory available. See the "discussion above"_#ooc for more details. Since the data set size is written to disk, when the library operates in out-of-core mode, the data size cannot exceed available disk space, either on a per-processor basis (if each processor is writing to its own local disk), or in aggregate (e.g. for a parallel file system). Some MR-MPI operations convert data from one form to another (e.g. KV to KMV) or make intermediate copies of data (e.g. for sorting). At a minimum this typically requires 2x the disk space of the data set itself. As discussed "here"_#align, a KeyValue pair requires 2 integers plus the key and value, plus alignment space. For a 1-byte key and a 0-byte value, this is a minimum of 12 bytes. By storing no more than INTMAX KeyValue pairs on a page, this still allows for pagesizes of nearly 24 Gb, more if KeyValue pair sizes are larger. The various INTMAX limits mean that user calls to the library, and library callbacks to user functions can use int parameters rather than uint64 parameters. It also reduces storage requirements for individual KeyValue and KeyMultiValue pairs. One exception is that all the library methods return a uint64 for the final number of KeyValue or KeyMultiValue pairs stored by the library. Another exception is the uint64 "itask" variable passed back to one flavor of the user mymap() function via the "map()"_map.html method. The INTMAX limits on the number of KeyMultiValue values stored in one page, mean that individual KeyMultiValue pairs that exceed this will be split across multiple pages. The user callback functions access these pages via the multivalue_blocks() and multivalue_block() methods, described witht the "reduce()"_reduce.html method. :line Hash functions :link(hash),h4 The "convert()"_convert.html and "collate()"_collate.html methods use a hash function to organize keys and find duplicates. The MR-MPI library uses the hashlittle() function from lookup3.c, written by Bob Jenkins and available freely on the WWW. It operates on arbitrary-length byte strings (a key) and produces a 32-bit integer hash value, a portion of which is used as a bucket index into a hash table. :line Callback functions :link(callback),h4 Several of the library methods take a callback function as an argument, meaning that function is called back to from the library when the method is invoked. These functions are part of your MapReduce program and can perform any operation you wish on your data (or on no data), so long as they produce the appropriate information. E.g. they generate key/value pairs in the case of "map()"_map.html or "compress()"_compress.html or "reduce()"_reduce.html, or they hash a key to a processor in the case of "aggregate()"_aggregate.html or "collate()"_collate.html, or they compare two keys or values in the case of "sort_keys()"_sort_key.html or "sort_values()"_sort_values.html. The mymap() and myreduce() functions can perform simple operations or very complex, compute-intensive operations. For example, if your parallel machine supports it, they could invoke another program or script to read/parse an input file or calculate some result. Note that in your program, a callback function CANNOT be a class method unless it is declared to be "static". It can also be a non-class method, i.e. just a stand-alone function. In either case, such a function cannot access class data. One way to get around this restriction is to define global variables that allow your function to access information it needs. Another way around this restriction is to use the feature provided by several of the library methods with callback function arguments which allow you to pass in a pointer to whatever data you wish. This pointer is returned as an argument when the callback is made. This pointer should be cast to (void *) when passed in, and your callback function can later cast it back to the appropriate data type. For example, a class could set the pointer to an array or an internal data structure or the class itself as "(void *) this". Specify a NULL if your function doesn't need the pointer. :line Python overhead :link(python),h4 Using the MR-MPI library from Python incurs two not-so-obvious overheads beyond the usual slowdown due to using an interpreted language. First, Python objects used as keys and values are "pickled" and "unpickled" using the cPickle Python library when passed into and out of the C++ library. This is because the library stores them as byte strings. The pickling process serializes a Python object (e.g. an integer, a string, a tuple, or a list) into a byte stream in a way that it can be unpickled into the same Python object. The second overhead is due to the complexity of making a double callbacks between the library and your Python script. I.e. the library calls back once to the user program which then calls back into the library. Consider what happens during a map() operation when the library is called from a C++ program. the program calls the library map() method the library map() calls back to the user map() callback function the user map() calls the library add() method to register a key/value pair :ul When doing this from Python there are 3 additional layers between the Python program and the library, the Python mrmpi class, an invisible C layer (created by ctypes), and the C interface on the C++ library itself. Thus the callback operation proceeds as follows: the program calls the mrmpi class map() method the mrmpi class map() calls the invisible C map() function the invisible map() calls the C interface map() function the C interface map() calls the library map() method the library map() calls back to the invisible C callback function the invisible callback calls the mrmpi class callback method the mrmpi callback calls the user map() callback function the user map() calls the mrmpi class add() method to register a key/value pair the mrmpi class add() calls the invisible C add() function the invisible add() calls the C interface add() function the C interface add() calls the library add() method :ul Thus 3 calls have become 11 due to the 3 additional layers data must pass through. Some of these pass throughs are very simple, but others require massaging and copying of data, like the pickling/unpickling described above, which occurs in the mrmpi class methods. I was somewhat surprised this double-callback sequence works as well and as transparently as it does - Python ctypes is amazing! :line Error messages :link(error),h4 The error messages printed out by the MR-MPI library are hopefully self-explanatory. At some point they will be listed in these doc pages. mrmpi-1.0~20131122/doc/Examples.txt0000644000175000017500000001217711734437536016473 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line Examples :h3 This section describes the MapReduce programs provided in the examples directory of the distribution: "wordfreq"_#word "rmat"_#rmat :ul Each are provided in 3 formats: as a C++ program, C program, and Python script. Note that the Python scripts use the PyPar package which provides a Python/MPI interface, as discussed above in the "Python Interface"_Interface_python.html section, so you must have "PyPar"_pypar installed in your Python to run them. :link(pypar,http://datamining.anu.edu.au/~ole/pypar) The C++ and C programs can be built (assuming you have already built the MR-MPI library) by typing make -f Makefile.foo :pre from within the examples directory, using one of the provided Makefiles. As with the library itself, you may need to edit one of the Makefiles to create a new version appropriate to your machine. The examples directory also includes input scripts for the scripting interface to MR-MPI called OINK. There are scripts for word frequency (in.wordfreq), R-MAT generation (in.rmat) and various graph algorithms (in.cc, in.tri, in.luby, in.sssp, in.pagerank), described in the paper by "Plimpton and Devine"_#Plimpton. OINK has its own "manual and doc pages"_../oinkdoc/Manual.html. To run these scripts you will need to build OINK, and then run one of the scripts as follows: oink_machine < in.rmat :pre :line Word frequency example :link(word),h4 The wordfreq programs implement the word frequency counting algorithm described above in "this section"_Whatis.html. The wordfreq programs are run by giving a list of text files as arguments, e.g. wordfreq ~/mydir/*.cpp mpirun -np 8 wordfreq ~/mydir/*.cpp cwordfreq ~/mydir/*.cpp mpirun -np 8 cwordfreq ~/mydir/*.cpp python wordfreq.py ~/mydir/*.cpp mpirun -np 8 python wordfreq.py ~/mydir/*.cpp :pre Total word counts and a list of the top 10 words should be printed to the screen, along with the time to perform the operation. The 3 different versions of the wordfreq program should give the same answers, although if non-text files are used, the parsing of the contents into words can be done differently by the C library strtok() function and the Python string "split" method. :line R-MAT matrices example :link(rmat),h4 The rmat programs generate a particular form of randomized sparse matrix known as an "R-MAT matrix"_#RMAT. Depending on the parameters chosen, the sparsity pattern in the resulting matrix can be highly non-uniform, and a good model for irregular graphs, such as ones representing a network of computers or WWW page links. The rmat programs are run by specifying a few parameters, e.g. rmat N Nz a b c d frac outfile mpirun -np 8 rmat N Nz a b c d frac outfile crmat N Nz a b c d frac outfile mpirun -np 8 crmat N Nz a b c d frac outfile python rmat.py N Nz a b c d frac outfile mpirun -np 8 python rmat.py N Nz a b c d frac outfile :pre The meaning of the parameters is as follows. Note that only matrices with a power-of-2 number of rows can be generated, so specifying N=20 creates a matrix with over a million rows. 2^N = # of rows in matrix Nz = average # of non-zeroes per row a,b,c,d = generation params for matrix entries, must sum to 1 frac = randomization parameter between 0 and 1 seed = random # seed, positive integer outfile = optional output file :ul A full description of the R-MAT generation algorithm is beyond the scope of this doc page, but here's the brief version. The {a,b,c,d} parameters are effectively weights on the 4 quadrants of the matrix. To generate a single new matrix element, one quadrant is chosen, with a probability proportional to its weight. This operation is repeated recursively within the chosen quadrant, applying the {frac} parameter to randomize the weights a bit. After N iterations, a single I,J matrix location has been identified and its value is set (to 1 in this case). The total number of matrix entries generated is Nx * 2^N. This procedure can generate duplicates, so those are removed, and new elements generated until the desired number is reached. When completed, the matrix statistics are printed to the screen, along with the time to generate the matrix. If the optional {outfile} parameter is specified, then the matrix entries are written to files (one per processor). Each line of any file has the form I J value :pre where I,J are the matrix row,column and value is the matrix entry (all are 1 in this case). If the files are concatenated together, the full set of matrix entries should result. The 3 different versions of the rmat programs should give the same answers in a statistical sense. The answers will not be identical because the same random number generation scheme is not used in all 3 programs. :line :link(RMAT) [(RMAT)] D. Chakrabarti, Y. Zhan, C. Faloutsos, R-MAT: A Recursive Model for Graph Mining", if Proceedings of the SIAM Conference on Data Mining (2004), available at http://www.cs.cmu.edu/~deepay/mywww/papers/siam04.pdf. :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/doc/broadcast.txt0000644000175000017500000000214611734437536016652 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce broadcast() method :h3 uint64_t MapReduce::broadcast(int root) :pre This calls the broadcast() method of a MapReduce object, which delete the key/value pairs of a KeyValue object on all processors except root, and then broadcasts the key/value pairs owned by the root processor to all the other processors. The end result is that all processors have a copy of the key/value pairs initially owned by the root processor. The resulting set of distributed key/value pairs will have P copies of each entry, where P = the # of processors. This will in general not be useful for further MapReduce operations, but it can be useful after a "gather()"_gather.html before doing a final "reduce()"_reduce.html where you want to give each processor access to the entire gathered result and let it make a local copy of the datums. This method requires parallel communication as processors send their key/value pairs to other processors. :line [Related methods]: "gather()"_gather.html mrmpi-1.0~20131122/doc/print.txt0000644000175000017500000000407611734437536016050 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce print() method :h3 void MapReduce::print(int proc, int nstride, int kflag, int vflag) void MapReduce::print(char *file, int fflag, int proc, int nstride, int kflag, int vflag) :pre This calls the print() method of a MapReduce object. The first variant prints out the KeyValue or KeyMultiValue pairs to the screen. The second variant prints to one or more files. This can be useful for debugging purposes. If {proc} < 0, then all processors print their information, one processor at a time. If {proc} >= 0, then only the specified {proc} prints its information. For printing to files, if {fflag} = 0, then all processors print in succession to the names file. If {fflag} = 1, then each processor writes to file.P, where P = 0 to Nprocs-1. Each processor prints every Nth of its pairs, where N = {nstride}. Thus if {nstride} = 1, all pairs are printed. The {kflag} and {vflag} setting control the format of the printed output. Only a limited set of choices is available. If these choices do not match the format of your keys and values, you will need to pass your data to map() or reduce() function you write yourself to print them. These can be invoked by the "map()"_map.html or "reduce()"_reduce.html methods. These are the recognized {kflag} and {vflag} settings: flag = 0 for NULL flag = 1 for 32-bit positive integer (int) flag = 2 for 64-bit unsigned integer (uint64_t) flag = 3 for 32-bit floating point value (float) flag = 4 for 64-bit floating point value (double) flag = 5 for a NULL-terminated string flag = 6 for a pair of 32-bit positive integers (int int) flag = 7 for a pair of 64-bit unsigned integers (uint64_t uint64_t) :ul For example, using {kflag} = 1 and {vflag} = 7, would be appropriate for keys that are 32-bit integers, and values that are a pair of 64-bit integers. For KeyMultiValue pairs, the {vflag} setting is used to format each output value in the multi-value. :line [Related methods]: "collate()"_collate.html mrmpi-1.0~20131122/doc/stats.txt0000644000175000017500000000374011734437536016047 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce kv_stats() method :h3 MapReduce kmv_stats() method :h3 MapReduce cummulative_stats() method :h3 uint64_t MapReduce::kv_stats(int level) uint64_t MapReduce::kmv_stats(int level) void MapReduce::cummulative_stats(int level, int reset) :pre Calling the kv_stats() method prints statistics about the KeyValue object stored within the MapReduce object. The total number of key/value pairs is returned. If level = 0 is specified, nothing else is done. If level = 1 is specified, a one-line summary is printed for all the key/value pairs across all processors. If a level = 2 is specified, per-processor information is also printed in a one-line histogram format. Calling the kmv_stats() method prints statistics about the KeyMultiValue object stored within the MapReduce object. The total number of key/multi-value pairs is returned. If level = 0 is specified, nothing else is done. If level = 1 is specified, a one-line summary is printed for all the key/multi-value pairs across all processors. If a level = 2 is specified, per-processor information is also printed in a one-line histogram format. Calling the cummulative_stats() method prints statistics about the cummulative memory allocation, inter-processor communication volume, and file I/O volume that has been performed by all MapReduce operations up to this point, by all MapReduce objects your program has instantiated. If level = 1 is specified, a brief summary is printed. If level = 2 is specified, per-processor information is also printed in a one-line histogram format. If the {reset} flag is set to 1, then the counters for these quantities are reset to 0. This cummulative_stats() method is called internally when your program destructs the last MapReduce object, using the "verbosity"_settings.html setting for the level argument. If verbosity is set to 0, then the method is not called. mrmpi-1.0~20131122/doc/open.txt0000644000175000017500000000500211734437536015643 0ustar mathieumathieu"MapReduce-MPI WWW Site"_mws - "MapReduce-MPI Documentation"_md :c :link(mws,http://mapreduce.sandia.gov) :link(md,Manual.html) :line MapReduce open() method :h3 MapReduce close() method :h3 void MapReduce::open() void MapReduce::open(int addflag) uint64_t MapReduce::close() :pre These call the open() and close() methods of a MapReduce object. This is only necessary when you will be performing a map() or reduce() that generates key/value pairs, and you wish to add pairs not only to the MapReduce object which is invoking the map() and reduce(), but also to one or more other MapReduce objects. In order to do this, you need to invoke the open() and close() methods on the other MapReduce object(s), so that they can accumulate new key/value pairs properly. The close() method returns the total number of key/value pairs in the KeyValue object. Here is an example of how this is done: MapReduce *mr = new MapReduce() MapReduce *mr2 = new MapReduce() mr2->open() mr->map(1000,mymap,mr2->kv); mr2->close() :pre void mymap(int itask, KeyValue *kv, void *ptr) \{ ... kv->add(key1,key1bytes,value1,value1bytes); KeyValue *kv2 = (KeyValue *) ptr; kv2->add(key2,key2bytes,value2,value2bytes); \} :pre The mymap() function is being called from the "mr" MapReduce object, and can add key/value pairs to "mr" in the usual way, via the kv->add() function call. But it can also add key/value pairs to the "mr2" MapReduce object via the kv2->add() function call. To do this, 3 things were necessary: call the open() method of mr2 before the map() was invoked pass a pointer to the map() which allows mymap() to retrieve the pointer to mr2's internal KeyValue object call the close() method of mr2 after the map() was invoked :ul The second bullet point was accomplishsed by passing mr2->kv directly to the map() method, but other variations are possible. For example, a pointer to a data structure could be passed, which contains pointers to several other MapReduce objects. In this case, the open() and close() methods for each of the other MapReduce objects would need to be called appropriately before and after the map() method, assuming they would each have key/value pairs added to them by the mymap() function. You can call open() and close() as many times as needed, but note calls to open() and close() should always come in pairs. You should not call close() when an open() has not been invoked. And you should not open() a second time without calling close() first. :line [Related methods]: "map()"_map.html, "reduce"_reduce.html mrmpi-1.0~20131122/src/0000755000175000017500000000000012243675123014156 5ustar mathieumathieumrmpi-1.0~20131122/src/cmapreduce.h0000644000175000017500000001334612243674342016450 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ /* C or Fortran style interface to MapReduce library */ /* ifdefs allow this file to be included in a C program */ #include "mpi.h" #include "stdint.h" #ifdef __cplusplus extern "C" { #endif void *MR_create(MPI_Comm comm); void *MR_create_mpi(); void *MR_create_mpi_finalize(); void MR_destroy(void *MRptr); void *MR_get_kv(void *MRptr); void *MR_get_kmv(void *MRptr); void *MR_copy(void *MRptr); uint64_t MR_add(void *MRptr, void *MRptr2); uint64_t MR_aggregate(void *MRptr, int (*myhash)(char *, int)); uint64_t MR_broadcast(void *MRptr, int); uint64_t MR_clone(void *MRptr); uint64_t MR_close(void *MRptr); uint64_t MR_collapse(void *MRptr, char *key, int keybytes); uint64_t MR_collate(void *MRptr, int (*myhash)(char *, int)); uint64_t MR_compress(void *MRptr, void (*mycompress)(char *, int, char *, int, int *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_convert(void *MRptr); uint64_t MR_gather(void *MRptr, int numprocs); uint64_t MR_map(void *MRptr, int nmap, void (*mymap)(int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_add(void *MRptr, int nmap, void (*mymap)(int, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_file(void *MRptr, int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_file_add(void *MRptr, int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_file_char(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char sepchar, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_file_char_add(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char sepchar, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_file_str(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char *sepstr, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_file_str_add(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char *sepstr, int delta, void (*mymap)(int, char *, int, void *KVptr, void *APPptr), void *APPptr, int addflag); uint64_t MR_map_mr(void *MRptr, void *MRptr2, void (*mymap)(uint64_t, char *, int, char *, int, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_map_mr_add(void *MRptr, void *MRptr2, void (*mymap)(uint64_t, char *, int, char *, int, void *KVptr, void *APPptr), void *APPptr, int addflag); void MR_open(void *MRptr); void MR_open_add(void *MRptr, int addflag); void MR_print(void *MRptr, int proc, int nstride, int kflag, int vflag); void MR_print_file(void *MRptr, char *file, int fflag, int proc, int nstride, int kflag, int vflag); uint64_t MR_reduce(void *MRptr, void (*myreduce)(char *, int, char *, int, int *, void *KVptr, void *APPptr), void *APPptr); uint64_t MR_multivalue_blocks(void *MRptr); void MR_multivalue_block_select(void *MRptr, int which); int MR_multivalue_block(void *MRptr, int iblock, char **ptr_multivalue, int **ptr_valuesizes); uint64_t MR_scan_kv(void *MRptr, void (*myscan)(char *, int, char *, int, void *), void *APPptr); uint64_t MR_scan_kmv(void *MRptr, void (*myscan)(char *, int, char *, int, int *, void *), void *APPptr); uint64_t MR_scrunch(void *MRptr, int numprocs, char *key, int keybytes); uint64_t MR_sort_keys(void *MRptr, int (*mycompare)(char *, int, char *, int)); uint64_t MR_sort_keys_flag(void *MRptr, int); uint64_t MR_sort_values(void *MRptr, int (*mycompare)(char *, int, char *, int)); uint64_t MR_sort_values_flag(void *MRptr, int); uint64_t MR_sort_multivalues(void *MRptr, int (*mycompare)(char *, int, char *, int)); uint64_t MR_sort_multivalues_flag(void *MRptr, int); uint64_t MR_kv_stats(void *MRptr, int level); uint64_t MR_kmv_stats(void *MRptr, int level); void MR_cummulative_stats(void *MRptr, int level, int reset); void MR_set_mapstyle(void *MRptr, int value); void MR_set_all2all(void *MRptr, int value); void MR_set_verbosity(void *MRptr, int value); void MR_set_timer(void *MRptr, int value); void MR_set_memsize(void *MRptr, int value); void MR_set_minpage(void *MRptr, int value); void MR_set_maxpage(void *MRptr, int value); void MR_set_keyalign(void *MRptr, int value); void MR_set_valuealign(void *MRptr, int value); void MR_set_fpath(void *MRptr, char *str); void MR_kv_add(void *KVptr, char *key, int keybytes, char *value, int valuebytes); void MR_kv_add_multi_static(void *KVptr, int n, char *key, int keybytes, char *value, int valuebytes); void MR_kv_add_multi_dynamic(void *KVptr, int n, char *key, int *keybytes, char *value, int *valuebytes); #ifdef __cplusplus } #endif mrmpi-1.0~20131122/src/keyvalue.h0000644000175000017500000000746311515373323016164 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef KEY_VALUE_H #define KEY_VALUE_H #include "mpi.h" #include "stdio.h" #include "stdint.h" namespace MAPREDUCE_NS { class KeyValue { friend class MapReduce; public: uint64_t nkv; // # of KV pairs in entire KV on this proc uint64_t ksize; // exact size of all key data uint64_t vsize; // exact size of all value data uint64_t esize; // exact size of all data in KV uint64_t fsize; // size of KV file int msize; // size of largest KV pair across all procs char *page; // in-memory page int memtag; // memory page ID int npage; // # of pages in entire KV KeyValue(class MapReduce *, int, int, class Memory *, class Error *, MPI_Comm); ~KeyValue(); void allocate(); void set_page(uint64_t, char *, int); void deallocate(int); void truncate(int, int, uint64_t); void copy(KeyValue *); void append(); void complete(); void complete_dummy(); int request_info(char **); int request_page(int, uint64_t &, uint64_t &, uint64_t &); void overwrite_page(int); void close_file(); void add(char *, int, char *, int); void add(int, char *, int, char *, int); void add(int, char *, int *, char *, int *); void print(FILE *, int, int, int); private: MapReduce *mr; MPI_Comm comm; class Memory *memory; class Error *error; int me; uint64_t pagesize; // size of page int kalign,valign; // alignment for keys & values int talign; // alignment of entire KV pair int kalignm1,valignm1,talignm1; // alignments-1 for masking int twolenbytes; // size of single key,value lengths // in-memory page int nkey; // # of KV pairs in page uint64_t keysize; // exact size of key data in page uint64_t valuesize; // exact size of value data in page uint64_t alignsize; // current size of page with alignment // virtual pages struct Page { uint64_t keysize; // exact size of keys uint64_t valuesize; // exact size of values uint64_t exactsize; // exact size of all data in page uint64_t alignsize; // aligned size of all data in page uint64_t filesize; // rounded-up alignsize for file I/O uint64_t fileoffset; // summed filesize of all previous pages int nkey; // # of KV pairs }; Page *pages; // list of pages int maxpage; // max # of pages currently allocated // file info char *filename; // filename to store KV if needed FILE *fp; // file ptr int fileflag; // 1 if file exists, 0 if not // private methods void add(KeyValue *); void add(int, char *); void add(char *); void add(int, char *, uint64_t, uint64_t, uint64_t); void add(int, char *, int, int); void init_page(); void create_page(); void write_page(); void read_page(int, int); uint64_t roundup(uint64_t,int); }; } #endif mrmpi-1.0~20131122/src/memory.cpp0000644000175000017500000000532311347542056016177 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #define _XOPEN_SOURCE 600 // needed for posix_memalign() in stdlib.h #include "mpi.h" #include "stdlib.h" #include "stdio.h" #include "memory.h" #include "error.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ Memory::Memory(MPI_Comm comm) { error = new Error(comm); } /* ---------------------------------------------------------------------- */ Memory::~Memory() { delete error; } /* ---------------------------------------------------------------------- safe malloc ------------------------------------------------------------------------- */ void *Memory::smalloc(size_t n, const char *name) { if (n == 0) return NULL; void *ptr = malloc(n); if (ptr == NULL) { char str[128]; sprintf(str,"Failed to allocate %lu bytes for array %s",n,name); error->one(str); } return ptr; } /* ---------------------------------------------------------------------- safe malloc with alignment ------------------------------------------------------------------------- */ void *Memory::smalloc_align(size_t n, int nalign, const char *name) { if (n == 0) return NULL; //void *ptr; //int ierror = posix_memalign(&ptr,nalign,n); //if (ierror) { void *ptr = malloc(n); if (ptr == NULL) { char str[128]; sprintf(str,"Failed to allocate %lu bytes for array %s",n,name); error->one(str); } return ptr; } /* ---------------------------------------------------------------------- safe free ------------------------------------------------------------------------- */ void Memory::sfree(void *ptr) { if (ptr == NULL) return; free(ptr); } /* ---------------------------------------------------------------------- safe realloc ------------------------------------------------------------------------- */ void *Memory::srealloc(void *ptr, size_t n, const char *name) { if (n == 0) { sfree(ptr); return NULL; } ptr = realloc(ptr,n); if (ptr == NULL) { char str[128]; sprintf(str,"Failed to reallocate %lu bytes for array %s",n,name); error->one(str); } return ptr; } mrmpi-1.0~20131122/src/spool.h0000644000175000017500000000435211347727352015475 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef SPOOL_H #define SPOOL_H #include "stdio.h" namespace MAPREDUCE_NS { class Spool { public: uint64_t nkv; // # of KV entries in entire spool file uint64_t esize; // size of all entries (with alignment) uint64_t fsize; // size of spool file char *page; // in-memory page int npage; // # of pages in Spool Spool(int, class MapReduce *, class Memory *, class Error *); ~Spool(); void set_page(uint64_t, char *); void complete(); void truncate(int, int, uint64_t); int request_info(char **); int request_page(int); void add(int, char *); void add(int, uint64_t, char *); private: class MapReduce *mr; class Memory *memory; class Error *error; uint64_t pagesize; // size of page // in-memory page int nkey; // # of entries uint64_t size; // current size of entries // virtual pages struct Page { uint64_t size; // size of entries uint64_t filesize; // rounded-up size for file I/O int nkey; // # of entries }; Page *pages; // list of pages in Spool int maxpage; // max # of pages currently allocated // file info char *filename; // filename to store Spool if needed int fileflag; // 1 if file exists, 0 if not FILE *fp; // file ptr // private methods void create_page(); void write_page(); void read_page(int); uint64_t roundup(uint64_t,int); }; } #endif mrmpi-1.0~20131122/src/keymultivalue.cpp0000644000175000017500000014501511515375770017576 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "stdio.h" #include "string.h" #include "stdint.h" #include "keymultivalue.h" #include "mapreduce.h" #include "keyvalue.h" #include "spool.h" #include "hash.h" #include "memory.h" #include "error.h" using namespace MAPREDUCE_NS; #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) #define ROUNDUP(A,B) (char *) (((uint64_t) A + B) & ~B); #define ALIGNFILE 512 // same as in mapreduce.cpp #define PARTITIONCHUNK 16 #define SETCHUNK 16 #define PAGECHUNK 16 #define MINSPOOLBYTES 16384 #define INTMAX 0x7FFFFFFF enum{KVFILE,KMVFILE,SORTFILE,PARTFILE,SETFILE}; // same as in mapreduce.cpp #define ONEMAX INTMAX // set small if wish to trigger multi-block KMV pairs // do not set smaller than MINSPOOLBYTES // do not set smaller than ALIGNFILE //#define KMV_DEBUG 1 // set if want debug output from convert() /* ---------------------------------------------------------------------- */ KeyMultiValue::KeyMultiValue(MapReduce *mr_caller, int memkalign, int memvalign, Memory *memory_caller, Error *error_caller, MPI_Comm comm_caller) { mr = mr_caller; memory = memory_caller; error = error_caller; comm = comm_caller; MPI_Comm_rank(comm,&me); filename = mr->file_create(KMVFILE); fileflag = 0; fp = NULL; pages = NULL; npage = maxpage = 0; // talign = max of (kalign,valign,int) kalign = memkalign; valign = memvalign; talign = MAX(kalign,valign); talign = MAX(talign,sizeof(int)); ualign = sizeof(uint64_t); kalignm1 = kalign-1; valignm1 = valign-1; talignm1 = talign-1; ualignm1 = ualign-1; twolenbytes = 2*sizeof(int); threelenbytes = 3*sizeof(int); if (ONEMAX < MINSPOOLBYTES || ONEMAX < ALIGNFILE) error->all("KeyMultiValue settings are inconsistent"); nkmv = ksize = vsize = esize = fsize = 0; init_page(); page = NULL; memtag = -1; allocate(); } /* ---------------------------------------------------------------------- */ KeyMultiValue::~KeyMultiValue() { // file may be open, if KMV was being read by MR::compress or MR::reduce // users may use request_page() via multivalue_block() multiple times, // so cannot close file on last page in request_page() if (fp) { fclose(fp); fp = NULL; } deallocate(1); memory->sfree(pages); if (fileflag) { remove(filename); mr->hiwater(1,fsize); } delete [] filename; } /* ---------------------------------------------------------------------- if need one, request an in-memory page ------------------------------------------------------------------------- */ void KeyMultiValue::allocate() { if (page == NULL) page = mr->mem_request(1,pagesize,memtag); } /* ---------------------------------------------------------------------- if allocated, mark in-memory page as unused if forceflag == 1, always do this else: only do this if MR outofcore flag is set or npage > 1 (since values currently in page are now useless) ------------------------------------------------------------------------- */ void KeyMultiValue::deallocate(int forceflag) { if (forceflag || mr->outofcore > 0 || npage > 1) { if (page) { mr->mem_unmark(memtag); page = NULL; memtag = -1; } } } /* ---------------------------------------------------------------------- copy contents of another KMV into me input KMV should never be self input KMV will have same pagesize and alignment as me called by MR::copy() ------------------------------------------------------------------------- */ void KeyMultiValue::copy(KeyMultiValue *kmv) { if (kmv == this) error->all("Cannot perform KeyMultiValue copy on self"); // pages will be loaded into memory assigned to other KMV // temporarily set my in-memory page to that of other KMV // write_page() will then write from that page to my file char *page_hold = page; int npage_other = kmv->request_info(&page); for (int ipage = 0; ipage < npage_other-1; ipage++) { nkey = kmv->request_page(ipage,0,keysize,valuesize,alignsize); create_page(); write_page(); npage++; } // copy last page to my memory, then reset my page to my memory nkey = kmv->request_page(npage_other-1,0,keysize,valuesize,alignsize); memcpy(page_hold,page,alignsize); page = page_hold; } /* ---------------------------------------------------------------------- complete the KMV after data has been added to it called by MR methods after creating & populating a KMV ------------------------------------------------------------------------- */ void KeyMultiValue::complete() { create_page(); // if disk file exists or MR outofcore flag set: // write current in-memory page to disk, close file if (fileflag || mr->outofcore > 0) { write_page(); fclose(fp); fp = NULL; } npage++; init_page(); // give up in-memory page if possible deallocate(0); // set sizes for entire KMV nkmv = ksize = vsize = esize = 0; for (int ipage = 0; ipage < npage; ipage++) { nkmv += pages[ipage].nkey; ksize += pages[ipage].keysize; vsize += pages[ipage].valuesize; esize += pages[ipage].exactsize; } if (fileflag) { fsize = pages[npage-1].fileoffset + pages[npage-1].filesize; mr->hiwater(0,fsize); } } /* ---------------------------------------------------------------------- return # of pages and ptr to in-memory page ------------------------------------------------------------------------- */ int KeyMultiValue::request_info(char **ptr) { *ptr = page; return npage; } /* ---------------------------------------------------------------------- ready a page of KMV data caller is looping over data in KMV writeflag = 0 when called by MR::compress() or MR::reduce() or copy() writeflag = 1 when called by MR::sort_multivalues() ------------------------------------------------------------------------- */ int KeyMultiValue::request_page(int ipage, int writeflag, uint64_t &keysize_page, uint64_t &valuesize_page, uint64_t &alignsize_page) { // load page from file if necessary if (fileflag) read_page(ipage,writeflag); keysize_page = pages[ipage].keysize; valuesize_page = pages[ipage].valuesize; alignsize_page = pages[ipage].alignsize; return pages[ipage].nkey; } /* ---------------------------------------------------------------------- return # of values in a multi-page KMV also return # of blocks of values on subsequent pages ------------------------------------------------------------------------- */ uint64_t KeyMultiValue::multivalue_blocks(int ipage, int &nblock) { nblock = pages[ipage].nblock; return pages[ipage].nvalue_total; } /* ---------------------------------------------------------------------- overwrite a reorganized page of KMV data onto disk reset npage to ipage so write_page() will work page properties stay the same so no call to create_page() called by MR::sort_multivalues() ------------------------------------------------------------------------- */ void KeyMultiValue::overwrite_page(int ipage) { int npage_save = npage; npage = ipage; if (fileflag || mr->outofcore > 0) write_page(); npage = npage_save; } /* ---------------------------------------------------------------------- close disk file if open called by MR::sort_multivalues() ------------------------------------------------------------------------- */ void KeyMultiValue::close_file() { if (fp) { fclose(fp); fp = NULL; } } /* ---------------------------------------------------------------------- add a key/value pair as a one-value KMV called by clone() ------------------------------------------------------------------------- */ void KeyMultiValue::add(char *key, int keybytes, char *value, int valuebytes) { char *iptr = &page[alignsize]; char *kptr = iptr + threelenbytes + sizeof(int); kptr = ROUNDUP(kptr,kalignm1); char *vptr = kptr + keybytes; vptr = ROUNDUP(vptr,valignm1); char *nptr = vptr + valuebytes; nptr = ROUNDUP(nptr,talignm1); int kmvbytes = nptr - iptr; // page is full, write to disk if (alignsize + kmvbytes > pagesize || nkey == INTMAX) { if (alignsize == 0) { printf("KeyMultiValue pair size/limit: %d %u\n",kmvbytes,pagesize); error->one("Single key/multivalue pair exceeds page size"); } create_page(); write_page(); npage++; init_page(); add(key,keybytes,value,valuebytes); return; } int *intptr = (int *) iptr; *(intptr++) = 1; *(intptr++) = keybytes; *(intptr++) = valuebytes; *(intptr++) = valuebytes; memcpy(kptr,key,keybytes); memcpy(vptr,value,valuebytes); nkey++; nvalue++; keysize += keybytes; valuesize += valuebytes; alignsize += kmvbytes; } /* ---------------------------------------------------------------------- clone a KV directly into a KMV, one KV pair -> one KMV pair each processor works on only its data called by MR::clone() ------------------------------------------------------------------------- */ void KeyMultiValue::clone(KeyValue *kv) { // loop over KV, turning each KV pair into a KMV pair int nkey_kv,keybytes,valuebytes; uint64_t kdummy,vdummy,adummy; char *ptr,*key,*value; char *page_kv; int npage_kv = kv->request_info(&page_kv); for (int ipage = 0; ipage < npage_kv; ipage++) { nkey_kv = kv->request_page(ipage,kdummy,vdummy,adummy); ptr = page_kv; for (int i = 0; i < nkey_kv; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); add(key,keybytes,value,valuebytes); } } } /* ---------------------------------------------------------------------- collapse a KV into a single KMV pair new KMV key = key, new KMV multivalue = key,value,key,value,etc from KV each processor works on only its data called by MR::collapse() ------------------------------------------------------------------------- */ void KeyMultiValue::collapse(char *key, int keybytes, KeyValue *kv) { // check if new KMV fits in one page uint64_t nkey_kv = kv->nkv; uint64_t ksize_kv = kv->ksize; uint64_t vsize_kv = kv->vsize; uint64_t totalsize = threelenbytes + 2*nkey_kv*sizeof(int); totalsize = roundup(totalsize,kalign); totalsize += keybytes; totalsize = roundup(totalsize,valign); totalsize += ksize_kv + vsize_kv; totalsize = roundup(totalsize,talign); if (2*nkey_kv <= ONEMAX && ksize_kv+vsize_kv <= ONEMAX && totalsize <= pagesize) collapse_one(key,keybytes,kv,totalsize); else collapse_many(key,keybytes,kv); } /* ---------------------------------------------------------------------- create a single KMV page from a collapsed KV called by KMV::collapse() ------------------------------------------------------------------------- */ void KeyMultiValue::collapse_one(char *key, int keybytes, KeyValue *kv, uint64_t totalsize) { // create memory layout for one large KMV int *iptr = (int *) page; *(iptr++) = 2*kv->nkv; *(iptr++) = keybytes; *(iptr++) = kv->ksize + kv->vsize; int *valuesizes = iptr; iptr += 2*kv->nkv; char *cptr = ROUNDUP((char *) iptr,kalignm1); memcpy(cptr,key,keybytes); cptr += keybytes; char *multivalue = ROUNDUP(cptr,valignm1); // loop over KV, copying its keys/values into KMV valuesizes/multivalue char *page_kv; int npage_kv = kv->request_info(&page_kv); int nkey_kv,keybytes_kv,valuebytes_kv; uint64_t kdummy,vdummy,adummy; char *ptr,*key_kv,*value_kv; int ivalue = 0; for (int ipage = 0; ipage < npage_kv; ipage++) { nkey_kv = kv->request_page(ipage,kdummy,vdummy,adummy); ptr = page_kv; for (int i = 0; i < nkey_kv; i++) { keybytes_kv = *((int *) ptr); valuebytes_kv = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key_kv = ptr; ptr += keybytes_kv; ptr = ROUNDUP(ptr,valignm1); value_kv = ptr; ptr += valuebytes_kv; ptr = ROUNDUP(ptr,talignm1); valuesizes[ivalue++] = keybytes_kv; memcpy(multivalue,key_kv,keybytes_kv); multivalue += keybytes_kv; valuesizes[ivalue++] = valuebytes_kv; memcpy(multivalue,value_kv,valuebytes_kv); multivalue += valuebytes_kv; } } nkey = 1; nvalue = 2*kv->nkv; keysize = keybytes; valuesize = kv->ksize + kv->vsize; alignsize = totalsize; } /* ---------------------------------------------------------------------- create multiple KMV pages from a collapsed KV called by KMV::collapse() ------------------------------------------------------------------------- */ void KeyMultiValue::collapse_many(char *key, int keybytes, KeyValue *kv) { error->all("Collapse of multi-block KMV not yet supported"); } /* ---------------------------------------------------------------------- convert a KV with non-unique keys into a KMV with unique keys each processor works on only its data called by MR::convert() ------------------------------------------------------------------------- */ void KeyMultiValue::convert(KeyValue *kv) { // setup partition and set data structs maxpartition = PARTITIONCHUNK; partitions = (Partition *) memory->smalloc(maxpartition*sizeof(Partition),"KMV:partitions"); maxset = SETCHUNK; sets = (Set *) memory->smalloc(maxset*sizeof(Set),"KMV:sets"); // estimate = # of unique keys that can be stored in 2 pages of memunique // each unique key requires roughly: // 1 Unique, 1 hash bucket, keyave bytes for the key itself // set nbuckets to power of 2 just smaller than estimate // also limit nbuckets to INTMAX+1 = 2^31 // since are using 32-bit hash and want hashmask & ibucket to be ints // set aside first portion of memunique for nbuckets // remainder for Unique data structs + keys uint64_t uniquesize; int uniquetag; char *memunique = mr->mem_request(2,uniquesize,uniquetag); uint64_t n = MAX(kv->nkv,1); double keyave = 1.0*kv->ksize/n; double oneave = keyave + sizeof(Unique) + sizeof(Unique *); uint64_t estimate = static_cast (uniquesize/oneave); if (estimate == 0) error->one("Cannot hold any unique keys in memory"); uint64_t nbuckets = 1; while (nbuckets <= estimate) nbuckets *= 2; nbuckets /= 2; nbuckets = MIN(nbuckets,INTMAX); if (nbuckets == INTMAX) nbuckets++; hashmask = nbuckets-1; buckets = (Unique **) memunique; bucketbytes = nbuckets*sizeof(Unique *); ustart = memunique + bucketbytes; ustop = memunique + uniquesize; ukeyoffset = sizeof(Unique); if (ustop-ustart < ukeyoffset) error->one("Cannot hold any unique keys in memory"); // use KV's memory page for all file reading, release it at end // spool_memory() requests MR pages, sets up memory allocs for Spool pages readpage = kv->page; int kv_memtag = kv->memtag; spool_memory(kv); // loop over partitions of KV pairs // partition = portion of KV pairs whose unique keys fit in memory // first partition is entire KV which may be split into more partitions npartition = 1; partitions[0].kv = kv; partitions[0].sp = NULL; partitions[0].sp2 = NULL; partitions[0].sortbit = 0; int ipartition = 0; while (ipartition < npartition) { #ifdef KMV_DEBUG printf("PARTITION %d of %d on proc %d\n",ipartition,npartition,me); #endif // scan KVs for unique keys, split partition if too big kv2unique(ipartition); // loop over sets of unique keys // set = portion of uniques which map to single KMV page // first set is entire partition which may be split into more sets nset = 1; sets[0].kv = partitions[ipartition].kv; sets[0].sp = partitions[ipartition].sp; sets[0].sp2 = partitions[ipartition].sp2; for (int iset = 0; iset < nset; iset++) { #ifdef KMV_DEBUG printf("SET %d of %d on proc %d\n",iset,nset,me); #endif // scan unique keys to structure KMV pages // if iset = 0: // loop over all unique keys and create nsets // flag each set as extended if it stores a multi-page KMV // structure KMV page for iset = 0 // if iset > 0: // loop over its unique keys to structure one KMV page // different operation for single-page vs multi-page KMV pairs int split = 0; if (iset == 0) split = unique2kmv_all(); if (sets[iset].extended) unique2kmv_extended(iset); else if (iset > 0) unique2kmv_set(iset); // if multiple KMV pages were induced by unique2kmv_all() // then split partition into one Spool per set if (split) partition2sets(ipartition); // scan KV pairs in set to populate a single KMV page with values // if set is extended, populate multiple pages with values if (sets[iset].extended) kv2kmv_extended(iset); else kv2kmv(iset); // write KMV page to disk unless very last one // complete() will write last page, regular or extended if (iset < nset-1 || ipartition < npartition-1) { create_page(); write_page(); npage++; init_page(); } } // free Spools for all sets and one partition // if nset = 1, then set 0 has Spools from ipartition, so don't re-delete // partition may have original KV, delete it to recover disk/memory space for (int iset = 0; iset < nset; iset++) { if (sets[iset].sp) delete sets[iset].sp; if (sets[iset].sp2) delete sets[iset].sp2; } if (nset > 1 && partitions[ipartition].sp) delete partitions[ipartition].sp; if (nset > 1 && partitions[ipartition].sp2) delete partitions[ipartition].sp2; if (partitions[ipartition].kv) delete kv; ipartition++; } // clean up local memory // release memory pages used by convert // release original KV's memory page memory->sfree(partitions); memory->sfree(sets); spool_free(); mr->mem_unmark(uniquetag); mr->mem_unmark(kv_memtag); } /* ---------------------------------------------------------------------- scan KV pairs in a partition to populate list of unique keys create new partitions if KV pairs overflow Unique list ------------------------------------------------------------------------- */ void KeyMultiValue::kv2unique(int ipartition) { int i,ispool,nkey_kv,keybytes,valuebytes,ibucket,pagecut,ncut; int nnew,nbits,mask,shift; uint64_t kdummy,vdummy,adummy,sizecut; uint32_t ubucket; char *ptr,*ptr_start,*key,*keyunique,*unext; Unique *uptr,*uprev; Spool *spextra; Spool **spools; int full = 0; uint64_t count = 0; nunique = 0; unext = ustart; memset(buckets,0,bucketbytes); // loop over KV pairs in this partition // source of KV pairs is either full KV or a Spool, not both // hash key for each KV pair, find it in unique list // either add to unique list or increment cummulative multivalue size KeyValue *kv = partitions[ipartition].kv; Spool *sp = partitions[ipartition].sp; if (sp) sp->set_page(pagesize,readpage); int npage_kv; char *page_kv; if (kv) npage_kv = kv->request_info(&page_kv); else npage_kv = sp->request_info(&page_kv); for (int ipage = 0; ipage < npage_kv; ipage++) { if (kv) nkey_kv = kv->request_page(ipage,kdummy,vdummy,adummy); else nkey_kv = sp->request_page(ipage); ptr = page_kv; for (i = 0; i < nkey_kv; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr_start = ptr; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); ibucket = hash(key,keybytes); uptr = find(ibucket,key,keybytes,uprev); count++; // if key is already in unique list, increment counters // if unique list is full, write KV to extra Spool if (uptr) { uptr->nvalue++; uptr->mvbytes += valuebytes; if (full) spextra->add(ptr-ptr_start,ptr_start); continue; } // if space available, add key to unique list uptr = (Unique *) unext; unext += ukeyoffset + keybytes; unext = ROUNDUP(unext,ualignm1); if (unext <= ustop) { if (uprev) uprev->next = uptr; else buckets[ibucket] = uptr; uptr->nvalue = 1; uptr->mvbytes = valuebytes; uptr->next = NULL; uptr->keybytes = keybytes; keyunique = ((char *) uptr) + ukeyoffset; memcpy(keyunique,key,keybytes); nunique++; continue; } // space is not available, so overflow into new Spool files // if this is first overflow KV pair, create partitions // pagecut,ncut,sizecut = info on last KV pair before cut // nnew = estimate of # of new parts based on KV fraction seen so far // nnew = next larger power-of-2, so can use hash bits to split // mask = bitmask on key hash to split into equal subsets // use nbits beyond sortbit of current partition if (full == 0) { full = 1; pagecut = ipage; ncut = i; sizecut = ptr_start - page_kv; count--; if (kv) nnew = (kv->nkv-count)/count + 1; else nnew = (sp->nkv-count)/count + 1; nbits = 0; while ((1 << nbits) < nnew) nbits++; nnew = 1 << nbits; #ifdef KMV_DEBUG if (kv) printf("PARTITION split: %u count, %u total, " "%d nbits, %d nnew\n", count,kv->nkv,nbits,nnew); else printf("PARTITION split: %u count, %u total, " "%d nbits, %d nnew\n", count,sp->nkv,nbits,nnew); #endif mask = nnew-1; shift = 32 - partitions[ipartition].sortbit - nbits; spool_request(nnew,1); spextra = augment_partition(ipartition); spools = new Spool*[nnew]; for (ispool = 0; ispool < nnew; ispool++) { spools[ispool] = create_partition(ispool); partitions[npartition-1].sortbit = partitions[ipartition].sortbit + nbits; } } // add KV pair to appropriate partition ubucket = hashlittle(key,keybytes,0); ispool = (ubucket >> shift) & mask; spools[ispool]->add(ptr-ptr_start,ptr_start); } } // truncate KV or Spool source file of KV pairs using cut info // force all new Spool files to close and write to disk if (full) { if (kv) kv->truncate(pagecut,ncut,sizecut); else sp->truncate(pagecut,ncut,sizecut); spextra->complete(); for (ispool = 0; ispool < nnew; ispool++) spools[ispool]->complete(); delete [] spools; } } /* ---------------------------------------------------------------------- scan all unique keys to identify subsets to spool determine nset = # of subsets, nset = 1 initially each set stores first Unique and number of uniques in set if one KMV pair exceeds single page, flag its set as extended also structure the first KMV page for set 0 unless it is extended return 1 if multiple sets induced, else 0 ------------------------------------------------------------------------- */ int KeyMultiValue::unique2kmv_all() { int multiflag,newflag; uint64_t onesize,setsize; char *ptr,*iptr,*nvptr,*kptr,*vptr,*keyunique,*unext; int *intptr; Unique *uptr; // loop over all unique keys // create new set when page size exceeded by single- or multi-page KMV pairs // uptr->set = which set the key is assigned to uptr = (Unique *) ustart; ptr = page; newflag = 1; if (nunique == 0) { sets[nset-1].nunique = 0; sets[nset-1].extended = 0; } for (int i = 0; i < nunique; i++) { if (newflag) { newflag = 0; sets[nset-1].first = uptr; sets[nset-1].nunique = i; sets[nset-1].extended = 0; setsize = 0; } iptr = ptr; nvptr = iptr + threelenbytes; kptr = nvptr + uptr->nvalue*sizeof(int); kptr = ROUNDUP(kptr,kalignm1); vptr = kptr + uptr->keybytes; vptr = ROUNDUP(vptr,valignm1); ptr = vptr + uptr->mvbytes; ptr = ROUNDUP(ptr,talignm1); onesize = ptr - iptr; // test if KMV pair is part of a single-page or multi-page KMV // is multi-page if: // onesize exceeds page size, nvalues or mvbytes exceed ONEMAX multiflag = 0; if (onesize > pagesize || uptr->nvalue > ONEMAX || uptr->mvbytes > ONEMAX) multiflag = 1; // single-page KMV pair // if space remains in page and nkey < INTMAX, add it to this set // else close set and add it as first KMV to new set // unless first KMV in Unique, then write current page and leave in set 0 // if added to set 0: // induce structure on KMV page and modify unique info accordingly if (multiflag == 0) { if (setsize + onesize > pagesize || nkey == INTMAX) { sets[nset-1].nunique = i - sets[nset-1].nunique; if (nset == maxset) { maxset += SETCHUNK; sets = (Set *) memory->srealloc(sets,maxset*sizeof(Set),"KMV:sets"); } nset++; sets[nset-1].first = uptr; sets[nset-1].nunique = i; sets[nset-1].extended = 0; setsize = 0; } uptr->set = nset-1; setsize += onesize; if (nset == 1) { intptr = (int *) iptr; *(intptr++) = uptr->nvalue; *(intptr++) = uptr->keybytes; *(intptr++) = uptr->mvbytes; keyunique = ((char *) uptr) + ukeyoffset; memcpy(kptr,keyunique,uptr->keybytes); nkey++; nvalue += uptr->nvalue; keysize += uptr->keybytes; valuesize += uptr->mvbytes; alignsize += onesize; uptr->soffset = (int *) nvptr; uptr->voffset = vptr; uptr->nvalue = 0; uptr->mvbytes = 0; } // multi-page KMV pair // if current set is just this KMV, close it as extended set // else close it as regular set and add new set as extended set // set newflag if more uniques exist, so new set will be initialized } else { if (uptr == sets[nset-1].first) { sets[nset-1].nunique = 1; sets[nset-1].extended = 1; } else { sets[nset-1].nunique = i - sets[nset-1].nunique; if (nset == maxset) { maxset += SETCHUNK; sets = (Set *) memory->srealloc(sets,maxset*sizeof(Set),"KMV:sets"); } nset++; sets[nset-1].first = uptr; sets[nset-1].nunique = 1; sets[nset-1].extended = 1; } uptr->set = nset-1; if (i < nunique-1) { if (nset == maxset) { maxset += SETCHUNK; sets = (Set *) memory->srealloc(sets,maxset*sizeof(Set),"KMV:sets"); } nset++; newflag = 1; } } // set uptr to next Unique unext = (char *) uptr; unext += ukeyoffset + uptr->keybytes; unext = ROUNDUP(unext,ualignm1); uptr = (Unique *) unext; } if (sets[nset-1].extended == 0) sets[nset-1].nunique = nunique - sets[nset-1].nunique; if (nset == 1) return 0; // setup a new Spool for each set // also resets set 0 ptrs to just new Spool spool_request(nset,0); Spool *sp; uint64_t size; char *pagespool; for (int ispool = 0; ispool < nset; ispool++) { sp = new Spool(SETFILE,mr,memory,error); pagespool = spool_malloc(ispool,size); sp->set_page(size,pagespool); sets[ispool].kv = NULL; sets[ispool].sp = sp; sets[ispool].sp2 = NULL; } #ifdef KMV_DEBUG printf("SET split: %d nunique, %d nsets\n",nunique,nset); #endif return 1; } /* ---------------------------------------------------------------------- structure a KMV page from a set with a single extended KMV pair only header page is created here value pages will be created in kv2kmv_extended() ------------------------------------------------------------------------- */ void KeyMultiValue::unique2kmv_extended(int iset) { char *ptr,*iptr,*kptr,*keyunique; int *intptr; Unique *uptr; ptr = page; uptr = sets[iset].first; iptr = ptr; kptr = iptr + twolenbytes; kptr = ROUNDUP(kptr,kalignm1); ptr = kptr + uptr->keybytes; intptr = (int *) iptr; *(intptr++) = 0; *(intptr++) = uptr->keybytes; keyunique = ((char *) uptr) + ukeyoffset; memcpy(kptr,keyunique,uptr->keybytes); nkey = 1; nvalue = uptr->nvalue; keysize = uptr->keybytes; valuesize = uptr->mvbytes; alignsize = ptr - iptr; } /* ---------------------------------------------------------------------- scan one subset of unique keys to structure a single KMV page ------------------------------------------------------------------------- */ void KeyMultiValue::unique2kmv_set(int iset) { char *ptr,*iptr,*nvptr,*kptr,*vptr,*keyunique,*unext; int *intptr; Unique *uptr; // loop over subset of unique keys ptr = page; uptr = sets[iset].first; int n = sets[iset].nunique; for (int i = 0; i < n; i++) { iptr = ptr; nvptr = iptr + threelenbytes; kptr = nvptr + uptr->nvalue*sizeof(int); kptr = ROUNDUP(kptr,kalignm1); vptr = kptr + uptr->keybytes; vptr = ROUNDUP(vptr,valignm1); ptr = vptr + uptr->mvbytes; ptr = ROUNDUP(ptr,talignm1); alignsize += ptr - iptr; intptr = (int *) iptr; *(intptr++) = uptr->nvalue; *(intptr++) = uptr->keybytes; *(intptr++) = uptr->mvbytes; keyunique = ((char *) uptr) + ukeyoffset; memcpy(kptr,keyunique,uptr->keybytes); nkey++; nvalue += uptr->nvalue; keysize += uptr->keybytes; valuesize += uptr->mvbytes; uptr->soffset = (int *) nvptr; uptr->voffset = vptr; uptr->nvalue = 0; uptr->mvbytes = 0; unext = (char *) uptr; unext += ukeyoffset + uptr->keybytes; unext = ROUNDUP(unext,ualignm1); uptr = (Unique *) unext; } } /* ---------------------------------------------------------------------- split partition of KVs into one Spool file per set (KMV page) ------------------------------------------------------------------------- */ void KeyMultiValue::partition2sets(int ipartition) { int i,nkey_kv,keybytes,valuebytes,ibucket,ispool; uint64_t kdummy,vdummy,adummy; char *ptr,*ptr_start,*key; Unique *uptr,*udummy; // destination Spools for all KV pairs in partition Spool **spools = new Spool*[nset]; for (i = 0; i < nset; i++) spools[i] = sets[i].sp; // loop over KV pairs in this partition // source of KV pairs can be a KV, KV + Spool, Spool, or Spool + Spool2 // uptr->set stores set index for each unique key KeyValue *kv = partitions[ipartition].kv; Spool *sp = partitions[ipartition].sp; Spool *sp2 = partitions[ipartition].sp2; if (sp) sp->set_page(pagesize,readpage); if (sp2) sp2->set_page(pagesize,readpage); int twosource = 0; if (kv && sp) twosource = 1; if (sp && sp2) twosource = 1; int npage_kv; char *page_kv; if (kv) npage_kv = kv->request_info(&page_kv); else npage_kv = sp->request_info(&page_kv); for (int ipage = 0; ipage < npage_kv; ipage++) { if (kv) nkey_kv = kv->request_page(ipage,kdummy,vdummy,adummy); else nkey_kv = sp->request_page(ipage); ptr = page_kv; for (i = 0; i < nkey_kv; i++) { ptr_start = ptr; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); ibucket = hash(key,keybytes); uptr = find(ibucket,key,keybytes,udummy); if (!uptr) error->one("Internal find error in partition2sets"); ispool = uptr->set; if (ispool < 0 || ispool >= nset) error->one("Internal spool error in partition2sets"); spools[ispool]->add(ptr-ptr_start,ptr_start); } // switch KV source from KV to Spool or Spool to Spool2 if (ipage == npage_kv-1 && twosource) { twosource = 0; ipage = -1; if (kv) { npage_kv = sp->request_info(&page_kv); kv = NULL; } else { npage_kv = sp2->request_info(&page_kv); sp = sp2; } } } for (i = 0; i < nset; i++) spools[i]->complete(); delete [] spools; } /* ---------------------------------------------------------------------- scan KV pairs in a set to populate single KMV page with values iset and KMV page contain many KMV pairs ------------------------------------------------------------------------- */ void KeyMultiValue::kv2kmv(int iset) { int i,nkey_kv,keybytes,valuebytes,ibucket; uint64_t kdummy,vdummy,adummy; char *ptr,*key,*value,*multivalue; int *valuesizes; Unique *uptr,*udummy; // loop over KV pairs in this set // source of KV pairs can be KV, KV + Spool, Spool, or Spool + Spool2 KeyValue *kv = sets[iset].kv; Spool *sp = sets[iset].sp; Spool *sp2 = sets[iset].sp2; if (sp) sp->set_page(pagesize,readpage); if (sp2) sp2->set_page(pagesize,readpage); int twosource = 0; if (kv && sp) twosource = 1; if (sp && sp2) twosource = 1; int npage_kv; char *page_kv; if (kv) npage_kv = kv->request_info(&page_kv); else npage_kv = sp->request_info(&page_kv); for (int ipage = 0; ipage < npage_kv; ipage++) { if (kv) nkey_kv = kv->request_page(ipage,kdummy,vdummy,adummy); else nkey_kv = sp->request_page(ipage); ptr = page_kv; for (i = 0; i < nkey_kv; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); ibucket = hash(key,keybytes); uptr = find(ibucket,key,keybytes,udummy); if (!uptr) error->one("Internal find error in kv2kmv"); if (uptr->set != iset) error->one("Internal set error in kv2kmv"); valuesizes = uptr->soffset; valuesizes[uptr->nvalue++] = valuebytes; multivalue = uptr->voffset; memcpy(&multivalue[uptr->mvbytes],value,valuebytes); uptr->mvbytes += valuebytes; } // switch KV source from KV to Spool or Spool to Spool2 if (ipage == npage_kv-1 && twosource) { twosource = 0; ipage = -1; if (kv) { npage_kv = sp->request_info(&page_kv); kv = NULL; } else { npage_kv = sp2->request_info(&page_kv); sp = sp2; } } } } /* ---------------------------------------------------------------------- scan KV pairs to populate multiple KMV pages with values iset and KMV pages contain one multi-page KMV pair first write out header page created by unique2kmv_extended() write out all value pages except last one which caller will write when done, set nblock count in header page ------------------------------------------------------------------------- */ void KeyMultiValue::kv2kmv_extended(int iset) { int i,nkey_kv,keybytes,valuebytes; uint64_t kdummy,vdummy,adummy; char *ptr,*key,*value,*vptr; // write out header page create_page(); write_page(); Unique *uptr = sets[iset].first; pages[npage].nvalue_total = uptr->nvalue; int header_page = npage; npage++; init_page(); // split KMV page into two half-pages for valuesizes and values // maxvalue = max # of values the first half can hold // leave leading int in first half for nvalue count uint64_t halfsize = pagesize/2; int maxvalue = MIN(ONEMAX,halfsize/sizeof(int)-1); int *valuesizes = (int *) &page[sizeof(int)]; char *multivalue = &page[halfsize]; char *valuestart = multivalue; char *valuestop = page + pagesize; // loop over KV pairs in this set, all with same key // source of KV pairs can be a KV, KV + Spool, Spool, or Spool + Spool2 // add value info to two half-pages // write out page when when either half-page is full int nblock = 0; int ncount = 0; KeyValue *kv = sets[iset].kv; Spool *sp = sets[iset].sp; Spool *sp2 = sets[iset].sp2; if (sp) sp->set_page(pagesize,readpage); if (sp2) sp2->set_page(pagesize,readpage); int twosource = 0; if (kv && sp) twosource = 1; if (sp && sp2) twosource = 1; int npage_kv; char *page_kv; if (kv) npage_kv = kv->request_info(&page_kv); else npage_kv = sp->request_info(&page_kv); for (int ipage = 0; ipage < npage_kv; ipage++) { if (kv) nkey_kv = kv->request_page(ipage,kdummy,vdummy,adummy); else nkey_kv = sp->request_page(ipage); ptr = page_kv; for (i = 0; i < nkey_kv; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); // if either half-page exceeded, pack two halves together, write page // use memmove() since target may overlap source if (ncount == maxvalue || multivalue+valuebytes > valuestop) { if (ncount == 0) { printf("Value size/limit: %d %u\n",valuebytes,halfsize); error->one("Single value exceeds KeyMultiValue page size"); } *((int *) page) = ncount; vptr = page + ((uint64_t) ncount)*sizeof(int) + sizeof(int); vptr = ROUNDUP(vptr,valignm1); memmove(vptr,valuestart,multivalue-valuestart); vptr += multivalue - valuestart; alignsize = vptr - page; create_page(); write_page(); npage++; init_page(); nblock++; ncount = 0; multivalue = valuestart; } memcpy(multivalue,value,valuebytes); multivalue += valuebytes; valuesizes[ncount++] = valuebytes; // switch KV source from KV to Spool or Spool to Spool2 if (ipage == npage_kv-1 && twosource) { twosource = 0; ipage = -1; if (kv) { npage_kv = sp->request_info(&page_kv); kv = NULL; } else { npage_kv = sp2->request_info(&page_kv); sp = sp2; } } } } // setup last partially filled page // will be written by caller *((int *) page) = ncount; vptr = page + ((uint64_t) ncount)*sizeof(int) + sizeof(int); vptr = ROUNDUP(vptr,valignm1); memmove(vptr,valuestart,multivalue-valuestart); vptr += multivalue - valuestart; alignsize = vptr - page; nblock++; // set nblock count in header page pages[header_page].nblock = nblock; } /* ---------------------------------------------------------------------- augment a partition with a Spool to store overflow KV pairs if partition has a KV, sp stores new Spool if partition has a Spool, sp2 stores new Spool ------------------------------------------------------------------------- */ Spool *KeyMultiValue::augment_partition(int ipartition) { uint64_t size; Spool *sp = new Spool(PARTFILE,mr,memory,error); char *pagespool = spool_malloc(-1,size); sp->set_page(size,pagespool); if (partitions[ipartition].kv) { partitions[ipartition].sp = sp; partitions[ipartition].sp2 = NULL; } else partitions[ipartition].sp2 = sp; return sp; } /* ---------------------------------------------------------------------- create a new partition with a Spool to store overflow KV pairs ------------------------------------------------------------------------- */ Spool *KeyMultiValue::create_partition(int ispool) { if (npartition == maxpartition) { maxpartition += PARTITIONCHUNK; partitions = (Partition *) memory->srealloc(partitions,maxpartition*sizeof(Partition), "KMV:partitions"); } uint64_t size; Spool *sp = new Spool(PARTFILE,mr,memory,error); char *pagespool = spool_malloc(ispool,size); sp->set_page(size,pagespool); partitions[npartition].kv = NULL; partitions[npartition].sp = sp; partitions[npartition].sp2 = NULL; npartition++; return sp; } /* ---------------------------------------------------------------------- find a Unique in ibucket that matches key return index of Unique if cannot find key, return NULL if bucket was empty, set prev = NULL else set prev = ptr to last Unique in the bucket ------------------------------------------------------------------------- */ KeyMultiValue::Unique *KeyMultiValue::find(int ibucket, char *key, int keybytes, Unique *&uprev) { Unique *uptr = buckets[ibucket]; if (!uptr) { uprev = NULL; return NULL; } char *keyunique; while (uptr) { keyunique = ((char *) uptr) + ukeyoffset; if (keybytes == uptr->keybytes && memcmp(key,keyunique,keybytes) == 0) return uptr; uprev = uptr; uptr = uptr->next; } return NULL; } /* ---------------------------------------------------------------------- hash a key to a bucket ------------------------------------------------------------------------- */ int KeyMultiValue::hash(char *key, int keybytes) { uint32_t ubucket = hashlittle(key,keybytes,0); int ibucket = ubucket & hashmask; return ibucket; } /* ---------------------------------------------------------------------- create virtual page entry for in-memory page ------------------------------------------------------------------------- */ void KeyMultiValue::init_page() { nkey = nvalue = 0; keysize = valuesize = 0; alignsize = 0; } /* ---------------------------------------------------------------------- create virtual page entry for in-memory page ------------------------------------------------------------------------- */ void KeyMultiValue::create_page() { if (npage == maxpage) { maxpage += PAGECHUNK; pages = (Page *) memory->srealloc(pages,maxpage*sizeof(Page),"KV:pages"); } pages[npage].nkey = nkey; pages[npage].keysize = keysize; pages[npage].valuesize = valuesize; pages[npage].exactsize = ((uint64_t) nkey)*threelenbytes + nvalue*sizeof(int) + keysize + valuesize; pages[npage].alignsize = alignsize; pages[npage].filesize = roundup(alignsize,ALIGNFILE); pages[npage].nvalue_total = 0; pages[npage].nblock = 0; if (npage) pages[npage].fileoffset = pages[npage-1].fileoffset + pages[npage-1].filesize; else pages[npage].fileoffset = 0; } /* ---------------------------------------------------------------------- write in-memory page to disk do a seek since may be overwriting an arbitrary page due to sort ------------------------------------------------------------------------- */ void KeyMultiValue::write_page() { if (mr->outofcore < 0) error->one("Cannot create KeyMultiValue file due to outofcore setting"); if (fp == NULL) { fp = fopen(filename,"wb"); if (fp == NULL) { char msg[1023]; sprintf(msg,"Cannot open KeyMultiValue file %s for writing",filename); error->one(msg); } fileflag = 1; } uint64_t fileoffset = pages[npage].fileoffset; int seekflag = fseek(fp,fileoffset,SEEK_SET); int nwrite = fwrite(page,pages[npage].filesize,1,fp); mr->wsize += pages[npage].filesize; if (seekflag) { char str[128]; sprintf(str,"Bad KMV fwrite/fseek on proc %d: %u",me,fileoffset); error->warning(str); } if (nwrite != 1 && pages[npage].filesize) { char str[128]; sprintf(str,"Bad KMV fwrite on proc %d: %d %u", me,nwrite,pages[npage].filesize); error->warning(str); } } /* ---------------------------------------------------------------------- read ipage from disk do a seek since may be reading arbitrary page for extended KMV ------------------------------------------------------------------------- */ void KeyMultiValue::read_page(int ipage, int writeflag) { if (fp == NULL) { if (writeflag) fp = fopen(filename,"r+b"); else fp = fopen(filename,"rb"); if (fp == NULL) error->one("Could not open KeyMultiValue file for reading"); } uint64_t fileoffset = pages[ipage].fileoffset; int seekflag = fseek(fp,fileoffset,SEEK_SET); int nread = fread(page,pages[ipage].filesize,1,fp); mr->rsize += pages[ipage].filesize; if (seekflag) { char str[128]; sprintf(str,"Bad KMV fread/fseek on proc %d: %u",me,fileoffset); error->warning(str); } if ((nread != 1 || ferror(fp)) && pages[ipage].filesize) { char str[128]; sprintf(str,"Bad KMV fread on proc %d: %d %u", me,nread,pages[ipage].filesize); error->warning(str); clearerr(fp); } } /* ---------------------------------------------------------------------- round N up to multiple of nalign and return it ------------------------------------------------------------------------- */ uint64_t KeyMultiValue::roundup(uint64_t n, int nalign) { if (n % nalign == 0) return n; n = (n/nalign + 1) * nalign; return n; } /* ---------------------------------------------------------------------- setup Spool page memory allocation ------------------------------------------------------------------------- */ void KeyMultiValue::spool_memory(KeyValue *kv) { uint64_t dummy; int dummy1,dummy2; // minspool = min size for Spool page = MAX(max KV size,MINSPOOLBYTES) minspool = MAX(kv->msize,MINSPOOLBYTES); if (minspool > pagesize) error->all("Spool size exceeds page size"); // query how many MR pages are available and request all of them npages_mr = mr->mem_query(dummy1,dummy2); tag_mr = (int *) memory->smalloc(npages_mr*sizeof(int),"KMV:tag_mr"); page_mr = (char **) memory->smalloc(npages_mr*sizeof(char *),"KMV:page_mr"); for (int i = 0; i < npages_mr; i++) page_mr[i] = mr->mem_request(1,dummy,tag_mr[i]); } /* ---------------------------------------------------------------------- alert spool_malloc() that Spool pages are about to be malloced N = # of pages that will be malloced extra = 1 if an extra page is needed for self partition ------------------------------------------------------------------------- */ void KeyMultiValue::spool_request(int n, int extra) { uint64_t dummy; // if all current MR pages are partitioned into N+extra small pages // then minsize = min size of any Spool page, // if minsize < minspool, allocate more page(s) from MR nquery = n + extra; if (npages_mr) { spoolperpage = nquery / npages_mr; if (nquery % npages_mr) spoolperpage++; sizespool = (pagesize/spoolperpage)/ALIGNFILE * ALIGNFILE; } else sizespool = 0; while (sizespool < minspool) { tag_mr = (int *) memory->srealloc(tag_mr,(npages_mr+1)*sizeof(int), "KMV:tag_mr"); page_mr = (char **) memory->srealloc(page_mr,(npages_mr+1)*sizeof(char *), "KMV:page_mr"); page_mr[npages_mr] = mr->mem_request(1,dummy,tag_mr[npages_mr]); npages_mr++; spoolperpage = nquery / npages_mr; if (nquery % npages_mr) spoolperpage++; sizespool = (pagesize/spoolperpage)/ALIGNFILE * ALIGNFILE; } } /* ---------------------------------------------------------------------- return ptr to Ith Spool page and size of page I = 0 to N-1, where N = nquery set by spool_request(N) I = -1, where spool_request() was notified with extra = 1 ------------------------------------------------------------------------- */ char *KeyMultiValue::spool_malloc(int i, uint64_t &size) { if (i < 0) i = nquery-1; size = sizespool; int ipage = i/spoolperpage; int ispool = i % spoolperpage; return page_mr[ipage] + ispool*sizespool; } /* ---------------------------------------------------------------------- free all memory pages for Spools requested from MR ------------------------------------------------------------------------- */ void KeyMultiValue::spool_free() { for (int i = 0; i < npages_mr; i++) mr->mem_unmark(tag_mr[i]); memory->sfree(tag_mr); memory->sfree(page_mr); } /* ---------------------------------------------------------------------- debug print of each KMV pair with nstride ------------------------------------------------------------------------- */ void KeyMultiValue::print(FILE *fp, int nstride, int kflag, int vflag) { int nvalues,keybytes,mvaluebytes; uint64_t dummy1,dummy2,dummy3; int *valuesizes; char *ptr,*key,*multivalue; int istride = 0; for (int ipage = 0; ipage < npage; ipage++) { nkey = request_page(ipage,0,dummy1,dummy2,dummy3); ptr = page; for (int i = 0; i < nkey; i++) { nvalues = *((int *) ptr); ptr += sizeof(int); if (nvalues > 0) { keybytes = *((int *) ptr); ptr += sizeof(int); mvaluebytes = *((int *) ptr); ptr += sizeof(int); valuesizes = (int *) ptr; ptr += ((uint64_t) nvalues) * sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); multivalue = ptr; ptr += mvaluebytes; ptr = ROUNDUP(ptr,talignm1); istride++; if (istride != nstride) continue; istride = 0; fprintf(fp,"KMV pair: proc %d, nvalues %d, sizes %d %d", me,nvalues,keybytes,mvaluebytes); fprintf(fp,", key "); if (kflag == 0) fprintf(fp,"NULL"); else if (kflag == 1) fprintf(fp,"%d",*(int *) key); else if (kflag == 2) fprintf(fp,"%lu",*(uint64_t *) key); else if (kflag == 3) fprintf(fp,"%g",*(float *) key); else if (kflag == 4) fprintf(fp,"%g",*(double *) key); else if (kflag == 5) fprintf(fp,"%s",key); else if (kflag == 6) fprintf(fp,"%d %d", *(int *) key, *(int *) (key+sizeof(int))); else if (kflag == 7) fprintf(fp,"%lu %lu", *(uint64_t *) key, *(uint64_t *) (key+sizeof(uint64_t))); fprintf(fp,", values "); if (vflag == 0) { for (int j = 0; j < nvalues; j++) fprintf(fp,"NULL "); } else if (vflag == 1) { for (int j = 0; j < nvalues; j++) { fprintf(fp,"%d ",*(int *) multivalue); multivalue += valuesizes[j]; } } else if (vflag == 2) { for (int j = 0; j < nvalues; j++) { fprintf(fp,"%lu ",*(uint64_t *) multivalue); multivalue += valuesizes[j]; } } else if (vflag == 3) { for (int j = 0; j < nvalues; j++) { fprintf(fp,"%g ",*(float *) multivalue); multivalue += valuesizes[j]; } } else if (vflag == 4) { for (int j = 0; j < nvalues; j++) { fprintf(fp,"%g ",*(double *) multivalue); multivalue += valuesizes[j]; } } else if (vflag == 5) { char *value = multivalue; for (int j = 0; j < nvalues; j++) { fprintf(fp,"%s ",multivalue); multivalue += valuesizes[j]; } } else if (vflag == 6) { for (int j = 0; j < nvalues; j++) { fprintf(fp,"%d %d ",*(int *) multivalue, *(int *) (multivalue+sizeof(int))); multivalue += valuesizes[j]; } } else if (vflag == 7) { for (int j = 0; j < nvalues; j++) { fprintf(fp,"%lu %lu ",*(uint64_t *) multivalue, *(uint64_t *) (multivalue+sizeof(uint64_t))); multivalue += valuesizes[j]; } } fprintf(fp,"\n"); } else { keybytes = *((int *) ptr); ptr += sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; fprintf(fp,"KMV pair: proc %d, nvalues %lu, size %d", me,pages[ipage].nvalue_total,keybytes); fprintf(fp,", key "); if (kflag == 0) fprintf(fp,"NULL"); else if (kflag == 1) fprintf(fp,"%d",*(int *) key); else if (kflag == 2) fprintf(fp,"%u",*(uint64_t *) key); else if (kflag == 3) fprintf(fp,"%g",*(float *) key); else if (kflag == 4) fprintf(fp,"%g",*(double *) key); else if (kflag == 5) fprintf(fp,"%s",key); else if (kflag == 6) fprintf(fp,"%d %d", *(int *) key, *(int *) (key+sizeof(int))); else if (kflag == 7) fprintf(fp,"%lu %lu", *(uint64_t *) key, *(uint64_t *) (key+sizeof(uint64_t))); fprintf(fp,", too many values to print"); fprintf(fp,"\n"); } } } } mrmpi-1.0~20131122/src/mrtype.h0000644000175000017500000000162111511417512015640 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef MRTYPE_H #define MRTYPE_H #include "mpi.h" #include "stdint.h" namespace MAPREDUCE_NS { // MRMPI_BIGINT must be set to what MPI treats as 8-byte unsigned int #define MRMPI_BIGINT MPI_UNSIGNED_LONG_LONG //#define MRMPI_BIGINT MPI_UNSIGNED_LONG } #endif mrmpi-1.0~20131122/src/error.h0000644000175000017500000000160511170765436015467 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef ERROR_H #define ERROR_H #include "mpi.h" namespace MAPREDUCE_NS { class Error { public: Error(MPI_Comm); void all(const char *); void one(const char *); void warning(const char *); private: MPI_Comm comm; int me; }; } #endif mrmpi-1.0~20131122/src/Makefile.shlib0000644000175000017500000000143312013170713016705 0ustar mathieumathieu# MR-MPI shared library multiple-machine Makefile SHELL = /bin/sh # Definitions ROOT = mrmpi EXE = lib$(ROOT)_$@.so SRC = $(wildcard *.cpp) INC = $(wildcard *.h) OBJ = $(SRC:.cpp=.o) # Targets help: @echo 'Type "make target" where target is one of:' @echo '' @files="`ls MAKE/Makefile.*`"; \ for file in $$files; do head -1 $$file; done clean: rm -rf Obj_shlib_* .DEFAULT: @test -f MAKE/Makefile.$@ @if [ ! -d Obj_shlib_$@ ]; then mkdir Obj_shlib_$@; fi @cp -p $(SRC) $(INC) Obj_shlib_$@ @cp MAKE/Makefile.$@ Obj_shlib_$@/Makefile @cd Obj_shlib_$@; \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" \ "INC = $(INC)" "EXE = ../$(EXE)" shlib @rm -f libmrmpi.so @ln -s $(EXE) libmrmpi.so @if [ -d Obj_shlib_$@ ]; then cd Obj_shlib_$@; \ rm -f $(SRC) $(INC) Makefile*; fi mrmpi-1.0~20131122/src/mapreduce.h0000644000175000017500000002213211577670272016305 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef MAP_REDUCE_H #define MAP_REDUCE_H #include "mpi.h" #include "stdint.h" namespace MAPREDUCE_NS { class MapReduce { friend class KeyValue; friend class KeyMultiValue; friend class Spool; public: int mapstyle; // 0 = chunks, 1 = strided, 2 = master/slave int all2all; // 0 = irregular comm, 1 = use MPI_Alltoallv() int verbosity; // 0 = none, 1 = totals, 2 = proc histograms int timer; // 0 = none, 1 = summary, 2 = proc histograms int memsize; // # of Mbytes per page int minpage; // # of pages that will be pre-allocated per proc >= 0 int maxpage; // max # of pages that can be allocated per proc, 0 = inf int freepage; // 1 to free unused pages after every operation, 0 if keep int outofcore; // 1 to force data out-of-core, 0 = only if exceeds 1 pg int zeropage; // 1 to init allocated pages to 0, 0 if don't bother int keyalign; // align keys to this byte count int valuealign; // align values to this byte count char *fpath; // prefix path added to intermediate out-of-core files int mapfilecount; // number of files processed by map file variants class KeyValue *kv; // single KV stored by MR class KeyMultiValue *kmv; // single KMV stored by MR // static variables across all MR objects static MapReduce *mrptr; // holds a ptr to MR currently being used static int instances_now; // total # of MRs currently instantiated // grows as created, shrinks as destroyed static int instances_ever; // total # of MRs ever instantiated // grows as created, never shrinks static int mpi_finalize_flag; // 1 if MR library should finalize MPI static uint64_t msize,msizemax; // current and hi-water memory allocation static uint64_t rsize,wsize; // total read/write bytes for all I/O static uint64_t cssize,crsize; // total send/recv bytes for all comm static double commtime; // total time for all comm // library API MapReduce(MPI_Comm); MapReduce(); MapReduce(double); ~MapReduce(); MapReduce *copy(); uint64_t add(MapReduce *); uint64_t aggregate(int (*)(char *, int)); uint64_t broadcast(int); uint64_t clone(); uint64_t close(); uint64_t collapse(char *, int); uint64_t collate(int (*)(char *, int)); uint64_t compress(void (*)(char *, int, char *, int, int *, class KeyValue *, void *), void *); uint64_t convert(); uint64_t gather(int); uint64_t map(int, void (*)(int, class KeyValue *, void *), void *, int addflag = 0); uint64_t map(int, char **, int, int, int, void (*)(int, char *, class KeyValue *, void *), void *, int addflag = 0); uint64_t map(int, int, char **, int, int, char, int, void (*)(int, char *, int, class KeyValue *, void *), void *, int addflag = 0); uint64_t map(int, int, char **, int, int, char *, int, void (*)(int, char *, int, class KeyValue *, void *), void *, int addflag = 0); uint64_t map(MapReduce *, void (*)(uint64_t, char *, int, char *, int, class KeyValue *, void *), void *, int addflag = 0); void open(int addflag = 0); void print(int, int, int, int); void print(char *, int, int, int, int, int); uint64_t reduce(void (*)(char *, int, char *, int, int *, class KeyValue *, void *), void *); uint64_t scan(void (*)(char *, int, char *, int, void *), void *); uint64_t scan(void (*)(char *, int, char *, int, int *, void *), void *); uint64_t scrunch(int, char *, int); uint64_t multivalue_blocks(int &); void multivalue_block_select(int); int multivalue_block(int, char **, int **); uint64_t sort_keys(int); uint64_t sort_keys(int (*)(char *, int, char *, int)); uint64_t sort_values(int); uint64_t sort_values(int (*)(char *, int, char *, int)); uint64_t sort_multivalues(int); uint64_t sort_multivalues(int (*)(char *, int, char *, int)); uint64_t kv_stats(int); uint64_t kmv_stats(int); void cummulative_stats(int, int); void set_fpath(const char *); // query functions MPI_Comm communicator() {return comm;}; int num_procs() {return nprocs;}; int my_proc() {return me;}; // functions accessed thru non-class wrapper functions void map_file_wrapper(int, class KeyValue *); int compare_wrapper(int, int); private: MPI_Comm comm; int me,nprocs; int instance_me; // which instances_ever I am int allocated; double time_start,time_stop; class Memory *memory; class Error *error; uint64_t rsize_one,wsize_one; // file read/write bytes for one operation uint64_t crsize_one,cssize_one; // send/recv comm bytes for one operation int collateflag; // flag for when convert() is called from collate() // memory pages and bookkeeping uint64_t pagesize; // pagesize for KVs and KMVs char **memptr; // ptrs to each page of memory int *memusage; // 0 if unused, else tag returned to requestor // multiple pages marked same if requested together // request may be smaller than entire alloc int *memcount; // # of pages alloced starting with this page // 0 if in middle of a contiguous alloc int npage; // total # of pages currently allocated int npagemax; // hi-water mark for # of pages allocated int tagmax; // highest tag used thus far // alignment info int twolenbytes; // byte length of two ints int kalign,valign; // finalized alignments for keys/values int talign; // alignment of entire KV or KMV pair int kalignm1,valignm1; // alignments-1 for masking int talignm1; // file info uint64_t fsize; // current aggregate size of disk files uint64_t fsizemax; // hi-water mark for fsize int fcounter_kv; // file counters for various intermediate files int fcounter_kmv; int fcounter_sort; int fcounter_part; int fcounter_set; // sorting typedef int (CompareFunc)(char *, int, char *, int); CompareFunc *compare; char **dptr; // ptrs to datums being sorted int *slength; // length of each datum being sorted // multi-block KMV info int kmv_block_valid; // 1 if user is processing a multi-block KMV pair int kmv_key_page; // which page the key info is on int kmv_nblock; // # of value pages in multi-block KMV uint64_t kmv_nvalue_total; // total # of values in multi-block KMV char *kmv_mvpage1; // page ptrs for 2 multi-block blocks of values char *kmv_mvpage2; // file map() typedef void (MapFileFunc)(int, char *, int, class KeyValue *, void *); struct FileMap { int sepwhich; char sepchar; char *sepstr; int delta; char **filename; // names of files to read uint64_t *filesize; // size in bytes of each file int *tasksperfile; // # of map tasks for each file int *whichfile; // which file each map task reads int *whichtask; // which sub-task in file each map task is MapFileFunc *appmapfile; // user map function void *appptr; // user data ptr }; FileMap filemap; // private functions void defaults(); void copy_kv(KeyValue *); void copy_kmv(KeyMultiValue *); uint64_t map_tasks(int, char **, void (*)(int, KeyValue *, void *), void (*)(int, char *, KeyValue *, void *), void *, int, int); uint64_t map_chunks(int, int, char **, void (*)(int, char *, int, class KeyValue *, void *), void *, int); void findfiles(char *, int, int, int &, int &, char **&); void addfiles(char *, int, int &, int &, char **&); void bcastfiles(int &, char **&); void sort_kv(int); void sort_onepage(int, int, char *, char *, char *); void merge(int, int, void *, int, void *, int, void *); int extract(int, char *, char *&, int &); void stats(const char *, int); char *file_create(int); void file_stats(int); uint64_t roundup(uint64_t, int); void start_timer(); void write_histo(double, const char *); void histogram(int, double *, double &, double &, double &, int, int *, int *); void mr_stats(int); void allocate(); void allocate_page(int); char *mem_request(int, uint64_t &, int &); void mem_unmark(int); void mem_cleanup(); int mem_query(int &, int &); void memory_debug(int); void hiwater(int, uint64_t); }; } #endif mrmpi-1.0~20131122/src/version.h0000644000175000017500000000004412243675113016011 0ustar mathieumathieu#define MRMPI_VERSION "22 Nov 2013" mrmpi-1.0~20131122/src/MAKE/0000755000175000017500000000000012243675123014673 5ustar mathieumathieumrmpi-1.0~20131122/src/MAKE/Makefile.pure0000755000175000017500000000177312013000137017276 0ustar mathieumathieu# pure = Purify version, mpic++ SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation PURE_CC = purify -best-effort -follow-child-processes=yes \ -cache-dir=/tmp/purify -chain-length=20 CC = g++34 -Wall -m64 -DPURIFY_HATES_HASHLITTLE CCFLAGS = -g $(shell mpic++ --showme:compile) SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.debug0000755000175000017500000000160412013000137017402 0ustar mathieumathieu# debug = Serial debug version with -g compilation, g++, no MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = g++ CCFLAGS = -g -O -I../../mpistubs SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared -L../../mpistubs SHLIB = -lmpi_stubs # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.serial0000755000175000017500000000157712013000137017604 0ustar mathieumathieu# serial = stand-alone code for serial execution, g++, no MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = g++ CCFLAGS = -O -I../../mpistubs SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared -L../../mpistubs SHLIB = -lmpi_stubs # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.tbird0000755000175000017500000000152012013000137017415 0ustar mathieumathieu# tbird = Tbird machine, mpic++ SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = mpic++ CCFLAGS = -O2 -DMPICH_IGNORE_CXX_SEEK SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.mac_serial0000755000175000017500000000157412013000137020421 0ustar mathieumathieu# mac_serial = Apple PowerBook G4 laptop, c++, no MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = c++ CCFLAGS = -O -m64 -I../../mpistubs SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared -L../../mpistubs SHLIB = -lmpi_stubs # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.mpicc0000755000175000017500000000152112013000137017405 0ustar mathieumathieu# mpicc = any machine with MPI compiler wrappers, mpic++ SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = mpic++ CCFLAGS = -O2 SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.linux0000755000175000017500000000154112012777070017473 0ustar mathieumathieu# linux = Linux desktop machine, g++, MPICH SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler and MPI CC = g++ CCFLAGS = -O -DMPICH_IGNORE_CXX_SEEK SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = -lmpich -lmpl -lpthread # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.nebula0000755000175000017500000000154712013000137017570 0ustar mathieumathieu# nebula = nebula cluster, mpiCC, OpenMPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = /opt/openmpi-gnu-1.3.2/bin/mpiCC CCFLAGS = -O -I../../src SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.odin0000755000175000017500000000155212013000137017247 0ustar mathieumathieu# odin = Odin cluster in CSRI, g++, MPICH SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = mpic++ -m64 CCFLAGS = -O2 -DMRMPI_FPATH=/localdisk1/scratch SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/MAKE/Makefile.mac0000755000175000017500000000150712013000137017056 0ustar mathieumathieu# mac = Apple PowerBook G4 laptop, c++, native MPI SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/archive settings # specify flags and libraries needed for your compiler and MPI installation CC = c++ CCFLAGS = -O SHFLAGS = -fPIC DEPFLAGS = -M ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared SHLIB = # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(EXE) $(OBJ) $(SHLIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) include $(DEPENDS) mrmpi-1.0~20131122/src/cmapreduce.cpp0000644000175000017500000002773312243674342017010 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ // C interface to MapReduce library // ditto for Fortran, scripting language, or other hi-level languages #include "cmapreduce.h" #include "mapreduce.h" #include "keyvalue.h" using namespace MAPREDUCE_NS; void *MR_create(MPI_Comm comm) { MapReduce *mr = new MapReduce(comm); return (void *) mr; } void *MR_create_mpi() { MapReduce *mr = new MapReduce(); return (void *) mr; } void *MR_create_mpi_finalize() { MapReduce *mr = new MapReduce(0.0); return (void *) mr; } void MR_destroy(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; delete mr; } void *MR_get_kv(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; return (void *) mr->kv; } void *MR_get_kmv(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; return (void *) mr->kmv; } void *MR_copy(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; MapReduce *mr2 = mr->copy(); return (void *) mr2; } uint64_t MR_add(void *MRptr, void *MRptr2) { MapReduce *mr = (MapReduce *) MRptr; MapReduce *mr2 = (MapReduce *) MRptr2; return mr->add(mr2); } uint64_t MR_aggregate(void *MRptr, int (*myhash)(char *, int)) { MapReduce *mr = (MapReduce *) MRptr; return mr->aggregate(myhash); } uint64_t MR_broadcast(void *MRptr, int root) { MapReduce *mr = (MapReduce *) MRptr; return mr->broadcast(root); } uint64_t MR_clone(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; return mr->clone(); } uint64_t MR_close(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; return mr->close(); } uint64_t MR_collapse(void *MRptr, char *key, int keybytes) { MapReduce *mr = (MapReduce *) MRptr; return mr->collapse(key,keybytes); } uint64_t MR_collate(void *MRptr, int (*myhash)(char *, int)) { MapReduce *mr = (MapReduce *) MRptr; return mr->collate(myhash); } uint64_t MR_compress(void *MRptr, void (*mycompress)(char *, int, char *, int, int *, void *, void *), void *APPptr) { typedef void (CompressFunc)(char *, int, char *, int, int *, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; CompressFunc *appcompress = (CompressFunc *) mycompress; return mr->compress(appcompress,APPptr); } uint64_t MR_convert(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; return mr->convert(); } uint64_t MR_gather(void *MRptr, int numprocs) { MapReduce *mr = (MapReduce *) MRptr; return mr->gather(numprocs); } uint64_t MR_map(void *MRptr, int nmap, void (*mymap)(int, void *, void *), void *APPptr) { typedef void (MapFunc)(int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,appmap,APPptr); } uint64_t MR_map_add(void *MRptr, int nmap, void (*mymap)(int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,appmap,APPptr,addflag); } uint64_t MR_map_file(void *MRptr, int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, void *, void *), void *APPptr) { typedef void (MapFunc)(int, char *, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nstr,strings,self,recurse,readfile,appmap,APPptr); } uint64_t MR_map_file_add(void *MRptr, int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(int, char *, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, char *, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nstr,strings,self,recurse,readfile,appmap,APPptr,addflag); } uint64_t MR_map_file_char(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char sepchar, int delta, void (*mymap)(int, char *, int, void *, void *), void *APPptr) { typedef void (MapFunc)(int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,nstr,strings,recurse,readflag, sepchar,delta,appmap,APPptr); } uint64_t MR_map_file_char_add(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char sepchar, int delta, void (*mymap)(int, char *, int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,nstr,strings,recurse,readflag, sepchar,delta,appmap,APPptr,addflag); } uint64_t MR_map_file_str(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char *sepstr, int delta, void (*mymap)(int, char *, int, void *, void *), void *APPptr) { typedef void (MapFunc)(int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,nstr,strings,recurse,readflag, sepstr,delta,appmap,APPptr); } uint64_t MR_map_file_str_add(void *MRptr, int nmap, int nstr, char **strings, int recurse, int readflag, char *sepstr, int delta, void (*mymap)(int, char *, int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,nstr,strings,recurse,readflag, sepstr,delta,appmap,APPptr,addflag); } uint64_t MR_map_mr(void *MRptr, void *MRptr2, void (*mymap)(uint64_t, char *, int, char *, int, void *, void *), void *APPptr) { typedef void (MapFunc)(uint64_t, char *, int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapReduce *mr2 = (MapReduce *) MRptr2; MapFunc *appmap = (MapFunc *) mymap; return mr->map(mr2,appmap,APPptr); } uint64_t MR_map_mr_add(void *MRptr, void *MRptr2, void (*mymap)(uint64_t, char *, int, char *, int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(uint64_t, char *, int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapReduce *mr2 = (MapReduce *) MRptr2; MapFunc *appmap = (MapFunc *) mymap; return mr->map(mr2,appmap,APPptr,addflag); } void MR_open(void *MRptr) { MapReduce *mr = (MapReduce *) MRptr; return mr->open(); } void MR_open_add(void *MRptr, int addflag) { MapReduce *mr = (MapReduce *) MRptr; return mr->open(addflag); } void MR_print(void *MRptr, int proc, int nstride, int kflag, int vflag) { MapReduce *mr = (MapReduce *) MRptr; mr->print(proc,nstride,kflag,vflag); } void MR_print_file(void *MRptr, char *file, int fflag, int proc, int nstride, int kflag, int vflag) { MapReduce *mr = (MapReduce *) MRptr; mr->print(file,fflag,proc,nstride,kflag,vflag); } uint64_t MR_reduce(void *MRptr, void (*myreduce)(char *, int, char *, int, int *, void *, void *), void *APPptr) { typedef void (ReduceFunc)(char *, int, char *, int, int *, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; ReduceFunc *appreduce = (ReduceFunc *) myreduce; return mr->reduce(appreduce,APPptr); } uint64_t MR_multivalue_blocks(void *MRptr, int *pnblock) { MapReduce *mr = (MapReduce *) MRptr; int nblock; uint64_t nvalue_total = mr->multivalue_blocks(nblock); *pnblock = nblock; return nvalue_total; } void MR_multivalue_block_select(void *MRptr, int which) { MapReduce *mr = (MapReduce *) MRptr; return mr->multivalue_block_select(which); } int MR_multivalue_block(void *MRptr, int iblock, char **ptr_multivalue, int **ptr_valuesizes) { MapReduce *mr = (MapReduce *) MRptr; return mr->multivalue_block(iblock,ptr_multivalue,ptr_valuesizes); } uint64_t MR_scan_kv(void *MRptr, void (*myscan)(char *, int, char *, int, void *), void *APPptr) { typedef void (ScanFunc)(char *, int, char *, int, void *); MapReduce *mr = (MapReduce *) MRptr; ScanFunc *appscan = (ScanFunc *) myscan; return mr->scan(appscan,APPptr); } uint64_t MR_scan_kmv(void *MRptr, void (*myscan)(char *, int, char *, int, int *, void *), void *APPptr) { typedef void (ScanFunc)(char *, int, char *, int, int *, void *); MapReduce *mr = (MapReduce *) MRptr; ScanFunc *appscan = (ScanFunc *) myscan; return mr->scan(appscan,APPptr); } uint64_t MR_scrunch(void *MRptr, int numprocs, char *key, int keybytes) { MapReduce *mr = (MapReduce *) MRptr; return mr->scrunch(numprocs,key,keybytes); } uint64_t MR_sort_keys(void *MRptr, int (*mycompare)(char *, int, char *, int)) { MapReduce *mr = (MapReduce *) MRptr; return mr->sort_keys(mycompare); } uint64_t MR_sort_keys_flag(void *MRptr, int flag) { MapReduce *mr = (MapReduce *) MRptr; return mr->sort_keys(flag); } uint64_t MR_sort_values(void *MRptr, int (*mycompare)(char *, int, char *, int)) { MapReduce *mr = (MapReduce *) MRptr; return mr->sort_values(mycompare); } uint64_t MR_sort_values_flag(void *MRptr, int flag) { MapReduce *mr = (MapReduce *) MRptr; return mr->sort_values(flag); } uint64_t MR_sort_multivalues(void *MRptr, int (*mycompare)(char *, int, char *, int)) { MapReduce *mr = (MapReduce *) MRptr; return mr->sort_multivalues(mycompare); } uint64_t MR_sort_multivalues_flag(void *MRptr, int flag) { MapReduce *mr = (MapReduce *) MRptr; return mr->sort_multivalues(flag); } uint64_t MR_kv_stats(void *MRptr, int level) { MapReduce *mr = (MapReduce *) MRptr; return mr->kv_stats(level); } uint64_t MR_kmv_stats(void *MRptr, int level) { MapReduce *mr = (MapReduce *) MRptr; return mr->kmv_stats(level); } void MR_cummulative_stats(void *MRptr, int level, int reset) { MapReduce *mr = (MapReduce *) MRptr; mr->cummulative_stats(level,reset); } void MR_set_mapstyle(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->mapstyle = value; } void MR_set_all2all(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->all2all = value; } void MR_set_verbosity(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->verbosity = value; } void MR_set_timer(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->timer = value; } void MR_set_memsize(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->memsize = value; } void MR_set_minpage(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->minpage = value; } void MR_set_maxpage(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->maxpage = value; } void MR_set_keyalign(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->keyalign = value; } void MR_set_valuealign(void *MRptr, int value) { MapReduce *mr = (MapReduce *) MRptr; mr->valuealign = value; } void MR_set_fpath(void *MRptr, char *str) { MapReduce *mr = (MapReduce *) MRptr; mr->set_fpath(str); } void MR_kv_add(void *KVptr, char *key, int keybytes, char *value, int valuebytes) { KeyValue *kv = (KeyValue *) KVptr; kv->add(key,keybytes,value,valuebytes); } void MR_kv_add_multi_static(void *KVptr, int n, char *key, int keybytes, char *value, int valuebytes) { KeyValue *kv = (KeyValue *) KVptr; kv->add(n,key,keybytes,value,valuebytes); } void MR_kv_add_multi_dynamic(void *KVptr, int n, char *key, int *keybytes, char *value, int *valuebytes) { KeyValue *kv = (KeyValue *) KVptr; kv->add(n,key,keybytes,value,valuebytes); } mrmpi-1.0~20131122/src/mapreduce.cpp0000644000175000017500000030116511577670272016646 0ustar mathieumathieu /* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "ctype.h" #include "string.h" #include "stdio.h" #include "stdlib.h" #include "stdint.h" #include "sys/types.h" #include "sys/stat.h" #include "dirent.h" #include "mapreduce.h" #include "version.h" #include "mrtype.h" #include "keyvalue.h" #include "keymultivalue.h" #include "spool.h" #include "irregular.h" #include "hash.h" #include "memory.h" #include "error.h" using namespace MAPREDUCE_NS; // allocate space for static class variables and initialize them MapReduce *MapReduce::mrptr; int MapReduce::instances_now = 0; int MapReduce::instances_ever = 0; int MapReduce::mpi_finalize_flag = 0; uint64_t MapReduce::msize = 0; uint64_t MapReduce::msizemax = 0; uint64_t MapReduce::rsize = 0; uint64_t MapReduce::wsize = 0; uint64_t MapReduce::cssize = 0; uint64_t MapReduce::crsize = 0; double MapReduce::commtime = 0.0; // prototypes for non-class functions void map_file_standalone(int, KeyValue *, void *); int compare_standalone(const void *, const void *); int compare_int(char *, int, char *, int); int compare_uint64(char *, int, char *, int); int compare_float(char *, int, char *, int); int compare_double(char *, int, char *, int); int compare_str(char *, int, char *, int); int compare_strn(char *, int, char *, int); int compare_int_reverse(char *, int, char *, int); int compare_uint64_reverse(char *, int, char *, int); int compare_float_reverse(char *, int, char *, int); int compare_double_reverse(char *, int, char *, int); int compare_str_reverse(char *, int, char *, int); int compare_strn_reverse(char *, int, char *, int); #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) #define ROUNDUP(A,B) (char *) (((uint64_t) A + B) & ~B); #define MAXLINE 1024 #define ALIGNFILE 512 // same as in other classes #define FILECHUNK 128 #define VALUECHUNK 128 #define MBYTES 64 #define ALIGNKV 4 #define INTMAX 0x7FFFFFFF enum{KVFILE,KMVFILE,SORTFILE,PARTFILE,SETFILE}; //#define MEMORY_DEBUG 1 // set if want debug output from memory requests /* ---------------------------------------------------------------------- construct using caller's MPI communicator perform no MPI_init() and no MPI_Finalize() ------------------------------------------------------------------------- */ MapReduce::MapReduce(MPI_Comm caller) { instances_now++; instances_ever++; instance_me = instances_ever; comm = caller; MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); defaults(); } /* ---------------------------------------------------------------------- construct without MPI communicator, use MPI_COMM_WORLD perform MPI_Init() if not already initialized perform no MPI_Finalize() ------------------------------------------------------------------------- */ MapReduce::MapReduce() { instances_now++; instances_ever++; instance_me = instances_ever; int flag; MPI_Initialized(&flag); if (!flag) { int argc = 0; char **argv = NULL; MPI_Init(&argc,&argv); } comm = MPI_COMM_WORLD; MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); defaults(); } /* ---------------------------------------------------------------------- construct without MPI communicator, use MPI_COMM_WORLD perform MPI_Init() if not already initialized perform MPI_Finalize() if final instance is destructed ------------------------------------------------------------------------- */ MapReduce::MapReduce(double dummy) { instances_now++; instances_ever++; instance_me = instances_ever; mpi_finalize_flag = 1; int flag; MPI_Initialized(&flag); if (!flag) { int argc = 0; char **argv = NULL; MPI_Init(&argc,&argv); } comm = MPI_COMM_WORLD; MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); defaults(); } /* ---------------------------------------------------------------------- free all memory if finalize_flag is set and this is last instance, then finalize MPI ------------------------------------------------------------------------- */ MapReduce::~MapReduce() { delete [] fpath; delete kv; delete kmv; delete memory; delete error; for (int i = 0; i < npage; i++) if (memcount[i]) { memory->sfree(memptr[i]); msize -= memcount[i]*pagesize; } memory->sfree(memptr); memory->sfree(memusage); memory->sfree(memcount); instances_now--; if (verbosity) mr_stats(verbosity); if (instances_now == 0 && verbosity) cummulative_stats(verbosity,1); if (mpi_finalize_flag && instances_now == 0) MPI_Finalize(); } /* ---------------------------------------------------------------------- default settings ------------------------------------------------------------------------- */ void MapReduce::defaults() { memory = new Memory(comm); error = new Error(comm); mapstyle = 0; all2all = 1; verbosity = 0; timer = 0; #ifdef MRMPI_MEMSIZE memsize = MRMPI_MEMSIZE; #else memsize = MBYTES; #endif minpage = 0; maxpage = 0; freepage = 1; outofcore = 0; zeropage = 0; keyalign = valuealign = ALIGNKV; #ifdef MRMPI_FPATH #define _QUOTEME(x) #x #define QUOTEME(x) _QUOTEME(x) #define QMRMPI_FPATH QUOTEME(MRMPI_FPATH) int n = strlen(QMRMPI_FPATH) + 1; fpath = new char[n]; strcpy(fpath,QMRMPI_FPATH); #else fpath = new char[2]; strcpy(fpath,"."); #endif collateflag = 0; fcounter_kv = fcounter_kmv = fcounter_sort = fcounter_part = fcounter_set = 0; twolenbytes = 2*sizeof(int); kmv_block_valid = 0; allocated = 0; memptr = NULL; memusage = NULL; memcount = NULL; npage = 0; npagemax = 0; tagmax = 0; fsize = 0; fsizemax = 0; kv = NULL; kmv = NULL; if (sizeof(uint64_t) != 8) error->all("Not compiled for 8-byte integers"); if (sizeof(char *) != 8 && me == 0) error->warning("Not compiled for 8-byte pointers, " "be sure to limit your memory usage to less than 4 Gb"); int mpisize; MPI_Type_size(MRMPI_BIGINT,&mpisize); if (mpisize != 8) error->all("MRMPI_BIGINT is not 8-byte data type: edit mrtype.h"); } /* ---------------------------------------------------------------------- make a copy of myself and return it new MR object duplicates my settings and KV/KMV ------------------------------------------------------------------------- */ MapReduce *MapReduce::copy() { if (timer) start_timer(); if (verbosity) file_stats(0); MapReduce *mrnew = new MapReduce(comm); mrnew->mapstyle = mapstyle; mrnew->all2all = all2all; mrnew->verbosity = verbosity; mrnew->timer = timer; mrnew->memsize = memsize; mrnew->minpage = minpage; mrnew->maxpage = maxpage; mrnew->freepage = freepage; mrnew->outofcore = outofcore; mrnew->zeropage = zeropage; if (allocated) { mrnew->keyalign = kalign; mrnew->valuealign = valign; } else { mrnew->keyalign = keyalign; mrnew->valuealign = valuealign; } delete [] mrnew->fpath; int n = strlen(fpath) + 1; mrnew->fpath = new char[n]; strcpy(mrnew->fpath,fpath); if (kv) { kv->allocate(); mrnew->copy_kv(kv); kv->deallocate(0); } if (kmv) { kmv->allocate(); mrnew->copy_kmv(kmv); kmv->deallocate(0); } if (freepage) mem_cleanup(); if (kv) stats("Copy",0); if (kmv) stats("Copy",1); return mrnew; } /* ---------------------------------------------------------------------- create my KV as copy of kv_src called by other MR's copy(), so my KV will not yet exist ------------------------------------------------------------------------- */ void MapReduce::copy_kv(KeyValue *kv_src) { if (!allocated) allocate(); kv = new KeyValue(this,kalign,valign,memory,error,comm); kv->copy(kv_src); kv->complete(); } /* ---------------------------------------------------------------------- create my KMV as copy of kmvsrc called by other MR's copy() ------------------------------------------------------------------------- */ void MapReduce::copy_kmv(KeyMultiValue *kmv_src) { if (!allocated) allocate(); kmv = new KeyMultiValue(this,kalign,valign,memory,error,comm); kmv->copy(kmv_src); kmv->complete(); } /* ---------------------------------------------------------------------- add KV pairs from another MR to my KV ------------------------------------------------------------------------- */ uint64_t MapReduce::add(MapReduce *mr) { if (mr->kv == NULL) error->all("MapReduce passed to add() does not have KeyValue pairs"); if (mr == this) error->all("Cannot add to self"); if (timer) start_timer(); if (verbosity) file_stats(0); if (!allocated) allocate(); delete kmv; kmv = NULL; if (kv == NULL) kv = new KeyValue(this,kalign,valign,memory,error,comm); else kv->append(); mr->kv->allocate(); kv->add(mr->kv); mr->kv->deallocate(0); kv->complete(); if (freepage) mem_cleanup(); stats("Add",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- aggregate a KV across procs to create a new KV initially, key copies can exist on many procs after aggregation, all copies of key are on same proc performed via parallel distributed hashing hash = user hash function (NULL if not provided) requires irregular all2all communication ------------------------------------------------------------------------- */ uint64_t MapReduce::aggregate(int (*hash)(char *, int)) { int i,nkey_send,keybytes,valuebytes,nkey_recv; int start,stop,done,mydone; int memtag_cdpage,memtag_epage,memtag_fpage,memtag_gpage; uint64_t dummy,dummy1,dummy2,dummy3; double timestart,fraction,minfrac; char *ptr,*key; int *proclist,*kvsizes,*reorder; char **kvptrs; if (kv == NULL) error->all("Cannot aggregate without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); if (nprocs == 1) { stats("Aggregate",0); return kv->nkv; } kv->allocate(); // new KV that will be created KeyValue *kvnew = new KeyValue(this,kalign,valign,memory,error,comm); // irregular communicator Irregular *irregular = new Irregular(all2all,memory,error,comm); // pages of workspace memory, including extra allocated pages uint64_t twopage; char *cdpage = mem_request(2,twopage,memtag_cdpage); char *epage = mem_request(1,dummy,memtag_epage); char *fpage = mem_request(1,dummy,memtag_fpage); char *gpage = mem_request(1,dummy,memtag_gpage); // maxpage = max # of pages in any proc's KV char *page_send; int npage_send = kv->request_info(&page_send); int maxpage; MPI_Allreduce(&npage_send,&maxpage,1,MPI_INT,MPI_MAX,comm); // loop over pages, perform irregular comm on each for (int ipage = 0; ipage < maxpage; ipage++) { // load page of KV pairs if (ipage < npage_send) nkey_send = kv->request_page(ipage,dummy1,dummy2,dummy3); else nkey_send = 0; // set ptrs to workspace memory proclist = (int *) epage; kvsizes = &proclist[nkey_send]; reorder = &proclist[2 * ((uint64_t) nkey_send)]; kvptrs = (char **) fpage; // hash each key to a proc ID // via user-provided hash function or hashlittle() ptr = page_send; for (i = 0; i < nkey_send; i++) { kvptrs[i] = ptr; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); kvsizes[i] = ptr - kvptrs[i]; if (hash) proclist[i] = hash(key,keybytes) % nprocs; else proclist[i] = hashlittle(key,keybytes,nprocs) % nprocs; } // perform irregular comm of each proc's page of KV pairs // add received KV pairs to kvnew // no proc can receive more than 2 pages at once, else scale back // iterate until entire page is communicated by every proc start = 0; stop = nkey_send; done = 0; while (!done) { // attempt to communicate all KVs from start to stop // if overflows any proc, then scale back stop until succeed // if setup returns any fraction < 1.0, reset stop and try again // 0.9 is a conservative round-down factor // NOTE: is scale back guaranteed to eventually be successful? // is this loop guaranteed to make progress (comm something)? // what if all procs want to send 1 big datum to proc 0 but // call cannot do it together? // then all might round down to 0 ?? timestart = MPI_Wtime(); while (1) { nkey_recv = irregular->setup(stop-start,&proclist[start], &kvsizes[start],&reorder[start], twopage,fraction); MPI_Allreduce(&fraction,&minfrac,1,MPI_DOUBLE,MPI_MIN,comm); if (minfrac < 1.0) stop = static_cast (start + 0.9*minfrac*(stop-start)); else break; } irregular->exchange(stop-start,&proclist[start],&kvptrs[start], &kvsizes[start],&reorder[start],gpage,cdpage); cssize += irregular->cssize; crsize += irregular->crsize; commtime += MPI_Wtime() - timestart; kvnew->add(nkey_recv,cdpage); // set start/stop to remainder of page and iterate // if all procs are at end of page, then done start = stop; stop = nkey_send; if (start == stop) mydone = 1; else mydone = 0; MPI_Allreduce(&mydone,&done,1,MPI_INT,MPI_MIN,comm); } } delete irregular; mem_unmark(memtag_cdpage); mem_unmark(memtag_epage); mem_unmark(memtag_fpage); mem_unmark(memtag_gpage); delete kv; kv = kvnew; kv->complete(); if (freepage) mem_cleanup(); stats("Aggregate",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- broadcast the KV on proc root to all other procs ------------------------------------------------------------------------- */ uint64_t MapReduce::broadcast(int root) { int npage_kv; char *buf; uint64_t dummy,sizes[4]; if (kv == NULL) error->all("Cannot broadcast without KeyValue"); if (root < 0 || root >= nprocs) error->all("Invalid root for broadcast"); if (timer) start_timer(); if (verbosity) file_stats(0); if (nprocs == 1) { stats("Broadcast",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } // on non-root procs, delete existing KV and create empty KV double timestart = MPI_Wtime(); if (me != root) { delete kv; kv = new KeyValue(this,kalign,valign,memory,error,comm); } else kv->allocate(); npage_kv = kv->request_info(&buf); MPI_Bcast(&npage_kv,1,MPI_INT,root,comm); // broadcast KV data, one page at a time, non-root procs add to their KV for (int ipage = 0; ipage < npage_kv; ipage++) { if (me == root) sizes[0] = kv->request_page(ipage,sizes[1],sizes[2],sizes[3]); MPI_Bcast(sizes,4,MRMPI_BIGINT,root,comm); MPI_Bcast(buf,sizes[3],MPI_BYTE,root,comm); if (me == root) cssize += sizes[3]; else { crsize += sizes[3]; kv->add(sizes[0],buf,sizes[1],sizes[2],sizes[3]); } } commtime += MPI_Wtime() - timestart; if (me != root) kv->complete(); else kv->complete_dummy(); if (freepage) mem_cleanup(); stats("Broadcast",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- clone KV to KMV so that KMV pairs are one-to-one copies of KV pairs each proc clones only its data assume each KV key is unique, but is not required ------------------------------------------------------------------------- */ uint64_t MapReduce::clone() { if (kv == NULL) error->all("Cannot clone without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kmv = new KeyMultiValue(this,kalign,valign,memory,error,comm); kv->allocate(); kmv->clone(kv); kmv->complete(); delete kv; kv = NULL; if (freepage) mem_cleanup(); stats("Clone",1); uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- close a KV that KV pairs were added to by another MR's map() ------------------------------------------------------------------------- */ uint64_t MapReduce::close() { if (kv == NULL) error->all("Cannot close without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kv->complete(); if (freepage) mem_cleanup(); stats("Close",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- collapse KV into a KMV with a single key/value each proc collapses only its data new key = provided key name (same on every proc) new value = list of old key,value,key,value,etc ------------------------------------------------------------------------- */ uint64_t MapReduce::collapse(char *key, int keybytes) { if (kv == NULL) error->all("Cannot collapse without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kmv = new KeyMultiValue(this,kalign,valign,memory,error,comm); kv->allocate(); kmv->collapse(key,keybytes,kv); kmv->complete(); delete kv; kv = NULL; if (freepage) mem_cleanup(); stats("Collapse",1); uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- collate KV to create a KMV aggregate followed by a convert hash = user hash function (NULL if not provided) ------------------------------------------------------------------------- */ uint64_t MapReduce::collate(int (*hash)(char *, int)) { if (kv == NULL) error->all("Cannot collate without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); collateflag = 1; int verbosity_hold = verbosity; int timer_hold = timer; verbosity = timer = 0; aggregate(hash); convert(); verbosity = verbosity_hold; timer = timer_hold; stats("Collate",1); collateflag = 0; fcounter_part = fcounter_set = 0; uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- compress KV to create a smaller KV duplicate keys are replaced with a single key/value each proc compresses only its data create a KMV temporarily call appcompress() with each key/multivalue in KMV appcompress() returns single key/value to new KV ------------------------------------------------------------------------- */ uint64_t MapReduce::compress(void (*appcompress)(char *, int, char *, int, int *, KeyValue *, void *), void *appptr) { if (kv == NULL) error->all("Cannot compress without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kmv = new KeyMultiValue(this,kalign,valign,memory,error,comm); kv->allocate(); // convert KV into KMV // KMV convert will delete kv kmv->convert(kv); kmv->complete(); if (freepage) mem_cleanup(); // create new KV kv = new KeyValue(this,kalign,valign,memory,error,comm); kmv->allocate(); uint64_t dummy; int memtag1,memtag2; char *mvpage1 = mem_request(1,dummy,memtag1); char *mvpage2 = mem_request(1,dummy,memtag2); int nkey_kmv,nvalues,keybytes,mvaluebytes; uint64_t dummy1,dummy2,dummy3; int *valuesizes; char *ptr,*key,*multivalue; char *page_kmv; int npage_kmv = kmv->request_info(&page_kmv); char *page_hold = page_kmv; for (int ipage = 0; ipage < npage_kmv; ipage++) { nkey_kmv = kmv->request_page(ipage,0,dummy1,dummy2,dummy3); ptr = page_kmv; for (int i = 0; i < nkey_kmv; i++) { nvalues = *((int *) ptr); ptr += sizeof(int); if (nvalues > 0) { keybytes = *((int *) ptr); ptr += sizeof(int); mvaluebytes = *((int *) ptr); ptr += sizeof(int); valuesizes = (int *) ptr; ptr += ((uint64_t) nvalues) * sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); multivalue = ptr; ptr += mvaluebytes; ptr = ROUNDUP(ptr,talignm1); appcompress(key,keybytes,multivalue,nvalues,valuesizes,kv,appptr); } else { keybytes = *((int *) ptr); ptr += sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; // set KMV page to mvpage1 so key will not be overwritten // when multivalue_block() loads new pages of values kmv_block_valid = 1; kmv_key_page = ipage; kmv_mvpage1 = mvpage1; kmv_mvpage2 = mvpage2; kmv_nvalue_total = kmv->multivalue_blocks(ipage,kmv_nblock); kmv->page = mvpage1; appcompress(key,keybytes,NULL,0,(int *) this,kv,appptr); kmv_block_valid = 0; ipage += kmv_nblock; kmv->page = page_hold; } } } kv->complete(); mem_unmark(memtag1); mem_unmark(memtag2); // delete KMV delete kmv; kmv = NULL; if (freepage) mem_cleanup(); stats("Compress",0); fcounter_part = fcounter_set = 0; uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- convert KV to KMV duplicate keys are replaced with a single key/multivalue each proc converts only its data new key = old unique key new multivalue = concatenated list of all values for that key in KV ------------------------------------------------------------------------- */ uint64_t MapReduce::convert() { if (kv == NULL) error->all("Cannot convert without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kmv = new KeyMultiValue(this,kalign,valign,memory,error,comm); kv->allocate(); // KMV will delete kv and free its memory kmv->convert(kv); kmv->complete(); kv = NULL; if (freepage) mem_cleanup(); stats("Convert",1); if (!collateflag) fcounter_part = fcounter_set = 0; uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- gather a distributed KV to a new KV on fewer procs numprocs = # of procs new KV resides on (0 to numprocs-1) ------------------------------------------------------------------------- */ uint64_t MapReduce::gather(int numprocs) { int flag,npage_kv,memtag; int nkey,rkey,skey,keybytes,valuebytes; int sizes[4]; uint64_t dummy; uint64_t keysize,valuesize,alignsize; uint64_t rkeysize,rvaluesize,ralignsize; uint64_t skeysize,svaluesize,salignsize; char *buf,*rbuf,*sbuf,*ptr,*ptrprev; MPI_Status status; MPI_Request request; if (kv == NULL) error->all("Cannot gather without KeyValue"); if (numprocs < 1 || numprocs > nprocs) error->all("Invalid processor count for gather"); if (timer) start_timer(); if (verbosity) file_stats(0); if (nprocs == 1 || numprocs == nprocs) { stats("Gather",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } // lo procs collect key/value pairs from hi procs // lo procs are those with ID < numprocs // lo procs recv from set of hi procs with same (ID % numprocs) double timestart = MPI_Wtime(); if (me < numprocs) { kv->append(); buf = mem_request(1,dummy,memtag); for (int iproc = me+numprocs; iproc < nprocs; iproc += numprocs) { MPI_Send(&flag,0,MPI_INT,iproc,0,comm); MPI_Recv(&npage_kv,1,MPI_INT,iproc,0,comm,&status); for (int ipage = 0; ipage < npage_kv; ipage++) { MPI_Send(&flag,0,MPI_INT,iproc,0,comm); MPI_Recv(&nkey,1,MPI_INT,iproc,0,comm,&status); rkey = 0; rkeysize = svaluesize = salignsize = 0; rbuf = buf; // recv sections of pages that fit in INTMAX as required by MPI_Irecv while (rkey < nkey) { MPI_Irecv(rbuf,INTMAX,MPI_BYTE,iproc,1,comm,&request); MPI_Send(&flag,0,MPI_INT,iproc,0,comm); MPI_Recv(sizes,4,MPI_INT,iproc,0,comm,&status); MPI_Wait(&request,&status); crsize += sizes[3]; rkey += sizes[0]; rkeysize += sizes[1]; rvaluesize += sizes[2]; ralignsize += sizes[3]; kv->add(sizes[0],rbuf, (uint64_t) sizes[1],(uint64_t) sizes[2],(uint64_t) sizes[3]); rbuf += sizes[3]; } } } mem_unmark(memtag); } else { kv->allocate(); npage_kv = kv->request_info(&buf); int iproc = me % numprocs; MPI_Recv(&flag,0,MPI_INT,iproc,0,comm,&status); MPI_Send(&npage_kv,1,MPI_INT,iproc,0,comm); for (int ipage = 0; ipage < npage_kv; ipage++) { nkey = kv->request_page(ipage,keysize,valuesize,alignsize); MPI_Recv(&flag,0,MPI_INT,iproc,0,comm,&status); MPI_Send(&nkey,1,MPI_INT,iproc,0,comm); skey = 0; skeysize = svaluesize = salignsize = 0; sbuf = buf; // send sections of pages that fit in INTMAX as required by MPI_Send while (skey < nkey) { if (alignsize-salignsize <= INTMAX) { sizes[0] = nkey - skey; sizes[1] = keysize - skeysize; sizes[2] = valuesize - svaluesize; sizes[3] = alignsize - salignsize; } else { sizes[0] = sizes[1] = sizes[2] = 0; ptr = sbuf; while (ptr-sbuf <= INTMAX) { ptrprev = ptr; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); sizes[0]++; sizes[1] += keybytes; sizes[2] += valuebytes; } sizes[0]--; sizes[1] -= keybytes; sizes[2] -= valuebytes; sizes[3] = ptrprev - sbuf;; } MPI_Recv(&flag,0,MPI_INT,iproc,0,comm,&status); MPI_Send(sizes,4,MPI_INT,iproc,0,comm); MPI_Send(sbuf,sizes[3],MPI_BYTE,iproc,1,comm); cssize += sizes[3]; skey += sizes[0]; skeysize += sizes[1]; svaluesize += sizes[2]; salignsize += sizes[3]; sbuf += sizes[3]; } } // leave empty KV on vacated procs delete kv; kv = new KeyValue(this,kalign,valign,memory,error,comm); } commtime += MPI_Wtime() - timestart; kv->complete(); if (freepage) mem_cleanup(); stats("Gather",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- user call: create a KV via a parallel map operation for nmap tasks make one call to appmap() for each task mapstyle determines how tasks are partitioned to processors ------------------------------------------------------------------------- */ uint64_t MapReduce::map(int nmap, void (*appmap)(int, KeyValue *, void *), void *appptr, int addflag) { return map_tasks(nmap,NULL,appmap,NULL,appptr,addflag,0); } /* ---------------------------------------------------------------------- user call: create a KV via a parallel map operation on nstr file/dir names input nstr/strings are used to generate list of filenames selfflag = 0, just proc 0 generates file list and bcasts it selfflag = 1, each proc generates its own file list ------------------------------------------------------------------------- */ uint64_t MapReduce::map(int nstr, char **strings, int selfflag, int recurse, int readflag, void (*appmap)(int, char *, KeyValue *, void *), void *appptr, int addflag) { int nfile = 0; int maxfile = 0; char **files = NULL; if (selfflag || me == 0) for (int i = 0; i < nstr; i++) findfiles(strings[i],recurse,readflag,nfile,maxfile,files); if (selfflag == 0) bcastfiles(nfile,files); if (selfflag) MPI_Allreduce(&nfile,&mapfilecount,1,MPI_INT,MPI_SUM,comm); else mapfilecount = nfile; uint64_t nkeyall = map_tasks(nfile,files,NULL,appmap,appptr,addflag,selfflag); for (int i = 0; i < nfile; i++) delete [] files[i]; memory->sfree(files); return nkeyall; } /* ---------------------------------------------------------------------- called by 2 user map methods above (task count, list of files) assign out and process the tasks ntasks can be generic (files = NULL), or number of files make one call to two different flavors of appmap() for each task mapstyle and selfflag determine how tasks are partitioned to processors ------------------------------------------------------------------------- */ uint64_t MapReduce::map_tasks(int ntask, char **files, void (*appmaptask)(int, KeyValue *, void *), void (*appmapfile)(int, char *, KeyValue *, void *), void *appptr, int addflag, int selfflag) { MPI_Status status; if (timer) start_timer(); if (verbosity) file_stats(0); if (!allocated) allocate(); delete kmv; kmv = NULL; if (addflag == 0) { delete kv; kv = new KeyValue(this,kalign,valign,memory,error,comm); } else if (kv == NULL) { kv = new KeyValue(this,kalign,valign,memory,error,comm); } else { kv->append(); } // selfflag = 1 = each processor performs own tasks // nprocs = 1 = all tasks to single processor // mapstyle 0 = chunk of tasks to each proc // mapstyle 1 = strided tasks to each proc // mapstyle 2 = master/slave assignment of tasks if (selfflag == 1 || nprocs == 1) { for (int itask = 0; itask < ntask; itask++) if (files) appmapfile(itask,files[itask],kv,appptr); else appmaptask(itask,kv,appptr); } else if (mapstyle == 0) { uint64_t nmap64 = ntask; int lo = me * nmap64 / nprocs; int hi = (me+1) * nmap64 / nprocs; for (int itask = lo; itask < hi; itask++) if (files) appmapfile(itask,files[itask],kv,appptr); else appmaptask(itask,kv,appptr); } else if (mapstyle == 1) { for (int itask = me; itask < ntask; itask += nprocs) if (files) appmapfile(itask,files[itask],kv,appptr); else appmaptask(itask,kv,appptr); } else if (mapstyle == 2) { if (me == 0) { int doneflag = -1; int ndone = 0; int itask = 0; for (int iproc = 1; iproc < nprocs; iproc++) { if (itask < ntask) { MPI_Send(&itask,1,MPI_INT,iproc,0,comm); itask++; } else { MPI_Send(&doneflag,1,MPI_INT,iproc,0,comm); ndone++; } } while (ndone < nprocs-1) { int iproc,tmp; MPI_Recv(&tmp,1,MPI_INT,MPI_ANY_SOURCE,0,comm,&status); iproc = status.MPI_SOURCE; if (itask < ntask) { MPI_Send(&itask,1,MPI_INT,iproc,0,comm); itask++; } else { MPI_Send(&doneflag,1,MPI_INT,iproc,0,comm); ndone++; } } } else { while (1) { int itask; MPI_Recv(&itask,1,MPI_INT,0,0,comm,&status); if (itask < 0) break; if (files) appmapfile(itask,files[itask],kv,appptr); else appmaptask(itask,kv,appptr); MPI_Send(&itask,1,MPI_INT,0,0,comm); } } } else error->all("Invalid mapstyle setting"); kv->complete(); if (freepage) mem_cleanup(); stats("Map",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- user call: create a KV via a parallel map operation on file splitting nfile filenames are split into nmap pieces based on separator character ------------------------------------------------------------------------- */ uint64_t MapReduce::map(int nmap, int nstr, char **strings, int recurse, int readflag, char sepchar, int delta, void (*appmap)(int, char *, int, KeyValue *, void *), void *appptr, int addflag) { int nfile = 0; int maxfile = 0; char **files = NULL; if (me == 0) for (int i = 0; i < nstr; i++) findfiles(strings[i],recurse,readflag,nfile,maxfile,files); bcastfiles(nfile,files); filemap.sepwhich = 1; filemap.sepchar = sepchar; filemap.delta = delta; uint64_t nkeyall = map_chunks(nmap,nfile,files,appmap,appptr,addflag); for (int i = 0; i < nfile; i++) delete [] files[i]; memory->sfree(files); return nkeyall; } /* ---------------------------------------------------------------------- user call: create a KV via a parallel map operation on file splitting nfile filenames are split into nmap pieces based on separator string ------------------------------------------------------------------------- */ uint64_t MapReduce::map(int nmap, int nstr, char **strings, int recurse, int readflag, char *sepstr, int delta, void (*appmap)(int, char *, int, KeyValue *, void *), void *appptr, int addflag) { int nfile = 0; int maxfile = 0; char **files = NULL; if (me == 0) for (int i = 0; i < nstr; i++) findfiles(strings[i],recurse,readflag,nfile,maxfile,files); bcastfiles(nfile,files); filemap.sepwhich = 0; int n = strlen(sepstr) + 1; filemap.sepstr = new char[n]; strcpy(filemap.sepstr,sepstr); filemap.delta = delta; uint64_t nkeyall = map_chunks(nmap,nfile,files,appmap,appptr,addflag); for (int i = 0; i < nfile; i++) delete [] files[i]; memory->sfree(files); return nkeyall; } /* ---------------------------------------------------------------------- called by 2 user map methods above (char/str separator for splitting files) nfile filenames are split into nmap pieces based on separator FileMap struct stores info on how to split files calls non-file map() to partition tasks to processors with callback to non-class map_file_standalone() map_file_standalone() reads chunk of file and passes it to user appmap() ------------------------------------------------------------------------- */ uint64_t MapReduce::map_chunks(int nmap, int nfile, char **files, void (*appmap)(int, char *, int, KeyValue *, void *), void *appptr, int addflag) { if (timer) start_timer(); if (verbosity) file_stats(0); if (!allocated) allocate(); delete kmv; kmv = NULL; // must have at least as many chunks as files if (nfile > nmap) nmap = nfile; // copy filenames into FileMap filemap.filename = new char*[nfile]; for (int i = 0; i < nfile; i++) { int n = strlen(files[i]) + 1; filemap.filename[i] = new char[n]; strcpy(filemap.filename[i],files[i]); } // get filesize of each file via stat() // proc 0 queries files, bcasts results to all procs filemap.filesize = new uint64_t[nfile]; struct stat stbuf; if (me == 0) { for (int i = 0; i < nfile; i++) { int flag = stat(files[i],&stbuf); if (flag < 0) error->one("Could not query file size"); filemap.filesize[i] = stbuf.st_size; } } MPI_Bcast(filemap.filesize,nfile*sizeof(uint64_t),MPI_BYTE,0,comm); // ntotal = total size of all files // nideal = ideal # of bytes per task uint64_t ntotal = 0; for (int i = 0; i < nfile; i++) ntotal += filemap.filesize[i]; uint64_t nideal = MAX(1,ntotal/nmap); // tasksperfile[i] = # of tasks for Ith file // initial assignment based on ideal chunk size // increment/decrement tasksperfile until reach target # of tasks // even small files must have 1 task filemap.tasksperfile = new int[nfile]; int ntasks = 0; for (int i = 0; i < nfile; i++) { filemap.tasksperfile[i] = MAX(1,filemap.filesize[i]/nideal); ntasks += filemap.tasksperfile[i]; } while (ntasks < nmap) for (int i = 0; i < nfile; i++) if (filemap.filesize[i] > nideal) { filemap.tasksperfile[i]++; ntasks++; if (ntasks == nmap) break; } while (ntasks > nmap) for (int i = 0; i < nfile; i++) if (filemap.tasksperfile[i] > 1) { filemap.tasksperfile[i]--; ntasks--; if (ntasks == nmap) break; } // check if any tasks are so small they will cause overlapping reads w/ delta // if so, reduce number of tasks for that file and issue warning int flag = 0; for (int i = 0; i < nfile; i++) { if (filemap.filesize[i] / filemap.tasksperfile[i] > filemap.delta) continue; flag = 1; while (filemap.tasksperfile[i] > 1) { filemap.tasksperfile[i]--; nmap--; if (filemap.filesize[i] / filemap.tasksperfile[i] > filemap.delta) break; } } if (flag & me == 0) { char str[128]; sprintf(str,"File(s) too small for file delta - decreased map tasks to %d", nmap); error->warning(str); } // whichfile[i] = which file is associated with the Ith task // whichtask[i] = which task in that file the Ith task is filemap.whichfile = new int[nmap]; filemap.whichtask = new int[nmap]; int itask = 0; for (int i = 0; i < nfile; i++) for (int j = 0; j < filemap.tasksperfile[i]; j++) { filemap.whichfile[itask] = i; filemap.whichtask[itask++] = j; } // use non-file map() to partition tasks to procs // it calls map_file_standalone once for each task int verbosity_hold = verbosity; int timer_hold = timer; verbosity = timer = 0; filemap.appmapfile = appmap; filemap.appptr = appptr; map(nmap,&map_file_standalone,this,addflag); verbosity = verbosity_hold; timer = timer_hold; stats("Map",0); // destroy FileMap if (filemap.sepwhich == 0) delete [] filemap.sepstr; for (int i = 0; i < nfile; i++) delete [] filemap.filename[i]; delete [] filemap.filename; delete [] filemap.filesize; delete [] filemap.tasksperfile; delete [] filemap.whichfile; delete [] filemap.whichtask; uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- wrappers on user-provided appmapfile function 2-level wrapper needed b/c file map() calls non-file map() and cannot pass it a class method unless it were static, but then it couldn't access MR class data so non-file map() is passed standalone non-class method standalone calls back into class wrapper which calls user appmapfile() ------------------------------------------------------------------------- */ void map_file_standalone(int imap, KeyValue *kv, void *ptr) { MapReduce *mr = (MapReduce *) ptr; mr->map_file_wrapper(imap,kv); } void MapReduce::map_file_wrapper(int imap, KeyValue *kv) { // readstart = position in file to start reading for this task // readsize = # of bytes to read including delta uint64_t filesize = filemap.filesize[filemap.whichfile[imap]]; int itask = filemap.whichtask[imap]; int ntask = filemap.tasksperfile[filemap.whichfile[imap]]; uint64_t readstart = itask*filesize/ntask; uint64_t readnext = (itask+1)*filesize/ntask; if (readnext - readstart + filemap.delta + 1 > INTMAX) error->one("Single file read exceeds int size"); int readsize = readnext - readstart + filemap.delta; readsize = MIN(readsize,filesize-readstart); // read from appropriate file // terminate string with NULL char *str = (char *) memory->smalloc(readsize+1,"MR:fileread"); FILE *fp = fopen(filemap.filename[filemap.whichfile[imap]],"rb"); fseek(fp,readstart,SEEK_SET); fread(str,1,readsize,fp); str[readsize] = '\0'; fclose(fp); // if not first task in file, trim start of string // separator can be single char or a string // str[strstart] = 1st char in string // if separator = char, strstart is char after separator // if separator = string, strstart is 1st char of separator int strstart = 0; if (itask > 0) { char *ptr; if (filemap.sepwhich) ptr = strchr(str,filemap.sepchar); else ptr = strstr(str,filemap.sepstr); if (ptr == NULL || ptr-str > filemap.delta) error->one("Could not find file separator within delta"); strstart = ptr-str + filemap.sepwhich; } // if not last task in file, trim end of string // separator can be single char or a string // str[strstop] = last char in string = inserted NULL // if separator = char, NULL is char after separator // if separator = string, NULL is 1st char of separator int strstop = readsize; if (itask < ntask-1) { char *ptr; if (filemap.sepwhich) ptr = strchr(&str[readnext-readstart],filemap.sepchar); else ptr = strstr(&str[readnext-readstart],filemap.sepstr); if (ptr == NULL) error->one("Could not find file separator within delta"); if (filemap.sepwhich) ptr++; *ptr = '\0'; strstop = ptr-str; } // call user appmapfile() function with user data ptr int strsize = strstop - strstart + 1; filemap.appmapfile(imap,&str[strstart],strsize,kv,filemap.appptr); memory->sfree(str); } /* ---------------------------------------------------------------------- user call: create a KV via a parallel map operation on an existing MR's KV make one call to appmap() for each key/value pair in the input MR's KV each proc operates on key/value pairs it owns ------------------------------------------------------------------------- */ uint64_t MapReduce::map(MapReduce *mr, void (*appmap)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *appptr, int addflag) { if (mr->kv == NULL) error->all("MapReduce passed to map() does not have KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); if (!allocated) allocate(); delete kmv; kmv = NULL; // kv_src = KeyValue object which sends KV pairs to appmap() // kv_dest = KeyValue object which stores new KV pairs // if mr = this, then 2 KVs are the same: // if addflag, make copy so can add to it, delete kv_src at end // if not addflag, add to new KV, delete kv_src at end KeyValue *kv_src = mr->kv; kv_src->allocate(); KeyValue *kv_dest; if (mr == this) { if (addflag) { kv_dest = new KeyValue(this,kalign,valign,memory,error,comm); kv_dest->copy(kv_src); kv_dest->append(); } else { kv_dest = new KeyValue(this,kalign,valign,memory,error,comm); } } else { if (addflag == 0) { delete kv; kv_dest = new KeyValue(this,kalign,valign,memory,error,comm); } else if (kv == NULL) { kv_dest = new KeyValue(this,kalign,valign,memory,error,comm); } else { kv->append(); kv_dest = kv; } } int nkey_kv,keybytes,valuebytes; uint64_t dummy1,dummy2,dummy3; char *page_kv,*ptr,*key,*value; int npage_kv = kv_src->request_info(&page_kv); uint64_t n = 0; for (int ipage = 0; ipage < npage_kv; ipage++) { nkey_kv = kv_src->request_page(ipage,dummy1,dummy2,dummy3); ptr = page_kv; for (int i = 0; i < nkey_kv; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); appmap(n++,key,keybytes,value,valuebytes,kv_dest,appptr); } } if (mr == this) delete kv_src; else kv_src->deallocate(0); kv = kv_dest; kv->complete(); if (freepage) mem_cleanup(); stats("Map",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- open a KV so KV pairs can be added to it by another MR's map ------------------------------------------------------------------------- */ void MapReduce::open(int addflag) { if (!allocated) allocate(); delete kmv; kmv = NULL; if (addflag == 0) { delete kv; kv = new KeyValue(this,kalign,valign,memory,error,comm); } else if (kv == NULL) { kv = new KeyValue(this,kalign,valign,memory,error,comm); } else { kv->append(); } if (freepage) mem_cleanup(); } /* ---------------------------------------------------------------------- print of KV or KMV pairs to screen if all procs are printing, pass print token from proc to proc ------------------------------------------------------------------------- */ void MapReduce::print(int proc, int nstride, int kflag, int vflag) { MPI_Status status; if (kv == NULL && kmv == NULL) error->all("Cannot print without KeyValue or KeyMultiValue"); if (kflag < 0 || vflag < 0) error->all("Invalid print args"); if (kflag > 7 || vflag > 7) error->all("Invalid print args"); if (proc == me) { if (kv) { kv->allocate(); kv->print(stdout,nstride,kflag,vflag); kv->deallocate(0); } if (kmv) { kmv->allocate(); kmv->print(stdout,nstride,kflag,vflag); kmv->deallocate(0); } } if (proc >= 0) return; int token; MPI_Barrier(comm); if (me > 0) MPI_Recv(&token,0,MPI_INT,me-1,0,comm,&status); if (kv) { kv->allocate(); kv->print(stdout,nstride,kflag,vflag); kv->deallocate(0); } if (kmv) { kmv->allocate(); kmv->print(stdout,nstride,kflag,vflag); kmv->deallocate(0); } if (me < nprocs-1) MPI_Send(&token,0,MPI_INT,me+1,0,comm); MPI_Barrier(comm); } /* ---------------------------------------------------------------------- print of KV or KMV pairs to file(s) if one proc is printing, write to filename if all procs are printing and fflag = 0, all write to one file if all procs are printing and fflag = 1, each proc writes to own file ------------------------------------------------------------------------- */ void MapReduce::print(char *file, int fflag, int proc, int nstride, int kflag, int vflag) { MPI_Status status; if (kv == NULL && kmv == NULL) error->all("Cannot print without KeyValue or KeyMultiValue"); if (kflag < 0 || vflag < 0) error->all("Invalid print args"); if (kflag > 7 || vflag > 7) error->all("Invalid print args"); if (proc == me) { FILE *fp = fopen(file,"w"); if (fp == NULL) error->one("Could not open print file"); if (kv) kv->print(fp,nstride,kflag,vflag); if (kmv) kmv->print(fp,nstride,kflag,vflag); fclose(fp); } if (proc >= 0) return; if (fflag == 1) { int n = strlen(file) + 8; char *procfile = new char[n]; sprintf(procfile,"%s.%d",file,me); FILE *fp = fopen(procfile,"w"); if (fp == NULL) error->one("Could not open print file"); if (kv) kv->print(fp,nstride,kflag,vflag); if (kmv) kmv->print(fp,nstride,kflag,vflag); fclose(fp); } else { int token; MPI_Barrier(comm); if (me > 0) MPI_Recv(&token,0,MPI_INT,me-1,0,comm,&status); FILE *fp = fopen(file,"a"); if (fp == NULL) error->one("Could not open print file"); if (kv) kv->print(fp,nstride,kflag,vflag); if (kmv) kmv->print(fp,nstride,kflag,vflag); fclose(fp); if (me < nprocs-1) MPI_Send(&token,0,MPI_INT,me+1,0,comm); MPI_Barrier(comm); } } /* ---------------------------------------------------------------------- create a KV from a KMV via a parallel reduce operation for nmap tasks make one call to appreduce() for each KMV pair each proc processes its owned KMV pairs ------------------------------------------------------------------------- */ uint64_t MapReduce::reduce(void (*appreduce)(char *, int, char *, int, int *, KeyValue *, void *), void *appptr) { if (kmv == NULL) error->all("Cannot reduce without KeyMultiValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kv = new KeyValue(this,kalign,valign,memory,error,comm); kmv->allocate(); uint64_t dummy; int memtag1,memtag2; char *mvpage1 = mem_request(1,dummy,memtag1); char *mvpage2 = mem_request(1,dummy,memtag2); int nkey_kmv,nvalues,keybytes,mvaluebytes; uint64_t dummy1,dummy2,dummy3; int *valuesizes; char *ptr,*key,*multivalue; char *page_kmv; int npage_kmv = kmv->request_info(&page_kmv); char *page_hold = page_kmv; for (int ipage = 0; ipage < npage_kmv; ipage++) { nkey_kmv = kmv->request_page(ipage,0,dummy1,dummy2,dummy3); ptr = page_kmv; for (int i = 0; i < nkey_kmv; i++) { nvalues = *((int *) ptr); ptr += sizeof(int); if (nvalues > 0) { keybytes = *((int *) ptr); ptr += sizeof(int); mvaluebytes = *((int *) ptr); ptr += sizeof(int); valuesizes = (int *) ptr; ptr += ((uint64_t) nvalues) * sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); multivalue = ptr; ptr += mvaluebytes; ptr = ROUNDUP(ptr,talignm1); appreduce(key,keybytes,multivalue,nvalues,valuesizes,kv,appptr); } else { keybytes = *((int *) ptr); ptr += sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; // set KMV page to mvpage1 so key will not be overwritten // when multivalue_block() loads new pages of values kmv_block_valid = 1; kmv_key_page = ipage; kmv_mvpage1 = mvpage1; kmv_mvpage2 = mvpage2; kmv_nvalue_total = kmv->multivalue_blocks(ipage,kmv_nblock); kmv->page = mvpage1; appreduce(key,keybytes,NULL,0,(int *) this,kv,appptr); kmv_block_valid = 0; ipage += kmv_nblock; kmv->page = page_hold; } } } kv->complete(); mem_unmark(memtag1); mem_unmark(memtag2); // delete KMV delete kmv; kmv = NULL; if (freepage) mem_cleanup(); stats("Reduce",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- query total # of values and # of value blocks in a single multi-page KMV called from user myreduce() or mycompress() function ------------------------------------------------------------------------- */ uint64_t MapReduce::multivalue_blocks(int &nblock) { if (!kmv_block_valid) error->one("Invalid call to multivalue_blocks()"); nblock = kmv_nblock; return kmv_nvalue_total; } /* ---------------------------------------------------------------------- query total # of values and # of value blocks in a single multi-page KMV called from user myreduce() or mycompress() function ------------------------------------------------------------------------- */ void MapReduce::multivalue_block_select(int which) { if (!kmv_block_valid) error->one("Invalid call to multivalue_block_select()"); if (which == 1) kmv->page = kmv_mvpage1; else if (which == 2) kmv->page = kmv_mvpage2; else error->one("Invalid arg to multivalue_block_select()"); } /* ---------------------------------------------------------------------- query info for 1 block of a single KMV that spans multiple pages called from user myreduce() or mycompress() function iblock = 0 to nblock_kmv-1 ------------------------------------------------------------------------- */ int MapReduce::multivalue_block(int iblock, char **pmultivalue, int **pvaluesizes) { if (!kmv_block_valid) error->one("Invalid call to multivalue_block()"); if (iblock < 0 || iblock >= kmv_nblock) error->one("Invalid page request to multivalue_block()"); uint64_t dummy1,dummy2,dummy3; char *page_kmv; kmv->request_info(&page_kmv); kmv->request_page(kmv_key_page+iblock+1,0,dummy1,dummy2,dummy3); char *ptr = page_kmv; int nvalues = *((int *) ptr); ptr += sizeof(int); *pvaluesizes = (int *) ptr; ptr += nvalues*sizeof(int); *pmultivalue = ROUNDUP(ptr,valignm1); return nvalues; } /* ---------------------------------------------------------------------- scan KV pairs without altering them make one call to appscan() for each KV pair each proc processes its owned KV pairs ------------------------------------------------------------------------- */ uint64_t MapReduce::scan(void (*appscan)(char *, int, char *, int, void *), void *appptr) { if (kv == NULL) error->all("Cannot scan without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kv->allocate(); int nkey_kv,keybytes,valuebytes; uint64_t dummy1,dummy2,dummy3; char *page_kv,*ptr,*key,*value; int npage_kv = kv->request_info(&page_kv); for (int ipage = 0; ipage < npage_kv; ipage++) { nkey_kv = kv->request_page(ipage,dummy1,dummy2,dummy3); ptr = page_kv; for (int i = 0; i < nkey_kv; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); appscan(key,keybytes,value,valuebytes,appptr); } } kv->deallocate(0); if (freepage) mem_cleanup(); stats("Scan",0); uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- scan KMV pairs without altering them make one call to appscan() for each KMV pair each proc processes its owned KMV pairs ------------------------------------------------------------------------- */ uint64_t MapReduce::scan(void (*appscan)(char *, int, char *, int, int *, void *), void *appptr) { if (kmv == NULL) error->all("Cannot scan without KeyMultiValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kmv->allocate(); uint64_t dummy; int memtag1,memtag2; char *mvpage1 = mem_request(1,dummy,memtag1); char *mvpage2 = mem_request(1,dummy,memtag2); int nkey_kmv,nvalues,keybytes,mvaluebytes; uint64_t dummy1,dummy2,dummy3; int *valuesizes; char *ptr,*key,*multivalue; char *page_kmv; int npage_kmv = kmv->request_info(&page_kmv); char *page_hold = page_kmv; for (int ipage = 0; ipage < npage_kmv; ipage++) { nkey_kmv = kmv->request_page(ipage,0,dummy1,dummy2,dummy3); ptr = page_kmv; for (int i = 0; i < nkey_kmv; i++) { nvalues = *((int *) ptr); ptr += sizeof(int); if (nvalues > 0) { keybytes = *((int *) ptr); ptr += sizeof(int); mvaluebytes = *((int *) ptr); ptr += sizeof(int); valuesizes = (int *) ptr; ptr += ((uint64_t) nvalues) * sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); multivalue = ptr; ptr += mvaluebytes; ptr = ROUNDUP(ptr,talignm1); appscan(key,keybytes,multivalue,nvalues,valuesizes,appptr); } else { keybytes = *((int *) ptr); ptr += sizeof(int); ptr = ROUNDUP(ptr,kalignm1); key = ptr; // set KMV page to mvpage1 so key will not be overwritten // when multivalue_block() loads new pages of values kmv_block_valid = 1; kmv_key_page = ipage; kmv_mvpage1 = mvpage1; kmv_mvpage2 = mvpage2; kmv_nvalue_total = kmv->multivalue_blocks(ipage,kmv_nblock); kmv->page = mvpage1; appscan(key,keybytes,NULL,0,(int *) this,appptr); kmv_block_valid = 0; ipage += kmv_nblock; kmv->page = page_hold; } } } kmv->deallocate(0); if (freepage) mem_cleanup(); stats("Scan",0); uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- scrunch KV to create a KMV on fewer processors, each with a single pair gather followed by a collapse numprocs = # of procs new KMV resides on (0 to numprocs-1) new key = provided key name (same on every proc) new value = list of old key,value,key,value,etc ------------------------------------------------------------------------- */ uint64_t MapReduce::scrunch(int numprocs, char *key, int keybytes) { if (kv == NULL) error->all("Cannot scrunch without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); int verbosity_hold = verbosity; int timer_hold = timer; verbosity = timer = 0; gather(numprocs); collapse(key,keybytes); verbosity = verbosity_hold; timer = timer_hold; stats("Scrunch",1); uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- sort keys in a KV to create a new KV call sort_keys(appcompare) with pre-defined compare method ------------------------------------------------------------------------- */ uint64_t MapReduce::sort_keys(int flag) { int absflag = flag; if (flag < 0) absflag = -flag; if (absflag == 1) { if (flag > 0) return sort_keys(compare_int); else return sort_keys(compare_int_reverse); } else if (absflag == 2) { if (flag > 0) return sort_keys(compare_uint64); else return sort_keys(compare_uint64_reverse); } else if (absflag == 3) { if (flag > 0) return sort_keys(compare_float); else return sort_keys(compare_float_reverse); } else if (absflag == 4) { if (flag > 0) return sort_keys(compare_double); else return sort_keys(compare_double_reverse); } else if (absflag == 5) { if (flag > 0) return sort_keys(compare_str); else return sort_keys(compare_str_reverse); } else if (absflag == 6) { if (flag > 0) return sort_keys(compare_strn); else return sort_keys(compare_strn_reverse); } error->all("Invalid compare method for sort keys"); } /* ---------------------------------------------------------------------- sort keys in a KV to create a new KV use appcompare() to compare 2 keys each proc sorts only its data ------------------------------------------------------------------------- */ uint64_t MapReduce::sort_keys(int (*appcompare)(char *, int, char *, int)) { if (kv == NULL) error->all("Cannot sort_keys without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); compare = appcompare; sort_kv(0); stats("Sort_keys",0); fcounter_sort = 0; uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- sort values in a KV to create a new KV call sort_values(appcompare) with pre-defined compare method ------------------------------------------------------------------------- */ uint64_t MapReduce::sort_values(int flag) { int absflag = flag; if (flag < 0) absflag = -flag; if (absflag == 1) { if (flag > 0) return sort_values(compare_int); else return sort_values(compare_int_reverse); } else if (absflag == 2) { if (flag > 0) return sort_values(compare_uint64); else return sort_values(compare_uint64_reverse); } else if (absflag == 3) { if (flag > 0) return sort_values(compare_float); else return sort_values(compare_float_reverse); } else if (absflag == 4) { if (flag > 0) return sort_values(compare_double); else return sort_values(compare_double_reverse); } else if (absflag == 5) { if (flag > 0) return sort_values(compare_str); else return sort_values(compare_str_reverse); } else if (absflag == 6) { if (flag > 0) return sort_values(compare_strn); else return sort_values(compare_strn_reverse); } error->all("Invalid compare method for sort values"); } /* ---------------------------------------------------------------------- sort values in a KV to create a new KV use appcompare() to compare 2 values each proc sorts only its data ------------------------------------------------------------------------- */ uint64_t MapReduce::sort_values(int (*appcompare)(char *, int, char *, int)) { if (kv == NULL) error->all("Cannot sort_values without KeyValue"); if (timer) start_timer(); if (verbosity) file_stats(0); compare = appcompare; sort_kv(1); stats("Sort_values",0); fcounter_sort = 0; uint64_t nkeyall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- sort values within each multivalue in a KMV call sort_multivalues(appcompare) with pre-defined compare method ------------------------------------------------------------------------- */ uint64_t MapReduce::sort_multivalues(int flag) { int absflag = flag; if (flag < 0) absflag = -flag; if (absflag == 1) { if (flag > 0) return sort_multivalues(compare_int); else return sort_multivalues(compare_int_reverse); } else if (absflag == 2) { if (flag > 0) return sort_multivalues(compare_uint64); else return sort_multivalues(compare_uint64_reverse); } else if (absflag == 3) { if (flag > 0) return sort_multivalues(compare_float); else return sort_multivalues(compare_float_reverse); } else if (absflag == 4) { if (flag > 0) return sort_multivalues(compare_double); else return sort_multivalues(compare_double_reverse); } else if (absflag == 5) { if (flag > 0) return sort_multivalues(compare_str); else return sort_multivalues(compare_str_reverse); } else if (absflag == 6) { if (flag > 0) return sort_multivalues(compare_strn); else return sort_multivalues(compare_strn_reverse); } error->all("Invalid compare method for sort multivalues"); } /* ---------------------------------------------------------------------- sort values within each multivalue in a KMV sorts in place, does not create a new KMV use appcompare() to compare 2 values within a multivalue each proc sorts only its data ------------------------------------------------------------------------- */ uint64_t MapReduce::sort_multivalues(int (*appcompare)(char *, int, char *, int)) { int i,j,k; int *order; uint64_t offset; char *page_kmv; if (kmv == NULL) error->all("Cannot sort_multivalues without KeyMultiValue"); if (timer) start_timer(); if (verbosity) file_stats(0); kmv->allocate(); int npage_kmv = kmv->request_info(&page_kmv); compare = appcompare; mrptr = this; uint64_t dummy; int memtag1,memtag2; char *twopage = mem_request(2,dummy,memtag2); char *scratch = mem_request(1,dummy,memtag1); int nkey_kmv,nvalues,keybytes,mvaluebytes; uint64_t dummy1,dummy2,dummy3; int *valuesizes; char *ptr,*multivalue,*ptr2; for (int ipage = 0; ipage < npage_kmv; ipage++) { nkey_kmv = kmv->request_page(ipage,1,dummy1,dummy2,dummy3); ptr = page_kmv; for (i = 0; i < nkey_kmv; i++) { nvalues = *((int *) ptr); ptr += sizeof(int); if (nvalues == 0) error->one("Sort_multivalue of multi-page KeyMultiValue " "not yet supported"); keybytes = *((int *) ptr); ptr += sizeof(int); mvaluebytes = *((int *) ptr); ptr += sizeof(int); valuesizes = (int *) ptr; ptr += ((uint64_t) nvalues) * sizeof(int); ptr = ROUNDUP(ptr,kalignm1); ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); multivalue = ptr; ptr += mvaluebytes; ptr = ROUNDUP(ptr,talignm1); // setup 2 arrays from 2 pages of memory // order = ordering of values in multivalue, initially 0 to N-1 // dptr = ptr to each value // slength = length of each value = valuesizes offset = ((uint64_t) nvalues) * sizeof(int); order = (int *) twopage; dptr = (char **) &twopage[offset]; slength = valuesizes; ptr2 = multivalue; for (j = 0; j < nvalues; j++) { order[j] = j; dptr[j] = ptr2; ptr2 += valuesizes[j]; } // sort values within multivalue via qsort() // simply creates new order array qsort(order,nvalues,sizeof(int),compare_standalone); // reorder the multivalue, using scratch space // copy back into original page ptr2 = scratch; for (j = 0; j < nvalues; j++) { k = order[j]; memcpy(ptr2,&dptr[k],slength[k]); ptr2 += slength[k]; } memcpy(multivalue,scratch,mvaluebytes); } // overwrite the changed KMV page to disk kmv->overwrite_page(ipage); } // close KMV file if necessary kmv->close_file(); // free memory pages kmv->deallocate(0); mem_unmark(memtag1); mem_unmark(memtag2); if (freepage) mem_cleanup(); stats("Sort_multivalues",1); uint64_t nkeyall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); return nkeyall; } /* ---------------------------------------------------------------------- sort keys or values in a KV to create a new KV flag = 0 = sort keys, flag = 1 = sort values ------------------------------------------------------------------------- */ void MapReduce::sort_kv(int flag) { int nkey_kv,memtag_kv,memtag1,memtag2,memtag_twopage,src1,src2; uint64_t dummy,dummy1,dummy2,dummy3; char *page_kv; void *src1ptr,*src2ptr,*destptr; kv->allocate(); mrptr = this; int npage_kv = kv->request_info(&page_kv); memtag_kv = kv->memtag; // KV has single page // sort into newpage, assign newpage to KV, and return if (npage_kv == 1) { char *twopage = mem_request(2,dummy,memtag_twopage); char *newpage = mem_request(1,dummy,memtag1); nkey_kv = kv->request_page(0,dummy1,dummy2,dummy3); sort_onepage(flag,nkey_kv,page_kv,newpage,twopage); mem_unmark(memtag_twopage); mem_unmark(memtag_kv); kv->set_page(pagesize,newpage,memtag1); kv->overwrite_page(0); kv->close_file(); kv->deallocate(0); if (freepage) mem_cleanup(); return; } // KV has multiple pages // perform merge sort, two sources at a time into destination // each pass thru while loop increments I by 2 and N by 1 // sources can be sorted page or Spool file // destination can be Spool file or final sorted KV char *twopage = mem_request(2,dummy,memtag_twopage); char *page1 = mem_request(1,dummy,memtag1); char *page2 = mem_request(1,dummy,memtag2); Spool **spools = new Spool*[2*npage_kv]; int n = npage_kv; int i = 0; while (i < n) { if (i < npage_kv) { kv->set_page(pagesize,page_kv,memtag_kv); nkey_kv = kv->request_page(i++,dummy1,dummy2,dummy3); sort_onepage(flag,nkey_kv,page_kv,page1,twopage); src1ptr = (void *) page1; src1 = nkey_kv; } else { spools[i]->set_page(pagesize,page1); src1ptr = (void *) spools[i++]; src1 = 0; } if (i < npage_kv) { kv->set_page(pagesize,page_kv,memtag_kv); nkey_kv = kv->request_page(i++,dummy1,dummy2,dummy3); sort_onepage(flag,nkey_kv,page_kv,page2,twopage); src2ptr = (void *) page2; src2 = nkey_kv; } else { spools[i]->set_page(pagesize,page2); src2ptr = (void *) spools[i++]; src2 = 0; } if (i < n) { spools[n] = new Spool(SORTFILE,this,memory,error); spools[n]->set_page(pagesize,page_kv); destptr = (void *) spools[n]; merge(flag,src1,src1ptr,src2,src2ptr,0,destptr); if (!src1) delete spools[i-2]; if (!src2) delete spools[i-1]; spools[n++]->complete(); } else { delete kv; kv = new KeyValue(this,kalign,valign,memory,error,comm); kv->set_page(pagesize,page_kv,memtag_kv); destptr = (void *) kv; merge(flag,src1,src1ptr,src2,src2ptr,1,destptr); if (!src1) delete spools[i-2]; if (!src2) delete spools[i-1]; kv->complete(); } } delete [] spools; mem_unmark(memtag_twopage); mem_unmark(memtag1); mem_unmark(memtag2); if (freepage) mem_cleanup(); } /* ---------------------------------------------------------------------- sort keys or values in one page of a KV to create a new KV flag = 0 for sort keys, flag = 1 for sort values unsorted KVs are in pagesrc, final sorted KVs are put in pagedest twopage is used for qsort() data structs ------------------------------------------------------------------------- */ void MapReduce::sort_onepage(int flag, int nkey_kv, char *pagesrc, char *pagedest, char *twopage) { int i,j; int keybytes,valuebytes; char *ptr,*key,*value; // setup 3 arrays from twopage of memory // order = ordering of keys or values in KV, initially 0 to N-1 // slength = length of each key or value // dptr = datum ptr = ptr to each key or value uint64_t offset = ((uint64_t) nkey_kv) * sizeof(int); int *order = (int *) twopage; slength = (int *) &twopage[offset]; dptr = (char **) &twopage[2*offset]; ptr = pagesrc; for (i = 0; i < nkey_kv; i++) { order[i] = i; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); if (flag == 0) { slength[i] = keybytes; dptr[i] = key; } else { slength[i] = valuebytes; dptr[i] = value; } } // sort keys or values via qsort() // simply creates new order array qsort(order,nkey_kv,sizeof(int),compare_standalone); // dptr = start of each KV pair // slength = length of entire KV pair ptr = pagesrc; for (i = 0; i < nkey_kv; i++) { dptr[i] = ptr; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); slength[i] = ptr - dptr[i]; } // reorder KV pairs into dest page ptr = pagedest; for (i = 0; i < nkey_kv; i++) { j = order[i]; memcpy(ptr,dptr[j],slength[j]); ptr += slength[j]; } } /* ---------------------------------------------------------------------- merge of 2 sources into a destination flag = 0 for key sort, flag = 1 for value sort src1,src2 can each be Spool file (src = 0) or KV page (src = nkey_kv) dest can be Spool file (dest = 0) or final KV (dest = 1) ------------------------------------------------------------------------- */ void MapReduce::merge(int flag, int src1, void *src1ptr, int src2, void *src2ptr, int dest, void *destptr) { int result,ientry1,ientry2,nbytes1,nbytes2,ipage1,ipage2; int npage1,npage2,nentry1,nentry2; char *str1,*str2,*page1,*page2; Spool *sp1,*sp2,*spdest; KeyValue *kvdest; if (src1) { npage1 = 1; page1 = (char *) src1ptr; nentry1 = src1; } else { sp1 = (Spool *) src1ptr; npage1 = sp1->request_info(&page1); nentry1 = sp1->request_page(0); } if (src2) { npage2 = 1; page2 = (char *) src2ptr; nentry2 = src2; } else { sp2 = (Spool *) src2ptr; npage2 = sp2->request_info(&page2); nentry2 = sp2->request_page(0); } if (dest) kvdest = (KeyValue *) destptr; else spdest = (Spool *) destptr; ipage1 = ipage2 = 0; ientry1 = ientry2 = 0; char *ptr1 = page1; char *ptr2 = page2; int len1 = extract(flag,ptr1,str1,nbytes1); int len2 = extract(flag,ptr2,str2,nbytes2); int done = 0; while (1) { if (done == 0) result = compare(str1,nbytes1,str2,nbytes2); if (result <= 0) { if (dest) kvdest->add(ptr1); else spdest->add(len1,ptr1); ptr1 += len1; ientry1++; if (ientry1 == nentry1) { ipage1++; if (ipage1 < npage1) { nentry1 = sp1->request_page(ipage1); ientry1 = 0; ptr1 = page1; len1 = extract(flag,ptr1,str1,nbytes1); } else { done++; if (done == 2) break; result = 1; } } else len1 = extract(flag,ptr1,str1,nbytes1); } if (result >= 0) { if (dest) kvdest->add(ptr2); else spdest->add(len2,ptr2); ptr2 += len2; ientry2++; if (ientry2 == nentry2) { ipage2++; if (ipage2 < npage2) { nentry2 = sp2->request_page(ipage2); ientry2 = 0; ptr2 = page2; len2 = extract(flag,ptr2,str2,nbytes2); } else { done++; if (done == 2) break; result = -1; } } else len2 = extract(flag,ptr2,str2,nbytes2); } } } /* ---------------------------------------------------------------------- extract datum from a KV pair beginning at ptr_start flag = 0, return key and keybytes as str and nbytes flag = 1, return value and valuebytes as str and nbytes also return byte increment to next entry ------------------------------------------------------------------------- */ int MapReduce::extract(int flag, char *ptr_start, char *&str, int &nbytes) { char *ptr = ptr_start; int keybytes = *((int *) ptr); int valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); char *key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); char *value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); if (flag == 0) { str = key; nbytes = keybytes; } else { str = value; nbytes = valuebytes; } return ptr - ptr_start; } /* ---------------------------------------------------------------------- wrappers on user-provided key or value comparison functions necessary so can extract 2 keys or values to pass back to application 2-level wrapper needed b/c qsort() cannot be passed a class method unless it were static, but then it couldn't access MR class data so qsort() is passed standalone non-class method it accesses static class member mrptr, set before call to qsort() standalone calls back into class wrapper which calls user compare() ------------------------------------------------------------------------- */ int compare_standalone(const void *iptr, const void *jptr) { return MapReduce::mrptr->compare_wrapper(*((int *) iptr),*((int *) jptr)); } int MapReduce::compare_wrapper(int i, int j) { return compare(dptr[i],slength[i],dptr[j],slength[j]); } /* ---------------------------------------------------------------------- compare 2 integers ------------------------------------------------------------------------- */ int compare_int(char *str1, int len1, char *str2, int len2) { int *i1 = (int *) str1; int *i2 = (int *) str2; if (*i1 < *i2) return -1; if (*i1 > *i2) return 1; return 0; } int compare_int_reverse(char *str1, int len1, char *str2, int len2) { int *i1 = (int *) str1; int *i2 = (int *) str2; if (*i1 < *i2) return 1; if (*i1 > *i2) return -1; return 0; } /* ---------------------------------------------------------------------- compare 2 64-bit unsigned integers ------------------------------------------------------------------------- */ int compare_uint64(char *str1, int len1, char *str2, int len2) { uint64_t *i1 = (uint64_t *) str1; uint64_t *i2 = (uint64_t *) str2; if (*i1 < *i2) return -1; if (*i1 > *i2) return 1; return 0; } int compare_uint64_reverse(char *str1, int len1, char *str2, int len2) { uint64_t *i1 = (uint64_t *) str1; uint64_t *i2 = (uint64_t *) str2; if (*i1 < *i2) return 1; if (*i1 > *i2) return -1; return 0; } /* ---------------------------------------------------------------------- compare 2 floats ------------------------------------------------------------------------- */ int compare_float(char *str1, int len1, char *str2, int len2) { float *i1 = (float *) str1; float *i2 = (float *) str2; if (*i1 < *i2) return -1; if (*i1 > *i2) return 1; return 0; } int compare_float_reverse(char *str1, int len1, char *str2, int len2) { float *i1 = (float *) str1; float *i2 = (float *) str2; if (*i1 < *i2) return 1; if (*i1 > *i2) return -1; return 0; } /* ---------------------------------------------------------------------- compare 2 doubles ------------------------------------------------------------------------- */ int compare_double(char *str1, int len1, char *str2, int len2) { double *i1 = (double *) str1; double *i2 = (double *) str2; if (*i1 < *i2) return -1; if (*i1 > *i2) return 1; return 0; } int compare_double_reverse(char *str1, int len1, char *str2, int len2) { double *i1 = (double *) str1; double *i2 = (double *) str2; if (*i1 < *i2) return 1; if (*i1 > *i2) return -1; return 0; } /* ---------------------------------------------------------------------- compare 2 NULL-terminated strings ------------------------------------------------------------------------- */ int compare_str(char *str1, int len1, char *str2, int len2) { return strcmp(str1,str2); } int compare_str_reverse(char *str1, int len1, char *str2, int len2) { return -strcmp(str1,str2); } /* ---------------------------------------------------------------------- compare 2 non-NULL terminated strings via strncmp on shorter length ------------------------------------------------------------------------- */ int compare_strn(char *str1, int len1, char *str2, int len2) { return strncmp(str1,str2,MIN(len1,len2)); } int compare_strn_reverse(char *str1, int len1, char *str2, int len2) { return -strncmp(str1,str2,MIN(len1,len2)); } /* ---------------------------------------------------------------------- use str to find files to add to list of filenames if str is a file, add it to list if str is a directory, add all files in directory to list if recurse = 1, call findfiles() on any directory found within directory return updated list of files ------------------------------------------------------------------------- */ void MapReduce::findfiles(char *str, int recurse, int readflag, int &nfile, int &maxfile, char **&files) { int err,n; struct stat buf; char newstr[MAXLINE]; err = stat(str,&buf); if (err) { char msg[256]; sprintf(msg,"Could not query status of file %s in map",str); error->one(msg); } else if (S_ISREG(buf.st_mode)) addfiles(str,readflag,nfile,maxfile,files); else if (S_ISDIR(buf.st_mode)) { struct dirent *ep; DIR *dp = opendir(str); if (dp == NULL) { char msg[256]; sprintf(msg,"Cannot open directory %s to search for files in map",str); error->one(msg); } while (ep = readdir(dp)) { if (ep->d_name[0] == '.') continue; sprintf(newstr,"%s/%s",str,ep->d_name); err = stat(newstr,&buf); if (S_ISREG(buf.st_mode)) addfiles(newstr,readflag,nfile,maxfile,files); else if (S_ISDIR(buf.st_mode) && recurse) findfiles(newstr,recurse,readflag,nfile,maxfile,files); } closedir(dp); } else { char msg[256]; sprintf(msg,"Invalid filename %s in map",str); error->one(msg); } } /* ---------------------------------------------------------------------- add a str to list of filenames if readflag = 0, just add str as filename if readflag = 1, open the file, read filenames out of it and add each to list return updated list of files ------------------------------------------------------------------------- */ void MapReduce::addfiles(char *str, int readflag, int &nfile, int &maxfile, char **&files) { if (!readflag) { if (nfile == maxfile) { maxfile += FILECHUNK; files = (char **) realloc(files,maxfile*sizeof(char *)); } int n = strlen(str) + 1; files[nfile] = new char[n]; strcpy(files[nfile],str); nfile++; return; } FILE *fp = fopen(str,"r"); if (fp == NULL) { char msg[256]; sprintf(msg,"Could not open file %s of filenames in map",str); error->one(msg); } char line[MAXLINE]; while (fgets(line,MAXLINE,fp)) { char *ptr = line; while (isspace(*ptr)) ptr++; if (strlen(ptr) == 0) { char msg[256]; sprintf(msg,"Blank line in file %s of filenames in map",str); error->one(msg); } char *ptr2 = ptr + strlen(ptr) - 1; while (isspace(*ptr2)) ptr2--; ptr2++; *ptr2 = '\0'; if (nfile == maxfile) { maxfile += FILECHUNK; files = (char **) realloc(files,maxfile*sizeof(char *)); } int n = strlen(ptr) + 1; files[nfile] = new char[n]; strcpy(files[nfile],ptr); nfile++; } fclose(fp); } /* ---------------------------------------------------------------------- bcast list of files from proc 0 ------------------------------------------------------------------------- */ void MapReduce::bcastfiles(int &nfile, char **&files) { MPI_Bcast(&nfile,1,MPI_INT,0,comm); if (me > 0) files = (char **) memory->srealloc(files,nfile*sizeof(char *),"MR:files"); int n; for (int i = 0; i < nfile; i++) { if (me == 0) n = strlen(files[i]) + 1; MPI_Bcast(&n,1,MPI_INT,0,comm); if (me > 0) files[i] = new char[n]; MPI_Bcast(files[i],n,MPI_CHAR,0,comm); } } /* ---------------------------------------------------------------------- print stats for KV ------------------------------------------------------------------------- */ uint64_t MapReduce::kv_stats(int level) { if (kv == NULL) error->all("Cannot print stats without KeyValue"); double mbyte = 1024.0*1024.0; int npages; uint64_t nkeyall,ksizeall,vsizeall,esizeall; MPI_Allreduce(&kv->nkv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kv->ksize,&ksizeall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kv->vsize,&vsizeall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kv->esize,&esizeall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kv->npage,&npages,1,MPI_INT,MPI_SUM,comm); if (level == 0) return nkeyall; if (level == 1) if (me == 0) printf("%lu pairs, %.3g Mb keys, %.3g Mb values, %.3g Mb, " "%d pages\n", nkeyall,ksizeall/mbyte,vsizeall/mbyte,esizeall/mbyte,npages); if (level == 2) { write_histo((double) kv->nkv," KV pairs:"); write_histo(kv->ksize/mbyte," Kdata (Mb):"); write_histo(kv->vsize/mbyte," Vdata (Mb):"); } return nkeyall; } /* ---------------------------------------------------------------------- print stats for KMV ------------------------------------------------------------------------- */ uint64_t MapReduce::kmv_stats(int level) { if (kmv == NULL) error->all("Cannot print stats without KeyMultiValue"); double mbyte = 1024.0*1024.0; int npages; uint64_t nkeyall,ksizeall,vsizeall,esizeall; MPI_Allreduce(&kmv->nkmv,&nkeyall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kmv->ksize,&ksizeall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kmv->vsize,&vsizeall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kmv->esize,&esizeall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&kmv->npage,&npages,1,MPI_INT,MPI_SUM,comm); if (level == 0) return nkeyall; if (level == 1) if (me == 0) printf("%lu pairs, %.3g Mb keys, %.3g Mb values, %.3g Mb, " "%d pages\n", nkeyall,ksizeall/mbyte,vsizeall/mbyte,esizeall/mbyte,npages); if (level == 2) { write_histo((double) kmv->nkmv," KMV pairs:"); write_histo(kmv->ksize/mbyte," Kdata (Mb):"); write_histo(kmv->vsize/mbyte," Vdata (Mb):"); } return nkeyall; } /* ---------------------------------------------------------------------- print cummulative memory, comm, and file read/write stats for all MR objects ------------------------------------------------------------------------- */ void MapReduce::cummulative_stats(int level, int reset) { double mbyte = 1024.0*1024.0; double gbyte = 1024.0*1024.0*1024.0; // version info if (me == 0) printf("MapReduce-MPI (%s)\n",MRMPI_VERSION); // memory uint64_t allmsizemax,allmsize; MPI_Allreduce(&msizemax,&allmsizemax,1,MRMPI_BIGINT,MPI_MAX,comm); MPI_Allreduce(&msizemax,&allmsize,1,MRMPI_BIGINT,MPI_SUM,comm); if (me == 0) printf("Cummulative hi-water mem = " "%.3g Mb any proc, %.3g Gb all procs\n", allmsizemax/mbyte,allmsize/gbyte); // communication uint64_t csize[2] = {cssize,crsize}; uint64_t allcsize[2]; MPI_Allreduce(csize,allcsize,2,MRMPI_BIGINT,MPI_SUM,comm); double allctime; MPI_Allreduce(&commtime,&allctime,1,MPI_DOUBLE,MPI_SUM,comm); if (allcsize[0] || allcsize[1]) { if (me == 0) printf("Cummulative comm = " "%.3g Mb send, %.3g Mb recv, %.3g secs\n", allcsize[0]/mbyte,allcsize[1]/mbyte, allctime/nprocs); if (level == 2) { write_histo(csize[0]/mbyte," Send (Mb):"); write_histo(csize[1]/mbyte," Recv (Mb):"); } } // file I/O uint64_t size[2] = {rsize,wsize}; uint64_t allsize[2]; MPI_Allreduce(size,allsize,2,MRMPI_BIGINT,MPI_SUM,comm); if (allsize[0] || allsize[1]) { if (me == 0) printf("Cummulative I/O = %.3g Mb read, %.3g Mb write\n", allsize[0]/mbyte,allsize[1]/mbyte); if (level == 2) { write_histo(size[0]/mbyte," Read (Mb):"); write_histo(size[1]/mbyte," Write (Mb):"); } } if (reset) { rsize = wsize = 0; cssize = crsize = 0; } } /* ---------------------------------------------------------------------- change fpath, but only if allocation has not occurred ------------------------------------------------------------------------- */ void MapReduce::set_fpath(const char *str) { if (allocated) return; delete [] fpath; int n = strlen(str) + 1; fpath = new char[n]; strcpy(fpath,str); } /* ---------------------------------------------------------------------- print hi-water memory page and disk file stats for MR ------------------------------------------------------------------------- */ void MapReduce::mr_stats(int level) { double mbyte = 1024.0*1024.0; int npagemaxall; MPI_Allreduce(&npagemax,&npagemaxall,1,MPI_INT,MPI_MAX,comm); uint64_t fsizemaxall; MPI_Allreduce(&fsizemax,&fsizemaxall,1,MRMPI_BIGINT,MPI_SUM,comm); if (me == 0) printf("MR stats = %d max pages any proc, %.3g Mb, " "%.3g Mb max file size all procs\n", npagemaxall,npagemaxall*pagesize/mbyte,fsizemaxall/mbyte); if (level == 2) { if (npagemaxall) write_histo((double) npagemax," Pages:"); if (fsizemaxall) write_histo(fsizemax/mbyte," Files:"); } } /* ---------------------------------------------------------------------- stats for one operation and its resulting KV or KMV which = 0 for KV, which = 1 for KMV output timer, KV/KMV, comm, I/O, or nothing depending on settings ------------------------------------------------------------------------- */ void MapReduce::stats(const char *heading, int which) { if (timer) { if (timer == 1) { MPI_Barrier(comm); if (me == 0) printf("%s time (secs) = %g\n", heading,MPI_Wtime()-time_start); } else if (timer == 2) { char str[64]; sprintf(str,"%s time (secs) =",heading); write_histo(MPI_Wtime()-time_start,str); } } if (verbosity == 0) return; if (which == 0) { if (me == 0) printf("%s KV = ",heading); kv_stats(verbosity); } else { if (me == 0) printf("%s KMV = ",heading); kmv_stats(verbosity); } file_stats(1); uint64_t rall,sall,wall; double mbyte = 1024.0*1024.0; MPI_Allreduce(&cssize_one,&sall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&crsize_one,&rall,1,MRMPI_BIGINT,MPI_SUM,comm); if (sall || rall) { if (me == 0) printf("%s Comm = %.3g Mb send, %.3g Mb recv\n",heading, sall/mbyte,rall/mbyte); if (verbosity == 2) { write_histo(cssize_one/mbyte," Send (Mb):"); write_histo(crsize_one/mbyte," Recv (Mb):"); } } MPI_Allreduce(&rsize_one,&rall,1,MRMPI_BIGINT,MPI_SUM,comm); MPI_Allreduce(&wsize_one,&wall,1,MRMPI_BIGINT,MPI_SUM,comm); if (rall || wall) { if (me == 0) printf("%s I/O = %.3g Mb read, %.3g Mb write\n",heading, rall/mbyte,wall/mbyte); if (verbosity == 2) { write_histo(rsize_one/mbyte," Read (Mb):"); write_histo(wsize_one/mbyte," Write (Mb):"); } } int partall,setall,sortall; MPI_Allreduce(&fcounter_part,&partall,1,MPI_INT,MPI_SUM,comm); MPI_Allreduce(&fcounter_set,&setall,1,MPI_INT,MPI_SUM,comm); MPI_Allreduce(&fcounter_sort,&sortall,1,MPI_INT,MPI_SUM,comm); if (partall || setall || sortall) { if (me == 0) printf("%s Files = %d partition, %d set, %d sort\n", heading,partall,setall,sortall); if (verbosity == 2) { if (partall) write_histo((double) fcounter_part," Partfiles:"); if (setall) write_histo((double) fcounter_set," Setfiles:"); if (sortall) write_histo((double) fcounter_sort," Sortfiles:"); } } } /* ---------------------------------------------------------------------- create a filename with increasing counter for KV, KMV, Sort, Partition, Spool return filename, caller will delete it ------------------------------------------------------------------------- */ char *MapReduce::file_create(int style) { int n = strlen(fpath) + 32; char *fname = new char[n]; if (style == KVFILE) sprintf(fname,"%s/mrmpi.kv.%d.%d.%d",fpath,instance_me,fcounter_kv++,me); else if (style == KMVFILE) sprintf(fname,"%s/mrmpi.kmv.%d.%d.%d",fpath,instance_me,fcounter_kmv++,me); else if (style == SORTFILE) sprintf(fname,"%s/mrmpi.sort.%d.%d.%d",fpath,instance_me, fcounter_sort++,me); else if (style == PARTFILE) sprintf(fname,"%s/mrmpi.part.%d.%d.%d",fpath,instance_me, fcounter_part++,me); else if (style == SETFILE) sprintf(fname,"%s/mrmpi.set.%d.%d.%d",fpath,instance_me, fcounter_set++,me); return fname; } /* ---------------------------------------------------------------------- size of file read/writes from KV, KMV, and Spool files flag = 0 -> rsize/wsize = current size flag = 1 -> rsize/wsize = current size - previous size ------------------------------------------------------------------------- */ void MapReduce::file_stats(int flag) { if (flag == 0) { rsize_one = rsize; wsize_one = wsize; cssize_one = cssize; crsize_one = crsize; } else { rsize_one = rsize - rsize_one; wsize_one = wsize - wsize_one; cssize_one = cssize - cssize_one; crsize_one = crsize - crsize_one; } } /* ---------------------------------------------------------------------- */ void MapReduce::start_timer() { if (timer == 1) MPI_Barrier(comm); time_start = MPI_Wtime(); } /* ---------------------------------------------------------------------- round N up to multiple of nalign and return it ------------------------------------------------------------------------- */ uint64_t MapReduce::roundup(uint64_t n, int nalign) { if (n % nalign == 0) return n; n = (n/nalign + 1) * nalign; return n; } /* ---------------------------------------------------------------------- write a histogram of value to screen with title ------------------------------------------------------------------------- */ void MapReduce::write_histo(double value, const char *title) { int histo[10],histotmp[10]; double ave,max,min; histogram(1,&value,ave,max,min,10,histo,histotmp); if (me == 0) { printf("%-13s %g ave %g max %g min\n",title,ave,max,min); printf("%-13s"," Histogram:"); for (int i = 0; i < 10; i++) printf(" %d",histo[i]); printf("\n"); } } /* ---------------------------------------------------------------------- */ void MapReduce::histogram(int n, double *data, double &ave, double &max, double &min, int nhisto, int *histo, int *histotmp) { min = 1.0e20; max = -1.0e20; ave = 0.0; for (int i = 0; i < n; i++) { ave += data[i]; if (data[i] < min) min = data[i]; if (data[i] > max) max = data[i]; } int ntotal; MPI_Allreduce(&n,&ntotal,1,MPI_INT,MPI_SUM,comm); double tmp; MPI_Allreduce(&ave,&tmp,1,MPI_DOUBLE,MPI_SUM,comm); ave = tmp/ntotal; MPI_Allreduce(&min,&tmp,1,MPI_DOUBLE,MPI_MIN,comm); min = tmp; MPI_Allreduce(&max,&tmp,1,MPI_DOUBLE,MPI_MAX,comm); max = tmp; for (int i = 0; i < nhisto; i++) histo[i] = 0; int m; double del = max - min; for (int i = 0; i < n; i++) { if (del == 0.0) m = 0; else m = static_cast ((data[i]-min)/del * nhisto); if (m > nhisto-1) m = nhisto-1; histo[m]++; } MPI_Allreduce(histo,histotmp,nhisto,MPI_INT,MPI_SUM,comm); for (int i = 0; i < nhisto; i++) histo[i] = histotmp[i]; } /* ---------------------------------------------------------------------- setup memory alignment params and pagesize perform initial allocation if minpage > 0 ------------------------------------------------------------------------- */ void MapReduce::allocate() { allocated = 1; // check key,value alignment factors kalign = keyalign; valign = valuealign; int tmp = 1; while (tmp < kalign) tmp *= 2; if (tmp != kalign) error->all("Invalid alignment setting"); tmp = 1; while (tmp < valign) tmp *= 2; if (tmp != valign) error->all("Invalid alignment setting"); // talign = max of (kalign,valign,int) talign = MAX(kalign,valign); talign = MAX(talign,sizeof(int)); kalignm1 = kalign - 1; valignm1 = valign - 1; talignm1 = talign - 1; // error checks if (memsize == 0) error->all("Invalid memsize setting"); if (minpage < 0) error->all("Invalid minpage setting"); if (maxpage && maxpage < minpage) error->all("Invalid maxpage setting"); // memory initialization if (memsize > 0) pagesize = ((uint64_t) memsize) * 1024*1024; else if (memsize < 0) pagesize = (uint64_t) (-memsize); if (pagesize < ALIGNFILE) error->all("Page size smaller than ALIGNFILE"); if (minpage) allocate_page(minpage); } /* ---------------------------------------------------------------------- allocate a new set of N contiguous pages ------------------------------------------------------------------------- */ void MapReduce::allocate_page(int n) { int nnew = npage + n; memptr = (char **) memory->srealloc(memptr,nnew*sizeof(char *),"MR:memptr"); memusage = (int *) memory->srealloc(memusage,nnew*sizeof(int),"MR:memusage"); memcount = (int *) memory->srealloc(memcount,nnew*sizeof(int),"MR:memcount"); char *ptr = (char *) memory->smalloc_align(n*pagesize,ALIGNFILE,"MR:page"); if (zeropage) memset(ptr,0,n*pagesize); // set memcount = N for 1st allocated page, 0 for others // set memusage = 0 for all allocated pages (caller will reset) for (int i = 0; i < n; i++) { memptr[npage+i] = ptr + i*pagesize; memusage[npage+i] = 0; memcount[npage+i] = 0; } memcount[npage] = n; npage = nnew; npagemax = MAX(npagemax,npage); msize += n*pagesize; msizemax = MAX(msizemax,msize); } /* ---------------------------------------------------------------------- request for N pages of contiguous memory return tag for caller for use when releasing page(s) via mem_unmark() return size of memory and ptr to memory ------------------------------------------------------------------------- */ char *MapReduce::mem_request(int n, uint64_t &size, int &tag) { int i,j,ok; #ifdef MEMORY_DEBUG printf("MEMORY REQUEST for %d pages on proc %d\n",n,me); memory_debug(me); #endif // satisfy request out of first unused chunk of exactly size N for (i = 0; i < npage; i++) { if (memusage[i]) continue; if (memcount[i] == 0) continue; if (memcount[i] == n) { ok = 1; for (j = i+1; j < i+n; j++) if (memusage[j]) ok = 0; if (ok) break; } } // else first unused chunk of size N, even if within larger chunk if (i == npage) { for (i = 0; i < npage; i++) { if (memusage[i]) continue; ok = 1; for (j = i+1; j < i+n; j++) if (j >= npage || memusage[j] || memcount[j]) ok = 0; if (ok) break; } } // else allocate N new pages if maxpage allows // else throw error if (i == npage) { if (maxpage && npage+n > maxpage) error->one("Cannot allocate requested memory page(s)"); allocate_page(n); } // mark all N pages with same memusage flag = returned tag tagmax++; for (j = 0; j < n; j++) memusage[i+j] = tagmax; #ifdef MEMORY_DEBUG printf("MEMORY RETURN with %d pages on proc %d\n",n,me); memory_debug(me); #endif tag = tagmax; size = n*pagesize; return memptr[i]; } /* ---------------------------------------------------------------------- mark pages with tag as unused, could be one or more contiguous pages ------------------------------------------------------------------------- */ void MapReduce::mem_unmark(int tag) { for (int i = 0; i < npage; i++) if (memusage[i] == tag) memusage[i] = 0; } /* ---------------------------------------------------------------------- free unused pages and compact page lists cannot free/compact if unused pages are subset of contiguous allocation ------------------------------------------------------------------------- */ void MapReduce::mem_cleanup() { int i,j,n,ok; for (i = 0; i < npage; i++) { // do not free if in use if (memusage[i]) continue; // check if unused page is part of larger contiguous allocation chunk // ok = 1 if ok to free // if it is not 1st page, previous pages are in use, so cannot free it // if it is 1st page of chunk, and no other pages are used, can free chunk // if free, then compact remaining pages ok = 1; n = memcount[i]; if (n == 0) ok = 0; for (j = i+1; j < i+n; j++) if (memusage[j]) ok = 0; if (ok) { memory->sfree(memptr[i]); msize -= n*pagesize; for (j = i+n; j < npage; j++) { memptr[j-n] = memptr[j]; memusage[j-n] = memusage[j]; memcount[j-n] = memcount[j]; } npage -= n; i--; } } } /* ---------------------------------------------------------------------- query status of currently allocated memory pages return # of unused pages return maxcontig = largest # of contiguous unused pages available return max = # of pages that can still be allocated, -1 if infinite ------------------------------------------------------------------------- */ int MapReduce::mem_query(int &maxcontig, int &max) { int i,j; int n = 0; for (i = 0; i < npage; i++) if (memusage[i] == 0) n++; maxcontig = 0; for (i = 0; i < npage; i++) { if (memusage[i]) continue; for (j = i+1; j < npage; j++) if (memusage[j] || memcount[j]) break; maxcontig = MAX(maxcontig,j-i); } if (maxpage == 0) max = -1; else max = maxpage-npage; return n; } /* ---------------------------------------------------------------------- debug print-out of memory page data structures iproc = -1, print for all procs else only iproc prints ------------------------------------------------------------------------- */ void MapReduce::memory_debug(int iproc) { if (iproc >= 0 && iproc != me) return; printf("MEMORY PAGES: %d on proc %d\n",npage,me); for (int i = 0; i < npage; i++) printf(" %d usage, %d count, %p ptr\n",memusage[i],memcount[i],memptr[i]); } /* ---------------------------------------------------------------------- set hi-water mark for file sizes on disk flag = 0, file of fsize was written to disk flag = 1, file of fsize was deleted from disk ------------------------------------------------------------------------- */ void MapReduce::hiwater(int flag, uint64_t size) { if (flag == 0) fsize += size; if (flag == 1) fsize -= size; fsizemax = MAX(fsizemax,fsize); } mrmpi-1.0~20131122/src/error.cpp0000644000175000017500000000354411347542056016023 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "stdio.h" #include "error.h" using namespace MAPREDUCE_NS; /* ---------------------------------------------------------------------- */ Error::Error(MPI_Comm caller) { comm = caller; MPI_Comm_rank(comm,&me); } /* ---------------------------------------------------------------------- called by all procs ------------------------------------------------------------------------- */ void Error::all(const char *str) { if (me == 0) { printf("ERROR: %s\n",str); fflush(stdout); } MPI_Finalize(); exit(1); } /* ---------------------------------------------------------------------- called by one proc ------------------------------------------------------------------------- */ void Error::one(const char *str) { char name[255]; int namelen; MPI_Get_processor_name(name,&namelen); name[namelen] = '\0'; printf("ERROR on proc %d (%s): %s\n",me,name,str); fflush(stdout); MPI_Abort(comm,1); } /* ---------------------------------------------------------------------- called by one proc ------------------------------------------------------------------------- */ void Error::warning(const char *str) { printf("WARNING: %s\n",str); fflush(stdout); } mrmpi-1.0~20131122/src/keyvalue.cpp0000644000175000017500000005605311515375770016526 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "stdio.h" #include "string.h" #include "stdint.h" #include "keyvalue.h" #include "mapreduce.h" #include "memory.h" #include "error.h" using namespace MAPREDUCE_NS; #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) #define ROUNDUP(A,B) (char *) (((uint64_t) A + B) & ~B); #define ALIGNFILE 512 // same as in mapreduce.cpp #define PAGECHUNK 16 #define INTMAX 0x7FFFFFFF enum{KVFILE,KMVFILE,SORTFILE,PARTFILE,SETFILE}; // same as in mapreduce.cpp /* ---------------------------------------------------------------------- */ KeyValue::KeyValue(MapReduce *mr_caller, int memkalign, int memvalign, Memory *memory_caller, Error *error_caller, MPI_Comm comm_caller) { mr = mr_caller; memory = memory_caller; error = error_caller; comm = comm_caller; MPI_Comm_rank(comm,&me); filename = mr->file_create(KVFILE); fileflag = 0; fp = NULL; pages = NULL; npage = maxpage = 0; // talign = max of (kalign,valign,int) kalign = memkalign; valign = memvalign; talign = MAX(kalign,valign); talign = MAX(talign,sizeof(int)); kalignm1 = kalign-1; valignm1 = valign-1; talignm1 = talign-1; twolenbytes = 2*sizeof(int); nkv = ksize = vsize = esize = fsize = 0; init_page(); page = NULL; memtag = -1; allocate(); } /* ---------------------------------------------------------------------- */ KeyValue::~KeyValue() { deallocate(1); memory->sfree(pages); if (fileflag) { remove(filename); mr->hiwater(1,fsize); } delete [] filename; } /* ---------------------------------------------------------------------- if need one, request an in-memory page ------------------------------------------------------------------------- */ void KeyValue::allocate() { if (page == NULL) page = mr->mem_request(1,pagesize,memtag); } /* ---------------------------------------------------------------------- directly assign page of memory to be the in-memory page caller assumes responsibility for any previously allocated page ------------------------------------------------------------------------- */ void KeyValue::set_page(uint64_t memsize, char *memblock, int tag) { pagesize = memsize; page = memblock; memtag = tag; } /* ---------------------------------------------------------------------- if allocated, mark in-memory page as unused if forceflag == 1, always do this else: only do this if MR outofcore flag is set or npage > 1 (since values currently in page are now useless) ------------------------------------------------------------------------- */ void KeyValue::deallocate(int forceflag) { if (forceflag || mr->outofcore > 0 || npage > 1) { if (page) { mr->mem_unmark(memtag); page = NULL; memtag = -1; } } } /* ---------------------------------------------------------------------- truncate KeyValue at ncut,pagecut entry called by KMV::convert() ------------------------------------------------------------------------- */ void KeyValue::truncate(int pagecut, int ncut, uint64_t sizecut) { if (ncut == 0) npage = pagecut; else { npage = pagecut+1; pages[pagecut].alignsize = sizecut; pages[pagecut].filesize = roundup(sizecut,ALIGNFILE); pages[pagecut].nkey = ncut; } } /* ---------------------------------------------------------------------- copy contents of another KV into me, one page at a time input KV should never be self input KV will have same pagesize and alignment as me called by MR::copy() and MR::map(mr) ------------------------------------------------------------------------- */ void KeyValue::copy(KeyValue *kv) { if (kv == this) error->all("Cannot perform KeyValue copy on self"); // pages will be loaded into memory assigned to other KV // temporarily set my in-memory page to that of other KV // write_page() will then write from that page to my file char *page_hold = page; int npage_other = kv->request_info(&page); for (int ipage = 0; ipage < npage_other-1; ipage++) { nkey = kv->request_page(ipage,keysize,valuesize,alignsize); create_page(); write_page(); npage++; } // copy last page to my memory, then reset my page to my memory nkey = kv->request_page(npage_other-1,keysize,valuesize,alignsize); memcpy(page_hold,page,alignsize); msize = kv->msize; page = page_hold; } /* ---------------------------------------------------------------------- prepare the KV for appending of new KV pairs called by MR::add(), MR::gather(), MR::map(addflag=1) ------------------------------------------------------------------------- */ void KeyValue::append() { if (npage == 0) return; allocate(); int ipage = npage-1; // read last page from file if necessary // decrement MR filesize as if file were being deleted if (fileflag) { read_page(ipage,1); mr->hiwater(1,fsize); } // set in-memory settings from virtual page settings nkey = pages[ipage].nkey; keysize = pages[ipage].keysize; valuesize = pages[ipage].valuesize; alignsize = pages[ipage].alignsize; // delete last page from pages data structures since will append to it npage--; } /* ---------------------------------------------------------------------- complete the KV after data has been added to it called by MR methods after creating & populating a KV ------------------------------------------------------------------------- */ void KeyValue::complete() { create_page(); // if disk file exists or MR outofcore flag set: // write current in-memory page to disk, close file if (fileflag || mr->outofcore > 0) { write_page(); fclose(fp); fp = NULL; } npage++; init_page(); // give up in-memory page if possible deallocate(0); // set sizes for entire KV nkv = ksize = vsize = esize = fsize = 0; for (int ipage = 0; ipage < npage; ipage++) { nkv += pages[ipage].nkey; ksize += pages[ipage].keysize; vsize += pages[ipage].valuesize; esize += pages[ipage].exactsize; } if (fileflag) { fsize = pages[npage-1].fileoffset + pages[npage-1].filesize; mr->hiwater(0,fsize); } // msize is max across all procs, for entire KV int tmp = msize; MPI_Allreduce(&tmp,&msize,1,MPI_INT,MPI_MAX,comm); } /* ---------------------------------------------------------------------- dummy complete for a KV that is already complete called by a proc while other procs call complete() invoke an Allreduce() to match Allreudce() in complete() ------------------------------------------------------------------------- */ void KeyValue::complete_dummy() { // give up in-memory page if possible deallocate(0); int tmp = msize; MPI_Allreduce(&tmp,&msize,1,MPI_INT,MPI_MAX,comm); } /* ---------------------------------------------------------------------- return # of pages and ptr to in-memory page ------------------------------------------------------------------------- */ int KeyValue::request_info(char **ptr) { *ptr = page; return npage; } /* ---------------------------------------------------------------------- ready one page of KV data caller is looping over data in KV ------------------------------------------------------------------------- */ int KeyValue::request_page(int ipage, uint64_t &keysize_page, uint64_t &valuesize_page, uint64_t &alignsize_page) { // load page from file if necessary if (fileflag) read_page(ipage,0); // close file if last page if (ipage == npage-1 && fileflag) { fclose(fp); fp = NULL; } keysize_page = pages[ipage].keysize; valuesize_page = pages[ipage].valuesize; alignsize_page = pages[ipage].alignsize; return pages[ipage].nkey; } /* ---------------------------------------------------------------------- overwrite a reorganized page of KV data onto disk reset npage to ipage so write_page() will work page properties stay the same so no call to create_page() called by MR::sort_keys() and MR::sort_values() when KV is just one page ------------------------------------------------------------------------- */ void KeyValue::overwrite_page(int ipage) { int npage_save = npage; npage = ipage; if (fileflag || mr->outofcore > 0) write_page(); npage = npage_save; } /* ---------------------------------------------------------------------- close disk file if open called by MR::sort_keys() and MR::sort_values() on one proc ------------------------------------------------------------------------- */ void KeyValue::close_file() { if (fp) { fclose(fp); fp = NULL; } } /* ---------------------------------------------------------------------- add a single key/value pair called by user appmap() or appreduce() or appcompress() ------------------------------------------------------------------------- */ void KeyValue::add(char *key, int keybytes, char *value, int valuebytes) { char *iptr = &page[alignsize]; char *kptr = iptr + twolenbytes; kptr = ROUNDUP(kptr,kalignm1); char *vptr = kptr + keybytes; vptr = ROUNDUP(vptr,valignm1); char *nptr = vptr + valuebytes; nptr = ROUNDUP(nptr,talignm1); int kvbytes = nptr - iptr; // size of KV pair cannot exceed int size if (nptr-iptr > INTMAX) error->one("Single key/value pair exceeds int size"); // page is full, write to disk // full page = pagesize exceeded or INTMAX KV pairs if (alignsize + kvbytes > pagesize || nkey == INTMAX) { if (alignsize == 0) { printf("KeyValue pair size/limit: %d %u\n",kvbytes,pagesize); error->one("Single key/value pair exceeds page size"); } create_page(); write_page(); npage++; init_page(); add(key,keybytes,value,valuebytes); return; } *((int *) iptr) = keybytes; *((int *) (iptr+sizeof(int))) = valuebytes; memcpy(kptr,key,keybytes); memcpy(vptr,value,valuebytes); nkey++; keysize += keybytes; valuesize += valuebytes; alignsize += kvbytes; msize = MAX(msize,kvbytes); } /* ---------------------------------------------------------------------- add N fixed-length key/value pairs called by user appmap() or appreduce() or appcomress() ------------------------------------------------------------------------- */ void KeyValue::add(int n, char *key, int keybytes, char *value, int valuebytes) { int koffset = 0; int voffset = 0; for (int i = 0; i < n; i++) { add(&key[koffset],keybytes,&value[voffset],valuebytes); koffset += keybytes; voffset += valuebytes; } } /* ---------------------------------------------------------------------- add N variable-length key/value pairs called by user appmap() or appreduce() or appcompress() ------------------------------------------------------------------------- */ void KeyValue::add(int n, char *key, int *keybytes, char *value, int *valuebytes) { uint64_t koffset = 0; uint64_t voffset = 0; for (int i = 0; i < n; i++) { add(&key[koffset],keybytes[i],&value[voffset],valuebytes[i]); koffset += keybytes[i]; voffset += valuebytes[i]; } } /* ---------------------------------------------------------------------- add key/value pairs from another KV input KV should never be self input KV may or may not have same alignment as me called by MR::add() ------------------------------------------------------------------------- */ void KeyValue::add(KeyValue *kv) { if (kv == this) error->all("Cannot perform KeyValue add on self"); int kalign_other = kv->kalign; int valign_other = kv->valign; // which add() to call depends on same or different alignment int nkey_other; uint64_t keysize_other,valuesize_other,alignsize_other; char *page_other; int npage_other = kv->request_info(&page_other); for (int ipage = 0; ipage < npage_other; ipage++) { nkey_other = kv->request_page(ipage,keysize_other,valuesize_other, alignsize_other); if (kalign == kalign_other && valign == valign_other) add(nkey_other,page_other,keysize_other,valuesize_other,alignsize_other); else add(nkey_other,page_other,kalign_other,valign_other); } msize = MAX(msize,kv->msize); } /* ---------------------------------------------------------------------- add N KV pairs from another buffer without specified sizes determine sizes and call add() with sizes input buf should never be own in-memory page input buf has same alignment as me called by MR::aggregate() ------------------------------------------------------------------------- */ void KeyValue::add(int n, char *buf) { int keybytes,valuebytes; uint64_t keysize_buf = 0; uint64_t valuesize_buf = 0; char *ptr = buf; for (int i = 0; i < n; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; keysize_buf += keybytes; valuesize_buf += valuebytes; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); } uint64_t alignsize_buf = ptr - buf; add(n,buf,keysize_buf,valuesize_buf,alignsize_buf); } /* ---------------------------------------------------------------------- add a single KV pair from another buffer without specified sizes determine 2 sizes and call add() with sizes input buf should never be own in-memory page input buf has same alignment as me called by MR::merge() ------------------------------------------------------------------------- */ void KeyValue::add(char *ptr) { int keybytes = *((int *) ptr); int valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); char *key = ptr; ptr += keybytes; char *value = ROUNDUP(ptr,valignm1); add(key,keybytes,value,valuebytes); } /* ---------------------------------------------------------------------- add N KV pairs from another buffer with specified sizes input buf should never be own in-memory page input buf has same alignment as me so add in chunks called by MR::gather(), add(kv), add(n,buf) ------------------------------------------------------------------------- */ void KeyValue::add(int n, char *buf, uint64_t keysize_buf, uint64_t valuesize_buf, uint64_t alignsize_buf) { int nkeychunk,keybytes,valuebytes,kvbytes; uint64_t keychunk,valuechunk,chunksize; char *ptr,*ptr_begin,*ptr_end,*ptr_start; // break data into chunks that fit into current and successive pages // full page = pagesize exceeded or INTMAX KV pairs // search for breakpoint by scanning KV pairs ptr = buf; int nlimit = INTMAX - nkey; while (alignsize + alignsize_buf > pagesize || n > nlimit) { ptr_begin = ptr; ptr_end = ptr_begin + (pagesize-alignsize); nkeychunk = 0; keychunk = valuechunk = 0; while (1) { ptr_start = ptr; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); kvbytes = ptr - ptr_start; if (ptr > ptr_end) break; if (nkeychunk == nlimit) break; nkeychunk++; keychunk += keybytes; valuechunk += valuebytes; } if (kvbytes > pagesize) { printf("KeyValue pair size/limit: %d %u\n",kvbytes,pagesize); error->one("Single key/value pair exceeds page size"); } ptr = ptr_start; chunksize = ptr - ptr_begin; memcpy(&page[alignsize],ptr_begin,chunksize); nkey += nkeychunk; keysize += keychunk; valuesize += valuechunk; alignsize += chunksize; create_page(); write_page(); npage++; init_page(); n -= nkeychunk; keysize_buf -= keychunk; valuesize_buf -= valuechunk; alignsize_buf -= chunksize; nlimit = INTMAX; } // add remainder to in-memory page memcpy(&page[alignsize],ptr,alignsize_buf); nkey += n; keysize += keysize_buf; valuesize += valuesize_buf; alignsize += alignsize_buf; } /* ---------------------------------------------------------------------- add N KV pairs from another buffer with specified sizes input buf should never be own in-memory page input buf has different alignment from me so must add one by one called by add(kv) ------------------------------------------------------------------------- */ void KeyValue::add(int n, char *buf, int kalign_buf, int valign_buf) { int keybytes,valuebytes; char *key,*value; int talign_buf = MAX(kalign_buf,valign_buf); talign_buf = MAX(talign_buf,sizeof(int)); int kalignm1_buf = kalign_buf-1; int valignm1_buf = valign_buf-1; int talignm1_buf = talign_buf-1; char *ptr = buf; for (int i = 0; i < n; i++) { keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1_buf); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1_buf); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1_buf); add(key,keybytes,value,valuebytes); } } /* ---------------------------------------------------------------------- create virtual page entry for in-memory page ------------------------------------------------------------------------- */ void KeyValue::init_page() { nkey = 0; keysize = valuesize = 0; alignsize = 0; msize = 0; } /* ---------------------------------------------------------------------- create virtual page entry for in-memory page ------------------------------------------------------------------------- */ void KeyValue::create_page() { if (npage == maxpage) { maxpage += PAGECHUNK; pages = (Page *) memory->srealloc(pages,maxpage*sizeof(Page),"KV:pages"); } pages[npage].nkey = nkey; pages[npage].keysize = keysize; pages[npage].valuesize = valuesize; pages[npage].exactsize = ((uint64_t) nkey)*twolenbytes + keysize + valuesize; pages[npage].alignsize = alignsize; pages[npage].filesize = roundup(alignsize,ALIGNFILE); if (npage) pages[npage].fileoffset = pages[npage-1].fileoffset + pages[npage-1].filesize; else pages[npage].fileoffset = 0; } /* ---------------------------------------------------------------------- write in-memory page to disk do a seek since may be overwriting an arbitrary page due to append ------------------------------------------------------------------------- */ void KeyValue::write_page() { if (mr->outofcore < 0) error->one("Cannot create KeyValue file due to outofcore setting"); if (fp == NULL) { fp = fopen(filename,"wb"); if (fp == NULL) { char msg[1023]; sprintf(msg,"Cannot open KeyValue file %s for writing",filename); error->one(msg); } fileflag = 1; } uint64_t fileoffset = pages[npage].fileoffset; int seekflag = fseek(fp,fileoffset,SEEK_SET); int nwrite = fwrite(page,pages[npage].filesize,1,fp); mr->wsize += pages[npage].filesize; if (seekflag) { char str[128]; sprintf(str,"Bad KV fwrite/fseek on proc %d: %u",me,fileoffset); error->warning(str); } if (nwrite != 1 && pages[npage].filesize) { char str[128]; sprintf(str,"Bad KV fwrite on proc %d: %d %u", me,nwrite,pages[npage].filesize); error->warning(str); } } /* ---------------------------------------------------------------------- read ipage from disk do a seek since may be reading last page ------------------------------------------------------------------------- */ void KeyValue::read_page(int ipage, int writeflag) { if (fp == NULL) { if (writeflag) fp = fopen(filename,"r+b"); else fp = fopen(filename,"rb"); if (fp == NULL) error->one("Could not open KeyValue file for reading"); } uint64_t fileoffset = pages[ipage].fileoffset; int seekflag = fseek(fp,fileoffset,SEEK_SET); int nread = fread(page,pages[ipage].filesize,1,fp); mr->rsize += pages[ipage].filesize; if (seekflag) { char str[128]; sprintf(str,"Bad KV fread/fseek on proc %d: %u",me,fileoffset); error->warning(str); } if ((nread != 1 || ferror(fp)) && pages[ipage].filesize) { char str[128]; sprintf(str,"Bad KV fread on proc %d: %d %u", me,nread,pages[ipage].filesize); error->warning(str); clearerr(fp); } } /* ---------------------------------------------------------------------- round N up to multiple of nalign and return it ------------------------------------------------------------------------- */ uint64_t KeyValue::roundup(uint64_t n, int nalign) { if (n % nalign == 0) return n; n = (n/nalign + 1) * nalign; return n; } /* ---------------------------------------------------------------------- debug print of each KV pair with nstride ------------------------------------------------------------------------- */ void KeyValue::print(FILE *fp, int nstride, int kflag, int vflag) { int keybytes,valuebytes; uint64_t dummy1,dummy2,dummy3; char *ptr,*key,*value,*ptr_start; int istride = 0; for (int ipage = 0; ipage < npage; ipage++) { nkey = request_page(ipage,dummy1,dummy2,dummy3); ptr = page; for (int i = 0; i < nkey; i++) { ptr_start = ptr; keybytes = *((int *) ptr); valuebytes = *((int *) (ptr+sizeof(int)));; ptr += twolenbytes; ptr = ROUNDUP(ptr,kalignm1); key = ptr; ptr += keybytes; ptr = ROUNDUP(ptr,valignm1); value = ptr; ptr += valuebytes; ptr = ROUNDUP(ptr,talignm1); istride++; if (istride != nstride) continue; istride = 0; fprintf(fp,"KV pair: proc %d, sizes %d %d",me,keybytes,valuebytes); fprintf(fp,", key "); if (kflag == 0) fprintf(fp,"NULL"); else if (kflag == 1) fprintf(fp,"%d",*(int *) key); else if (kflag == 2) fprintf(fp,"%lu",*(uint64_t *) key); else if (kflag == 3) fprintf(fp,"%g",*(float *) key); else if (kflag == 4) fprintf(fp,"%g",*(double *) key); else if (kflag == 5) fprintf(fp,"%s",key); else if (kflag == 6) fprintf(fp,"%d %d", *(int *) key, *(int *) (key+sizeof(int))); else if (kflag == 7) fprintf(fp,"%lu %lu", *(uint64_t *) key, *(uint64_t *) (key+sizeof(uint64_t))); fprintf(fp,", value "); if (vflag == 0) fprintf(fp,"NULL"); else if (vflag == 1) fprintf(fp,"%d",*(int *) value); else if (vflag == 2) fprintf(fp,"%lu",*(uint64_t *) value); else if (vflag == 3) fprintf(fp,"%g",*(float *) value); else if (vflag == 4) fprintf(fp,"%g",*(double *) value); else if (vflag == 5) fprintf(fp,"%s",value); else if (vflag == 6) fprintf(fp,"%d %d", *(int *) value, *(int *) (value+sizeof(int))); else if (vflag == 7) fprintf(fp,"%lu %lu", *(uint64_t *) value, *(uint64_t *) (value+sizeof(uint64_t))); fprintf(fp,"\n"); } } } mrmpi-1.0~20131122/src/keymultivalue.h0000644000175000017500000001561211515062551017230 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef KEY_MULTIVALUE_H #define KEY_MULTIVALUE_H #include "mpi.h" #include "stdio.h" #include "stdint.h" namespace MAPREDUCE_NS { class KeyMultiValue { public: uint64_t nkmv; // # of KMV pairs in entire KMV on this proc uint64_t ksize; // exact size of all key data uint64_t vsize; // exact size of all multivalue data uint64_t esize; // total exact size of entire KMV uint64_t fsize; // size of KMV file char *page; // in-memory page int memtag; // memory page ID int npage; // # of pages in entire KMV KeyMultiValue(class MapReduce *, int, int, class Memory *, class Error *, MPI_Comm); ~KeyMultiValue(); void allocate(); void deallocate(int); void copy(KeyMultiValue *); void complete(); int request_info(char **); int request_page(int, int, uint64_t &, uint64_t &, uint64_t &); uint64_t multivalue_blocks(int, int &); void overwrite_page(int); void close_file(); void clone(class KeyValue *); void collapse(char *, int, class KeyValue *); void convert(class KeyValue *); void print(FILE *, int, int, int); private: MapReduce *mr; MPI_Comm comm; class Memory *memory; class Error *error; int me; uint64_t pagesize; // size of page int kalign,valign; // alignment for keys & multivalues int talign; // alignment of entire KMV pair int ualign; // alignment of Unique int kalignm1,valignm1; // alignments-1 for masking int talignm1,ualignm1; int twolenbytes; // size of key & value lengths int threelenbytes; // size of nvalue & key & value lengths // in-memory page int nkey; // # of KMV pairs in page uint64_t nvalue; // # of values in all KMV mvalues in page uint64_t keysize; // exact size of key data in page uint64_t valuesize; // exact size of multivalue data in page uint64_t alignsize; // current size of page with alignment // virtual pages struct Page { uint64_t keysize; // exact size of keys uint64_t valuesize; // exact size of multivalues uint64_t exactsize; // exact size of all data in page uint64_t alignsize; // aligned size of all data in page uint64_t filesize; // rounded-up alignsize for file I/O uint64_t fileoffset; // summed filesize of all previous pages uint64_t nvalue_total; // total # of values for multi-page KMV header int nkey; // # of KMV pairs int nblock; // # of value blocks for multi-page KMV header }; Page *pages; // list of pages int maxpage; // max # of pages currently allocated // unique keys int nunique; // current # of unique keys int ukeyoffset; // offset from start of Unique to where key starts struct Unique { uint64_t nvalue; // # of values associated with this key uint64_t mvbytes; // total size of values associated with this key int *soffset; // ptr to start of value sizes in KMV page char *voffset; // ptr to start of values in KMV page Unique *next; // ptr to next key in this hash bucket int keybytes; // size of this key int set; // which KMV set this key will be part of }; // hash of unique keys Unique **buckets; // ptr to 1st key in each hash bucket int hashmask; // bit mask for mapping hashed key into hash buckets // nbuckets = hashmask + 1 uint64_t bucketbytes; // byte size of hash buckets char *memunique; // ptr to where memory for hash+Uniques starts char *ustart; // ptr to where memory for Uniques starts char *ustop; // ptr to where memory for Uniques stops // file info int fileflag; // 1 if file exists, 0 if not char *filename; // filename to store KMV if needed FILE *fp; // file ptr // partitions of KV data per unique list struct Partition { class KeyValue *kv; // primary KV storing pairs for this partition class Spool *sp; // secondary Spool of pairs if re-partitioned class Spool *sp2; // tertiary Spool of pairs if re-partitioned int sortbit; // bit from hi-end that partitioning was done on }; Partition *partitions; int npartition,maxpartition; // sets of unique keys per KMV page struct Set { class KeyValue *kv; // KV pairs for set can be in KV and/or Spool(s) class Spool *sp; class Spool *sp2; Unique *first; // ptr to first Unique in set int nunique; // # of Uniques in set int extended; // 1 if set contains one Unique -> multi-page KMV }; Set *sets; int nset,maxset; // memory management for Spool pages char *readpage; // convert() does all reading from this page int minspool; // minimum allowed size for a spool page int npages_mr; // # of MR pages I have allocated int *tag_mr; // page IDs for MR pages char **page_mr; // ptrs to MR pages uint64_t sizespool; // size of spool page int spoolperpage; // # of spool pages per MR page int nquery; // # of requested spool pages on this iteration // private methods void add(char *, int, char *, int); void collapse_one(char *, int, class KeyValue *, uint64_t); void collapse_many(char *, int, class KeyValue *); void kv2unique(int); int unique2kmv_all(); void unique2kmv_extended(int); void unique2kmv_set(int); void partition2sets(int); void kv2kmv(int); void kv2kmv_extended(int); class Spool *augment_partition(int); class Spool *create_partition(int); char *chunk_allocate(); Unique *find(int, char *, int, Unique *&); int hash(char *, int); void init_page(); void create_page(); void write_page(); void read_page(int, int); uint64_t roundup(uint64_t, int); void spool_memory(class KeyValue *); void spool_request(int, int); char *spool_malloc(int, uint64_t &); void spool_free(); }; } #endif mrmpi-1.0~20131122/src/hash.h0000644000175000017500000000027111030550532015237 0ustar mathieumathieu// Hash function hashlittle() // from lookup3.c, by Bob Jenkins, May 2006, Public Domain // bob_jenkins@burtleburtle.net uint32_t hashlittle(const void *key, size_t length, uint32_t); mrmpi-1.0~20131122/src/hash.cpp0000644000175000017500000002656611347542056015626 0ustar mathieumathieu// Hash function hashlittle() // from lookup3.c, by Bob Jenkins, May 2006, Public Domain // bob_jenkins@burtleburtle.net #include "stddef.h" #include "stdint.h" #define HASH_LITTLE_ENDIAN 1 // Intel and AMD are little endian #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) /* ------------------------------------------------------------------------------- mix -- mix 3 32-bit values reversibly. This is reversible, so any information in (a,b,c) before mix() is still in (a,b,c) after mix(). If four pairs of (a,b,c) inputs are run through mix(), or through mix() in reverse, there are at least 32 bits of the output that are sometimes the same for one pair and different for another pair. This was tested for: * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or in any combination of bottom bits of (a,b,c). * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly produced by subtraction) look like a single 1-bit difference. * the base values were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this are 4 6 8 16 19 4 9 15 3 18 27 15 14 9 3 7 17 3 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ" defined as + with a one-bit base and a two-bit delta. I used http://burtleburtle.net/bob/hash/avalanche.html to choose the operations, constants, and arrangements of the variables. This does not achieve avalanche. There are input bits of (a,b,c) that fail to affect some output bits of (a,b,c), especially of a. The most thoroughly mixed value is c, but it doesn't really even achieve avalanche in c. This allows some parallelism. Read-after-writes are good at doubling the number of bits affected, so the goal of mixing pulls in the opposite direction as the goal of parallelism. I did what I could. Rotates seem to cost as much as shifts on every machine I could lay my hands on, and rotates are much kinder to the top and bottom bits, so I used rotates. ------------------------------------------------------------------------------- */ #define mix(a,b,c) \ { \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b, 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c ^= rot(b, 4); b += a; \ } /* ------------------------------------------------------------------------------- final -- final mixing of 3 32-bit values (a,b,c) into c Pairs of (a,b,c) values differing in only a few bits will usually produce values of c that look totally different. This was tested for * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or in any combination of bottom bits of (a,b,c). * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly produced by subtraction) look like a single 1-bit difference. * the base values were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. These constants passed: 14 11 25 16 4 14 24 12 14 25 16 4 14 24 and these came close: 4 8 15 26 3 22 24 10 8 15 26 3 22 24 11 8 15 26 3 22 24 ------------------------------------------------------------------------------- */ #define final(a,b,c) \ { \ c ^= b; c -= rot(b,14); \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -= rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \ } /* ------------------------------------------------------------------------------- hashlittle() -- hash a variable-length key into a 32-bit value k : the key (the unaligned variable-length array of bytes) length : the length of the key, counting by bytes initval : can be any 4-byte value Returns a 32-bit value. Every bit of the key affects every bit of the return value. Two keys differing by one or two bits will have totally different hash values. The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. For example, if you need only 10 bits, do h = (h & hashmask(10)); In which case, the hash table should have hashsize(10) elements. If you are hashing n strings (uint8_t **)k, do it like this: for (i=0, h=0; i 12) { a += k[0]; b += k[1]; c += k[2]; mix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]&0xffffff" actually reads beyond the end of the string, but * then masks off the part it's not allowed to read. Because the * string is aligned, the masked-off tail is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */ #ifndef VALGRIND switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff; a+=k[0]; break; case 6 : b+=k[1]&0xffff; a+=k[0]; break; case 5 : b+=k[1]&0xff; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff; break; case 2 : a+=k[0]&0xffff; break; case 1 : a+=k[0]&0xff; break; case 0 : return c; /* zero length strings require no mixing */ } #else /* make valgrind happy */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]; break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ case 1 : a+=k8[0]; break; case 0 : return c; } #endif /* !valgrind */ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ const uint8_t *k8; /*--------------- all but last block: aligned reads and different mixing */ while (length > 12) { a += k[0] + (((uint32_t)k[1])<<16); b += k[2] + (((uint32_t)k[3])<<16); c += k[4] + (((uint32_t)k[5])<<16); mix(a,b,c); length -= 12; k += 6; } /*----------------------------- handle the last (probably partial) block */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[4]+(((uint32_t)k[5])<<16); b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=k[4]; b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=k[2]; a+=k[0]+(((uint32_t)k[1])<<16); break; case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=k[0]; break; case 1 : a+=k8[0]; break; case 0 : return c; /* zero length requires no mixing */ } } else { /* need to read the key one byte at a time */ const uint8_t *k = (const uint8_t *)key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; a += ((uint32_t)k[1])<<8; a += ((uint32_t)k[2])<<16; a += ((uint32_t)k[3])<<24; b += k[4]; b += ((uint32_t)k[5])<<8; b += ((uint32_t)k[6])<<16; b += ((uint32_t)k[7])<<24; c += k[8]; c += ((uint32_t)k[9])<<8; c += ((uint32_t)k[10])<<16; c += ((uint32_t)k[11])<<24; mix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=((uint32_t)k[11])<<24; case 11: c+=((uint32_t)k[10])<<16; case 10: c+=((uint32_t)k[9])<<8; case 9 : c+=k[8]; case 8 : b+=((uint32_t)k[7])<<24; case 7 : b+=((uint32_t)k[6])<<16; case 6 : b+=((uint32_t)k[5])<<8; case 5 : b+=k[4]; case 4 : a+=((uint32_t)k[3])<<24; case 3 : a+=((uint32_t)k[2])<<16; case 2 : a+=((uint32_t)k[1])<<8; case 1 : a+=k[0]; break; case 0 : return c; } } final(a,b,c); return c; #else /* PURIFY_HATES_HASHLITTLE */ /* I don't know what it is about Jenkins' hashlittle function, but * it drives purify insane, even with VALGRIND defined. It makes * purify unusable!! The code execution doesn't even make sense. * Below is a (probably) weaker hash function that at least allows * testing with purify. */ #define MAXINT_DIV_PHI 11400714819323198485U uint32_t h, rest, *p, bytes, num_bytes; char *byteptr; num_bytes = length; /* First hash the uint32_t-sized portions of the key */ h = 0; for (p = (uint32_t *)key, bytes=num_bytes; bytes >= (uint32_t) sizeof(uint32_t); bytes-=sizeof(uint32_t), p++){ h = (h^(*p))*MAXINT_DIV_PHI; } /* Then take care of the remaining bytes, if any */ rest = 0; for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){ rest = (rest<<8) | (*byteptr); } /* If extra bytes, merge the two parts */ if (rest) h = (h^rest)*MAXINT_DIV_PHI; return h; #endif /* PURIFY_HATES_HASHLITTLE */ } mrmpi-1.0~20131122/src/irregular.h0000644000175000017500000000462311513417541016325 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef IRREGULAR_H #define IRREGULAR_H #include "mpi.h" #include "stdint.h" namespace MAPREDUCE_NS { class Irregular { public: Irregular(int, class Memory *, class Error *, MPI_Comm); ~Irregular(); uint64_t cssize,crsize; // total send/recv bytes for one exchange int setup(int, int *, int *, int *, uint64_t, double &); void exchange(int, int *, char **, int *, int *, char *, char *); private: int me,nprocs; int all2all; class Memory *memory; class Error *error; MPI_Comm comm; // MPI communicator for all communication // all2all and custom settings uint64_t *bigsendbytes; // bytes to send to each proc, including self int *sendbytes; // bytes to send to each proc, including self int *sdispls; // proc offset into clumped send buffer int *recvbytes; // bytes to recv from each proc, including self int *rdispls; // proc offset into recv buffer int *senddatums; // # of datums to send each proc, including self int *one; // 1 for each proc, for MPI call int ndatum; // # of total datums I recv, including self // custom settings int self; // 0 = no data to copy to self, 1 = yes int nsend; // # of messages to send w/out self int nrecv; // # of messages to recv w/out self int *sendprocs; // list of procs to send to w/out self int *recvprocs; // list of procs to recv from w/out self MPI_Request *request; // MPI requests for posted recvs MPI_Status *status; // MPI statuses for Waitall void exchange_all2all(int, int *, char **, int *, char *, char *); void exchange_custom(int, int *, char **, int *, char *, char *); }; } #endif mrmpi-1.0~20131122/src/memory.h0000644000175000017500000000173611347542056015650 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef MEMORY_H #define MEMORY_H #include "mpi.h" namespace MAPREDUCE_NS { class Memory { public: Memory(MPI_Comm); ~Memory(); void *smalloc(size_t, const char *); void *smalloc_align(size_t, int, const char *); void sfree(void *); void *srealloc(void *, size_t, const char *); private: class Error *error; }; } #endif mrmpi-1.0~20131122/src/irregular.cpp0000644000175000017500000002236611513417541016664 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #include "stdio.h" #include "stdlib.h" #include "string.h" #include "irregular.h" #include "mrtype.h" #include "memory.h" #include "error.h" using namespace MAPREDUCE_NS; #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) #define INTMAX 0x7FFFFFFF /* ---------------------------------------------------------------------- */ Irregular::Irregular(int all2all_caller, Memory *memory_caller, Error *error_caller, MPI_Comm comm_caller) { all2all = all2all_caller; memory = memory_caller; error = error_caller; comm = comm_caller; MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); bigsendbytes = new uint64_t[nprocs]; sendbytes = new int[nprocs]; sdispls = new int[nprocs]; recvbytes = new int[nprocs]; rdispls = new int[nprocs]; senddatums = new int[nprocs]; one = new int[nprocs]; for (int i = 0; i < nprocs; i++) one[i] = 1; sendprocs = new int[nprocs]; recvprocs = new int[nprocs]; request = new MPI_Request[nprocs]; status = new MPI_Status[nprocs]; } /* ---------------------------------------------------------------------- */ Irregular::~Irregular() { delete [] bigsendbytes; delete [] sendbytes; delete [] sdispls; delete [] recvbytes; delete [] rdispls; delete [] senddatums; delete [] one; delete [] sendprocs; delete [] recvprocs; delete [] request; delete [] status; } /* ---------------------------------------------------------------------- setup irregular communication for all2all or custom n = # of datums contributed by this proc proclist = which proc each datum is to be sent to sizes = byte count of each datum recvlimit = max allowed size of received data return # of datums I recv set fraction = 1.0 if can recv all datums without exceeding two limits else set it to estimated fraction I can recv limit #1 = total volume of send data exceeds INTMAX limit #2 = total volume of recv data exceeds min(recvlimit,INTMAX) 2nd limit also insures # of received datums cannot exceed INTMAX extra data is setup for custom communication: sendprocs = list of nsend procs to send to recvprocs = list of nrecv procs to recv from reorder = contiguous send indices for each send, self copy is last ------------------------------------------------------------------------- */ int Irregular::setup(int n, int *proclist, int *sizes, int *reorder, uint64_t recvlimit, double &fraction) { // compute sendbytes and sdispls for (int i = 0; i < nprocs; i++) bigsendbytes[i] = 0; for (int i = 0; i < n; i++) bigsendbytes[proclist[i]] += sizes[i]; // error return if any proc sending > INTMAX to a single proc uint64_t maxsend = 0; for (int i = 0; i < nprocs; i++) maxsend = MAX(maxsend,bigsendbytes[i]); uint64_t maxsendall; MPI_Allreduce(&maxsend,&maxsendall,1,MRMPI_BIGINT,MPI_MAX,comm); if (maxsendall > INTMAX) { fraction = ((double) INTMAX) / maxsendall; return 0; } for (int i = 0; i < nprocs; i++) sendbytes[i] = bigsendbytes[i]; // compute sdispls sdispls[0] = 0; uint64_t sendtotal = sendbytes[0]; for (int i = 1; i < nprocs; i++) { sdispls[i] = sdispls[i-1] + sendbytes[i-1]; sendtotal += sendbytes[i]; } // error return if any proc's send total > INTMAX uint64_t sendtotalmax; MPI_Allreduce(&sendtotal,&sendtotalmax,1,MRMPI_BIGINT,MPI_MAX,comm); if (sendtotalmax > INTMAX) { fraction = ((double) INTMAX) / sendtotal; return 0; } // compute recvbytes and rdispls MPI_Alltoall(sendbytes,1,MPI_INT,recvbytes,1,MPI_INT,comm); rdispls[0] = 0; uint64_t recvtotal = recvbytes[0]; for (int i = 1; i < nprocs; i++) { rdispls[i] = rdispls[i-1] + recvbytes[i-1]; recvtotal += recvbytes[i]; } // error return if any proc's recv total > min(recvlimit,INTMAX) recvlimit = MIN(recvlimit,INTMAX); uint64_t recvtotalmax; MPI_Allreduce(&recvtotal,&recvtotalmax,1,MRMPI_BIGINT,MPI_MAX,comm); if (recvtotalmax > recvlimit) { fraction = ((double) recvlimit) / recvtotal; return 0; } // successful setup // compute senddatums // nrecv = total # of datums I receive, guaranteed to be < INTMAX cssize = sendtotal - sendbytes[me]; crsize = recvtotal - recvbytes[me]; for (int i = 0; i < nprocs; i++) senddatums[i] = 0; for (int i = 0; i < n; i++) senddatums[proclist[i]]++; MPI_Reduce_scatter(senddatums,&ndatum,one,MPI_INT,MPI_SUM,comm); // if all2all, done if (all2all) { fraction = 1.0; return ndatum; } // if custom, setup additional data strucs // sendprocs,recvprocs = lists of procs to send to and recv from // begin lists with iproc > me and wrap around // reorder = contiguous send indices for each proc I send to // let s0 = senddatums[sendprocs[0]], s1 = senddatums[sendprocs[1]], etc // reorder[0:s0-1] = indices of datums in 1st message // reorder[s0:s0+s1-1] = indices of datums in 2nd message, etc // proc2send[i] = which send (0 to nsend-1) goes to proc I // offset[i] = running offset into reorder for each send (0 to nsend-1) int *proc2send = new int[nprocs]; nsend = nrecv = 0; int iproc = me; for (int i = 1; i < nprocs; i++) { iproc++; if (iproc == nprocs) iproc = 0; if (sendbytes[iproc]) { proc2send[iproc] = nsend; sendprocs[nsend++] = iproc; } if (recvbytes[iproc]) recvprocs[nrecv++] = iproc; } if (sendbytes[me]) { self = 1; proc2send[me] = nsend; } else self = 0; int *offset = new int[nprocs]; offset[0] = 0; for (int i = 1; i <= nsend; i++) offset[i] = offset[i-1] + senddatums[sendprocs[i-1]]; int j; for (int i = 0; i < n; i++) { j = proclist[i]; reorder[offset[proc2send[j]]++] = i; } delete [] proc2send; delete [] offset; fraction = 1.0; return ndatum; } /* ---------------------------------------------------------------------- perform irregular communication via all2all or custom n = # of datums contributed by this proc proclist (for all2all) = which proc each datum is to be sent to sizes = byte count of each datum reorder (for custom) = contiguous send indices for each send copy = buffer to pack send datums into recv = buffer to recv all datums into ------------------------------------------------------------------------- */ void Irregular::exchange(int n, int *proclist, char **ptrs, int *sizes, int *reorder, char *copy, char *recv) { if (all2all) exchange_all2all(n,proclist,ptrs,sizes,copy,recv); else exchange_custom(n,reorder,ptrs,sizes,copy,recv); } /* ---------------------------------------------------------------------- wrapper on MPI_Alltoallv() first copy datums from ptrs into copy buf in correct order via proclist ------------------------------------------------------------------------- */ void Irregular::exchange_all2all(int n, int *proclist, char **ptrs, int *sizes, char *copy, char *recv) { int i,iproc; char **cptrs = new char*[nprocs]; for (i = 0; i < nprocs; i++) cptrs[i] = ©[sdispls[i]]; for (int i = 0; i < n; i++) { iproc = proclist[i]; memcpy(cptrs[iproc],ptrs[i],sizes[i]); cptrs[iproc] += sizes[i]; } delete [] cptrs; MPI_Alltoallv(copy,sendbytes,sdispls,MPI_BYTE, recv,recvbytes,rdispls,MPI_BYTE,comm); } /* ---------------------------------------------------------------------- custom all2all communication post all receives copying datums for one send into copy buf in correct order via indices copy self data while waiting for receives indices are 0 to N-1, contiguous for each proc to send to, self copy is last ------------------------------------------------------------------------- */ void Irregular::exchange_custom(int n, int *indices, char **ptrs, int *sizes, char *copy, char *recv) { int i,j,iproc; char *ptr; // post all receives for (int irecv = 0; irecv < nrecv; irecv++) { iproc = recvprocs[irecv]; MPI_Irecv(&recv[rdispls[iproc]],recvbytes[iproc],MPI_BYTE, iproc,0,comm,&request[irecv]); } // barrier to insure receives are posted MPI_Barrier(comm); // send each message, packing copy buf with needed datums int index = 0; for (int isend = 0; isend < nsend; isend++) { iproc = sendprocs[isend]; ptr = copy; n = senddatums[iproc]; for (i = 0; i < n; i++) { j = indices[index++]; memcpy(ptr,ptrs[j],sizes[j]); ptr += sizes[j]; } MPI_Send(copy,sendbytes[iproc],MPI_BYTE,iproc,0,comm); } // copy self data directly to recv buf if (self) ptr = &recv[rdispls[me]]; n = senddatums[me]; for (i = 0; i < n; i++) { j = indices[index++]; memcpy(ptr,ptrs[j],sizes[j]); ptr += sizes[j]; } // wait on all incoming messages if (nrecv) MPI_Waitall(nrecv,request,status); } mrmpi-1.0~20131122/src/spool.cpp0000644000175000017500000001604611515375257016033 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #include "stdlib.h" #include "string.h" #include "stdint.h" #include "spool.h" #include "mapreduce.h" #include "memory.h" #include "error.h" using namespace MAPREDUCE_NS; #define ALIGNFILE 512 // same as in mapreduce.cpp #define PAGECHUNK 16 //#define SPOOL_DEBUG 1 /* ---------------------------------------------------------------------- */ Spool::Spool(int style, MapReduce *mr_caller, Memory *memory_caller, Error *error_caller) { mr = mr_caller; memory = memory_caller; error = error_caller; filename = mr->file_create(style); fileflag = 0; fp = NULL; pages = NULL; npage = maxpage = 0; nkv = esize = fsize = 0; nkey = size = 0; } /* ---------------------------------------------------------------------- */ Spool::~Spool() { memory->sfree(pages); if (fileflag) { remove(filename); mr->hiwater(1,fsize); } delete [] filename; } /* ---------------------------------------------------------------------- directly assign a chunk of memory to be the im-memory page for the Spool ------------------------------------------------------------------------- */ void Spool::set_page(uint64_t memsize, char *memblock) { pagesize = memsize; page = memblock; } /* ---------------------------------------------------------------------- complete the Spool after data has been added to it always write page to disk, unlike KV and KMV which can stay in memory ------------------------------------------------------------------------- */ void Spool::complete() { create_page(); write_page(); fclose(fp); fp = NULL; npage++; nkey = size = 0; // set sizes for entire spool file nkv = esize = fsize = 0; for (int ipage = 0; ipage < npage; ipage++) { nkv += pages[ipage].nkey; esize += pages[ipage].size; fsize += pages[ipage].filesize; } mr->hiwater(0,fsize); #ifdef SPOOL_DEBUG printf("SP Created %s: %d pages, %u entries, %g Mb\n", filename,npage,nkv,esize/1024.0/1024.0); #endif } /* ---------------------------------------------------------------------- truncate Spool at ncut,pagecut entry called by KMV::convert() ------------------------------------------------------------------------- */ void Spool::truncate(int pagecut, int ncut, uint64_t sizecut) { if (ncut == 0) npage = pagecut; else { npage = pagecut+1; pages[pagecut].size = sizecut; pages[pagecut].filesize = roundup(sizecut,ALIGNFILE); pages[pagecut].nkey = ncut; } } /* ---------------------------------------------------------------------- return # of pages and ptr to in-memory page ------------------------------------------------------------------------- */ int Spool::request_info(char **ptr) { *ptr = page; return npage; } /* ---------------------------------------------------------------------- ready a page of entries caller is looping over data in Spool ------------------------------------------------------------------------- */ int Spool::request_page(int ipage) { read_page(ipage); // close file if last request if (ipage == npage-1) { fclose(fp); fp = NULL; } return pages[ipage].nkey; } /* ---------------------------------------------------------------------- add a single entry ------------------------------------------------------------------------- */ void Spool::add(int nbytes, char *entry) { // page is full, write to disk if (size+nbytes > pagesize) { create_page(); write_page(); npage++; nkey = size = 0; if (nbytes > pagesize) { printf("Spool size/limit: %d %d\n",nbytes,pagesize); error->one("Single entry exceeds Spool page size"); } } memcpy(&page[size],entry,nbytes); size += nbytes; nkey++; } /* ---------------------------------------------------------------------- add N entries of total nbytes ------------------------------------------------------------------------- */ void Spool::add(int n, uint64_t nbytes, char *entries) { // page is full, write to disk if (size+nbytes > pagesize) { create_page(); write_page(); npage++; nkey = size = 0; if (nbytes > pagesize) { printf("Spool size/limit: %d %d\n",nbytes,pagesize); error->one("Single entry exceeds Spool page size"); } } memcpy(&page[size],entries,nbytes); size += nbytes; nkey += n; } /* ---------------------------------------------------------------------- create virtual page entry for in-memory page ------------------------------------------------------------------------- */ void Spool::create_page() { if (npage == maxpage) { maxpage += PAGECHUNK; pages = (Page *) memory->srealloc(pages,maxpage*sizeof(Page),"SP:pages"); } pages[npage].nkey = nkey; pages[npage].size = size; pages[npage].filesize = roundup(size,ALIGNFILE); } /* ---------------------------------------------------------------------- write in-memory page to disk ------------------------------------------------------------------------- */ void Spool::write_page() { if (mr->outofcore < 0) error->one("Cannot create Spool file due to outofcore setting"); if (fp == NULL) { fp = fopen(filename,"wb"); if (fp == NULL) { char msg[1023]; sprintf(msg,"Cannot open Spool file %s for writing",filename); error->one(msg); } fileflag = 1; } int nwrite = fwrite(page,pages[npage].filesize,1,fp); mr->wsize += pages[npage].filesize; if (nwrite != 1 && pages[npage].filesize) { char str[128]; sprintf(str,"Bad SP fwrite: %d %u",nwrite,pages[npage].filesize); error->warning(str); } } /* ---------------------------------------------------------------------- read ipage from disk ------------------------------------------------------------------------- */ void Spool::read_page(int ipage) { if (fp == NULL) { fp = fopen(filename,"rb"); if (fp == NULL) error->one("Could not open Spool file for reading"); } int nread = fread(page,pages[ipage].filesize,1,fp); mr->rsize += pages[ipage].filesize; if ((nread != 1 && pages[ipage].filesize) || ferror(fp)) { char str[128]; sprintf(str,"Bad SP fread: %d %u",nread,pages[ipage].filesize); error->warning(str); clearerr(fp); } } /* ---------------------------------------------------------------------- round N up to multiple of nalign and return it ------------------------------------------------------------------------- */ uint64_t Spool::roundup(uint64_t n, int nalign) { if (n % nalign == 0) return n; n = (n/nalign + 1) * nalign; return n; } mrmpi-1.0~20131122/src/Makefile0000644000175000017500000000334312013025304015602 0ustar mathieumathieu# MR-MPI static library multiple-machine Makefile SHELL = /bin/sh #.IGNORE: # Definitions ROOT = mrmpi EXE = lib$(ROOT)_$@.a SRC = $(wildcard *.cpp) INC = $(wildcard *.h) OBJ = $(SRC:.cpp=.o) # List of all targets help: @echo '' @echo 'make clean-all delete all object files' @echo 'make clean-machine delete object files for one machine' @echo 'make tar mrmpi_src.tar.gz of src dir' @echo 'make stubs build dummy MPI library in mpistubs' @echo 'make install-python install MR-MPI wrapper in Python' @echo '' @echo 'make -f Makefile.shlib machine build MR-MPI shared library for machine' @echo 'make machine build MR-MPI static library for machine:' @echo '' @files="`ls MAKE/Makefile.*`"; \ for file in $$files; do head -1 $$file; done @echo '' # Build the code .DEFAULT: @test -f MAKE/Makefile.$@ @if [ ! -d Obj_$@ ]; then mkdir Obj_$@; fi @cp -p $(SRC) $(INC) Obj_$@ @cp MAKE/Makefile.$@ Obj_$@/Makefile @cd Obj_$@; \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "SHFLAGS =" \ "EXE = ../$(EXE)" lib @if [ -d Obj_$@ ]; then cd Obj_$@; rm -f $(SRC) $(INC) Makefile*; fi # Remove machine-specific object files clean: @echo 'make clean-all delete all object files' @echo 'make clean-machine delete object files for one machine' clean-all: rm -rf Obj_* clean-%: rm -rf Obj_$(@:clean-%=%) # Create a tarball of this dir tar: @cd ..; tar cvzf src/$(ROOT)_src.tar.gz \ src/Make* src/MAKE src/*.cpp src/*.h --exclude=*/.svn @echo "Created $(ROOT)_src.tar.gz" # Make MPI STUBS library stubs: @cd STUBS; make clean; make # install MR-MPI shared lib and Python wrapper for Python usage install-python: @python ../python/install.py mrmpi-1.0~20131122/LICENSE0000644000175000017500000000316411170664765014410 0ustar mathieumathieuProgram: MapReduce-MPI (MR-MPI) Library Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Sandia Corporation nor the names of contributors to this software may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mrmpi-1.0~20131122/mpistubs/0000755000175000017500000000000012243675121015233 5ustar mathieumathieumrmpi-1.0~20131122/mpistubs/mpi.h0000644000175000017500000001054511511420617016171 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ #ifndef MPI_STUBS #define MPI_STUBS /* dummy defs for MPI stubs */ #define MPI_COMM_WORLD 0 #define MPI_INT 1 #define MPI_FLOAT 2 #define MPI_DOUBLE 3 #define MPI_CHAR 4 #define MPI_BYTE 5 #define MPI_UNSIGNED_LONG 6 #define MPI_UNSIGNED_LONG_LONG 6 #define MPI_DOUBLE_INT 7 #define MPI_SUM 1 #define MPI_MAX 2 #define MPI_MIN 3 #define MPI_MAXLOC 4 #define MPI_MINLOC 5 #define MPI_ANY_SOURCE -1 #define MPI_Comm int #define MPI_Request int #define MPI_Datatype int #define MPI_Op int #ifdef __cplusplus extern "C" { #endif /* MPI data structs */ typedef struct { int MPI_SOURCE; } MPI_Status; /* Function prototypes for MPI stubs */ void MPI_Init(int *argc, char ***argv); void MPI_Initialized(int *flag); void MPI_Finalize(); void MPI_Comm_rank(MPI_Comm comm, int *me); void MPI_Comm_size(MPI_Comm comm, int *nprocs); void MPI_Abort(MPI_Comm comm, int errorcode); void MPI_Type_size(MPI_Datatype datatype, int *size); void MPI_Get_processor_name(char *name, int *len); double MPI_Wtime(); void MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); void MPI_Rsend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); void MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status); void MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); void MPI_Wait(MPI_Request *request, MPI_Status *status); void MPI_Waitall(int n, MPI_Request *request, MPI_Status *status); void MPI_Waitany(int count, MPI_Request *request, int *index, MPI_Status *status); void MPI_Sendrecv(void *sbuf, int scount, MPI_Datatype sdatatype, int dest, int stag, void *rbuf, int rcount, MPI_Datatype rdatatype, int source, int rtag, MPI_Comm comm, MPI_Status *status); void MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count); void MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *comm_out); void MPI_Comm_dup(MPI_Comm comm, MPI_Comm *comm_out); void MPI_Comm_free(MPI_Comm *comm); void MPI_Cart_create(MPI_Comm comm_old, int ndims, int *dims, int *periods, int reorder, MPI_Comm *comm_cart); void MPI_Cart_get(MPI_Comm comm, int maxdims, int *dims, int *periods, int *coords); void MPI_Cart_shift(MPI_Comm comm, int direction, int displ, int *source, int *dest); void MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank); void MPI_Barrier(MPI_Comm comm); void MPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm); void MPI_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); void MPI_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); void MPI_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); void MPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm); void MPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); void MPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); void MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); void MPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm); #ifdef __cplusplus } #endif #endif mrmpi-1.0~20131122/mpistubs/mpi.cpp0000644000175000017500000003111611511420617016521 0ustar mathieumathieu/* ---------------------------------------------------------------------- MR-MPI = MapReduce-MPI library http://www.cs.sandia.gov/~sjplimp/mapreduce.html Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the modified Berkeley Software Distribution (BSD) License. See the README file in the top-level MapReduce directory. ------------------------------------------------------------------------- */ /* Single-processor "stub" versions of MPI routines */ #include "stdlib.h" #include "string.h" #include "stdio.h" #include "stdint.h" #include #include "mpi.h" /* lo-level function prototypes */ void mpi_copy_int(void *, void *, int); void mpi_copy_float(void *, void *, int); void mpi_copy_double(void *, void *, int); void mpi_copy_char(void *, void *, int); void mpi_copy_byte(void *, void *, int); /* lo-level data structure */ struct { double value; int proc; } double_int; /* ---------------------------------------------------------------------- */ /* MPI Functions */ /* ---------------------------------------------------------------------- */ void MPI_Init(int *argc, char ***argv) {} /* ---------------------------------------------------------------------- */ void MPI_Initialized(int *flag) { *flag = 1; } /* ---------------------------------------------------------------------- */ void MPI_Finalize() {} /* ---------------------------------------------------------------------- */ void MPI_Comm_rank(MPI_Comm comm, int *me) { *me = 0; } /* ---------------------------------------------------------------------- */ void MPI_Comm_size(MPI_Comm comm, int *nprocs) { *nprocs = 1; } /* ---------------------------------------------------------------------- */ void MPI_Abort(MPI_Comm comm, int errorcode) { exit(1); } /* ---------------------------------------------------------------------- */ void MPI_Type_size(MPI_Datatype datatype, int *size) { if (datatype == MPI_INT) *size = sizeof(int); else if (datatype == MPI_FLOAT) *size = sizeof(float); else if (datatype == MPI_DOUBLE) *size =sizeof(double); else if (datatype == MPI_CHAR) *size = sizeof(char); else if (datatype == MPI_BYTE) *size = sizeof(char); else if (datatype == MPI_UNSIGNED_LONG) *size = sizeof(uint64_t); else if (datatype == MPI_DOUBLE_INT) *size = sizeof(double_int); } /* ---------------------------------------------------------------------- */ void MPI_Get_processor_name(char *name, int *len) { strcpy(name,"Proc 0"); *len = strlen(name); } /* ---------------------------------------------------------------------- */ double MPI_Wtime() { double time; struct timeval tv; gettimeofday(&tv,NULL); time = 1.0 * tv.tv_sec + 1.0e-6 * tv.tv_usec; return time; } /* ---------------------------------------------------------------------- */ void MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { printf("MPI Stub WARNING: Should not send message to self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Rsend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { printf("MPI Stub WARNING: Should not rsend message to self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) { printf("MPI Stub WARNING: Should not recv message from self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request) { printf("MPI Stub WARNING: Should not recv message from self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Wait(MPI_Request *request, MPI_Status *status) { printf("MPI Stub WARNING: Should not wait on message from self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Waitall(int n, MPI_Request *request, MPI_Status *status) { printf("MPI Stub WARNING: Should not wait on message from self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Waitany(int count, MPI_Request *request, int *index, MPI_Status *status) { printf("MPI Stub WARNING: Should not wait on message from self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Sendrecv(void *sbuf, int scount, MPI_Datatype sdatatype, int dest, int stag, void *rbuf, int rcount, MPI_Datatype rdatatype, int source, int rtag, MPI_Comm comm, MPI_Status *status) { printf("MPI Stub WARNING: Should not send message to self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count) { printf("MPI Stub WARNING: Should not get count of message to self\n"); } /* ---------------------------------------------------------------------- */ void MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *comm_out) { *comm_out = comm; } /* ---------------------------------------------------------------------- */ void MPI_Comm_dup(MPI_Comm comm, MPI_Comm *comm_out) { *comm_out = comm; } /* ---------------------------------------------------------------------- */ void MPI_Comm_free(MPI_Comm *comm) { } /* ---------------------------------------------------------------------- */ void MPI_Cart_create(MPI_Comm comm_old, int ndims, int *dims, int *periods, int reorder, MPI_Comm *comm_cart) { *comm_cart = comm_old; } /* ---------------------------------------------------------------------- */ void MPI_Cart_get(MPI_Comm comm, int maxdims, int *dims, int *periods, int *coords) { dims[0] = dims[1] = dims[2] = 1; periods[0] = periods[1] = periods[2] = 1; coords[0] = coords[1] = coords[2] = 0; } /* ---------------------------------------------------------------------- */ void MPI_Cart_shift(MPI_Comm comm, int direction, int displ, int *source, int *dest) { *source = *dest = 0; } /* ---------------------------------------------------------------------- */ void MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank) { *rank = 0; } /* ---------------------------------------------------------------------- */ void MPI_Barrier(MPI_Comm comm) {} /* ---------------------------------------------------------------------- */ void MPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) {} /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int n; if (datatype == MPI_INT) n = count*sizeof(int); else if (datatype == MPI_FLOAT) n = count*sizeof(float); else if (datatype == MPI_DOUBLE) n = count*sizeof(double); else if (datatype == MPI_CHAR) n = count*sizeof(char); else if (datatype == MPI_BYTE) n = count*sizeof(char); else if (datatype == MPI_UNSIGNED_LONG) n = count*sizeof(uint64_t); else if (datatype == MPI_DOUBLE_INT) n = count*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ void MPI_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int n; if (datatype == MPI_INT) n = count*sizeof(int); else if (datatype == MPI_FLOAT) n = count*sizeof(float); else if (datatype == MPI_DOUBLE) n = count*sizeof(double); else if (datatype == MPI_CHAR) n = count*sizeof(char); else if (datatype == MPI_BYTE) n = count*sizeof(char); else if (datatype == MPI_UNSIGNED_LONG) n = count*sizeof(uint64_t); else if (datatype == MPI_DOUBLE_INT) n = count*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { int n; if (sendtype == MPI_INT) n = sendcount*sizeof(int); else if (sendtype == MPI_FLOAT) n = sendcount*sizeof(float); else if (sendtype == MPI_DOUBLE) n = sendcount*sizeof(double); else if (sendtype == MPI_CHAR) n = sendcount*sizeof(char); else if (sendtype == MPI_BYTE) n = sendcount*sizeof(char); else if (sendtype == MPI_UNSIGNED_LONG) n = sendcount*sizeof(uint64_t); else if (sendtype == MPI_DOUBLE_INT) n = sendcount*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm) { int n; if (sendtype == MPI_INT) n = sendcount*sizeof(int); else if (sendtype == MPI_FLOAT) n = sendcount*sizeof(float); else if (sendtype == MPI_DOUBLE) n = sendcount*sizeof(double); else if (sendtype == MPI_CHAR) n = sendcount*sizeof(char); else if (sendtype == MPI_BYTE) n = sendcount*sizeof(char); else if (sendtype == MPI_UNSIGNED_LONG) n = sendcount*sizeof(uint64_t); else if (sendtype == MPI_DOUBLE_INT) n = sendcount*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int n; if (datatype == MPI_INT) n = *recvcounts*sizeof(int); else if (datatype == MPI_FLOAT) n = *recvcounts*sizeof(float); else if (datatype == MPI_DOUBLE) n = *recvcounts*sizeof(double); else if (datatype == MPI_CHAR) n = *recvcounts*sizeof(char); else if (datatype == MPI_BYTE) n = *recvcounts*sizeof(char); else if (datatype == MPI_UNSIGNED_LONG) n = *recvcounts*sizeof(uint64_t); else if (datatype == MPI_DOUBLE_INT) n = *recvcounts*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) { int n; if (sendtype == MPI_INT) n = sendcount*sizeof(int); else if (sendtype == MPI_FLOAT) n = sendcount*sizeof(float); else if (sendtype == MPI_DOUBLE) n = sendcount*sizeof(double); else if (sendtype == MPI_CHAR) n = sendcount*sizeof(char); else if (sendtype == MPI_BYTE) n = sendcount*sizeof(char); else if (sendtype == MPI_UNSIGNED_LONG) n = sendcount*sizeof(uint64_t); else if (sendtype == MPI_DOUBLE_INT) n = sendcount*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { int n; if (sendtype == MPI_INT) n = sendcount*sizeof(int); else if (sendtype == MPI_FLOAT) n = sendcount*sizeof(float); else if (sendtype == MPI_DOUBLE) n = sendcount*sizeof(double); else if (sendtype == MPI_CHAR) n = sendcount*sizeof(char); else if (sendtype == MPI_BYTE) n = sendcount*sizeof(char); else if (sendtype == MPI_UNSIGNED_LONG) n = sendcount*sizeof(uint64_t); else if (sendtype == MPI_DOUBLE_INT) n = sendcount*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } /* ---------------------------------------------------------------------- */ /* copy values from data1 to data2 */ void MPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) { int n; if (sendtype == MPI_INT) n = sendcounts[0]*sizeof(int); else if (sendtype == MPI_FLOAT) n = sendcounts[0]*sizeof(float); else if (sendtype == MPI_DOUBLE) n = sendcounts[0]*sizeof(double); else if (sendtype == MPI_CHAR) n = sendcounts[0]*sizeof(char); else if (sendtype == MPI_BYTE) n = sendcounts[0]*sizeof(char); else if (sendtype == MPI_UNSIGNED_LONG) n = sendcounts[0]*sizeof(uint64_t); else if (sendtype == MPI_DOUBLE_INT) n = sendcounts[0]*sizeof(double_int); memcpy(recvbuf,sendbuf,n); } mrmpi-1.0~20131122/mpistubs/Makefile0000644000175000017500000000114112012771122016660 0ustar mathieumathieu# Makefile for MPI stubs library # Syntax: # make # build lib as libmpi_stubs.a # make clean # remove *.o and lib files # edit System-specific settings as needed for your platform SHELL = /bin/sh .IGNORE: # Files SRC = mpi.c INC = mpi.h # Definitions EXE = libmpi_stubs.a OBJ = $(SRC:.c=.o) # System-specific settings CC = g++ CCFLAGS = -O -fPIC ARCHIVE = ar ARCHFLAG = rs # Targets lib: $(OBJ) $(ARCHIVE) $(ARCHFLAG) $(EXE) $(OBJ) clean: rm -f *.o libmpi_stubs.a # Compilation rules .c.o: $(CC) $(CCFLAGS) -c $< # Individual dependencies $(OBJ): $(INC) mrmpi-1.0~20131122/README0000644000175000017500000000475211723252470014255 0ustar mathieumathieuThis is the MapReduce-MPI (MR-MPI) library. Copyright (2009) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. ---------------------------------------------------------------------- MapReduce is the operation popularized by Google for computing on large distributed data sets. See the Wikipedia entry on MapReduce for an overview of what a MapReduce is. The MR-MPI library is a simple, portable implementation of MapReduce that runs on any serial desktop machine or large parallel machine using MPI message passing. As a user, you write a program which calls the MR-MPI library and you provide functions that operate on your data such as a map() and a reduce(). These functions are invoked by the library on single processors, so that you typically do not need to write any parallel code to perform a MapReduce. You can also use a provided Python wrapper on the library. You can also use a provided script-language interface to the library called OINK. The library is written in C++ and can be called from C++ or from C or other hi-level languages such as Fortran or a scripting language. A Python wrapper for the library is provided. If you want to run on a single processor, a dummy MPI library is provided to link against. To perform MapReduces in parallel, you need to link against an installed MPI library. The MR-MPI library and the accompanying software is licensed under the Berkeley Software Distribution (BSD) License, which basically means it can be used by anyone for any purpose. See the LICENSE file in this directory for details. The most current version of the library including all bug fixes and new featues can be downloaded at www.sandia.gov/~sjplimp/download.html. The authors of the library are Steve Plimpton and Karen Devine at Sandia National Laboratories who can be contacted at sjplimp, kddevin at sandia.gov. Or see www.sandia.gov/~sjplimp. This MR-MPI distribution includes the following files and directories: README this file LICENSE the BSD License doc documentation examples simple examples of MapReduce programs mpistubs dummy MPI library oink OINK scripting framework for MR-MPI library oinkdoc documentation for OINK python Python wrapper files on MR-MPI library src library source files user user-contributed MapReduce programs Point your browser at doc/Manual.html to get started. mrmpi-1.0~20131122/oinkdoc/0000755000175000017500000000000012252022520015000 5ustar mathieumathieumrmpi-1.0~20131122/oinkdoc/if.txt0000755000175000017500000001361411734437746016177 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line if command :h3 [Syntax:] if boolean then t1 t2 ... elif boolean f1 f2 ... elif boolean f1 f2 ... else e1 e2 ... :pre boolean = a Boolean expression evaluated as TRUE or FALSE (see below) then = required word t1,t2,...,tN = one or more OINK commands to execute if condition is met, each enclosed in quotes elif = optional word, can appear multiple times f1,f2,...,fN = one or more OINK commands to execute if elif condition is met, each enclosed in quotes (optional arguments) else = optional argument e1,e2,...,eN = one or more OINK commands to execute if no condition is met, each enclosed in quotes (optional arguments) :ul [Examples:] if "$\{steps\} > 1000" then exit if "$x <= $y" then "print X is smaller = $x" else "print Y is smaller = $y" if "($\{flag\} == 0) || ($n < 1000)" then & "graph reduce myfunc" & elif $\{flag\} == 1 & "graph reduce myfunc2" & else & "graph kmv_stats 2" & "print 'Elapsed time = $t'" if "$\{niter\} > $\{niter_previous\}" then "jump file1" else "jump file2" :pre [Description:] This command provides an in-then-else capability within an input script. A Boolean expression is evaluted and the result is TRUE or FALSE. Note that as in the examples above, the expression can contain variables, as defined by the "variable"_variable.html command, which will be evaluated as part of the expression. Thus a user-defined formula that reflects the current state of the simulation can be used to issue one or more new commands. If the result of the Boolean expression is TRUE, then one or more commands (t1, t2, ..., tN) are executed. If it is FALSE, then Boolean expressions associated with successive elif keywords are evaluated until one is found to be true, in which case its commands (f1, f2, ..., fN) are executed. If no Boolean expression is TRUE, then the commands associated witht the else keyword, namely (e1, e2, ..., eN), are executed. The elif and else keywords and their associated commands are optional. If they aren't specified and the initial Boolean expression is FALSE, then no commands are executed. The syntax for Boolean expressions is described below. Each command (t1, f1, e1, etc) can be any valid OINK input script command. If the command is more than one word, it must enclosed in quotes, so it will be treated as a single argument, as in the examples above. IMPORTANT NOTE: If a command itself requires a quoted argument (e.g. a "print"_print.html command), then double and single quotes can be used and nested in the usual manner, as in the examples above and below. See "this section"_Section_commands.html#3_2 of the manual for more details on using quotes in arguments. Only one of level of nesting is allowed, but that should be sufficient for most use cases. Note that by using the line continuation character "&", the if command can be spread across many lines, though it is still a single command: if "$a < $b" then & "print 'Minimum value = $a'" & "graph ..." & else & 'print "Minimum value = $b"' & "graph ..." :pre Note that if one of the commands to execute is an invalid OINK command, such as "exit" in the first example above, then executing the command will cause OINK to halt. Note that by jumping to a label in the same input script, the if command can be used to break out of a loop. See the "variable delete"_variable.html command for info on how to delete the associated loop variable, so that it can be re-used later in the input script. Here is an example of a double loop which uses the if and "jump"_jump.html commands to break out of the inner loop when a condition is met, then continues iterating thru the outer loop. label loopa variable a loop 5 label loopb variable b loop 5 print "A,B = $a,$b" ... if '$b > 2' then "print 'Jumping to another script'" "jump in.script break" next b jump in.script loopb label break variable b delete :pre next a jump in.script loopa :pre :line The Boolean expressions for the if and elif keywords have a C-like syntax. Note that each expression is a single argument within the if command. Thus if you want to include spaces in the expression for clarity, you must enclose the entire expression in quotes. An expression is built out of numbers: 0.2, 100, 1.0e20, -15.4, etc :pre and Boolean operators: A == B, A != B, A < B, A <= B, A > B, A >= B, A && B, A || B, !A :pre Each A and B is a number or a variable reference like $a or $\{abc\}, or another Boolean expression. If a variable is used it must produce a number when evaluated and substituted for in the expression, else an error will be generated. Expressions are evaluated left to right and have the usual C-style precedence: the unary logical NOT operator "!" has the highest precedence, the 4 relational operators "<", "<=", ">", and ">=" are next; the two remaining relational operators "==" and "!=" are next; then the logical AND operator "&&"; and finally the logical OR operator "||" has the lowest precedence. Parenthesis can be used to group one or more portions of an expression and/or enforce a different order of evaluation than what would occur with the default precedence. The 6 relational operators return either a 1.0 or 0.0 depending on whether the relationship between x and y is TRUE or FALSE. The logical AND operator will return 1.0 if both its arguments are non-zero, else it returns 0.0. The logical OR operator will return 1.0 if either of its arguments is non-zero, else it returns 0.0. The logical NOT operator returns 1.0 if its argument is 0.0, else it returns 0.0. The overall Boolean expression produces a TRUE result if the result is non-zero. If the result is zero, the expression result is FALSE. [Related commands:] "variable"_variable.html, "print"_print.html mrmpi-1.0~20131122/oinkdoc/degree.txt0000755000175000017500000000263011734437746017030 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line degree command :h3 [Syntax:] degree dupflag -i in1 -o out1.file out1.mr :pre dupflag = 1/2 for counting edge once/twice in1 = graph edges: Key = Vi Vj, Value = NULL out1 = degree of each vertex: Key = Vi, Value = degree :ul [Examples:] degree 1 -i mrv -o degree.list NULL [Description:] This is a named command which calculates the degree of each vertex in a graph. If the dupflag is 2, then each edge increments the degree of both of its vertices. If the dupflag is 1, then each edge only increments the degree of the first of its vertices. The former is usually more approrpriate for undirected graphs; the latter for directed graphs, in which case the out-degree of each vertex is being calculated. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges. No assumption is made about duplicates or self edges, i.e. (Vi,Vj) may appear multiple times, both (Vi,Vj) or (Vj,Vi) may appear, as may (Vi,Vi). The input is unchanged by this command. Out1 will store the degree count of each vertex. [Related commands:] "neighbor"_neighbor.html, "degree_stats"_degree_stats.html mrmpi-1.0~20131122/oinkdoc/vertex_extract.txt0000755000175000017500000000176711734437746020656 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line vertex_extract command :h3 [Syntax:] vertex_extract -i in1 -o out1.file out1.mr :pre in1 = graph edges: Key = Vi Vj, Value = NULL out1 = all vertices in graph: Key = Vi, Value = NULL :ul [Examples:] vertex_degree -i mre -o vlist.txt mrv [Description:] This is a named command which extracts a list of all the vertices in a graph from the edge list. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges. No assumption is made about duplicates or self edges, i.e. (Vi,Vj) may appear multiple times, both (Vi,Vj) or (Vj,Vi) may appear, as may (Vi,Vi). The input is unchanged by this command. Out1 will store the vertices in the graph. [Related commands:] none mrmpi-1.0~20131122/oinkdoc/Section_commands.txt0000755000175000017500000006150111734437746021064 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line 4. Adding Commands to OINK :h3 The purpose of this section is to give details of how to write new "named commands"_command.html that can be added to OINK and which will be invokable by your input scripts and will interact appropriately with other OINK commands. OINK is designed to make this easy to do with a minimum of special coding on your part. Several such named commands are included with the OINK distribution; more will be added over time. See "this section"_Section_script.html#comm of the manual for a list of the current named commands in OINK. We also invite OINK users to send email to the developers with new commands they have written and wish to share, so we can add them to the distribution, attributed to you. 4.1 "Source files for the new class"_#4_1 4.2 "Methods in the new class"_#4_2 4.3 "Calls to the OINK object manager"_#4_3 4.4 "Calling back to map() and reduce() functions"_#4_4 :all(b) :line :line 4.1 Source files for the new class :link(4_1),h4 In OINK a named command is a child class that derives from the Command parent class (see src/command.cpp and src/command.h), meaning that it contains several methods that can be called by the OINK framework. Adding a new named command to OINK is as simple as writing the code for it in two new files (e.g. foo.cpp and foo.h), dropping them into the src directory, and re-building OINK. It is easiest to understand the description that follows if you look at an example named command in OINK. In what follows we will use the "degree"_deree.html command, contained in src/degree.cpp and src/degree.h for illustration purposes. The *.h file for a new named command should have lines like these at the top (from the src/degree.h file): #ifdef COMMAND_CLASS CommandStyle(degree,Degree) #else :pre CommandStyle(arg1,arg2) is a macro that gets converted by the OINK build procedure into source code. Arg1 is the "name" of the named command, which is how you reference it in your input script, e.g. as degree -i graphdir -o out/outfile NULL :pre Arg2 is the class that implements that command. The list of all such named commands will appear in the style_command.h file after OINK is (re)built, via a make command. The remainder of the *.h file (between the #else and final pair of #endif) is the definition of your new class. Note that you will need to include the mapreduce.h file (from the MR-MPI library src dir) and the MAPREDUCE_NS namespace if your class definition includes any map(), reduce(), etc callback functions since they have the "KeyValue" class name in their prototype. :line :line 4.2 Methods in the new class :link(4_2),h4 When a line in the input script starts with a "named command"_command.html, the associated class is instantiated, its params() and run() methods are called, and the instance of the class is destroyed. The constructor of your new class should set two variables, ninputs and noutputs, which are the number of input and output descriptors it requires you to list in the input script. For the "degree"_degree.html command, ninputs = noutpus = 1, as illustrated in the example above. Your new class is required to define only two methods: params() and run(). Params() is passed the list of arguments following the command name, excluding the "-i" and "-o" arguments, which are processed separately by the parent Command class. Your params() method should parse and check the arguments and generate an error message if the number of arguments is incorrect or any of their values is invalid. Note that the "degree"_degree.html command takes 0 arguments. Run() is called to invoke the meat of your command and it can perform any series of MapReduce or other operations you wish, using as many "MapReduce objects"_md (from the MR-MPI library) and "MR-MPI objects"_mr.html (managed by OINK) as you wish. The calls that run() can make to the OINK object manager (obj) are discussed in the next section. The desstructor of your new class should free any memory it has allocated, including any local MapReduce objects that it allocated. Note that this is different from "MR-MPI objects"_mr.html and the underlying MapReduce objects they wrap, and which are often associated with the input and output descriptors of your command in the input script; those objects are created/destroyed by OINK itself, as discussed in the next section. :line :line 4.3 Calls to the OINK object manager :link(4_3),h4 These are the calls that the run() method of your new class can make to the OINK object manager. Each is discussed below. Note that you make the calls via the "obj" pointer which is visible to your class, e.g. obj->cleanup(). This means you should add the line #include "object.h" :pre at the top of your *.cpp file. MapReduce *Object::create_mr(); MapReduce *Object::create_mr(int verbosity, int timer, int memsize, int outofcore); MapReduce *Object::copy_mr(MapReduce *mr); int Object::permanent(MapReduce *mr); :pre MapReduce *Object::input(int index); MapReduce *Object::input(int index, void (*map1)(int, char *, KeyValue *, void *), void *ptr); MapReduce *Object::input(int index, void (*map2)(int, char *, int, KeyValue *, void *), void *ptr); MapReduce *Object::input(int index, void (*map1)(int, char *, KeyValue *, void *), void (*map2)(int, char *, int, KeyValue *, void *), void *ptr); :pre void Object::output(int index, MapReduce *mr); void Object::output(int index, MapReduce *mr, void (*scankv)(char *, int, char *, int, void *), void *ptr, int disallow); void Object::output(int index, MapReduce *mr, void (*scankmv)(char *, int, char *, int, int *, void *), void *ptr, int disallow); void Object::output(int index, MapReduce *mr, void (*map)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *ptr, int disallow); void Object::output(int index, MapReduce *mr, void (*reduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr, int disallow); void Object::output(int index, MapReduce *mr, void (*scankv)(char *, int, char *, int, void *), void (*scankmv)(char *, int, char *, int, int *, void *), void (*map)(uint64_t, char *, int, char *, int, KeyValue *, void *), void (*reduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr, int disallow); :pre void Object::cleanup(); :pre Here is a brief summary of the calls your run() method will typically make: 1 call to input() for each of its Ninput input descriptors calls to create_mr() or copy_mr() for additional MapReduce objects it uses 1 call to output() for each of its Noutput output descriptors final call to cleanup() at the end :ul The details are discussed below. :line Your run() method should call one of the 4 variants of the input() methods one time for each of its inputs. Which variant it calls depends on what forms of input you wish to support, which is related to the "-i" arguments specified with your "named command"_command.html in the input script and additional options set by the "input"_input.html command. Each call takes an "index" argument which is the index of the input descriptor being referenced, from 1 to Ninputs. Each call returns a pointer to a MapReduce object which will contain the desired input data as key/value (KV) pairs. As the "named command"_command.html doc page explains, each input descriptor for your command can be specified in the input script as one or more files or directories or as an existing MR-MPI object. For reading files, there are 2 kinds of map() methods that can be used to convert the file contents into KV pairs, one where a filename is passed to your callback function, and the other where a chunk of bytes is passed to your callback function. See the "map() method"_..doc/map.html doc page for details. If you invoke this method: MapReduce *Object::input(int index); :pre then the input descriptor must be specified in your input script as an existing MR-MPI object. No reading of files is allowed. If you invoke this method: MapReduce *Object::input(int index, void (*map1)(int, char *, KeyValue *, void *), void *ptr); :pre then the input can be specified as either an MR-MPI object or as files which will be processed via the map1() callback function which receives a filename as an argument, so that it can open the file, read it, and generate KV pairs. If you invoke this method: MapReduce *Object::input(int index, void (*map2)(int, char *, int, KeyValue *, void *), void *ptr); :pre then the input can be specied as either an MR-MPI object or as files which will be processed via the map2() callback function which receives a chunk of bytes read from a file as an arbument, so that it can convert the byte string into KV pairs. To use this map2() method, you would also need to specify an "input"_input.html command in your input script that setup various options needed to call the "MR-MPI library map() method"_../doc/map.html that uses map2() as a callback function. If you invoke the 4th variant: MapReduce *Object::input(int index, void (*map1)(int, char *, KeyValue *, void *), void (*map2)(int, char *, int, KeyValue *, void *), void *ptr); :pre then both kinds of map() callback functions can be specified, map1() and map2(), and OINK will select which to use depending on what options have been setup via the "input"_input.html command for this input descriptor. Note that you have to provide the map1() and/or map2() callback functions to the input() calls, with the correct prototype. As discussed below and on "this doc page"_Section_functions.html, they can be static methods in your class, or they can be map() methods in separate files in the OINK src directory, which are named map_*.cpp. Also note that if you want to provide maximum flexibility for using your "named command"_command.html, then you should provide one of both flavors of callback map() functions for allowing input from files along with input from an existing MR-MPI object. If you do not provide either callback or just one of the two, then input scripts will be limited in what forms of input descriptor they can define. :line Your run() method should call one of the 6 variants of the output() methods one time for each of its outputs. Which variant it calls depends on what forms of output you wish to support, which is related to the "-o" arguments specified with your "named command"_command.html in the input script and additional options set by the "output"_output.html command. Each call takes an "index" argument which is the index of the output descriptor being referenced, from 1 to Noutputs. Each call also takes a MapReduce object pointer "mr", which contains the data you wish to output. As the "named command"_command.html doc page explains, each output descriptor for your command is specified in the input script with 2 parts, either of which can be NULL. The first part is a filename for writing output to files. The second part is the ID of an MR-MPI object which will contain the output. For writing files, there are 4 kinds of callback methods that can be used to write the contents of "mr" to a file. Each of these 4 methods is called with a "FILE *" as its final "void *" argument. This is the file pointer to a file created and opened (and later closed) by OINK which the callback method can write its data to. If you pass your own non-NULL pointer to the callback method via the "void *ptr" argument to the output() calls, then it will be appended to the FILE *, so that it can be dereferenced as a 2nd pointer passed to the callback function. If you invoke this method: void Object::output(int index, MapReduce *mr); :pre then the output descriptor must be specified in your input script as only defining a MR-MPI object for output. No writing to files is allowed. This call will assign the ID specified in your input script to the MR-MPI object that wraps "mr". Also note, that this will remove the ID from any other MR-MPI object that has the same ID. They then become unnamed or temporary MR-MPI objects which will be deleted at the end of your run() method. See further discussion of temporaray versus permanent MR-MPI objects in the next section. If you invoke one of these 2 methods: void Object::output(int index, MapReduce *mr, void (*scankv)(char *, int, char *, int, void *), void *ptr, int disallow); void Object::output(int index, MapReduce *mr, void (*scankmv)(char *, int, char *, int, int *, void *), void *ptr, int disallow); :pre then the output can be specified as either an MR-MPI object or as files which will be written to via the scankv() or scankmv() callback functions respectively. In the first case, the scankv() function will receive key/value (KV) pairs, one at time from the "mr" MapReduce object. In the second case, the scankmv() function will receive key/multivalue (KMV) pairs, one at time from the "mr" MapReduce object. The MapReduce object will be unaltered by this operation. See the "scan() method"_..doc/scan.html doc page in the MR-MPI library for details. The "disallow" flag is explained below. If you invoke one of these 2 methods: void Object::output(int index, MapReduce *mr, void (*map)(uint64_t, char *, int, char *, int, KeyValue *, void *), void *ptr, int disallow); void Object::output(int index, MapReduce *mr, void (*reduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr, int disallow); :pre then the output can be specified as either an MR-MPI object or as files which will be written to via the map() or reduce() callback functions respectively. In the first case, the map() function will receive key/value (KV) pairs, one at time from the "mr" MapReduce object. In the second case, the reduce() function will receive key/multivalue (KMV) pairs, one at time from the "mr" MapReduce object. For the first case, the MapReduce object will typically be unaltered by this operation, since the "MR-MPI library map() method"_..doc/map.html is called with addflag=1, so that the existing KV pairs are preserved. But your map() callback function should not emit any new KV pairs. For the second case, the MapReduce object will be altered by this operation, since the "MR-MPI library reduce() method"_..doc/reduce.html deletes the KMV pairs and replaces them with new KV pairs which your reduce() callback function may or may not emit. The "disallow" flag is explained below. If you invoke the 6th variant: void Object::output(int index, MapReduce *mr, void (*scankv)(char *, int, char *, int, void *), void (*scankmv)(char *, int, char *, int, int *, void *), void (*map)(uint64_t, char *, int, char *, int, KeyValue *, void *), void (*reduce)(char *, int, char *, int, int *, KeyValue *, void *), void *ptr, int disallow); :pre then any of the 4 kinds of callback functions can be specified, namely scankv(), scankmv(), map(), or reduce(). Those that you do not wish to provide or that are not compatible with the current state of the MapReduce object "mr" (which will contain either kV or KMV pairs, but not both), can be specified as NULL. Note that you have to provide each of these 4 callback functions to the output() calls, with the correct prototype. As discussed below and on "this doc page"_Section_functions.html, they can be static methods in your class, or they can be methods in separate files in the OINK src directory, which are named scan_*.cpp, map_*.cpp, and reduce_*.cpp respectively. Also note that if you want to provide maximum flexibility for using your "named command"_command.html, then you should provide at least one flavor of a callback function for allowing output to files along with output to an MR-MPI object. If you do not do this, then input scripts will be limited in what forms of output descriptor they can define. All but the first of the output() variants can be called with an optional disallow flag which is set to 0 by default. If these methods are called with disallow=1, then no output to an MR-MPI object is allowed. This is useful if you expect the run() method of your "named command"_command.html to subsequently change the data stored in the MapReduce object, and thus make the data written to an output file differ from what is stored in the MapReduce object. :line Your run() method may need to use additional MapReduce objects as workspace, in addition to its inputs. Some of these may end up holding the data you wish to output. One key point to understand is that the OINK object manager keeps track of two kinds of MR-MPI objects, each of which is a thin wrapper on MapReduce objects which hold your key/value (KV) or key/multivalue (KMV) data. Each MR-MPI object can be "permanent" meaning it has an ID which can be referenced by input script commands. Or it can be "temporary", meaning it has no ID and was created to hold data input from a file or by the function calls discussed below. Permanent MR-MPI objects persist until they are explicitly deleted by your input script. Temporary MR-MPI objects are deleted at the end of your run() method; they can be thought of as workspace created and used by your run() method. These two calls create a new temporary MR-MPI object and return a pointer to the MapReduce object contained within it: MapReduce *Object::create_mr(); MapReduce *Object::create_mr(int verbosity, int timer, int memsize, int outofcore); :pre The first variant will use the default settings for the MapReduce object; see the "set"_set.html command and the "settings doc page"_../doc/settings.html of the MR-MPI library for details. The second variant allows you to override a few of the settings with specified values. This call makes a copy of the "mr" MapReduce object, wraps it in a new temporary MR-MPI object, and returns a pointer to the new MapReduce object: MapReduce *Object::copy_mr(MapReduce *mr); :pre There are two reasons to create new and copied MapReduce objects via these calls, rather than direcly invoking MR-MPI library calls within your run() method. I.e. two reasons to do one of these: MapReduce *mr = obj->create_mr(); MapReduce *mr2 = obj->copy(mr); :pre instead of one of these: MapReduce *mr = new MapReduce(); MapReduce *mr2 = mr->copy(); :pre The first reason is that OINK will manage the memory associated with the new MR-MPI objects and free them for you at the end of your run() method; see the cleanup() method discussion below. The second is that you can assign an ID to these temporary MR-MPI objects via the output() calls discussed above, which you cannot do if you create the MapReduce object directly yourself. I.e. you cannot pass to an output() method a pointer to a MapReduce object that you allocated yourself if that operation will assign an ID (specified in your input script) to the MR-MPI object. You are of course free to create additional MapReduce objects yourself via direct calls to the MR-MPI library. In this case you should insure you free the objects yourself before the run() method ends, so as not to leak memory. One additional point is that it is fine to do this within your run() method, where mr is a pointer returned by obj->create_mr(): mr = obj->copy(mr); :pre whereas you should not do this: mr = mr->copy(); :pre The former simply overwrites the local mr pointer, but OINK will manage and free the memory if necessary for the underlying MapReduce objects associated with both the original and new mr pointers. The latter will leak memory since the underlying MapReduce object associated with the original mr pointer is lost. This call is useful for checking whether a MR-MPI object has been assigned a name or not, when it was used for input or output: int Object::permanent(MapReduce *mr); :pre It is called using a MapReduce object pointer and returns a 1 if the associated MR-MPI object that wraps it has a name, and a 0 if it does not. There are two uses for this call. First, it can be used after an input() call to determine whether the input was done from a file or an existing MR-MPI object. In the former case permanent() will return 0, since the new MR-MPI object holding the data is unnamed. In the latter case it will return 1, since the MR-MPI object holding the data was named in your input script as one of the "-i" arguments to the "named command"_command.html. If the run() method will subsequently alter the MapReduce object and it is permanent, you can make a copy of it, so as to not alter the original. Second, it can be used after an output() call to determine whether the MapReduce object was assigned a name. This will be the case if a MR-MPI ID was specified in your input script as one of the "-o" arguments to the "named command"_command.html. If this is the case, you typically do not want to alter the data in the MapReduce object after outputting it. If you wish to further process the data, you can make a copy. Finally, this method should be called at the end of your run() method to free all the temporary MR-MPI objects stored by OINK, and perform other internal cleanup: void Object::cleanup(); :pre :line 4.4 Calling back to map() and reduce() functions :link(4_4),h4 You run() method will typically invoke various methods from the MR-MPI library which involve callback functions, e.g. for performing map() or reduce() operations. The "MR-MPI library manual"_../doc/Technical.html#callback discusses the general rules for passing a pointer to a callback function to a MR-MPI library method. Since you will be doing this from within the class that encodes your "named command"_command.html you have two choices. First, you can pass a pointer to a static function declared within your class. This function cannot directly access any class variables, but you can pass it the "this" pointer for the class (as the void * argument to the map() or reduce() function) which the callback function can use to access class variables indirectly, through that pointer. If you do this, then the map() and reduce() methods defined in your class can only be used by that "named command"_command.html. An alternative is to put your callback functions in their own files, named map_*.cpp for map() functions, reduce_*.cpp for reduce() functions, compare_*.cpp for compare() functions, hash_*.cpp for hash() functions, and scan_*.cpp functions. By doing this the callback functions can be used by any "named command"_command.html or as arguments to the "MR-MPI library commands" used in an input script to invoke the MR-MPI library methods directly. See the oink/rmat.cpp file, which implements the "rmat"_rmat.html command, for an example of a "named command"_command.html which accesses several of its callback functions in this manner. Each map_*.cpp file (and reduce_*.cpp, compare_*.cpp, etc) can contain one or more map() (reduce(), compare(), etc) callback functions. These are not class methods, but stand-alone functions. See examples in the oink directory. The header files that contain the prototypes for these functions are named style_map.h, style_reduce.h, etc and are auto-generated when OINK is built. Your "named command"_command.html class, e.g. rmat.cpp, simply needs to include these style header files in order to use any of the callback functions in OINK. Likewise, any callback function included in one of these files can be accessed by name in your input script when using one of the "MR-MPI library commands"_mrmpi.html. Documentation for the collection of map(), reduce(), etc functions is also auto-extracted and included in "this section"_Section_functions.html of the OINK documentation. Instructions on how to pass generic pointers to the callback functions is also discussed in "this section"_Section_functions.html. It is also possible in the run() method of your "named command"_command.html to select a callback function based on an input script parameter to your command. For example, the input script could list the name of a particular compare() function you wish to you to use to sort the data in a MapReduce object. By calling the appropriate lookup() method in the MRMPI class (oink/mrmpi.cpp), the parameter string can be converted into a matching function pointer. For example, consider these lines of code: MapReduce *mr = obj->create_mr(); ... CompareFnPtr compare = compare_lookup(userparam); mr->sort_keys(compare); :pre In this example "userparam" is a string, listed in the input script as a command parameter, which contains a function name, e.g. mySpecialCompare. Assuming that function is included in OINK in a compare_*.cpp file, the the compare_lookup() method will be able to match the string to the function and return a pointer to the function which can then be used as an argument to the "sort_keys()"_../doc/sort.html MR-MPI library method. The definition of CompareFnPtr and all other callback function pointers is in the "typedefs.h" file, which can be included at the top of your "named commmand"_command.html *.cpp file. mrmpi-1.0~20131122/oinkdoc/cc_stats.txt0000755000175000017500000000220511734437746017376 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line cc_stats command :h3 [Syntax:] cc_stats -i in1 :pre in1 = component assignment for each vertex: Key = Vi, Value = Ci [Examples:] cc_stats -i ccdir :pre [Description:] This is a named command which summarizes and prints out the statistics on connected components (CCs) that each vertex in a graph belongs to. For each component size n, one line is printed to the screen with the number of components of size n. This is done in sorted order. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of vertices Vi and the component ID Ci which each is assigned to. Typically this will have been computed previously by the "cc_find"_cc_find.html command. The input is unchanged by this command. This command produces no output, other than what is written to the screen. [Related commands:] "cc_find"_cc_find.html mrmpi-1.0~20131122/oinkdoc/cc_find.txt0000755000175000017500000000352411734437746017165 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line cc_find command :h3 [Syntax:] cc_find nthresh -i in1 -o out1.file out1.mr :pre nthresh = threshhold size at which components are split across processors in1 = graph edges: Key = Vi Vj, Value = NULL out1 = components: Key = Vi, Value = Ci :ul [Examples:] cc_find 1000 -i mre -o cc.list cc :pre [Description:] This is a named command which finds all the connected components (CCs) in an undirected graph. A connected component is a set of vertices where each is connected to one or more other vertices in the set via an edge. The CCs are found via the MapReduce algorithm of "(Cohen)"_#Cohen discussed in his paper with extensions described in the paper of "(Plimpton)"_#Plimpton which attempt to load-balance the calculation across processors when one or a few very large components exist in the graph. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges, assumed to have no duplicates or self-edges. This means that either (Vi,Vj) or (Vj,Vi) appears, but not both. Also (Vi,Vi) does not appear. The input is unchanged by this command. Out1 will store the assignment of each vertex in the graph to a component ID. The component ID is the smallest vertex ID of any vertex in the component. [Related commands:] "cc_stats"_cc_stats.html :line :link(Cohen) [(Cohen)] Cohen, "Graph Twiddling in a MapReduce World", Computing in Science and Engineering, 11, 29-41 (2009). :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/oinkdoc/label.txt0000755000175000017500000000152611734437746016657 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line label command :h3 [Syntax:] label ID :pre ID = string used as label name :ul [Examples:] label xyz label loop :pre [Description:] Label this line of the input script with the chosen ID. Unless a jump command was used previously, this does nothing. But if a "jump"_jump.html command was used with a label argument to begin invoking this script file, then all command lines in the script prior to this line will be ignored. I.e. execution of the script will begin at this line. This is useful for looping over a section of the input script as discussed in the "jump"_jump.html command. [Related commands:] none mrmpi-1.0~20131122/oinkdoc/mr.txt0000755000175000017500000000306311734437746016214 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line mr command :h3 [Syntax:] mr MR-ID verbosity timer memsize outofcore :pre MR-ID = ID of new MR-MPI object to create verbosity = verbosity setting (optional) timer = timer setting (optional) memsize = memsize setting (optional) outofcore = outofcore setting (optional) :ul [Examples:] mr edge mr edge 2 mr edge 1 1 16 0 :pre [Description:] Create a new MR-MPI object, which can be referenced by name elsewhere in your input script. In OINK, a MR-MPI object is simply a thin wrapper on a MapReduce object created via the "MR-MPI library"_md. The MR-MPI object has an ID which can be used elsewhere in the input script. For example it can be used as input to a "named command"_command.html or in a "MR-MPI library command"_mrmpi.html. The ID of an MR-MPI object can only contain alphanumeric characters and underscores. When the underlying MapReduce object is created, it will have default settings as described "here"_../doc/settings.html. Several of these settings can be overridden by the 4 options listed above. If none of them are specified, then the default settings are used. To reset one of the settings, you must specify all the settings that preceed it. E.g. if just two optional arguments are used, they are the verbosity and timer settings. [Related commands:] "MR-MPI library commands"_mrmpi.html, "named commands"_command.html mrmpi-1.0~20131122/oinkdoc/rmat.txt0000755000175000017500000000603411734437746016542 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line rmat command :h3 rmat2 command :h3 [Syntax:] rmat N Nz a b c d fraction seed -o out1.file out1.mr rmat2 N Nz a b c d fraction seed -o out1.file out1.mr :pre N = order of matrix, 2^N = number of rows in matrix Nz = average # of non-zeroes per row, Nz * 2^N = total # of non-zeroes a,b,c,d = R-MAT parameters which sum to 1.0 fraction = R-MAT twiddle factor seed = random number seed (positive integer) out1 = graph edges: Key = Vi Vj, Value = NULL :ul [Examples:] rmat 20 8 0.45 0.25 0.25 0.05 0.0 284958 -o NULL mre rmat2 20 8 0.45 0.25 0.25 0.05 0.0 284958 -o tmp.rmat NULL :pre [Description:] These are named commands which generate a sparse random matrix via the procedure defined for R-MAT matrices, as discussed in the paper by "(Chakrabarti)"_#Chakrabarti. Such matrices are often used to represent graphs where the vertices are numbered 1 to Nrows, and the non-zero matrix entries represent edges. The number of rows and non-zero entries are determined by the specified {N} and {Nz} arguments. Depending on the choice of the R-MAT parameters the degree distribution of the resulting graph can be roughly uniform or highly skewed, which is useful in modeling different kind of graphs, e.g. Internet connectivity. The a,b,c,d parameters must sum to 1.0 and represent weighting for the 4 different quadrants of the matrix. As non-zero entries are generated, they are assigned to each quadrant in a recursive manner using the a,b,c,d weightings and a random number generator. A fraction value of 0.0 means the a,b,c,d weightings are used as-is. A fraction value > 0.0 but < 1.0 means the weightings are randomly twiddled at each iteration of the recursion. The MapReduce algorithms used for performing the R-MAT generation are described in the paper by "(Plimpton)"_#Plimpton. The rmat command implements the first of the two algorithms discussed in the paper. The rmat2 command implements the second of the two algorithms. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. These commands take no inputs. Out1 will store the list of edges of the R-MAT graph, or equivalently, the I,J indices of non-zeroes in the matrix. There will be exactly Nz * 2^N entries in out1. This may include some duplicate or self-edges. A duplicate edge is when both (Vi,Vj) or (Vj,Vi) appear. A self-edge is when (Vi,Vi) appears. If desired, these can be removed by further processing; see the "edge_upper"_edge_upper.html command. [Related commands:] "edge_upper"_edge_upper.html :line :link(Chakrabarti) [(Chakrabarti]) Chakrabarti, Zhan, Faloutsos, "R-MAT: A recursive model for graph mining", in SIAM Data Mining (2004). :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/oinkdoc/next.txt0000755000175000017500000001024411734437746016553 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line next command :h3 [Syntax:] next variables :pre variables = one or more variable names :ul [Examples:] next x next a t x myTemp :pre [Description:] This command is used with variables defined by the "variable"_variable.html command. It assigns the next value to the variable from the list of values defined for that variable by the "variable"_variable.html command. Thus when that variable is subsequently substituted for in an input script command, the new value is used. See the "variable"_variable.html command for info on how to define and use different kinds of variables in OINK input scripts. If a variable name is a single lower-case character from "a" to "z", it can be used in an input script command as $a or $z. If it is multiple letters, it can be used as $\{myTemp\}. If multiple variables are used as arguments to the {next} command, then all must be of the same variable style: {index}, {loop}, {universe}, or {uloop}. An exception is that {universe}- and {uloop}-style variables can be mixed in the same {next} command. All the variables specified with the next command are incremented by one value from their respective list or values. {String-} or {equal}- or {world}-style variables cannot be used with the the next command, since they only store a single value. When any of the variables in the next command has no more values, a flag is set that causes the input script to skip the next "jump"_jump.html command encountered. This enables a loop containing a next command to exit. As explained in the "variable"_variable.html command, the variable that has exhausted its values is also deleted. This allows it to be used and re-defined later in the input script. When the next command is used with {index}- or {loop}-style variables, the next value is assigned to the variable for all processors. When the next command is used with {universe}- or {uloop}-style variables, the next value is assigned to whichever processor partition executes the command first. All processors in the partition are assigned the same value. Running OINK on multiple partitions of processors via the "-partition" command-line switch is described in "this section"_Section_build.html#1_4 of the manual. {Universe}- and {uloop}-style variables are incremented using the files "tmp.oink.variable" and "tmp.oink.variable.lock" which you will see in your directory during such a OINK run. Here is an example of running a series of simulations using the next command with an {index}-style variable. If this input script is named in.graph, 8 simulations would be run using data files from directories run1 thru run8. variable d index run1 run2 run3 run4 run5 run6 run7 run8 shell cd $d graph -i data.graph shell cd .. clear next d jump in.graph :pre If the variable "d" were of style {universe}, and the same in.graph input script were run on 3 partitions of processors, then the first 3 simulations would begin, one on each set of processors. Whichever partition finished first, it would assign variable "d" the 4th value and run another simulation, and so forth until all 8 simulations were finished. Jump and next commands can also be nested to enable multi-level loops. For example, this script will run 15 simulations in a double loop. variable i loop 3 variable j loop 5 clear ... print Running simulation $i.$j graph -i data.polymer.$i$j next j jump in.script next i jump in.script :pre Here is an example of a double loop which uses the "if"_if.html and "jump"_jump.html commands to break out of the inner loop when a condition is met, then continues iterating thru the outer loop. label loopa variable a loop 5 label loopb variable b loop 5 print "A,B = $a,$b" run 10000 if $b > 2 then "jump in.script break" next b jump in.script loopb label break variable b delete :pre next a jump in.script loopa :pre [Related commands:] "jump"_jump.html, "include"_include.html, "shell"_shell.html, "variable"_variable.html, mrmpi-1.0~20131122/oinkdoc/wordfreq.txt0000755000175000017500000000332511734437746017430 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line wordfreq command :h3 [Syntax:] wordfreq Ntop -i in1 -o out1.file out1.mr :pre Ntop = print Ntop of the most frequently occurring words to screen in1 = words: Key = word, Value = NULL out1 = frequency count of each word: Key = word, Value = count :ul [Examples:] wordfreq 10 -i v_files -o full.list NULL wordfreq 10 -i v_files -o NULL NULL :pre [Description:] This is a named command which calculates the frequency of word occurrence in an input data set, which is typically a set of files. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of words. The input is unchanged by this command. If the input is one or more files then the files are read and each "word" is defined as separated by whitespace. Note that you can pass a list of files as the input argument after the "-i" argument by using a variable, which in turn can be initialized with a command-line argument to OINK. E.g. this line would work with the first example above: oink_linux -var files *.cpp < in.script :pre See "this section"_Section_build.html#1_4 of the manual and the "variable"_variable.html doc page for more details. Out1 will store the frequency count of all unique words. Additional statistics can be generated and printed via the {Ntop} setting. The highest frequency {Ntop} words will be printed to the screen with their count, in sorted order. If {Ntop} is 0, nothing is printed. [Related commands:] none mrmpi-1.0~20131122/oinkdoc/Manual.py0000644000175000017500000001001111524064716016576 0ustar mathieumathieu#!/usr/local/bin/python # Manaul.py # add per-function documentation to Section_functions.txt in tabular HTML format # extract the comment lines from OINK source files between C-style /* ... */ # invoked by PDFgen.sh when PDF of doc pages is created # Syntax: Manual.py import sys,os,glob,commands,re # mtxt, etc = contents of source files files = glob.glob("../oink/map_*.cpp") files.sort() mtxt = "" for file in files: mtxt += open(file,"r").read() files = glob.glob("../oink/reduce_*.cpp") files.sort() rtxt = "" for file in files: rtxt += open(file,"r").read() files = glob.glob("../oink/compare_*.cpp") files.sort() ctxt = "" for file in files: ctxt += open(file,"r").read() files = glob.glob("../oink/hash_*.cpp") files.sort() htxt = "" for file in files: htxt += open(file,"r").read() files = glob.glob("../oink/scan_*.cpp") files.sort() stxt = "" for file in files: stxt += open(file,"r").read() # mcomm, etc = comments in source files, with /* and */ lines pattern = re.compile("(/\*.*?\*/)",re.DOTALL) mcomm = re.findall(pattern,mtxt) rcomm = re.findall(pattern,rtxt) ccomm = re.findall(pattern,ctxt) hcomm = re.findall(pattern,htxt) scomm = re.findall(pattern,stxt) # mpair, etc = comments in source files, without /* and */ lines and whitespace mpair = [] for comm in mcomm: lines = comm.split("\n") lines = [line.strip() for line in lines] mpair.append([lines[1],lines[2:-1]]) rpair = [] for comm in rcomm: lines = comm.split("\n") lines = [line.strip() for line in lines] rpair.append([lines[1],lines[2:-1]]) cpair = [] for comm in ccomm: lines = comm.split("\n") lines = [line.strip() for line in lines] cpair.append([lines[1],lines[2:-1]]) hpair = [] for comm in hcomm: lines = comm.split("\n") lines = [line.strip() for line in lines] hpair.append([lines[1],lines[2:-1]]) spair = [] for comm in scomm: lines = comm.split("\n") lines = [line.strip() for line in lines] spair.append([lines[1],lines[2:-1]]) # re-create Section_functions.txt file below double :line location # txt2html does not know how to create multiline table entries # so write tabular HTML format directly with and txt = open("Section_functions.txt","r").read() separator = "\n:line\n:line\n" halves = txt.split(separator) half2 = "" half2 += "\nMap() functions :link(3_1),h4\n\n" half2 += '
' for pair in mpair: half2 += "\n" half2 += "\n" % pair[0] half2 += "\n" half2 += "\n" half2 += "
%s\n" for line in pair[1]: half2 += "%s
\n" % line half2 += "
\n" half2 += "\n:line\n" half2 += "\nReduce() functions :link(3_1),h4\n\n" half2 += '
' for pair in rpair: half2 += "\n" half2 += "\n" % pair[0] half2 += "\n" half2 += "\n" half2 += "
%s\n" for line in pair[1]: half2 += "%s
\n" % line half2 += "
\n" half2 += "\n:line\n" half2 += "\nCompare() functions :link(3_1),h4\n\n" half2 += '
' for pair in cpair: half2 += "\n" half2 += "\n" % pair[0] half2 += "\n" half2 += "\n" half2 += "
%s\n" for line in pair[1]: half2 += "%s
\n" % line half2 += "
\n" half2 += "\n:line\n" half2 += "\nHash() functions :link(3_1),h4\n\n" half2 += '
' for pair in hpair: half2 += "\n" half2 += "\n" % pair[0] half2 += "\n" half2 += "\n" half2 += "
%s\n" for line in pair[1]: half2 += "%s
\n" % line half2 += "
\n" half2 += "\n:line\n" half2 += "\nScan() functions :link(3_1),h4\n\n" half2 += '
' for pair in spair: half2 += "\n" half2 += "\n" % pair[0] half2 += "\n" half2 += "\n" half2 += "
%s\n" for line in pair[1]: half2 += "%s
\n" % line half2 += "
\n" half2 += "\n:line\n" txt = halves[0] + separator + half2 open("Section_functions.txt","w").write(txt) mrmpi-1.0~20131122/oinkdoc/histo.txt0000755000175000017500000000307311734437746016725 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line histo command :h3 [Syntax:] histo -i in1 -o out1.file out1.mr :pre in1 = anthing: Key = key, Value = value out1 = frequency count of each key: Key = key, Value = count :ul [Examples:] histo -i mrdata -o NULL out1.mr :pre [Description:] This is a named command which calculates the frequency of key occurrence in an input set of key/values. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 is any MR-MPI object containing key/value pairs. The input is unchanged by this command. In1 cannot be specified as input from file(s) since no assumption is made about how the files store key/value pairs. You would need to read the files into a MR-MPI object as a pre-processing step, using a map() function you provide, before passing that object to the histo command as an input. Out1 will store the frequency count of all unique keys. Out1 cannot be specified as output to a file since no assumption is made about how the key should be formatted for output. You would need to write the files from the output MR-MPI object as a post-processing step, using a map() or scan() function you provide. Statistics on the count of each key will be printed to the screen in sorted order. [Related commands:] "degree_stats"_degree_stats.html, "wordfreq"_wordfreq.html mrmpi-1.0~20131122/oinkdoc/Section_build.txt0000755000175000017500000002514411734437746020365 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line 1. Building OINK :h3 This section describes how to build and run OINK, which is a simple C++ program which wraps the MapReduce-MPI (MR-MPI) library. 1.1 "Making OINK"_#1_1 1.2 "Building OINK as a library"_#1_2 1.3 "Running OINK"_#1_3 1.4 "Command-line options"_#1_4 :all(b) :line 1.1 Making OINK :h4,link(1_1) All of the OINK source files are in the "oink" directory of the MR-MPI distribution tarball. The "src" directory contains the source files for the MR-MPI library itself. These are the 4 steps to building OINK: (1) Insure MPI is installed on your system. (2) Build the MR-MPI library. (3) Use or create a oink/MAKE/Makefile.machine file appropriate for your machine. (4) Type "make machine" :ul Here are more details on each step: (1) MPI installation MPI is the message passing interface library, which is likely already installed on your Linux box or Mac, and on most parallel machines. If not, it is freely available. The two most commonly used generic versions are "OpenMPI"_openmpi and "MPICH"_mpich. Download and install one of these if you need to. The default installation location on Linux is under /usr/local. Or if you do not plan to run the MR-MPI library or OINK in parallel, you can use the provided dummy MPI library in the mpistubs dir. From mpistubs, type "make" and you should get a libmpi.a file. If not, you may need to edit the mpistubs/Makefile. (2) Build the MR-MPI library See "this section"_../doc/Start.html of the MapReduce-MPI library doc pages for instructions on how to do this. When you have done this a file named src/libmrmpi_machine.a should exist. (3) Create a Makefile.machine appropriate for your machine. See the oink/MAKE dir for examples of these. You may be able to use one of these, or edit one that is close to create one for your machine. The only settings you need to worry about are those in the top section. Set the C++ compiler name and settings appropriate for your box. The only extra libraries used by OINK are MPI and MR-MPI. The settings for the latter are already present in the Makefile.machine files. You may need to change the MPI settings depending on how you did your installation. If you use the MPI compiler wrappers (mpiCC) for building an MPI-based program like OINK, then you likely need no additional -I or -L or LIB settings. If you use your system compilers directly, e.g. g++, then you will typically need these MPI-related settings: An -I setting for where to find the file mpi.h. A -L setting for where the MPI library is, libmpich.a A LIB setting for the MPI library, e.g. -lmpich :ul If you are using the provided dummy MPI library (no parallelism), then see MAKE/Makefile.serial for how to compile/link with it. Note that you should insure you build both OINK and the MR-MPI library with the same MPI. If not, confusion will ensue. (4) Type "make machine" Do this from the oink directory where its source files are. If you just type "make" you will see what machine options are available (first line of oink/MAKE/Makefile.machine files). Some other options are also listed, e.g. for cleaning up. If you type "make machine", an executable file oink_machine should be created, e.g. oink_linux or oink_mac. If that happens, you're done. If some error is generated, then you'll need to edit your oink/MAKE/Makefile.machine. Find a local make or machine expert to help if you have problems. If you build OINK on a new kind of machine, for which there isn't a similar Makefile for in the oink/MAKE directory, send it to the developers and we'll add it to the OINK distribution. You can make OINK for multiple platforms from the same oink directory. Each target creates its own object sub-directory called Obj_name where it stores the system-specific *.o files. :line 1.2 Building OINK as a library :h4,link(1_4) OINK can be built as a library, which can then be called from another application or a scripting language. This is done by typing: make makelib make -f Makefile.lib foo :pre where foo is the machine name. The first "make" command will create a current Makefile.lib with all the file names in your src dir. The 2nd "make" command will use it to build OINK as a library. This requires that Makefile.foo have a library target (lib) and system-specific settings for ARCHIVE and ARFLAGS. See Makefile.linux for an example. The build will create the file liboink_foo.a which another application can link to. When used from a C++ program, the library allows one or more OINK objects to be instantiated. All of OINK is wrapped in a OINK_NS namespace; you can safely use any of its classes and methods from within your application code, as needed. When used from a C or Fortran program or a scripting language, the library has a simple C-style interface, provided in oink/library.cpp and oink/library.h. :line 1.3 Running OINK :h4,link(1_3) By default, OINK runs by reading commands from stdin; e.g. oink_linux < in.file. This means you first create an input script (e.g. in.file) containing the desired commands. "This section"_Section_commands.html describes how input scripts are structured and what commands they contain. You can test OINK on any of the sample inputs provided in the examples directory. OINK input scripts are named in.*. Here is how you might run one of the tests on a Linux box, using mpirun to launch a parallel job: cd src make -f Makefile.linux # builds src/libmrmpi.a cd ../oink make linux # builds oink/oink_linux cd ../examples mpirun -np 4 ../oink/oink_linux ../doc/*.txt < in.wordcount :pre If OINK encounters errors in the input script or while running a command it will print an ERROR message and stop or a WARNING message and continue. See "this section"_Section_errors.html for a discussion of the various kinds of errors OINK can or can't detect, a list of all ERROR and WARNING messages, and what to do about them. OINK can run a MapReduce calculation on any number of processors, including a single processor. :line 2.6 Command-line options :h4,link(2_6) At run time, OINK recognizes several optional command-line switches which may be used in any order. Either the full word or the one-letter abbreviation can be used: -echo or -e -partition or -p -in or -i -log or -l -screen or -s -var or -v :ul For example, oink_ibm might be launched as follows: mpirun -np 16 oink_ibm -var file tmp.out -log my.log -screen none < in.graph :pre Here are the details on the options: -echo style :pre Set the style of command echoing. The style can be {none} or {screen} or {log} or {both}. Depending on the style, each command read from the input script will be echoed to the screen and/or logfile. This can be useful to figure out which line of your script is causing an input error. The default value is {log}. The echo style can also be set by using the "echo"_echo.html command in the input script itself. -partition 8x2 4 5 ... :pre Invoke OINK in multi-partition mode. When OINK is run on P processors and this switch is not used, OINK runs in one partition, i.e. all P processors run a single calculation. If this switch is used, the P processors are split into separate partitions and each partition runs its own calculation. The arguments to the switch specify the number of processors in each partition. Arguments of the form MxN mean M partitions, each with N processors. Arguments of the form N mean a single partition with N processors. The sum of processors in all partitions must equal P. Thus the command "-partition 8x2 4 5" has 10 partitions and runs on a total of 25 processors. Note that with MPI installed on a machine (e.g. your desktop), you can run on more (virtual) processors than you have physical processors. The input script specifies what simulation is run on which partition; see the "variable"_variable.html and "next"_next.html commands. -in file :pre Specify a file to use as an input script. This is an optional switch when running OINK in one-partition mode. If it is not specified, OINK reads its input script from stdin - e.g. oink_linux < in.run. This is a required switch when running OINK in multi-partition mode, since multiple processors cannot all read from stdin. -log file :pre Specify a log file for OINK to write status information to. In one-partition mode, if the switch is not used, OINK writes to the file log.oink. If this switch is used, OINK writes to the specified file. In multi-partition mode, if the switch is not used, a log.oink file is created with hi-level status information. Each partition also writes to a log.oink.N file where N is the partition ID. If the switch is specified in multi-partition mode, the hi-level logfile is named "file" and each partition also logs information to a file.N. For both one-partition and multi-partition mode, if the specified file is "none", then no log files are created. Using a "log"_log.html command in the input script will override this setting. -screen file :pre Specify a file for OINK to write its screen information to. In one-partition mode, if the switch is not used, OINK writes to the screen. If this switch is used, OINK writes to the specified file instead and you will see no screen output. In multi-partition mode, if the switch is not used, hi-level status information is written to the screen. Each partition also writes to a screen.N file where N is the partition ID. If the switch is specified in multi-partition mode, the hi-level screen dump is named "file" and each partition also writes screen information to a file.N. For both one-partition and multi-partition mode, if the specified file is "none", then no screen output is performed. -var name value1 value2 ... :pre Specify a variable that will be defined for substitution purposes when the input script is read. "Name" is the variable name which can be a single character (referenced as $x in the input script) or a full string (referenced as $\{abc\}). An "index-style variable"_variable.html will be created and populated with the subsequent values, e.g. a set of filenames. Using this command-line option is equivalent to putting the line "variable name index value1 value2 ..." at the beginning of the input script. Defining an index variable as a command-line argument overrides any setting for the same index variable in the input script, since index variables cannot be re-defined. See the "variable"_variable.html command for more info on defining index and other kinds of variables and "this section"_Section_commands.html#3_2 for more info on using variables in input scripts. mrmpi-1.0~20131122/oinkdoc/Manual.txt0000755000175000017500000001417611734437746017022 0ustar mathieumathieu OINK Users Manual "MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line

OINK Documentation :c,h3 Version info: :h4 The OINK "version" is the date when it was released, such as 1 Feb 2011. OINK and MR-MPI library are updated continuously. Whenever we fix a bug or add a feature, we release it immediately, and post a notice on "this page of the WWW site"_bug. Each dated copy of OINK contains all the features and bug-fixes up to and including that version date. The version date is printed to the screen and log file every time you run OINK. It is also in the file oink/version.h and in the MR-MPI directory name created when you unpack a tarball. If you browse the HTML or PDF doc pages for OINK on the MR-MPI WWW site, they always describe the most current version of OINK. :ulb,l If you browse the HTML or PDF doc pages for OINK included in your tarball, they describe the version you have. :ule,l OINK is a simple scripting wrapper around the "MapReduce-MPI" library"_mws, and also provides a easy-to-use development framework for writing new MapReduce algorithms and codes. Like the MR-MPI library, OINK will run on any platform (serial or parallel) that supports "MPI"_mpi. Note the MR-MPI library has its own "manual and doc pages"_md. The name OINK is meant to evoke the aroma of the "Apache Pig"_pig platform which wraps the "Hadoop"_hadoop MapReduce capabilities with its high-level Pig Latin language. Since OINK has only a small fraction of Pig's capability, it is more the sound of a pig, than the pig itself. Source code for OINK and the MR-MPI library were developed at Sandia National Laboratories, a US Department of Energy facility. They are freely available for download from the "MR-MPI web site"_mrmpi and are distributed under the terms of the modified "Berkeley Software Distribution (BSD) License"_bsd. This basically means they can be used by anyone for any purpose. See the LICENSE file provided with the distribution for more details. The authors of OINK and the MR-MPI library are "Steve Plimpton"_sjp at and "Karen Devine"_kdd who can be contacted via email: sjplimp,kddevin at sandia.gov. :link(bsd,http://en.wikipedia.org/wiki/BSD_license) :link(kdd,http://www.cs.sandia.gov/~kddevin) :link(sjp,http://www.sandia.gov/~sjplimp) :link(mrmpi,http://mapreduce.sandia.gov) :link(bug,http://mapreduce.sandia.gov/bug.html) :link(pig,http://pig.apache.org) :link(hadoop,http://hadoop.apache.org) :line Goals of OINK :h4 (1) To allow MapReduce algorithms which call the MR-MPI library to be written with a minimum of extraneous code, to work with input/output in various forms, and to be chained together and driven via a simple, yet versatile scripting language. :ulb,l (2) To create an archive of map() and reduce() functions for re-use by different algorithms. :l (3) To provide a scripted interface to the lo-level MR-MPI library calls that can speed development/debugging of new algortihms before coding them up in C++ or another language. :l,ule We think the first two goals are largely met. See the section on "Adding Commands to OINK"_Section_commands.html and the "named command"_command.html, "input"_input.html, and "output"_output.html doc pages for details of the first goal. See the section on "Adding Functions to OINK"_Section_functions.txt for details of the second goal. The third goal, however, is only partially met. See the "MR-MPI library commands"_mrmpi.html doc page for its current status. The sticking point here is that in a real programming language you can pass a pointer to an arbitrary data structure to your map() or reduce() functions, but it is hard to do that from a scripting language using text input without re-inventing something like "Python"_python. :link(python,http://www.python.org) :line Contents of OINK Manual :c,h4 OINK aims to be a simple scripting interface and development environment and the lightweight documentation reflects that. Once you are familiar with OINK, you may want to bookmark "this page"_Section_script.html#comm at Section_script.html#comm since it gives quick access to documentation for all OINK commands. "PDF file"_Manual.pdf of the entire manual, generated by "htmldoc"_http://www.easysw.com/htmldoc "Building OINK"_Section_build.html :l 1.1 "Making OINK"_1_1 :ulb,b 1.2 "Building OINK as a library"_1_2 :b 1.3 "Running OINK"_1_3 :b 1.4 "Command-line options"_1_4 :ule,b "OINK Scripts"_Section_script.html :l 2.1 "Input script operation"_2_1 :ulb,b 2.2 "Parsing rules"_2_2 :b 2.3 "Input script commands"_2_3 :ule,b "Adding Functions to OINK"_Section_functions.html :l 3.1 "Map() functions"_3_1 :ulb,b 3.2 "Reduce() functions"_3_2 :b 3.3 "Compare() functions"_3_3 :b 3.4 "Hash() functions"_3_4 :b 3.5 "Scan() functions"_3_5 :ule,b "Adding Commands to OINK"_Section_commands.html :l 4.1 "Source files for the new class"_4_1 :ulb,b 4.2 "Methods in the new class"_4_2 :b 4.3 "Calls to the OINK object manager"_4_3 :b 4.4 "Calling back to map() and reduce() functions"_4_4 :ule,b "Errors"_Section_errors.html :l 5.1 "Error & warning messages"_5_1 :ulb,b,ule :ole :link(1_1,Section_build.html#1_1) :link(1_2,Section_build.html#1_2) :link(1_3,Section_build.html#1_3) :link(1_4,Section_build.html#1_4) :link(2_1,Section_script.html#2_1) :link(2_2,Section_script.html#2_2) :link(2_3,Section_script.html#2_3) :link(3_1,Section_functions.html#3_1) :link(3_2,Section_functions.html#3_2) :link(3_3,Section_functions.html#3_3) :link(3_4,Section_functions.html#3_4) :link(3_5,Section_functions.html#3_5) :link(4_1,Section_commands.html#4_1) :link(4_2,Section_commands.html#4_2) :link(4_3,Section_commands.html#4_3) :link(4_4,Section_commands.html#4_4) :link(5_1,Section_errors.html#5_1) mrmpi-1.0~20131122/oinkdoc/pagerank.txt0000755000175000017500000000435711734437746017375 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line pagerank command :h3 [Syntax:] pagerank tolerance Nmax alpha -i in1 -o out1.file out1.mr :pre tolerance = stopping tolerance on PageRank iterations Nmax = max # of matrix-vector iterations to allow alpha = ??? in1 = graph edges: Key = Vi Vj, Value = weight out1 = distance to each vertex: Key = Vi, Value = distance :ul [Examples:] pagerank 0.01 50 0.3 -i mre -o prank.txt NULL :pre [Description:] This is a named command which computes the PageRank numeric score for vertices in a directed graph. The PageRank of a vertex is a measure of how "important" it is in the graph, based on what vertices point to it, and the PageRank of those vertices. If the graph represents WWW pages linked to each other, then this is part of how Google ranks the relative importance of pages it shows you as the result of a search. See "this Google site"_#Google for more information. The PageRank calculation is performed via an iterative matrix-vector multiply operation, where the graph can be though of as a sparse matrix. The MapReduce version of this PageRank implementation is described in the paper of "(Plimpton)"_#Plimpton. Describe what alpha and tolerance and maxiter do. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges with weights, assumed to have no duplicates, meaning that (Vi,Vj) only appears once. Each edge is directed in the sense that Vi points to Vj. The weight is effectively the non-zero value of the (Vi,Vj) element of the matrix. Typically it should be 1/D where D is the out-degree of Vi, but this is not required. The input is unchanged by this command. Out1 will store the list of vertices and the numeric rank of each vertex. [Related commands:] none :line :link(Google) [(Google)] Some citation to a Google paper or WWW site, explaining PageRank, (2005). :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/oinkdoc/sssp.txt0000755000175000017500000000416111734437746016566 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line sssp command :h3 sssp2 command :h3 [Syntax:] sssp N seed -i in1 -o out1.file out1.mr :pre sssp2 N seed -i in1 -o out1.file out1.mr :pre N = # of random starting vertices to choose seed = random number seed (positive integer) in1 = graph edges: Key = Vi Vj, Value = weight out1 = distance to each vertex: Key = Vi, Value = distance :ul [Examples:] sssp 10 12345 -i mre -o sssp.dist mrdist sssp2 10 12345 -i mre -o sssp.dist mrdist :pre [Description:] These are named commands which compute the shortest path to each vertex from a source vertex in an undirected graph. This is called a single-source shortest-path (SSSP) calculation. The source vertex is selected randomly. Each edge in the graph has a weight. The shortest-path distance to any other vertex is the minimum summed weight of a list of consecutive edges that connect the two vertices. This calculation involves a breadth-first search on the graph. The MapReduce algorithms used for performing the SSSP calculation are described in the paper of "(Plimpton)"_#Plimpton. The sssp command implements the first of the two algorithms discussed in the paper. The sssp2 command implements the second of the two algorithms. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges with floating point weights, assumed to have no duplicates or self-edges. This means that either (Vi,Vj) or (Vj,Vi) appears, but not both. Also (Vi,Vi) does not appear. The input is unchanged by this command. Out1 will store the list of vertices and the distance to each vertex. If the specified N > 1, then this will be the SSSP result for only the last source vertex randomly selected. [Related commands:] none :line :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/oinkdoc/set.txt0000755000175000017500000001156111734437746016373 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line set command :h3 [Syntax:] set keyword value ... :pre one or more keyword/value pairs may be appended :ulb,l keyword = {verbosity} or {timer} or {memsize} or {outofcore} or {scratch} or {prepend} or {substitute} :l {verbosity} value = setting for created MapReduce objects {timer} value = setting for created MapReduce objects {memsize} value = setting for created MapReduce objects {outofcore} value = setting for created MapReduce objects {minpage} value = setting for created MapReduce objects {maxpage} value = setting for created MapReduce objects {freepage} value = setting for created MapReduce objects {zeropage} value = setting for created MapReduce objects {scratch} value = setting for created MapReduce objects {prepend} value = string to prepend to file/directory path names {substitute} value = 0 or 1 = how to substitute for "%" in path name :pre :ule [Examples:] set verbosity 2 set verbosity 1 timer 1 memsize 16 set scratch /tmp/mr set prepend /scratch%/data substitute 1 [Description:] This command sets global settings which are used in the creation of MR-MPI objects and the underlying MapReduce objects they wrap. Note that many of these setting names have the same meaning they do in the MR-MPI library themselves, as discussed on "this doc page"_../doc/settings.html. The settings for the {verbosity}, {timer}, {memsize}. {outofcore}, {minpage}, {maxpage}, {freepage}, and {zeropage} keywords are used by the "mr"_mr.html command creates a MapReduce object to set its attributes. Note that the "mr"_mr.html command itself can override several of these global settings. "Named commands"_command.html can also create MapReduce objects, either when inputting and outputting data, or when the run() method in the named command class invokes certain methods, like create_mr() or copy_mr(). Each time a new MapReduce object is created, these same global settings are applied to it. See "this doc page"_Section_commands.html for more discussion of the input/output options and these methods. The {scratch} keyword is a directory pathname which all MapReduce objects will use for writing temporary files when they operate in out-of-core mode. Every MapReduce object created by OINK will have its scratch directory set to this value, via the fpath() call described on "this doc page"_../doc/settings.html The {prepend} and {substitute} keywords affect how file and directory names are interpreted by OINK. File and directory names are used as input and output options to "named commands"_command.html via the "-i" and "-o" arguments in an input script. Before these path names are passed to the MR-MPI library, e.g. as part of a "map()"_../doc/map.html method, they can have a directory name prepended to them, and "%" characters in the path name substituted for with a processor ID. This is to enable flexible options for input/output of different files by different processors. If the {prepend} keyword is set, its value should be a directory name (without the trailing "/"). This will be prepended to every input and output pathname used by OINK, including the scratch directory noted above. This global setting can be overridden for a single input or output of the next-executed "named command"_command.html by setting the same {prepend} keyword in the "input"_input.html or "output"_output.html command. Input file or directory names can contain the wildcard character "%". Only the first occurrence of the wildcard character is replaced. If the {substitute} keyword is set to 0, then a "%" is replaced by the processor ID, 0 to Nprocs-1. If it is set to N > 0, then "%" is replaced by (proc-ID % N) + 1. I.e. for 8 processors and N = 4, then the 8 processors replace the "%" with (1,2,3,4,1,2,3,4). This can be useful for multi-core nodes where each core has its own local disk. E.g. you wish each core to read data from one disk. As with the {prepend} keyword, this substitution rule will be applied to every input and output pathname used by OINK, including the scratch directory noted above. This global setting can be overridden for a single input or output of the next-executed "named command"_command.html by setting the same {substitute} keyword in the "input"_input.html or "output"_output.html command. [Related commands:] "input"_input.html, "output"_output.html, "named commands"_command.html, "MR-MPI library commands"_mrmpi.html, "Section_commands"_Section_commands.html [Defaults:] The setting defaults are the same as for the MR-MPI library itself, namely verbosity = 0, timer = 0, memsize = 64, outofcore = 0, minpage = 0, maxpage = 0, freepage = 1, zeropage = 0, scratch = ".". There are additional default values: prepend = NULL, and substitute = 0. mrmpi-1.0~20131122/oinkdoc/echo.txt0000755000175000017500000000145211734437746016514 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line echo command :h3 [Syntax:] echo style :pre style = {none} or {screen} or {log} or {both} :ul [Examples:] echo both echo log :pre [Description:] This command determines whether OINK echoes each input script command to the screen and/or log file as it is read and processed. If an input script has errors, it can be useful to look at echoed output to see the last command processed. The "command-line switch"_Section_start.html#2_4 -echo can be used in place of this command. The default is echo log, i.e. commands are echoed to the log file. [Related commands:] none mrmpi-1.0~20131122/oinkdoc/neighbor.txt0000755000175000017500000000242011734437746017367 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line neighbor command :h3 [Syntax:] neighbor -i in1 -o out1.file out1.mr :pre in1 = graph edges: Key = Vi Vj, Value = NULL out1 = neighbors of each vertex: Key = Vi, Value = Vj Vk ... :ul [Examples:] degree -i mrv -o degree.list NULL [Description:] This is a named command which calculates the list of neighbors of each vertex in an undirected graph. A list of all the vertices each vertex shares an edge with is calculated. These are the first neighbors of each vertex. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges, assumed to have no duplicates or self-edges. This means that either (Vi,Vj) or (Vj,Vi) appears, but not both. Also (Vi,Vi) does not appear. The input is unchanged by this command. Out1 will store the list of neighbor vertices for each vertex. The list is a single value in a key/value pair which is a vector of vertex IDs, one after the other. [Related commands:] "degree"_degree.html, "neigh_tri"_neigh_tri.html mrmpi-1.0~20131122/oinkdoc/Section_functions.txt0000755000175000017500000001631611734437746021277 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line 3. Adding Callback Functions to OINK :h3 In the oink directory, the files map_*.cpp, reduce_*.cpp, compare_*.cpp, hash_*.cpp, and scan_*.cpp each contain one or more functions which can be used as callback methods, passed to MR-MPI library calls, such as the "map()"_doc/..map.html and "reduce()"_../doc/reduce.html operations. This can be done either in "named commands"_command.html that you write, as described in "this section"_Section_commands.html of the documention, or in "MR-MPI library commands"_mrmpi.html made directly from an OINK input script. The collection of these files and callback functions is effectively a library of tools that can be used by new "named commands"_command.html or your input script to speed the development of new MapReduce algorithms and workflows. Over time, we intend to add new callback function to OINK, and also invite users to send their own functions to the developers for inclusion in OINK. The map(), reduce(), and scan() callback functions include a "void *ptr" as a final argument, which the caller can pass to the callback. This is typically done to enable the callback function to access additional parameters stored by the caller. When doing this with functions listed in the map_*.cpp, reduce_*.cpp, and scan_*.cpp files in OINK, you will want to make the data these pointers point to "portable", so that and "named command" can use it. Thus you would should not typically encode class-specific or command-specific data in the structure pointed to. Instead, your caller should create the minimial data structure that the callback function needs to operate, and store the structure in a map_*.h file that corresponds to the specific map_*.cpp file that contains the function (or reduce_*.h or scan_*.h). See the file oink/map_rmat_generate.h file as an example. It contains the definition of an RMAT_params structure, which is used by both the "rmat command"_rmat.txt and the map() methods it uses, listed in map_rmat_generate.cpp. Both the rmat.h and map_rmat_generate.cpp files include the map_rmat_generate.h header file to accomplish this. Other commands or callback functions could use the same data structure by including that header file. The following sections list the various callback function currently included in OINK, and a brief explanation of what each of them does. Note that map() functions come in 4 flavors, depending on what MR-MPI library "map() method"_../doc/map.html is being used. Similarly, scan() functions come in 2 flavors, as documented on the "scan() method"_../doc/scan.html page. Map_*.cpp and scan_*.cpp files within OINK can contain any of the 4 or 2 flavors of map() and scan() methods. 3.1 "Map() functions"_#3_1 3.2 "Reduce() functions"_#3_2 3.3 "Compare() functions"_#3_3 3.4 "Hash() functions"_#3_4 3.5 "Scan() functions"_#3_5 :all(b) The documenation below this double line is auto-generated when the OINK manual is created. This is done by extracting C-style documentation text from the map_*.cpp, reduce_*.cpp, compare_*.cpp, hash_*.cpp, and scan_*.cpp files in the oink directory. Thus you should not edit content below this double line. In the *.cpp files in the oink directory, the lines between a line with a "/*" and a line with a "*/" are extracted. In the tables below, the first such line of extracted text is assumed to be the function name and appears in the left column. The remaining lines appear in the right columns. :line :line Map() functions :link(3_1),h4
add_label add a default integer label to each key, key could be vertex or edge
input: key = anything, value = NULL
output: key = unchanged, value = 1
add_weight add a default floating point weight to each key, key could be vertex or edge
input: key = anything, value = NULL
output: key = unchanged, value = 1.0
edge_to_vertex emit 1 vertex for each edge, just first one
input: key = Vi Vj, value = NULL
output: key = Vi, value = NULL
edge_to_vertex_pair emit 1 vertex for each edge, just first one
input: key = Vi Vj, value = NULL
output: key = Vi, value = NULL
edge_to_vertices emit 2 vertices for each edge
input: key = Vi Vj, value = NULL
output:
key = Vi, value = NULL
key = Vj, value = NULL
edge_upper emit each edge with Vi < Vj, drop self-edges with Vi = Vj
input: key = Vi Vj, value = NULL
output: key = Vi Vj, value = NULL, with Vi < Vj
invert invert key and value
input: key, value
output: key = value, value = key
read_edge read edges from file, formatted with 2 vertices per line
output: key = Vi Vj, value = NULL
read_edge_label read edges and labels from file
file format = 2 vertices and integer label per line
output: key = Vi Vj, value = label
read_edge_weight read edges and weights from file
file format = 2 vertices and floating point weight per line
output: key = Vi Vj, value = weight
read_vertex_label read vertices and labels from file
file format = vertex and integer label per line
output: key = Vi, value = label
read_vertex_weight read vertices and weights from file
file format = vertex and floating point weight per line
output: key = Vi, value = weight
read_words read words from file, separated by whitespace
output: key = word, value = NULL
rmat_generate generate graph edges via recursive R-MAT algorithm
input: # to generated & R-MAT params extracted from RMAT_struct in ptr
output: key = Vi Vj, value = NULL
:line Reduce() functions :link(3_1),h4
count count number of values associated with key
input: KMV with key and one or more values
output: key = unchanged, value = count
cull eliminate duplicate values
input: KMV with key and one or more values (assumed to be duplicates)
output: key = unchanged, value = first value
:line Compare() functions :link(3_1),h4
:line Hash() functions :link(3_1),h4
:line Scan() functions :link(3_1),h4
print_edge print out an edge to a file
input: key = Vi Vj, value = NULL
print_string_int print out key as string and value as int, to a file
input: key = string, value = int
print_vertex print out an vertex to a file
input: key = Vi, value = NULL
:line mrmpi-1.0~20131122/oinkdoc/include.txt0000755000175000017500000000161711734437746017224 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line include command :h3 [Syntax:] include file :pre file = filename of new input script to switch to :ul [Examples:] include newfile include in.run2 :pre [Description:] This command opens a new input script file and begins reading OINK commands from that file. When the new file is finished, the original file is returned to. Include files can be nested as deeply as desired. If input script A includes script B, and B includes A, then OINK could run for a long time. If the filename is a variable (see the "variable"_variable.html command), different processor partitions can run different input scripts. [Related commands:] "variable"_variable.html, "jump"_jump.html mrmpi-1.0~20131122/oinkdoc/luby_find.txt0000755000175000017500000000341511734437746017552 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line luby_find command :h3 [Syntax:] luby_find seed -i in1 -o out1.file out1.mr :pre seed = random number seed (positive integer) in1 = graph edges: Key = Vi Vj, Value = NULL out1 = minimal independent set: Key = Vi, Value = NULL :ul [Examples:] luby_find 982938 -i mre -o mis.list mis :pre [Description:] This is a named command which computes a minimal independent set (MIS) of vertices for an undirected graph. A MIS contains vertices which do not share an edge and to which no additional vertex can be added. For a given graph there are typically many possible MIS's; the MIS that is computed is a function of the specified random number seed. The MIS is found by "Luby's algorithm"_#Luby, which is an interative method. The MapReduce version of Luby's algorithm implemented by this command is discussed in the paper of "(Plimpton)"_#Plimpton. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges, assumed to have no duplicates or self-edges. This means that either (Vi,Vj) or (Vj,Vi) appears, but not both. Also (Vi,Vi) does not appear. The input is unchanged by this command. Out1 will store the list of vertices in the MIS. [Related commands:] none :line :link(Luby) [(Luby)] Luby, "A Simple Parallel Algorithm for the Maximal Independent Set Problem", SIAM J Computing, 15, 1036-1055 (1986). :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/oinkdoc/clear.txt0000755000175000017500000000165711734437746016673 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line clear command :h3 [Syntax:] clear :pre [Examples:] (commands for 1st computation) clear (commands for 2nd computation) :pre [Description:] This command deletes all data structures and restores all settings to their default values. Once a clear command has been executed, it is as if OINK were starting over, with only the exceptions noted below. This command enables multiple jobs to be run sequentially from one input script. These settings are not affected by a clear command: the working directory ("shell"_shell.html command), log file status ("log"_log.html command), echo status ("echo"_echo.html command), and input script variables ("variable"_variable.html command). [Related commands:] none mrmpi-1.0~20131122/oinkdoc/log.txt0000755000175000017500000000173411734437746016362 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line log command :h3 [Syntax:] log file :pre file = name of new logfile :ul [Examples:] log log.graph :pre [Description:] This command closes the current OINK log file, opens a new file with the specified name, and begins logging information to it. If the specified file name is {none}, then no new log file is opened. If multiple processor partitions are being used, the file name should be a variable, so that different processors do not attempt to write to the same log file. The file "log.oink" is the default log file for a OINK run. The name of the initial log file can also be set by the command-line switch -log. See "this section"_Section_start.html#2_6 for details. The default OINK log file is named log.oink [Related commands:] none mrmpi-1.0~20131122/oinkdoc/output.txt0000755000175000017500000001016311734437746017135 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line output command :h3 [Syntax:] output N keyword value ... :pre N = which output to set options for :ulb,l one or more keyword/value pairs may be appended :l keyword = {prepend} or {substitute} {prepend} value = string to prepend to file path names {substitute} value = 0 or 1 = how to substitute for "%" in path name :ule [Examples:] output 1 substitue 4 output 2 substitute 4 prepend /scratch%/hadoop-datastore/local_files :pre [Description:] This command is used to control the writing of data that a "named command"_command.html performs as part of its output. It does this by setting options on specific outputs to "named commands"_command.html. The options set by this command are in effect for ONLY the next named commmand. After a named command is invoked, it restores all output options to their default values. Note that all of the options which can be set by this command have default values, so you don't need to set those you don't want to change. As described on the "named command"_command.html doc page, a named command may specify one or more output descriptors. Each descriptor is a pair of arguemnts, the first of which is an output filename (if it is not specified as NULL). OINK converts the specified argument into an actual filename which is opened by each processor. The purpose of the output command is to give you control over how that conversion takes place. The {N} value corresponds to a particular output descriptor, as defined by the "named command"_command.html. It should be an integer from 1 to Noutput, where Noutput is the number of output descriptors the command requires. The output command can be used multiple times with the same {N} to specify different parameters, e.g. one at a time. The remaining arguments are pairs of {keywords} and {values}. One or more can be specified. :line The {prepend} and {substitute} keywords alter the file and directory path names specified with the filename of an output descriptor in a named command. IMPORTANT NOTE: The {prepend} and {substitute} keywords can also be set globally so that their values will be applied to all output descriptors of all "named commands"_command.html. See the "set"_set.html command for details. If an output command is not used to override the global value, then the global value is used by the "named command"_command.html. The {prepend} keyword specifies a path name to prepend to the output file specified with the "named command"_command.html. The prepend string is presumed to be a directory name and should be specified without the trailing "/" character, since that is added when the prepending is done. Ouptut file or directory names specified with the "named command"_command.html can contain either or both of two wildcard characters: "%" or "*". Only the first occurrence of each wildcard character is replaced. If the {substitute} keyword is set to 0, then a "%" is replaced by the processor ID, 0 to Nprocs-1. If it is set to N > 0, then "%" is replaced by (proc-ID % N) + 1. I.e. for 8 processors and N = 4, then the 8 processors replace the "%" with (1,2,3,4,1,2,3,4). This can be useful for multi-core nodes where each core has its own local disk. E.g. you wish each core to write data to one disk. IMPORTANT NOTE: The proessor ID is also added as a suffix to the specified output file by each processor, so that one output file per processor is generated. This is in addition to any replacement of a "%" wildcard character. If a "*" appears, then it is replaced with a 1. Unlike for "input files"_input.html, this is not a particularly useful wildcard for output files. :line [Related commands:] "input"_input.html, "named commands"_command.html, "how to write named commands"_Section_commands.html, "set"_set.html [Defaults:] The option defaults are prepend = NULL, substitute = 0, multi = 1, mmode = 0, recurse = 0, self = 0, readfile = 0, nmap = 0, sepchar = newline character, sepstr = newline, delta = 80. mrmpi-1.0~20131122/oinkdoc/shell.txt0000755000175000017500000000370311734437746016706 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line shell command :h3 [Syntax:] shell style args :pre style = {cd} or {mkdir} or {mv} or {rm} or {rmdir} :ulb,l {cd} arg = dir dir = directory to change to {mkdir} args = dir1 dir2 ... dir1,dir2 = one or more directories to create {mv} args = old new old = old filename new = new filename {rm} args = file1 file2 ... file1,file2 = one or more filenames to delete {rmdir} args = dir1 dir2 ... dir1,dir2 = one or more directories to delete :pre :ule [Examples:] shell cd sub1 shell cd .. shell mkdir tmp1 tmp2 tmp3 shell rmdir tmp1 shell mv log.lammps hold/log.1 shell rm TMP/file1 TMP/file2 :pre [Description:] Execute a shell command. Only a few simple file-based shell commands are supported, in Unix-style syntax. With the exception of {cd}, all commands are executed by only a single processor, so that files/directories are not being manipulated by multiple processors. The {cd} style executes the Unix "cd" command to change the working directory. All subsequent OINK commands that read/write files will use the new directory. All processors execute this command. The {mkdir} style executes the Unix "mkdir" command to create one or more directories. The {mv} style executes the Unix "mv" command to rename a file and/or move it to a new directory. The {rm} style executes the Unix "rm" command to remove one or more files. The {rmdir} style executes the Unix "rmdir" command to remove one or more directories. A directory must be empty to be successfully removed. IMPORTANT NOTE: OINK does not detect errors or print warnings when any of these Unix commands execute. E.g. if the specified directory does not exist, executing the {cd} command will silently not do anything. [Related commands:] none mrmpi-1.0~20131122/oinkdoc/tri_find.txt0000755000175000017500000000305511734437746017375 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line tri_find command :h3 [Syntax:] tri_find -i in1 -o out1.file out1.mr :pre in1 = graph edges: Key = Vi Vj, Value = NULL out1 = triangles: Key = Vi Vj Vk, Value = NULL :ul [Examples:] tri_find -i mre -o tri.list mtri :pre [Description:] This is a named command which enumerates all the triangles in an undirected graph. A triangle is a set of 3 vertices I,J,K for which the edges IJ, JK, IK all exist. The triangles are found via the MapReduce algorithm of "(Cohen)"_#Cohen discussed in his paper and in the paper of "(Plimpton)"_#Plimpton. Note that even small graphs can have large numbers of triangles if there are very high-degree vertices. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges, assumed to have no duplicates or self-edges. This means that either (Vi,Vj) or (Vj,Vi) appears, but not both. Also (Vi,Vi) does not appear. The input is unchanged by this command. Out1 will store the list of triangles. [Related commands:] none :line :link(Cohen) [(Cohen)] Cohen, "Graph Twiddling in a MapReduce World", Computing in Science and Engineering, 11, 29-41 (2009). :link(Plimpton) [(Plimpton)] Plimpton and Devine, "MapReduce in MPI for Large-Scale Graph Algorithms", to appear in Parallel Computing (2011). mrmpi-1.0~20131122/oinkdoc/input.txt0000755000175000017500000001470611734437746016743 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line input command :h3 [Syntax:] input N keyword value ... :pre N = which input to set options for :ulb,l one or more keyword/value pairs may be appended :l keyword = {prepend} or {substitute} or {multi} or {mmode} or {recurse} or {self} or {readfile} or {nmap} or {sepchar} or {sepstr} or {delta} {prepend} value = string to prepend to file/directory path names {substitute} value = 0 or 1 = how to substitute for "%" in path name {multi} value = Nmulti = multiplicity of path names to generate {mmode} value = 0 or 1 or 2 = which style of map() method to invoke {recurse} value = 0 or 1 = passed to map() method {self} value = 0 or 1 = passed to map() method {readfile} value = 0 or 1 = passed to map() method {nmap} value = number of map tasks = passed to map() method {sepchar} value = single character = passed to map() method {sepstr} value = string = passed to map() method {delta} value = Ndelta = passed to map() method :pre :ule [Examples:] input 1 multi 4 input 2 self 1 substitute 4 prepend /scratch%/hadoop-datastore/local_files :pre [Description:] This command is used to control the reading of input data that a "named command"_command.html performs as part of its input. It does this by setting options on specific inputs to "named commands"_command.html. The options set by this command are in effect for ONLY the next named commmand. After a named command is invoked, it restores all input options to their default values. Note that all of the options which can be set by this command have default values, so you don't need to set those you don't want to change. As described on the "named command"_command.html doc page, a named command may specify one or more input descriptors. If the descriptor is one or more file or directory names, then each of them is converted into a list of strings which is passed to a map() method of a created MR-MPI object, along with various other arguments needed by the map() method. The purpose of the input command is to give you control over how that conversion takes place and what the values of those additional arguments are. The {N} value corresponds to a particular input descriptor, as defined by the "named command"_command.html. It should be an integer from 1 to Ninput, where Ninput is the number of input descriptors the command requires. The input command can be used multiple times with the same {N} to specify different parameters, e.g. one at a time. The remaining arguments are pairs of {keywords} and {values}. One or more can be specified. :line The {prepend}, {substitute}, and {multi} keywords alter the file and directory path names specified with an input descriptor in a named command. IMPORTANT NOTE: The {prepend}, {substitute}, and {multi} keywords are applied to each file of directory name in the list of such names that the "named command"_command.html uses in its input descriptor. IMPORTANT NOTE: The {prepend} and {substitute} keywords can also be set globally so that their values will be applied to all input descriptors of all "named commands"_command.html. See the "set"_set.html command for details. If an input command is not used to override the global value, then the global value is used by the "named command"_command.html. The {prepend} keyword specifies a path name to prepend to each input file or directory name specified with the "named command"_command.html. The prepend string is presumed to be a directory name and should be specified without the trailing "/" character, since that is added when the prepending is done. Input file or directory names specified with the "named command"_command.html can contain either or both of two wildcard characters: "%" or "*". Only the first occurrence of each wildcard character is replaced. If the {substitute} keyword is set to 0, then a "%" is replaced by the processor ID, 0 to Nprocs-1. If it is set to N > 0, then "%" is replaced by (proc-ID % N) + 1. I.e. for 8 processors and N = 4, then the 8 processors replace the "%" with (1,2,3,4,1,2,3,4). This can be useful for multi-core nodes where each core has its own local disk. E.g. you wish each core to read data from one disk. If a "*" appears, then the file or directory name is duplicated N times where N is the value set by the {multi} keyword. In each of the N duplicates, the "*" is replaced by the number 1 to N. Again, this can be useful for multi-core nodes where each core has its own local disk. E.g. you want a single core to read data from each of several local disks on the node, presumably because you have launched an MPI job so that it runs on a single core per node. :line The {mmode} keyword stands for "map mode" and determines what form of the "MR-MPI library map() method"_../doc/map.html is invoked by the "named command"_command.html. It is up to the coding of the "named command" to determine which of these forms of data input it supports. There are 3 variants of the map() method which involve file input: mmode = 0 = map(int nstr, char **strings, int self, int recurse, int readfile, void (*mymap)(), void *ptr) mmode = 1 = map(int nmap, int nstr, char **strings, int recurse, int readfile, char sepchar, int delta, void (*mymap)(), void *ptr) mmode = 2 = map(int nmap, int nstr, char **strings, int recurse, int readfile, char *sepstr, int delta, void (*mymap)(), void *ptr) :pre The "nstr" and "strings" arguments to these methods are created by OINK, using the settings described above. The remaining arguments are set by the keywords of the input command, as needed. Note that some keywords have no meaning for certain map() method variants, in which case they are simply ignored. The meaning of the {self}, {recurse}, {readfile}, {nmap}, {sepchar}, {sepstr}, and {delta} keywords is the same as explained on the doc page for the "map() method"_../doc/map.html of the MR-MPI library. The value for the {sepchar} keyword will be treated as a single character. The value for the {sepstr} keyword will be treated as a string. :line [Related commands:] "output"_output.html, "named commands"_command.html, "how to write named commands"_Section_commands.html, "set"_set.html [Defaults:] The option defaults are prepend = NULL, substitute = 0, multi = 1, mmode = 0, recurse = 0, self = 0, readfile = 0, nmap = 0, sepchar = newline character, sepstr = newline, delta = 80. mrmpi-1.0~20131122/oinkdoc/edge_upper.txt0000755000175000017500000000220711734437746017714 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line edge_upper command :h3 [Syntax:] edge_upper -i in1 -o out1.file out1.mr :pre in1 = graph edges: Key = Vi Vj, Value = NULL out1 = graph edges: Key = Vi Vj, Value = NULL :ul [Examples:] edge_upper -i tmp.matrix -o mre :pre [Description:] This is a named command which eliminates duplicate and self-edges from a graph. A duplicate edge is when both (Vi,Vj) or (Vj,Vi) appear in the edge list. A self-edge is when (Vi,Vi) appears. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges, which may have duplicates or self edges. The input is unchanged by this command. Out1 will store the edges of a new graph which has the duplicates and self edges removed. In the new graph, Vi < Vj for every edge, so it also represents the non-zeroes of an upper-triangular matrix. [Related commands:] "rmat"_rmat.html mrmpi-1.0~20131122/oinkdoc/jump.txt0000755000175000017500000000660611734437746016557 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line jump command :h3 [Syntax:] jump file label :pre file = filename of new input script to switch to label = optional label within file to jump to :ul [Examples:] jump newfile jump in.run2 runloop jump SELF runloop :pre [Description:] This command closes the current input script file, opens the file with the specified name, and begins reading OINK commands from that file. Unlike the "include"_include.html command, the original file is not returned to, although by using multiple jump commands it is possible to chain from file to file or back to the original file. If the word "SELF" is used for the filename, then the current input script is re-opened and read again. IMPORTANT NOTE: The SELF option is not guaranteed to work when the current input script is being read through stdin (standard input), e.g. lmp_g++ < in.script :pre since the SELF option invokes the C-library rewind() call, which may not be supported for stdin on some systems. This can be worked around by using the "-in command-line argument"_Section_start.html#2_6 or the "-var command-line argument"_Section_start.html#2_6 to pass the script name as a variable to the input script In the latter case, the "fname" "variable"_variable.html could be used in place of SELF. E.g. lmp_g++ -in in.script :pre lmp_g++ -var fname n.script < in.script :pre The 2nd argument to the jump command is optional. If specified, it is treated as a label and the new file is scanned (without executing commands) until the label is found, and commands are executed from that point forward. This can be used to loop over a portion of the input script, as in this example. These commands perform 10 runs, each of 10000 steps, and create 10 dump files named file.1, file.2, etc. The "next"_next.html command is used to exit the loop after 10 iterations. When the "a" variable has been incremented for the tenth time, it will cause the next jump command to be skipped. variable a loop 10 label loop dump 1 all atom 100 file.$a run 10000 undump 1 next a jump in.lj loop :pre If the jump {file} argument is a variable, the jump command can be used to cause different processor partitions to run different input scripts. In this example, LAMMPS is run on 40 processors, with 4 partitions of 10 procs each. An in.file containing the example variable and jump command will cause each partition to run a different simulation. mpirun -np 40 lmp_ibm -partition 4x10 -in in.file :pre variable f world script.1 script.2 script.3 script.4 jump $f :pre Here is an example of a double loop which uses the "if"_if.html and jump commands to break out of the inner loop when a condition is met, then continues iterating thru the outer loop. label loopa variable a loop 5 label loopb variable b loop 5 print "A,B = $a,$b" run 10000 if $b > 2 then "jump in.script break" next b jump in.script loopb label break variable b delete :pre next a jump in.script loopa :pre IMPORTANT NOTE: If you jump to a file and it does not contain the specified label, OINK will come to the end of the file and exit. [Related commands:] "variable"_variable.html, "include"_include.html, "label"_label.html, "next"_next.html mrmpi-1.0~20131122/oinkdoc/mrmpi.txt0000755000175000017500000001635011734437746016725 0ustar mathieumathieu "MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line library commands :h3 [Syntax:] MR-ID keyword args ... :pre MR-ID = ID of previously created MR object keyword = MR-MPI library function to invoke args = arguments to library function :ul [Examples:] "mr"_mr.html edge edge map/task 100 mymap edge map/task 100 mymap 1 edge collate NULL edge reduce myreduce edge kv_stats 1 edge set timer 1 :pre [Description:] Invoke a MR-MPI library function directly on a previously created MR-MPI objects. In OINK, an MR-MPI object is a thin wrapper on a MapReduce object created via the "MR-MPI library"_md. They can be created by the "mr"_mr.html command or can be output by a "named command"_command.html. Such an MR-MPI object has an ID which is the command name used in the input script to trigger the library calls, e.g. "edge" in the examples above. The keyword is the library function to invoke on the underlying MapReduce object wrapped by the MR-MPI object. These have a one-to-one correspondence with the methods available in the "MR-MPI library"_md. Here is the list of keywords and their arguments. The arguments used in the OINK input script correspond to the arguments used by each library method. Arguments in parentheses are optional. More details are discussed below. Keywords, Arguments, delete, none, copy, MR2-ID, add, MR2-ID, aggregate, NULL or hash-function, broadcast, root, clone, none, close, none, collapse, type key, collate, NULL of hash-function, compress, reduce-function, convert, none, gather, nprocs, map/task, nmap map-function (addflag), map/char, nmap strings recurse readfile sepchar delta map-function (addflag), map/string, nmap strings recurse readfile sepstr delta map-function (addflag), map/mr, MR2-ID map-function (addflag), open, none, print, (file) (fflag) proc nstride kflag vflag, reduce, reduce-function, scan/kv, scan-function, scan/kmv, scan-function, scrunch, nprocs type key, sort_keys, flag or compare-function, sort_values, flag or compare-function, sort_multivalues, flag or compare-function, kv_stats, level, kmv_stats, level, cummulative_stats, level reset, set, name value :tb(c=2) The MR2-ID used as an argument to the "copy", "add", and "map/mr" keywords should be the ID of another previously defined MR-MPI object. IMPORTANT NOTE: The syntax for the copy keyword in an OINK script is as follows: MR-ID copy MR2-ID. This creates a new MR-MPI object MR2-ID, which is a copy of the existing MR-MPI object MR-ID. The MR2-ID object cannot already exist. This corresponds to the following C++ calling syntax for the "copy() method of the MR-MPI library"_../doc/copy.html, but note that the OINK syntax is somewhat reversed: MapReduce *mr2 = mr->copy(); :pre The map-function, reduce-function, hash-function, compare-function, scan-function arguments to various keywords are the names of functions that will be called back to by the MR-MPI library. Within OINK, these must be names of functions defined in map_*.cpp, reduce_*.cpp, hash_*.cpp, compare_*,cpp, or scan_*.cpp files with the appropriate function prototype. When you build OINK, these files are scanned, the function prototypes extracted, and the style_map.h, style_reduce.h, style_hash.h, style_compare.h, style_scan.h files are created whcih enables a function name you list in your input script to be recognized by OINK. Note that as new map(), reduce(), etc functions are added to the OINK src directory, they automatically become avaiable to your script to use in MR-MPI library commands. Thus you can use to OINK to accumulate a collection of useful map(), reduce(), etc functions. These functions can also be used with "named commands"_command.html as discussed "here"_Section_command.html. Note that map() functions come in 4 different flavors, with different prototypes, as "detailed here"_../doc/map.html. Which you should use depends on which map variant you invoke, i.e. map/task, map/char, map/string, or map/mr. Likewise, scan() functions come in 2 different flavors, as "detailed here"_../doc/scan.html, one for use with scan/kv and the other with scan/kmv. The "strings" argument to the {map/char} and {map/string} keywords can take one of two forms. It can be a single filename or directory. If the latter, then the "map() method in the MR-MPI library"_../doc/map.html reads the files in the directory. Or it can be a variable defined elsewhere in the OINK input script that contains one or more strings which are passed to the map() method as a collection of strings. In this case the "strings" argument should be specified as v_name, where name is the name of the variable. All the different styles of variables (except equal-style) store strings; see the "variable"_variable.html command for details. Also note that there is a "command-line option"_Section_build.txt#1_4 -var or -v which can be specified when OINK is executed to store a list of filenames in an index-style variable. The sepchar and sepstr arguments to the {map/char} and {map/string} keywords should be a single character or a string of characters. The addflag argument to the various {map} keywords is optional. It should be 1 if you wish to add key/value pairs to those already contained in a MapReduce object. The type argument to the {collapse} and {scrunch} keywords should be one of the following: "int", "uint46", "double", or "str". The key that follows will be converted into that data type to use as the key argument to the MR-MPI library function. The {print} keyword takes either 4 or 6 arguments. If 6 are used, the first two are a file name and file flag, the same as is available with the "print() method in the MR-MPI library"_../doc/print.html. The flag argument to the various {sort} keywords is an integer (e.g. 1 or -1) that can be used in place of a compare-function. This is the same integer that the "sort methods in the MR-MPI library"_../doc/sort.html takes as a valid argument. The {set} keyword takes a "name" and "value" argument. These can be any of the options that are valid to set for a MapReduce object in the MR-MPI library, as "discussed here"_../doc/settings.html. E.g. the command "edge verbosity 1" will set the verbosity level to 1 in the MapReduce object wrapped by the MR-MPI object named "edge". IMPORTANT NOTE: There is currently no way in OINK to pass a data pointer to the various MR-MPI library functions that accept it, e.g. to map() or reduce(). When using the library from a programming language, such as C++ or C, this is powerful option for passing extra information to the user callback map() or reduce() function. We are still thinking about the best way to do this, at least in some limited fashion, from an OINK input script. :line When any MR-MPI library command is executed, its elapsed execution time is stored internally by OINK. This value can be accessed by the keyword "time" in an "equal-style variable"_variable.html and printed out in the following manner: variable t equal time edge map/task 100 mymap print "Time for map/task = $t" :pre :line [Related commands:] "named commands"_command.html, "mr"_mr.html, "MR-MPI library documentation"_md, "map(), reduce(), etc functions"_Section_functions.txt mrmpi-1.0~20131122/oinkdoc/Section_script.txt0000755000175000017500000001406611734437746020573 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line 2. OINK Commands :h3 This section describes OINK input scripts and what commands are used to define an OINK calculation. 2.1 "Input script operation"_#2_1 2.2 "Parsing rules"_#2_2 2.3 "Input script commands"_#2_3 :all(b) :line 2.1 Input script operation :link(3_1),h4 OINK executes by reading commands from a input script (text file), one line at a time. When the input script ends, OINK exits. Each command causes OINK to take some action. It may set an internal variable, read in a file, or perform a MapReduce operation. Most commands have default settings, which means you only need to use the command if you wish to change the default. Note that OINK does not read your entire input script and then perform a calculation with all the settings. Rather, the input script is read one line at a time and each command takes effect when it is read. Thus this sequence of commands: set verbosity 1 mr foo :pre does something different than this sequence: mr foo set verbosity 1 :pre In the first case, the MR object created will have its verbosity set to 1. In the latter case it will have the default verbosity of 0, since the set command was not used until after the MR object was created. Many input script errors are detected by OINK and an ERROR or WARNING message is printed. "This section"_Section_errors.html gives more information on what errors mean. The documentation for each command gives additional information. :line 2.2 Parsing rules :link(3_2),h4 Each non-blank line in the input script is treated as a command. OINK commands are case sensitive. Pre-defined command names are lower-case, as are specified command arguments. Upper case letters may be used in file names or user-chosen ID strings. Here is how each line in the input script is parsed by OINK: (1) If the last printable character on the line is a "&" character (with no surrounding quotes), the command is assumed to continue on the next line. The next line is concatenated to the previous line by removing the "&" character and newline. This allows long commands to be continued across two or more lines. (2) All characters from the first "#" character onward are treated as comment and discarded. See an exception in (6). Note that a comment after a trailing "&" character will prevent the command from continuing on the next line. Also note that for multi-line commands a single leading "#" will comment out the entire command. (3) The line is searched repeatedly for $ characters, which indicate variables that are replaced with a text string. See an exception in (6). If the $ is followed by curly brackets, then the variable name is the text inside the curly brackets. If no curly brackets follow the $, then the variable name is the single character immediately following the $. Thus $\{myTemp\} and $x refer to variable names "myTemp" and "x". See the "variable"_variable.html command for details of how strings are assigned to variables and how they are substituted for in input script commands. (4) The line is broken into "words" separated by whitespace (tabs, spaces). Note that words can thus contain letters, digits, underscores, or punctuation characters. (5) The first word is the command name. All successive words in the line are arguments. (6) If you want text with spaces to be treated as a single argument, it can be enclosed in either double or single quotes. E.g. print "Value = $t" print 'Value = $t' :pre The quotes are removed when the single argument is stored internally. See the "if"_if.html commands for examples. A "#" or "$" character that is between quotes will not be treated as a comment indicator in (2) or substituted for as a variable in (3). IMPORTANT NOTE: If the argument is itself a command that requires a quoted argument (e.g. using a "print"_print.html command as part of an "if"_if.html command), then the double and single quotes can be nested in the usual manner. See the doc pages for those commands for examples. Only one of level of nesting is allowed, but that should be sufficient for most use cases. :line 2.3 Input script commands :h4,link(2_3) There are 4 kinds of OINK commands: (1) Set command to alter parameters: "set"_set.html :ul (2) MR-MPI library commands: "mr foo"_mr.html "foo map ..., foo reduce ..., etc"_mrmpi.html :ul (3) Named commands: "input"_input.html "output"_output.html "myfoo params ... -i ... -o ..."_command.html :ul (4) Miscellaneous commands that are part of the scripting language: "clear"_clear.html "echo"_echo.html "if"_if.html "include"_include.html "jump"_jump.html "label"_label.html "log"_log.html "next"_next.html "print"_print.html "shell"_shell.html "variable"_variable.html :ul :line :link(comm) Here is a list of all OINK input script commands alphabetically: "clear"_clear.html, "echo"_echo.html, "if"_if.html, "include"_include.html, "input"_input.html, "jump"_jump.html, "label"_label.html, "log"_log.html, "mr"_mr.html, "library commands"_mrmpi.html, "named commands"_command.html, "next"_next.html, "output"_output.html, "print"_print.html, "set"_set.html, "shell"_shell.html, "variable"_variable.html :tb(c=6,ea=c) These are the named commands currently included in OINK. We will add to this list from time to time. If you write a useful new command, send it to us and we can include it in the distribution. "cc_find"_cc_find.html, "cc_stats"_cc_stats.html, "degree"_degree.html, "degree_stats"_degree_stats.html, "degree_weight"_degree_weight.html, "edge_upper"_edge_upper.html, "histo"_histo.html, "luby_find"_luby_find.html, "neigh_tri"_neigh_tri.html, "neighbor"_neighbor.html, "rmat"_rmat.html, "rmat2"_rmat.html, "sssp"_sssp.html, "sssp2"_sssp.html, "tri_find"_tri_find.html, "vertex_extract"_vertex_extract.html, "wordfreq"_wordfreq.html :tb(c=2,ea=c) Here is a link to the MR-MPI library commands that can be invoked directly from an OINK input script: "MR-MPI library commands"_mrmpi.html :tb(c=1,ea=c) mrmpi-1.0~20131122/oinkdoc/degree_stats.txt0000755000175000017500000000322611734437746020250 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line degree_stats command :h3 [Syntax:] degree_stats dupflag -i in1 :pre dupflag = 1/2 for counting edge once/twice in1 = graph edges: Key = Vi Vj, Value = NULL [Examples:] degree_stats 2 -i edges :pre [Description:] This is a named command which calculates and prints out the degree statistics of an undirected graph. For each degree d, one line is printed to the screen with the number of vertices of degree d. This is done in sorted order. If the dupflag is 2, then each edge increments the degree of both of its vertices. If the dupflag is 1, then each edge only increments the degree of the first of its vertices. The former is usually more approrpriate for undirected graphs; the latter for directed graphs, in which case the out-degree of each vertex is being calculated. The latter can also be useful if the graph represents a sparse matrix and you want the statistics on non-zeroes in each row of the matrix. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. In1 stores a set of edges. No assumption is made about duplicates or self edges, i.e. (Vi,Vj) may appear multiple times, both (Vi,Vj) or (Vj,Vi) may appear, as may (Vi,Vi). The input is unchanged by this command. This command produces no output. Statistics on the degree count of each vertex will be printed to the screen in sorted order. [Related commands:] "degree"_degree.html mrmpi-1.0~20131122/oinkdoc/Section_errors.txt0000644000175000017500000002315611734437746020600 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line 5. Errors :h3 OINK tries to flag errors and print informative error messages so you can fix the problem. If you get an error message about an invalid command in your input script, you can determine what command is causing the problem by looking in the log.oink file or using the "echo command"_echo.html to see it on the screen. For a given command, OINK expects certain arguments in a specified order. If you mess this up, OINK will often flag the error. Generally, OINK will print a message to the screen and logfile and exit gracefully when it encounters a fatal error. Sometimes it will print a WARNING to the screen and logfile and continue on; you can decide if the WARNING is important or not. If OINK crashes or hangs without spitting out an error message first then it could be a bug If you think you have found a bug in OINK, please send an email to the "developers"_http://mapreduce.sandia.gov with info about the problem. Anything you can do to isolate the problem and reproduce it on a small data set will be helpful. :line 5.1 Error & warning messages :h4,link(5_1) These are two alphabetic lists of the "ERROR"_#error and "WARNING"_#warn messages OINK prints out and the reason why. If the explanation here is not sufficient, the documentation for the offending command may help. Grepping the source files for the text of the error message and staring at the source code and comments is also not a bad idea! Note that sometimes the same message can be printed from multiple places in the code. Errors: :h4,link(error) :dlb {All universe/uloop variables must have same # of values} :dt Self-explanatory. :dd {All variables in next command must be same style} :dt Self-explanatory. :dd Cannot attempt to open a 2nd input script, when the original file is still being processed. :dd {Arccos of invalid value in variable formula} :dt Argument of arccos() must be between -1 and 1. :dd {Arcsin of invalid value in variable formula} :dt Argument of arcsin() must be between -1 and 1. :dd {Cannot open input script %s} :dt Self-explanatory. :dd {Cannot open log.oink} :dt The default OINK log file cannot be opened. Check that the directory you are running in allows for files to be created. :dd {Cannot open logfile %s} :dt The OINK log file specified in the input script cannot be opened. Check that the path and name are correct. :dd {Cannot open logfile} :dt The OINK log file named in a command-line argument cannot be opened. Check that the path and name are correct. :dd {Cannot open screen file} :dt The screen file specified as a command-line argument cannot be opened. Check that the directory you are running in allows for files to be created. :dd {Cannot open universe log file} :dt For a multi-partition run, the master log file cannot be opened. Check that the directory you are running in allows for files to be created. :dd {Cannot open universe screen file} :dt For a multi-partition run, the master screen file cannot be opened. Check that the directory you are running in allows for files to be created. :dd {Cannot redefine variable as a different style} :dt An equal-style variable can be re-defined but only if it was originally an equal-style variable. :dd {Command input is equal-style variable} :dt Only variables that store strings can be used. :dd {Command input variable is unknown} :dt Self-explanatory. :dd {Command outputs must be specified in pairs} :dt Self-explanatory. :dd {Could not create dir for file %s\n} :dt Self-explanatory. :dd {Could not open file in print} :dt This comes from the output "print" routine of the neigh_tri command. :dd {Could not open output file %s for output object()} :dt Self-explanatory. :dd {Divide by 0 in variable formula} :dt Self-explanatory. :dd {Expected floating point parameter in variable definition} :dt The quantity being read is an integer on non-numeric value. :dd {Expected integer parameter in variable definition} :dt The quantity being read is a floating point or non-numeric value. :dd {Failed to allocate %d bytes for array %s} :dt Your OINK simulation has run out of memory. You need to run a smaller simulation or on more processors. :dd {Failed to reallocate %d bytes for array %s} :dt Your OINK simulation has run out of memory. You need to run a smaller simulation or on more processors. :dd {ID in mr command is already in use} :dt Self-explanatory. :dd {Illegal ... command} :dt Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running OINK to see the offending line. :dd {Input line too long after variable substitution} :dt This is a hard (very large) limit defined in the input.cpp file. :dd {Input line too long: %s} :dt This is a hard (very large) limit defined in the input.cpp file. :dd {Invalid Boolean syntax in if command} :dt Self-explanatory. :dd {Invalid named command switch} :dt Only -i and -o are allowed. :dd {Invalid command-line argument} :dt One or more command-line arguments is invalid. Check the syntax of the command you are using to launch OINK. :dd {Invalid keyword in variable formula} :dt Self-explanatory. :dd {Invalid math function in variable formula} :dt Self-explanatory. :dd {Invalid seed for Marsaglia random # generator} :dt The initial seed for this random number generator must be a positive integer less than or equal to 900 million. :dd {Invalid syntax in variable formula} :dt Self-explanatory. :dd {Invalid variable evaluation in variable formula} :dt A variable used in a formula could not be evaluated. :dd {Invalid variable in next command} :dt Self-explanatory. :dd {Invalid variable name in variable formula} :dt Variable name is not recognized. :dd {Invalid variable name} :dt Variable name used in an input script line is invalid. :dd {Invalid variable style with next command} :dt Variable styles {equal} and {world} cannot be used in a next command. :dd {Label wasn't found in input script} :dt Self-explanatory. :dd {Log of zero/negative value in variable formula} :dt Self-explanatory. :dd {MR ID must be alphanumeric or underscore characters} :dt Self-explanatory. :dd {MR object map command variable is unknown} :dt Variable used as collection of strings is not recognized. :dd {MR object add comand MR object does not exist} :dt Second MR object is not recognized. :dd {MR object command input is equal-style variable} :dt Only variables that store strings can be used. :dd {MR object created by copy already exists} :dt This command creates a new MR object, which cannot already be defined. :dd {MR object map command MR object does not exist} :dt Second MR object is not recognized. :dd {Mismatch in command inputs} :dt Named command defines different number of inputs. :dd {Mismatch in command outputs} :dt Named command defines different number of outputs. :dd {Must use -in switch with multiple partitions} :dt A multi-partition simulation cannot read the input script from stdin. The -in command-line option must be used to specify a file. :dd {Object input() invoked with invalid index} :dt Index argument must be from 1 to Ninputs. :dd {Object input() map function does not match input mode} :dt Input command is used to select map mode (mmode) which much match map() function. :dd {Object input() with no map function} :dt Input script specified input from file, but no map() method was provided by named command. :dd {Object output() as MR object not allowed} :dt Input script speficied output as MR object, but named command invoked disallow flag. :dd {Object output() called for unknown MR object} :dt A MapReduce object unknown to the object manager was passed to the output() method by a named command. :dd {Object output() invoked with invalid index} :dt Index argument must be from 1 to Noutputs. :dd {Object permanent() called for unknown MR object} :dt A MapReduce object unknown to the object manager was passed to the permanent() method by a named command. :dd {Ouptut MR ID must be alphanumeric or underscore characters} :dt Self-explanatory. :dd {Power by 0 in variable formula} :dt Self-explanatory. :dd {Processor partitions are inconsistent} :dt The total number of processors in all partitions must match the number of processors LAMMPS is running on. :dd {RMAT a,b,c,d must sum to 1} :dt Self-explanatory. :dd {RMAT fraction must be < 1} :dt Self-explanatory. :dd {Sqrt of negative value in variable formula} :dt Self-explanatory. :dd {Substitution for illegal variable} :dt Input script line contained a variable that could not be substituted for. :dd {Too many edges for one vertex in reduce first_degree} :dt Number of edges of one vertex exceeds max integer in tri_find command. Will never be able to emit N^2 angles. :dd {Tour + vertex reduce exceeds one block} :dt No matching end double quote was found following a leading double quote. :dd {Universe/uloop variable count < # of partitions} :dt A universe or uloop style variable must specify a number of values >= to the number of processor partitions. :dd {Unknown command: %s} :dt The command is not known to OINK. Check the input script. :dd {Variable name must be alphanumeric or underscore characters} :dt Self-explanatory. :dd {World variable count doesn't match # of partitions} :dt A world-style variable must specify a number of values equal to the number of processor partitions. :dd Warnings: :h4,link(warn) :dlb {Placeholder} :dt No warnings are yet defined in OINK. :dd :dle mrmpi-1.0~20131122/oinkdoc/command.txt0000755000175000017500000001627311734437746017223 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line named commands :h3 [Syntax:] cmomand-name params ... -i input1 input2 ,.. -o output1.file output1.ID output2.file output2.ID ... :pre commmand-name = name of command params = zero or more params required by command -i = start of input definitions required by command inputN = list of 0 or more input objects -o = start of output definitions to command outputN.file = list of 0 or more output files outputN.ID = list of 0 or more output MR-MPI objects :ul [Examples:] wordfreq 5 -i v_files -o NULL NULL rmat 10 8 0.25 0.25 0.25 0.25 0.0 12345 -o tmp.rmat NULL degree -i graph/edges -o degree/degree degree :pre [Description:] Invoke a named command with the list of parameters it requires, as well as the list of input and output objects it expects. In OINK a named command is a child class that derives from the Command parent class, meaning that it contains several methods that can be called from the OINK framwork. See "this section"_Section_commands.html of the manual for details on how to write new named commands and add them to OINK. The list of named commands currently included with OINK are listed on "this page"_Section_script.txt#comm. They are also listed in the source code in the file src/style_command.h which is auto-generated each time that OINK is built. Each named command has a "name", defined in the *.h file for the class, which is the command name used in the input script to invoke the command, e.g. wordfreq or rmat or degree in the examples above. Any arguments that follow the command name, upto a "-i" or "=o" argument are passed as {params} to the command before it is invoked, so that it can process and store them as needed. The number and nature of these parameters are defined by the command itself and it should generate errors if they are not specified correctly. The code that processes parameters can be written to allow for optional parameters and keywords within the list of {params}. The "-i" and "-o" arguments can be listed in either order. The arguments that follow each of them, either between them, or upto the end of the command, are passed to an "input" and "output" processing routine within the command class. Each command requires a specific number of input and output "definitions", as explained below. Input definitions are single arguments. Output definitions are pairs of arguments. If zero input (or output) definitions are required by the command, then the "-i" (or "-o") argument need not be specified. If 2 output definitions are required, then 4 arguments must follow the "-o". Typically each required input definition is a form of data input that the command requires. It can come from reading one or more files or from an MR-MPI object that already exists. Similarly, each required output definition is a form of data output that the command produces. It can be stored either in one or more files or in an MR-MPI object that the command creates. In OINK, an MR-MPI object is a thin wrapper on a MapReduce object created via the "MR-MPI library"_md. See "this doc page"_mrmpi.html for more discussion of MR-MPI objects and input script operations that can be performed on them directly. :line Each input definition {inputN} is one of three things. First, it can be the ID of an existing MR-MPI object, which wraps a MapReduce object which contains key/value pairs. If {inputN} matches such an ID, then the second or third options are not considered. In this case, it is assumed that the data stored within the MapReduce object is already in the form that would be produced by the map() method that would read the input from one or more file or directory names. Second, the {inputN} can be the path name of a file or directory. Third, it can be a variable defined elsewhere in the OINK input script that contains one or more strings. In the third case {inputN} should be specified as v_name, where name is the name of the variable. All the different styles of variables (except equal-style) store strings; see the "variable"_variable.html command for details. Also note that there is a "command-line option"_Section_build.txt#1_4 -var or -v which can be specified when OINK is executed to store a list of strings in an index-style variable. The strings are treated as a list of file or directory names. Thus in both the first or second case the effect is that a list of one or more file/directory names is passed to the command. The command creates a temporary, unnamed MR-MPI object and invokes a map() method within it, as specified in the code of the command class, using the list of file/directory names as input. There are several options available which influence how the list of strings specified in the input script are converted into actual file/directory paths passed to the map() method. This include wildcard charcters "%" and "*". See the "input"_input.html command for details. :line Each required ouptut definition is a pair of arguments: {ouputN.file} and {outputN.ID}. Either or both can be specified as NULL if no output in that form is desired. The {outputN.file} argument is the path name of a file. A map() or reduce() or scan() method, as specified in the code of the command class, will be invoked which will write data to that file when the command is finished. A processor-ID (0 to Nprocs-1) will be appended to the filename, so that when running on multiple processors, multiple files will be created. If the specfied path name does not entirely exist, additional directories in the path name will be created as needed. Also, there are several options available which influence how the file name specified in the input script is converted into the file name actually opened by OINK and written to by the map(), reduce(), or scan() method. This include wildcard charcters "%" and "*". See the "output"_input.html command for details. The {outputN.ID} argument is the ID of an MR-MPI object which wraps a MapReduce object. The code in the command class will have created or altered the MR-MPI object and its associated MapReduce object and populated it with data. As a final step, the specified ID is assigned to that MR-MPI object. If the ID is already in use, then the name is removed from the other MR-MPI object. This means that if an {outputN.ID} is the same as an {inputN} to the command then the output will effectively overwrite that input. When the command completes, named MR-MPI objects persist so that they can be used in subsequent input script commands. All unnamed MR-MPI objects are deleted. :line When any named command is executed, its elapsed execution time is stored internally by OINK. This value can be accessed by the keyword "time" in an "equal-style variable"_variable.html and printed out in the following manner: variable t equal time rmat 10 8 0.25 0.25 0.25 0.25 0.0 12345 -o tmp.rmat NULL print "Time for RMAT generation = $t" :pre :line [Related commands:] "MR-MPI library commands"_mrmpi.html, "mr"_mr.html, "MR-MPI library documentation"_md, "how to write named commands"_Section_commands.html, "input"_input.html, "output"_output.html mrmpi-1.0~20131122/oinkdoc/print.txt0000755000175000017500000000225511734437746016734 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line print command :h3 [Syntax:] print str :pre str = text string to print, which may contain variables :ul [Examples:] print "Done with first stage" print "Elapsed time = $t secs on $p procs" [Description:] Print a text string to the screen and logfile. One line of output is generated. If the string has white space in it (spaces, tabs, etc), then you must enclose it in quotes so that it is treated as a single argument. If variables are included in the string, they will be evaluated and their current values printed. See the "variable"_variable.html command for a description of various kinds of variables, any of which can be used with the print command. Note that there are keywords for the number of processors and elapsed time for a command or MR-MPI library call which can be accessed with variables, e.g. variable t equal time variable p equal nprocs print "Elapsed time = $t secs on $p procs" :pre [Related commands:] "variable"_variable.html mrmpi-1.0~20131122/oinkdoc/variable.txt0000755000175000017500000004114711734437746017370 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line variable command :h3 [Syntax:] variable name style args ... :pre name = name of variable to define :ulb,l style = {delete} or {index} or {loop} or {world} or {universe} or {uloop} or {string} or {equal} or {atom} :l {delete} = no args {index} args = one or more strings {loop} args = N N = integer size of loop, loop from 1 to N inclusive {loop} args = N pad N = integer size of loop, loop from 1 to N inclusive pad = all values will be same length, e.g. 001, 002, ..., 100 {loop} args = N1 N2 N1,N2 = loop from N1 to N2 inclusive {loop} args = N1 N2 pad N1,N2 = loop from N1 to N2 inclusive pad = all values will be same length, e.g. 050, 051, ..., 100 {world} args = one string for each partition of processors {universe} args = one or more strings {uloop} args = N N = integer size of loop {uloop} args = N pad N = integer size of loop pad = all values will be same length, e.g. 001, 002, ..., 100 {string} arg = one string {equal} args = one formula containing numbers, keywords, math operations, variable references numbers = 0.0, 100, -5.4, 2.8e-4, etc constants = PI keywords = nprocs, time math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x==y, x!=y, xy, x>=y, x&&y, x||y, !x math functions = sqrt(x), exp(x), ln(x), log(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x) ramp(x,y), stagger(x,y), logfreq(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) variable references = v_name :pre :ule [Examples:] variable x index run1 run2 run3 run4 run5 run6 run7 run8 variable LoopVar loop $n variable p equal nprocs variable b1 equal 0.5*v_flag variable b1 equal "10 + 0.5*v_flag" variable foo myfile variable x universe 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 variable x uloop 15 pad variable x delete :pre [Description:] This command assigns one or more strings to a variable name for evaluation later in the input script or during a simulation. Variables can be used in several ways in OINK. A variable can be referenced elsewhere in an input script to become part of a new input command. For variable styles that store multiple strings, the "next"_next.html command can be used to increment which string is assigned to the variable. Variables of style {equal} store a formula which when evaluated produces a single numeric value which can be output via the "print"_print.html command. Variables that store a collection of strings can be used as input to a named command, e.g. to process a collection of filenames. See the "named command"_command.html doc page for details. In the discussion that follows, the "name" of the variable is the arbitrary string that is the 1st argument in the variable command. This name can only contain alphanumeric characters and underscores. The "string" is one or more of the subsequent arguments. The "string" can be simple text as in the 1st example above, it can contain other variables as in the 2nd example, or it can be a formula as in the 3rd example. The "value" is the numeric quantity resulting from evaluation of the string. Note that the same string can generate different values when it is evaluated at different times during a simulation. IMPORTANT NOTE: When the input script line that defines a variable of style {equal} is encountered, the formula is NOT immediately evaluated and the result stored. See the discussion below about "Immediate Evaluation of Variables" if you want to do this. IMPORTANT NOTE: When a variable command is encountered in the input script and the variable name has already been specified, the command is ignored. This means variables can NOT be re-defined in an input script (with 2 exceptions, read further). This is to allow an input script to be processed multiple times without resetting the variables; see the "jump"_jump.html or "include"_include.html commands. It also means that using the "command-line switch"_Section_start.html#2_6 -var will override a corresponding index variable setting in the input script. There are two exceptions to this rule. First, variables of style {string} and {equal} ARE redefined each time the command is encountered. This allows these style of variables to be redefined multiple times in an input script. In a loop, this means the formula associated with an {equal}-style variable can change if it contains a substitution for another variable, e.g. $x. Second, as described below, if a variable is iterated on to the end of its list of strings via the "next"_next.html command, it is removed from the list of active variables, and is thus available to be re-defined in a subsequent variable command. The {delete} style does the same thing. :line "This section"_Section_script.html#2_2 of the manual explains how occurrences of a variable name in an input script line are replaced by the variable's string. The variable name can be referenced as $x if the name "x" is a single character, or as $\{LoopVar\} if the name "LoopVar" is one or more characters. As described below, for variable styles {index}, {loop}, {universe}, and {uloop}, which string is assigned to a variable can be incremented via the "next"_next.html command. When there are no more strings to assign, the variable is exhausted and a flag is set that causes the next "jump"_jump.html command encountered in the input script to be skipped. This enables the construction of simple loops in the input script that are iterated over and then exited from. As explained above, an exhausted variable can be re-used in an input script. The {delete} style also removes the variable, the same as if it were exhausted, allowing it to be redefined later in the input script or when the input script is looped over. This can be useful when breaking out of a loop via the "if"_if.html and "jump"_jump.html commands before the variable would become exhausted. For example, label loop variable a loop 5 print "A = $a" if $a > 2 then "jump in.script break" next a jump in.script loop label break variable a delete :pre :line For the {index} style, one or more strings are specified. Initially, the 1st string is assigned to the variable. Each time a "next"_next.html command is used with the variable name, the next string is assigned. All processors assign the same string to the variable. {Index} style variables with a single string value can also be set by using the command-line switch -var; see "this section"_Section_start.html#2_6 for details. The {loop} style is identical to the {index} style except that the strings are the integers from 1 to N inclusive, if only one argument N is specified. This allows generation of a long list of runs (e.g. 1000) without having to list N strings in the input script. Initially, the string "1" is assigned to the variable. Each time a "next"_next.html command is used with the variable name, the next string ("2", "3", etc) is assigned. All processors assign the same string to the variable. The {loop} style can also be specified with two arguments N1 and N2. In this case the loop runs from N1 to N2 inclusive, and the string N1 is initially assigned to the variable. For the {world} style, one or more strings are specified. There must be one string for each processor partition or "world". See "this section"_Section_build.html#1_4 of the manual for information on running OINK with multiple partitions via the "-partition" command-line switch. This variable command assigns one string to each world. All processors in the world are assigned the same string. The next command cannot be used with {equal} style variables, since there is only one value per world. This style of variable is useful when you wish to perform different calculations on different partitions. For the {universe} style, one or more strings are specified. There must be at least as many strings as there are processor partitions or "worlds". See "this page"_Section_start.html#2_6 for information on running OINK with multiple partitions via the "-partition" command-line switch. This variable command initially assigns one string to each world. When a "next"_next.html command is encountered using this variable, the first processor partition to encounter it, is assigned the next available string. This continues until all the variable strings are consumed. Thus, this command can be used to run 50 simulations on 8 processor partitions. The simulations will be run one after the other on whatever partition becomes available, until they are all finished. {Universe} style variables are incremented using the files "tmp.oink.variable" and "tmp.oink.variable.lock" which you will see in your directory during such a OINK run. The {uloop} style is identical to the {universe} style except that the strings are the integers from 1 to N. This allows generation of long list of runs (e.g. 1000) without having to list N strings in the input script. All {universe}- and {uloop}-style variables defined in an input script must have the same number of values. :line For the {equal} style, a single string is specified which represents a formula that will be evaluated afresh each time the variable is used. If you want spaces in the string, enclose it in double quotes so the parser will treat it as a single argument. The formula computes a scalar quantity, which becomes the value of the variable whenever it is evaluated. Note that {equal} variables can produce different values at different stages of the input script or at different times during a run. For example, if the {equal} variable is printed during a loop, different values could be printed each time it was invoked. If you want a variable to be evaluated immediately, so that the result is stored by the variable instead of the string, see the section below on "Immediate Evaluation of Variables". The next command cannot be used with {equal} style variables, since there is only one string. The formula for an {equal} variable can contain a variety of quantities. The syntax for each kind of quantity is simple, but multiple quantities can be nested and combined in various ways to build up formulas of arbitrary complexity. Specifically, an formula can contain numbers, keywords, math operators, math functions, and references to other variables. Number: 0.2, 100, 1.0e20, -15.4, etc Constant: PI Keywords: nprocs, time Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x==y, x!=y, xy, x>=y, x&&y, x||y, !x Math functions: sqrt(x), exp(x), ln(x), log(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) Other variables: v_name :tb(s=:) :line The keywords allowed in a formula are {nprocs} and {time}. Nprocs is the number of processors being used. Time is the elapsed time of the most recently executed "named command"_command.html or "MR-MPI library command"_mrmpi.html. :line Math Operators :h4 Math operators are written in the usual way, where the "x" and "y" in the examples can themselves be arbitrarily complex formulas, as in the examples above. Operators are evaluated left to right and have the usual C-style precedence: unary minus and unary logical NOT operator "!" have the highest precedence, exponentiation "^" is next; multiplication and division are next; addition and subtraction are next; the 4 relational operators "<", "<=", ">", and ">=" are next; the two remaining relational operators "==" and "!=" are next; then the logical AND operator "&&"; and finally the logical OR operator "||" has the lowest precedence. Parenthesis can be used to group one or more portions of a formula and/or enforce a different order of evaluation than what would occur with the default precedence. The 6 relational operators return either a 1.0 or 0.0 depending on whether the relationship between x and y is TRUE or FALSE. For example the expression x<10.0 in an atom-style variable formula will return 1.0 for all atoms whose x-coordinate is less than 10.0, and 0.0 for the others. The logical AND operator will return 1.0 if both its arguments are non-zero, else it returns 0.0. The logical OR operator will return 1.0 if either of its arguments is non-zero, else it returns 0.0. The logical NOT operator returns 1.0 if its argument is 0.0, else it returns 0.0. :line Math Functions :h4 Math functions are specified as keywords followed by one or more parenthesized arguments "x", "y", "z", each of which can themselves be arbitrarily complex formulas. Most of the math functions perform obvious operations. The ln() is the natural log; log() is the base 10 log. The random(x,y,z) function takes 3 arguments: x = lo, y = hi, and z = seed. It generates a uniform random number between lo and hi. The normal(x,y,z) function also takes 3 arguments: x = mu, y = sigma, and z = seed. It generates a Gaussian variate centered on mu with variance sigma^2. In both cases the seed is used the first time the internal random number generator is invoked, to initialize it. For equal-style variables, every processor uses the same seed so that they each generate the same sequence of random numbers. IMPORTANT NOTE: Internally, there is just one random number generator for all equal-style variables. If you define multiple variables (of each style) which use the random() or normal() math functions, then the internal random number generators will only be initialized once, which means only one of the specified seeds will determine the sequence of generated random numbers. The ceil(), floor(), and round() functions are those in the C math library. Ceil() is the smallest integer not less than its argument. Floor() if the largest integer not greater than its argument. Round() is the nearest integer to its argument. :line Variable References :h4 Variable references access quantities calulated by other variables, which will cause those variables to be evaluated. The name in the reference should be replaced by the name of a variable defined elsewhere in the input script. IMPORTANT NOTE: If you define variables in circular manner like this: variable a equal v_b variable b equal v_a print $a :pre then OINK may run for a while when the print statement is invoked! :line [Immediate Evaluation of Variables:] There is a difference between referencing a variable with a leading $ sign (e.g. $x or $\{abc\}) versus with a leading "v_" (e.g. v_x or v_abc). The former can be used in any command, including a variable command, to force the immediate evaluation of the referenced variable and the substitution of its value into the command. The latter is a required kind of argument to some commands (e.g. the "fix ave/spatial"_fix_ave_spatial.html or "dump custom"_dump.html or "thermo_style"_thermo_style.html commands) if you wish it to evaluate a variable periodically during a run. It can also be used in a variable formula if you wish to reference a second variable. The second variable will be evaluated whenever the first variable is evaluated. As an example, suppose you use this command in your input script to define the variable "v" as variable v equal vol :pre before a run where the simulation box size changes. You might think this will assign the initial volume to the variable "v". That is not the case. Rather it assigns a formula which evaluates the volume (using the thermo_style keyword "vol") to the variable "v". If you use the variable "v" in some other command like "fix ave/time"_fix_ave_time.html then the current volume of the box will be evaluated continuously during the run. If you want to store the initial volume of the system, you can do it this way: variable v equal vol variable v0 equal $v :pre The second command will force "v" to be evaluated (yielding the initial volume) and assign that value to the variable "v0". Thus the command thermo_style custom step v_v v_v0 :pre would print out both the current and initial volume periodically during the run. Note that it is a mistake to enclose a variable formula in double quotes if it contains variables preceeded by $ signs. For example, variable vratio equal "$\{vfinal\}/$\{v0\}" :pre This is because the quotes prevent variable substitution (see "this section"_Section_commands.html#3_2 on parsing input script commands), and thus an error will occur when the formula for "vratio" is evaluated later. :line [Related commands:] "next"_next.html, "jump"_jump.html, "include"_include.html, "print"_print.html mrmpi-1.0~20131122/oinkdoc/neigh_tri.txt0000755000175000017500000000510211734437746017542 0ustar mathieumathieu"MR-MPI WWW Site"_mws -"MR-MPI Documentation"_md - "OINK Documentation"_od - "OINK Commands"_oc :c :link(mws,http://mapreduce.sandia.gov) :link(md,../doc/Manual.html) :link(od,Manual.html) :link(oc,Section_script.html#comm) :line neigh_tri command :h3 [Syntax:] neigh_tri dirname -i input1 input2 -o out1.file out1.mr :pre dirname = directory name to create set of output files in, one per vertex input1 = graph neighbors: Key = Vi, Value = Vj Vk ... input2 = triangles: Key = Vi Vj Vk, Value = NULL out1 = neighbors + triangle edges of each vertex: Key = Vi, MultiValue = Vj Vk ... (Vj Vk) (Vm Vn) ... :ul [Examples:] neigh_tri myneigh -i mrn mrtri -o NULL mrnplus [Description:] This is a named command which calculates a list of edges associated with each vertex in an undirected graph, which include all edges the vertex is in (its first neighbors) and also edges between pairs of its first neighbors (triangle edges). This set of data is written to a file per vertex as a list of edges. See the "named command"_command.html doc page for various ways in which the -i inputs and -o outputs for a named command can be specified. Input1 stores a set of neighbors of each vertex. See the "neighbor"_neighbor.html command, which can compute this data. Input2 stores a set of triangles. See the "tri_find"_tri_find.html command, which can compute this data. The two inputs are unchanged by this command. These 2 data sets are merged to identify the edges that exist between pairs of neighbors of each vertex. This information is written to a file per vertex. The name of each file is dirname/Vi where {dirname} is the specified argument (a directory name), and Vi is the vertex ID. Each file will contain a list of edges, one per line, written as Vm Vn. For some of the Vm will equal Vi, which means they are edges containing Vi, i.e. they are the first neighbors of Vi. Other edges will have Vm != Vi. These are edges between pairs of first neighbors. Out1 will store the neighbor and triangle edge information as key/multivalue (KMV) pairs, not as key/value (KV) pairs (the usual form of output). Out1.file must be specified as NULL with the "-o" argument so that the output is only allowed as an MR-MPI object, not as a file. This is because the file would contain data for all the vertices together. The equivalent info is already output as one file per vertex, as described above. NOTE: alter the neigh_tri.cpp code so that it uses the input dirname with expandpath() to apply the global prepend and substitute settings ?? [Related commands:] "neighbor"_neighbor.html, "tri_find"_tri_find.html mrmpi-1.0~20131122/user/0000755000175000017500000000000012243675124014346 5ustar mathieumathieumrmpi-1.0~20131122/user/README0000644000175000017500000000054611347542401015226 0ustar mathieumathieuThis directory will eventually contain MapReduce programs written by the developers and users which call the MapReduce MPI library. For this release, it's empty. If you write a MapReduce program you'd like to contribute to the distrubution, please send an email to Steve Plimpton: sjplimp at sandia.gov. See the examples dir for other MapReduce programs. mrmpi-1.0~20131122/txt2html/0000755000175000017500000000000011613342422015146 5ustar mathieumathieumrmpi-1.0~20131122/txt2html/README.txt0000600000175000017500000002343611613342422016644 0ustar mathieumathieutxt2html - a text to HTML conversion tool :h3 [txt2html] is a simple tool for converting text files into HTML files. Text files can contain simple formatting and mark-up commands that [txt2html] converts into HTML. [txt2html] was written by "Steve Plimpton"_sjp. I use it for "documentation"_doc and "WWW pages"_www. Anna Reese added the table formatting options. See the "example.txt"_example.txt and "example.html"_example.html files in the [txt2html] directory for examples of what all the formatting commands and mark-up syntax end up looking like in HTML. :link(sjp,http://www.cs.sandia.gov/~sjplimp) :link(www,http://www.cs.sandia.gov/~sjplimp) :link(doc,http://www.cs.sandia.gov/~sjplimp/lammps.html) :line [Syntax:] txt2html file read from text file, write HTML to standard output txt2html file1 file2 file3 ... read each argument as text file, write one HTML file per argument :dl Input files are first opened with the specified name. If that fails, a ".txt" suffix is added. Output files are created with an ".html" suffix, which is either added or replaces the ".txt" suffix. :line [Compiling:] The source for [txt2html] is a single C++ file. Compile it by typing: g++ -o txt2html txt2html.cpp :pre :line [How the tool works:] [txt2html] reads a text file, one {paragraph} at a time. A paragraph ends with: a blank line a line whose final word starts with ":" (a format string) the end of the file :ul Any line in the paragraph which ends with "\" is concatenated to the following line by removing the "\" character and following newline. This can be useful for some of the formatting commands described below that operate on individual lines in the paragraph. If a paragraph starts with a "<" character and ends with a ">" character, it is treated as raw HTML and is written directly into the output file. If a paragraph does not end with a format string, then it is surrounded with HTML paragraph markers (<P> and </P>), "mark-up"_#markup is performed, and the paragraph is written to the output file. If the paragraph ends with a format string, then "formatting"_#format is performed, "mark-up"_#markup is performed, and the paragraph is written to the output file. :line [Formatting:] :link(format) A format string is the last word of a paragraph if it starts with a ":" character. A format string contains one or more comma-separated commands, like ":ulb,l" or ":c,h3". Note that a format string cannot contain spaces, else it would not be the last word. An individual command can have 0 or more arguments: {b} or {line()} = 0 arguments {image(file)} = 1 argument {link(alias,value)} = 2 or more comma-separated arguments :ul Format commands add HTML markers at the beginning or end of the paragraph and individual lines. Commands are processed in the order they appear in the format string. Thus if two commands add HTML markers to the beginning of the paragraph, the 2nd command's marker will appear 2nd. The reverse is true at the end of the paragraph; the 2nd command's marker will appear 1st. Some comands, like {line} or {image} make most sense if used as stand-alone commands without an accompanying paragraph. Commands that format the entire paragraph: p --> surround the paragraph with <P> </P> b --> put <BR> at the end of the paragraph pre --> surround the paragraph with <PRE> </PRE> c --> surround the paragraph with <CENTER> </CENTER> h1,h2,h3,h4,h5,h6 --> surround the paragraph with \ <H1> </H1>, etc :ul Commands that format the lines of the paragraph as a list: ul --> surround the paragraph with <UL> </UL>, \ put <LI> at start of every line ol --> surround the paragraph with <OL> </OL>, \ put <LI> at start of every line dl --> surround the paragraph with <DL> </DL>, \ alternate <DT> and <DD> at start of every line :ul Commands that treat the paragraph as one entry in a list: l --> put <LI> at the beginning of the paragraph dt --> put <DT> at the beginning of the paragraph dd --> put <DD> at the beginning of the paragraph ulb --> put <UL> at the beginning of the paragraph ule --> put </UL> at the end of the paragraph olb --> put <OL> at the beginning of the paragraph ole --> put </OL> at the end of the paragraph dlb --> put <DL> at the beginning of the paragraph dle --> put </DL> at the end of the paragraph :ul Commands applied to each line of the paragraph: all(p) --> surround each line with <P> </P> all(c) --> surround each line with <CENTER> </CENTER> all(b) --> append a <BR> to each line all(l) --> prepend a <LI> to each line :ul Special commands (all HTML is inserted at beginning of paragraph): line --> insert a horizontal line = <HR> image(file) --> insert an image = <IMG SRC = "file"> image(file,link) --> insert an image that when clicked on goes to link link(name) --> insert a named link that can be referred to \ elsewhere (see "mark-up"_#markup) = <A NAME = "name"></A> link(alias,value) --> define a link alias that can be used \ elsewhere in this file (see "mark-up"_#markup) :ul Table command: tb(c=3,b=5,w=100%,a=c) --> format the paragraph as a table :ul Arguments within tb() can appear in any order and are all optional, since they each have default values. c=N --> Make an N-column table. Treat the paragraph as one long list of entries (separated by the separator character) and put them into N columns one after the other. If N = 0, treat each line of the paragraph as one row of the table with as many columns as there are maximum entries in any line. Default is c=0. :ulb,l s=: --> Use the character string following the equal sign as the separator between entries. Default separator is a comma "," which you cannot specify directly since the comma delimits the tb() arguments :l b=N --> Create a border N pixels wide. If N is 0, there is no border between or outside the cells. If N is 1, there is a minimal border between and outside all cells. For N > 1, the border between cells does not change but the outside border gets wider. Default is b=1. :l w=N or w=N% --> The first form makes each cell of the table at least N pixels wide. The second form makes the entire table take up N% of the width of the browser window. Default is w=0 which means each cell will be just as wide as the text it contains. :l a=X --> Align the entire table at the left, center, or right of the browser window, for X = "l", "c", or "r". Default is a=c. :l ea=X --> Align the text in each entry at the left, center, or right of its cell, for X = "l", "c", or "r". Default is browser's default (typically left). :l eva=X --> Vertically align the text in each entry at the top, middle, baseline, or bottom of its cell, for X = "t", "m", "ba", or "bo". Default is browser's default (typically middle). :l cwM=N or cwM=N% --> The first form makes column M be at least N pixels wide. The second form makes column M take up N% of the width of the browser window. This setting overrides the "w" argument for column M. Only one column per table can be tweaked with this argument. Default is no settings for any column. :l caM=X --> Align the text in each entry of column M at the left, center, or right of its cell, for X = "l", "c", or "r". This setting overrides the "ea" argument for column M. Only one column per table can be tweaked with this argument. Default is no settings for any column. :l cvaM=X --> Vertically align the text in each entry of column m at the top, middle, baseline, or bottom of its cell, for X = "t", "m", "ba", or "bo". This setting overrides the "eva" argument for column M. Only one column per table can be tweaked with this argument. Default is no settings for any column. :l,ule :line [Mark-up:] :link(markup) The text of the paragraph is scanned for special mark-up characters which are converted into HTML. Bold and italic characters:
  • "[" (left brace) --> turn-on bold by inserting a <B>
  • "]" (right brace) --> turn-off bold by inserting a </B>
  • "{" (left bracket) --> turn-on italics by inserting a <I>
  • "}" (right bracket) --> turn-off italics by inserting a </I>
If a backslash ESCAPE character '\' preceeds any of the bold/italic mark-up characters, then mark-up is not performed; the mark-up character is simply left in the text. Links are inserted by enclosing a section of text in double quotes, and appending an underscore to the ending quote, followed by the link. The link ends when whitespace is found, except that trailing punctuation characters (comma, period, semi-colon, colon, question mark, exclamation point, parenthesis) are not considered part of the link.

A link of the form "text"_link becomes <A HREF = "link">text</A> in the HTML output. The only exception is if "link" is defined elsewhere in the file as an alias (see the link command above). In that case, the value is used instead of the alias name.

With these rules, links can take several forms.
  • "This links"_#abc to another part of this file which is labeled with a :link(abc) command.
  • "This links"_other.html to another file named other.html.
  • "This links"_other.html#abc to another file which has an "abc" location defined internally.
  • "This links"_http://www.google.com to a WWW site.
  • "This"_M12 could be used in place of any of the above forms. It requires an alias like :link(M12,http://www.google.com) to be defined elsewhere in the file.
mrmpi-1.0~20131122/txt2html/equation.gif0000600000175000017500000010701411613342422017455 0ustar mathieumathieuGIF89añ?÷ÿ  !!!"""###$$$%%%&&&'''((()))***+++,,,---...///000111222333444555666777888999:::;;;<<<===>>>???@@@AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ[[[\\\]]]^^^___```aaabbbcccdddeeefffggghhhiiijjjkkklllmmmnnnooopppqqqrrrssstttuuuvvvwwwxxxyyyzzz{{{|||}}}~~~€€€‚‚‚ƒƒƒ„„„………†††‡‡‡ˆˆˆ‰‰‰ŠŠŠ‹‹‹ŒŒŒŽŽŽ‘‘‘’’’“““”””•••–––———˜˜˜™™™ššš›››œœœžžžŸŸŸ   ¡¡¡¢¢¢£££¤¤¤¥¥¥¦¦¦§§§¨¨¨©©©ªªª«««¬¬¬­­­®®®¯¯¯°°°±±±²²²³³³´´´µµµ¶¶¶···¸¸¸¹¹¹ººº»»»¼¼¼½½½¾¾¾¿¿¿ÀÀÀÁÁÁÂÂÂÃÃÃÄÄÄÅÅÅÆÆÆÇÇÇÈÈÈÉÉÉÊÊÊËËËÌÌÌÍÍÍÎÎÎÏÏÏÐÐÐÑÑÑÒÒÒÓÓÓÔÔÔÕÕÕÖÖÖ×××ØØØÙÙÙÚÚÚÛÛÛÜÜÜÝÝÝÞÞÞßßßàààáááâââãããäääåååæææçççèèèéééêêêëëëìììíííîîîïïïðððñññòòòóóóôôôõõõööö÷÷÷øøøùùùúúúûûûüüüýýýþþþÿÿÿ!ù!þ4 Image generated by GNU Ghostscript (device=pnmraw) ,ñ?þÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿]Ýþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿»ºýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþíêöà¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû€"Dˆ!B„Hdú÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿ¡êß¿ÿþýû÷ïß¿ÿþý‹@"Dˆ!B$2ýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþÝQ5CÕ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý‹€"Dˆ!B„Hdú÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþE ‚"Dˆ!™þýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûwGÕ Uÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß?D"¸û÷ïß¿ÿþuË4CÄ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûG„Ù."ÿþýû÷ïß¿ÿþýû÷ï"Üýû÷ïß¿ÿºeš!âß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷³]Dþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï"Üýû÷ïß¿ÿºÌ4CÄ¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷€îþýû÷ïß¿Ý2Íñïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?"Ìvù÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý# Ê¿ÿþýû÷ïß¿fTýû÷ïß¿ÿþíÚµ«Û¿n»výû÷ïß¿îîüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþÝù÷ïß¿ÿˆ¨ú÷ïß¿ÿþýû÷ïß¿ÿþPåß¿ÿþýû÷ïß?3ªþýû÷ïß¿ÿþíÚµ«Û¿n»výû÷ïß¿ÿÜÝù÷]Ýþýû÷ïß¿ÿþýû÷ïß¿ÿþ¹ÛÕíß¿ÿþý»óïß¿þÿþQõïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþPåß¿ÿþýû÷ïß?3ªþýû÷ïß¿ÿýû÷ïß¿»víêö¯Û®]ÿþýû÷ïß¿ÿþýûÈ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþ¹cöïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ÏÝÿþýû÷ïß¿ÿþýû÷ïß¿Ýþý»óïß?"Dþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿˆ¨òïß¿ÿþýû÷ïŸUÿþýû÷ïß¿ÿþýû÷ïß¿ÿvíÚÕí_·]»þýû÷ïß¿ÿÜþÝù÷ïß¿Ýþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï_·ÿþýû÷ïßÿþýûGDÕ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû×m€Dˆˆ¨òïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï_7îüû÷ïß¿ÿþýû§êß¿ÿþýûç.DÝþ©"’éß¿ÿþý3³ëß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷OÕÿþý»“©Û¿ÿþýû÷ïß¿ÿþýû÷¯›wþýû÷ïß¿ÿþýSõïß¿ÿþýûç.DÝþ©"þ’éß¿ÿþýûgf׿"òïß¿ÿþýû÷ïß¿ÿþýû玈*"ÿþýû÷;ÿþýûw'S·ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷¯›wþýû÷ïß¿ÿþýSõïß¿ÿþýû÷ïß¿ÿÜe€¨Àªˆdú÷ïß¿ÿþýû÷ïß?"ÿþÝ!Bäß¿ÿþýû÷ïß¿ÿþýûÇL‘ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿fvýû÷ïß¿ÿþýû÷ïß¿ÿþý3ó;ÿþý#òïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû×Mþ€;ÿþýû÷ïß¿ÿþ©ú÷ïß¿ÿþýû÷ïß¿ÿþýs— ¢nÿTÉôïß¿ÿþýûgf׿ÿþý3óïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿÌüû÷ïß¿ÿTÝù÷ïßLÝþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿»"¸û÷ïßÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþ1@Õ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ªDüû§êοÿþýû÷o×ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿˆ0û÷ÏL•þ;ˆþýû÷/³ÿþýû÷ïß¿ÿ˜ êß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ªDüû§êοÿþýû÷ïß®;ÿþý#’éfÿþýû÷ïß¿ÿþýû÷ïß?"™þ`öïß¿ÿþaö*wýû÷ï_&fÿþýû÷ïß¿ÿþýû÷ïß¿ÿ˜ êß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷O•ˆÿTÝù÷ïß¿ÿþýûðß¿ÿþù÷o×"ÿþýû÷ï_&fÿþýû÷ïß¿wîüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþíºóïß¿ÿþþýû÷ïß¿ÿþýû÷¢ÿþaöï‘ÿþýû—‰Ù¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÌPõïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?U"þýSuçß¿ÿþýû÷ïß®;ÿþýû÷¢ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýc†èß¿ÿþýû÷³fªÜAôïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý»Cä_·Lÿ2ýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøï""ºýû÷ïß¿ÿþýû÷ïß¿ÿþý»SåßUÿþýþû÷ïß¿fvýûçŽÙ?fîþýû÷ïß¿ÿþýû÷ïß¿î˜ýcæîß¿ÿþýû§ÊÌ¿»víêöïß¿wvýû÷ïß¿ÿþýû÷Ýþýû÷ïß¿ÿþýû÷ïß¿ÿþý»SåßUÿþýû÷ïß¿ÿÌìú÷O•wÿìú÷ïß¿ÿþýû÷ïß¿ÿT‰p÷À®ÿþýû÷O•™ÿvíÚÕíß¿ÿîìú÷ïß¿ÿþýû÷ïß¿ÿþýû÷Ýþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿wªü»£êß¿ÿþýû÷ïß¿ÿþýûGäß¿wˆü÷þïß¿ÿîìú÷ïß¿ÿºu¡êß¿ÿþýû÷¯Û]ÿþýswg׿ÿîdböÏÌ®ÿÜ1û×Ù¿ÿþýû÷ïß¿wTýû÷O•™ÿˆüû÷ïß¿;»þýû÷ïß¿ÿþýû÷ïß¿ÿþýû‡€ˆnÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþÝ©òÿþýû÷ïß¿ÿÌìú÷ïß¿wTýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßUÿþýû÷ïß¿ªÌüÛµkW·ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß 3»ˆtû÷ïß¿ÿþýû÷Ñþ;wîܹ“éß¿ÿþýû÷ïß¿ÿª òïß¿ÿþû÷ïß¿ÿþýû÷ï_„*UDüû÷ïß¿ÿþý›ñïß?3"ˆ!òïß¿ÿþýû÷ïß¿ÿþý3#‚"ÿþýû÷ïß?f3þýû÷ïß¿ÿþý»³ëß¿ÿþýû÷ïß¿ÿª òïß¿ÿþýû÷ïß¿ÿþýû÷ï_„*UDüû÷ïß?w»Üýû7ãß¿wˆìBÁÝ¿ÿþýû÷ïß¿ÿþýûw‡È.DÜýû÷ïß¿Ìfüû÷ïß¿ÿþýûwg׿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿª òïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý‹P¥Šˆÿþýû÷ïß?wÿþýû÷ïß?"ÿþý»Cäß¿ÿþý»³ëß¿ÿ¹«rç€nÿþýû÷ïß¿L3ÜAôo× wwÌü«Â,Ó3þýûWE‘;fýû÷ïß¿ÿî˜!rÇŒ»ÌfüûGäß¿ÿþÝÙõïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿Uù÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷/B•*"þýû÷ïß¿îþýû7ãß¿ÿî˜!rÇŒ»ÿþýû÷ïß¿ÿþýû÷ïß¿;fˆÜ1ãîß¿ÿþþýc6ãß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿î ÚUå_7Dþýû÷ïß¿ÌvíÚµk×.fÿþýû÷ïß¿ÿþýû÷o€;ÿþýû÷ïß¿ÿþýû÷ïß?€ÝÜqAÕ¿ÿEø÷ï€ÿþÝ™‘I•ªÿþýÛµk×®]»v¹û÷ïß™T©úçÎÌwÿþýðïß¿ÿþýÛµk—D»v1û÷ïß¿ÿþýû÷o€;ÿþýû÷ïß¿ÿþýû÷ïß¿ÝÜqAÕ¿ÿþÉ„èß?ÿþý‹pgW"ÿþýû·k×®]»vír÷ïß¿wvU!þòÏ™;îþýûàß¿ÿþýû·k×.3ˆvíböïß¿ÿþýû÷ïß¿ÿþýû÷o€;ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷¯€;î"¨ú÷ïß¿ÿþ!ú÷ïß¿ÿþù÷ïß"ÿþýÛµk—D»v1ûgÆÌ?U"þýû÷ïÀÿþÍPõ/³výû'BUD»ºýðïß¿;32ý»sçß¿ÿþý»Så¢Dvýûàß?"ÿþíÚµË ¢]»˜ýû÷ïß¿ÿþýû÷ïß¿ÿþýû7Àÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿n êþß¿ÿþýCôïß?ÿþý»Så¢Dvýû÷o×®]»víÚåîß¿ÿîTù‡è‘Wfî¸û÷ïß?ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýSÀÝ¿]ÿî ºóïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿»Tù÷ïß¿ÿþýû÷ïß¿ÿþeðïßÿþýCtà¿ÿüû÷ïfÿþýû÷ïß;wîܹs§Û¿ÿþ`öïß¿̘Uù÷ï€ÿþýû÷ï¢;wˆ˜¹s'Ó¿ÿþýû÷ïß¿»Tù÷ïß¿ÿþýû÷ïß¿þÿþýËàß¿;3þýû‡(S·;ÿþø÷ï€]ÿˆdú÷ïß¿;wîܹsçN·ÿþØõH¦3˜1«òïß?ÿþýû÷ïß?Dwî1sçN¦ÿþýû÷ïß¿ÿþýû÷ïß¿»Tù÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿™üûwgÆ¿ÿþýs—)Ó¿ÿþýû÷È¿ÿîù÷ï¢;wˆ˜¹s'Ó¿ªþU¹óïß¿ÿþýû`׿ÿþØðß?"wTqç€ÿþýÀìߌLÿþýû÷ï_fîý›±ëß?ÿþù÷Ñ;DÌܹ“éß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿]ªüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþeðïßÿþýûç.S¦ÿþø÷ï_fîý›±ëß¿wîܹsçÎnÿþýû™;Dÿf˜`Ƭʿÿþø÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïŸ"ÿþýÛeÆ ‘*Ýþýû÷ï_"Dˆ!Bäοÿþýû÷ïß¿ÿþýC$ÂÝ¿ÿþýû÷ïß¿ÿþýû÷¯Š™ÿ˜ØõïŸ"Uºýñïß¿]3þýøïß¿ÿþýû÷ïß¿ÿþýû·kÆ¿ÿþþø÷/¿ÌDüû÷ïß¿ÿþýûwg׿ÿþýû÷ïß¿ÿþýC$ÂÝ¿ÿþýû÷ïß¿ÿþýû÷ï_3ÿþ1°ëß¿fªÜù÷OÄ¿ÿ¸Û5ãß¿ÿþýû÷ïß¿ÿþýû÷€»]3þýðï_„ÿ˜‰ø÷ïß¿ÿþýû÷ïήÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýC$ÂÝ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý«bæß?fvýû÷O•™*ˆîüû÷ïß¿Dþýûw‡È¿ÿþýûwg׿ÿþp÷oF¦L»ˆ˜©²ëß¿ªþýÛõ/‚ªÿD¨2óþÏ™"þü÷o׌Ý"¨"CÄL•]ÿ"¸ËÄ ‘ÿ˜‰ø÷È¿ÿþý»³ëß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþ!áîß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷¯Š™ÿ˜ØõïŸ*3UÝù÷OÄ¿ÿ"¸ËÄ ‘ÿþýû÷ïß¿ÿþýû÷ïß¿î21CDÀ¿þýûÇLÄ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿w"¹s§JU"ºÝÙõïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷O•wÿþýû÷ïß¿ÿþýû÷ïßþ¿;DÌìú—‰ˆ™]ÿDt»³ë_•]ÿþݹóïß¿ÿþýû÷ïß¿ÿþû÷ïß¿;wþýû÷À¿þýSUåß¿ÿþýû÷ïß¿;»þýû÷ïß¿ÿþýû÷O•wÿþýû÷ïß¿ÿþýû÷ïß¿wˆ˜Ùõ/3»þEøg†Ù¿Uvýûwç‘nÿþýû÷ïß¿ÿþýû÷ïß¿wîéöï€ÿüû§ªÊ¿ÿþýû÷ïß¿wvýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷O•wÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï3»þe"bf׿"ÜÝéFÄ]7fþîþý#òïß*Dîüû÷ïß¿;»þýû÷¯Ê3ªìRE$“*"ÿþ1#¢ÊÌ®»ˆ¨2sç_„Lˆîü«²ëß¿;wþ1›QE‘LUù‡¨J•;DÜýû§ªÊ¿Dþýû÷ïß]ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû§J„»ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿;DÌìú—‰ˆ™]ÿD¸»ÓÈ¿Uvýû‡¨J•;DÜýû÷ïß¿ÿþýû÷ïß¿ÿþ!ªRåwþýðïß?UUþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþý›ñO„»ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿]"výû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû7ãŸwÿTÝù÷Ï»ÿþýû÷ïß¿ÿþýû÷ïß¿ÿܹû÷ïß?€ºu›ñï_fÿþýû÷ïß¿ÿþ!Úõïß¿ÿþýû÷ïß¿]"výû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷¯Ê®*ÿþýSuçß¿»þýû÷ïß¿ÿþýû÷ïß¿ÿþýû·ëß¿ÿ"të6ãß¿*Ìþýû÷ïß¿ÿþýC´ëß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿]"výû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý‹Ð ÑUwD 2óï‘ÿþýû÷ïß¿ÿþ!Úõïß¿ÿv¹sÇìß¿ÿÌìú÷]îþýû÷o—»ÿº1ëö;ÿþ¹s÷ï3wÿ"ü3³ëß?wDÜýû÷ïÀ*Ìþý#òïß¿ÿíú÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß.»þýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû¡¢;ªþýSuçß¿û÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþŸ;"îþý‹Ð­ÛŒÿþUaöïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷;™þýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿº‰Èôïß¿ÿþýû÷ÿþýû÷ïß¿ÿþýû÷ïß?3w2ýû÷H·ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþ©ªbf×?ffþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿº‰Èôïß¿ÿþýû÷ÿþýû÷ïß¿ÿþýû÷ïß¿™î¸û÷ïß?"Ýþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû§ªŠ™]ÿ˜™ù÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿº‰Èôïß¿ÿþýû÷ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß."UvýëFd×ÿˆüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿ˜E0sçß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþéöïß¿ÿþýcÁÌÿþ1Côïß¿ÌÌüû÷È¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÝDdú÷ïß¿ÿþýû÷OÕ¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿]Dªìú÷ïß?"Ýþýc†èß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿ˜!ú÷*fvýûÇÌÌ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýë6#Ó¿ÿþýû÷ïß¿fTýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿îîüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþ™ù÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýë6#Ó¿ÿþýþû÷ïß¿fTýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþ¹»óïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûgæß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýë6#Ó¿ÿþýû÷ïß¿fTýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþÝù÷ïŸ*3ˆÜýûG„È¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûwçοÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ÏÝÿþýû÷ïß;ÿþýû÷¯[·ÿþý3óïß?"þýû÷þïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþu›‘éß¿ÿþýû÷ïß?3ªþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý»óïß¿ÿþ¹»óï_·nÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû×­Û¿ÿþýû÷ïß?3ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïߌ;ÿþýû÷ïß?f»Íñïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷oÆÿþû÷ïß?f»Íñïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïߌ;ÿþýû÷ïß?f»Íñïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷¯Û¿ÿÌ ú÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿]»þýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿvíú÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû7ãοÿýû÷ï³]ˆfˆø÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï_·ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿fþÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?3àÎ?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿fÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïŸpçÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýÛµk×®]»víÚµk×®]»Üýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþíÚµk×®]»víÚµk×®]îþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýÛµk×®]»víÚµk×®]»Üýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿvíÚµk×®]»víÚµk×.wÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿Dîüû÷ïß¿ÿþýû÷ïß¿ÿvEø÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï‘;ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýÛá‘;ÿˆÜù÷ïß¿ÿþýû÷oW„ÿþýû÷ïß¿Dîüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï‘;ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý»sçþß¿ÿ˜uû÷ïß¿ÿþýû÷È?w»Üýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿîÜùǬÛ?w»Üýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?"ÿþýûwçοÿþ1ëöïß¿ÿˆüû÷ïß¿ÿþý»s矻þ]îþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷Y·ÿþýû÷ïß¿̺ýû÷ïß?w»Üýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß*™ˆt3sçÎDˆd"òïÎL"þ‰Øeæß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷*Ýþ!’‰È?Dwˆ˜ùw玙;fv‰Øeæß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþøïß¿ÿþýû÷ïß¿;w2‰øGäο;D2ùg掙]ÿîÜÉ$â"И©Òíß;ÿDì2óïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûGäÎ?"D2Í@T%Ó?"U2éF„H„,wîÜùG¤J&"Ýþ™¹cf"º‰Øeæß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿîÜùwç€ÿ"ü»sç‘Üý#òˆˆ;ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ª2™ÙõïÎDþUÙug׿;w¨Ê”‰ˆˆ;ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿp÷È¿;wþݹóÈ?ª2eúÀÝ?"ÿîÜù§*“™]ÿîÜùGDÄÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿;wþݹóÈ¿;wþݹóïÎ;þwþø÷/¿;wþݹó€ªL™îÜù÷ˆˆ;ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûwçÎ?3ˆ"të&âß;ÿˆü‹Ðíߌw܉ø÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþý‘ÉL¦nwîü#òïÎLÿþݹÙ?fw܉ø÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû¡Û¿ÿîÜùwçÎ?€DþE`öÙ¿ÝþÍøwçÎ?™Ìdêvçο;îDüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþݹóïÎDþݹóïÎUݹó/B·n"þÝ¹ó¯ ¢˜ýcvçοw܉ø÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿]ˆî úÇìÎfÿ2eêv§³;ˆÌt»sÑ¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß Dwf0Ë”©ÛnDîÜ©ò/S&fw1»sÑ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?fw™é–)Ó¿L™ºÝéÆì"fÿ˜ÝAd`·L™ÜÍ@tg³L™þݹƒèß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷/S¦™2u»Ó-"wwªÜ¹ó/S&wÌîÜaöïN•;wþýcv³L™Üý»sÑ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý›Ñ­ÛŒÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý3cæß¿ÿþýû÷ïß¿fÌüû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ï_7DˆÜýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß®]ÿþýû÷ïß¿ÿþýÛµëß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûðß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýûðß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýøïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷þïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþü÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïßþ¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿þÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß?€ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýûþ÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïÀÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïþß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷à¿ÿþýû÷ïß¿ÿþýû÷ïß¿<ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþýû÷ïß¿ÿþý ;mrmpi-1.0~20131122/txt2html/README.html0000600000175000017500000002460611613342422016771 0ustar mathieumathieu

txt2html - a text to HTML conversion tool

txt2html is a simple tool for converting text files into HTML files. Text files can contain simple formatting and mark-up commands that txt2html converts into HTML.

txt2html was written by Steve Plimpton. I use it for documentation and WWW pages. Anna Reese added the table formatting options.

See the example.txt and example.html files in the txt2html directory for examples of what all the formatting commands and mark-up syntax end up looking like in HTML.


Syntax:

txt2html file
read from text file, write HTML to standard output
txt2html file1 file2 file3 ...
read each argument as text file, write one HTML file per argument

Input files are first opened with the specified name. If that fails, a ".txt" suffix is added. Output files are created with an ".html" suffix, which is either added or replaces the ".txt" suffix.


Compiling:

The source for txt2html is a single C++ file. Compile it by typing:

g++ -o txt2html txt2html.cpp 

How the tool works:

txt2html reads a text file, one paragraph at a time. A paragraph ends with:

  • a blank line
  • a line whose final word starts with ":" (a format string)
  • the end of the file

Any line in the paragraph which ends with "\" is concatenated to the following line by removing the "\" character and following newline. This can be useful for some of the formatting commands described below that operate on individual lines in the paragraph.

If a paragraph starts with a "<" character and ends with a ">" character, it is treated as raw HTML and is written directly into the output file.

If a paragraph does not end with a format string, then it is surrounded with HTML paragraph markers (<P> and </P>), mark-up is performed, and the paragraph is written to the output file.

If the paragraph ends with a format string, then formatting is performed, mark-up is performed, and the paragraph is written to the output file.


Formatting:

A format string is the last word of a paragraph if it starts with a ":" character. A format string contains one or more comma-separated commands, like ":ulb,l" or ":c,h3". Note that a format string cannot contain spaces, else it would not be the last word. An individual command can have 0 or more arguments:

  • b or line() = 0 arguments
  • image(file) = 1 argument
  • link(alias,value) = 2 or more comma-separated arguments

Format commands add HTML markers at the beginning or end of the paragraph and individual lines. Commands are processed in the order they appear in the format string. Thus if two commands add HTML markers to the beginning of the paragraph, the 2nd command's marker will appear 2nd. The reverse is true at the end of the paragraph; the 2nd command's marker will appear 1st. Some comands, like line or image make most sense if used as stand-alone commands without an accompanying paragraph.

Commands that format the entire paragraph:

  • p --> surround the paragraph with <P> </P>
  • b --> put <BR> at the end of the paragraph
  • pre --> surround the paragraph with <PRE> </PRE>
  • c --> surround the paragraph with <CENTER> </CENTER>
  • h1,h2,h3,h4,h5,h6 --> surround the paragraph with <H1> </H1>, etc

Commands that format the lines of the paragraph as a list:

  • ul --> surround the paragraph with <UL> </UL>, put <LI> at start of every line
  • ol --> surround the paragraph with <OL> </OL>, put <LI> at start of every line
  • dl --> surround the paragraph with <DL> </DL>, alternate <DT> and <DD> at start of every line

Commands that treat the paragraph as one entry in a list:

  • l --> put <LI> at the beginning of the paragraph
  • dt --> put <DT> at the beginning of the paragraph
  • dd --> put <DD> at the beginning of the paragraph
  • ulb --> put <UL> at the beginning of the paragraph
  • ule --> put </UL> at the end of the paragraph
  • olb --> put <OL> at the beginning of the paragraph
  • ole --> put </OL> at the end of the paragraph
  • dlb --> put <DL> at the beginning of the paragraph
  • dle --> put </DL> at the end of the paragraph

Commands applied to each line of the paragraph:

  • all(p) --> surround each line with <P> </P>
  • all(c) --> surround each line with <CENTER> </CENTER>
  • all(b) --> append a <BR> to each line
  • all(l) --> prepend a <LI> to each line

Special commands (all HTML is inserted at beginning of paragraph):

  • line --> insert a horizontal line = <HR>
  • image(file) --> insert an image = <IMG SRC = "file">
  • image(file,link) --> insert an image that when clicked on goes to link
  • link(name) --> insert a named link that can be referred to elsewhere (see mark-up) = <A NAME = "name"></A>
  • link(alias,value) --> define a link alias that can be used elsewhere in this file (see mark-up)

Table command:

  • tb(c=3,b=5,w=100%,a=c) --> format the paragraph as a table

Arguments within tb() can appear in any order and are all optional, since they each have default values.

  • c=N --> Make an N-column table. Treat the paragraph as one long list of entries (separated by the separator character) and put them into N columns one after the other. If N = 0, treat each line of the paragraph as one row of the table with as many columns as there are maximum entries in any line. Default is c=0.
  • s=: --> Use the character string following the equal sign as the separator between entries. Default separator is a comma "," which you cannot specify directly since the comma delimits the tb() arguments
  • b=N --> Create a border N pixels wide. If N is 0, there is no border between or outside the cells. If N is 1, there is a minimal border between and outside all cells. For N > 1, the border between cells does not change but the outside border gets wider. Default is b=1.
  • w=N or w=N% --> The first form makes each cell of the table at least N pixels wide. The second form makes the entire table take up N% of the width of the browser window. Default is w=0 which means each cell will be just as wide as the text it contains.
  • a=X --> Align the entire table at the left, center, or right of the browser window, for X = "l", "c", or "r". Default is a=c.
  • ea=X --> Align the text in each entry at the left, center, or right of its cell, for X = "l", "c", or "r". Default is browser's default (typically left).
  • eva=X --> Vertically align the text in each entry at the top, middle, baseline, or bottom of its cell, for X = "t", "m", "ba", or "bo". Default is browser's default (typically middle).
  • cwM=N or cwM=N% --> The first form makes column M be at least N pixels wide. The second form makes column M take up N% of the width of the browser window. This setting overrides the "w" argument for column M. Only one column per table can be tweaked with this argument. Default is no settings for any column.
  • caM=X --> Align the text in each entry of column M at the left, center, or right of its cell, for X = "l", "c", or "r". This setting overrides the "ea" argument for column M. Only one column per table can be tweaked with this argument. Default is no settings for any column.
  • cvaM=X --> Vertically align the text in each entry of column m at the top, middle, baseline, or bottom of its cell, for X = "t", "m", "ba", or "bo". This setting overrides the "eva" argument for column M. Only one column per table can be tweaked with this argument. Default is no settings for any column.

Mark-up:

The text of the paragraph is scanned for special mark-up characters which are converted into HTML.

Bold and italic characters:

  • "[" (left brace) --> turn-on bold by inserting a <B>
  • "]" (right brace) --> turn-off bold by inserting a </B>
  • "{" (left bracket) --> turn-on italics by inserting a <I>
  • "}" (right bracket) --> turn-off italics by inserting a </I>

If a backspace '\' preceeds any of the bold/italic mark-up characters, then mark-up is not performed; the mark-up character is simply left in the text.

Links are inserted by enclosing a section of text in double quotes, and appending an underscore to the ending quote, followed by the link. The link ends when whitespace is found, except that trailing punctuation characters (comma, period, semi-colon, colon, question mark, exclamation point, parenthesis) are not considered part of the link.

A link of the form "text"_link becomes <A HREF = "link">text</A> in the HTML output. The only exception is if "link" is defined elsewhere in the file as an alias (see the link command above). In that case, the value is used instead of the alias name.

With these rules, links can take several forms.

  • "This links"_#abc to another part of this file which is labeled with a :link(abc) command.
  • "This links"_other.html to another file named other.html.
  • "This links"_other.html#abc to another file which has an "abc" location defined internally.
  • "This links"_http://www.google.com to a WWW site.
  • "This"_M12 could be used in place of any of the above forms. It requires an alias like :link(M12,http://www.google.com) to be defined elsewhere in the file.
mrmpi-1.0~20131122/txt2html/txt2html.cpp0000600000175000017500000005721711613342422017444 0ustar mathieumathieu// txt2html - written by Steve Plimpton, May 2004 // table formatting by Anna Reese, Jul 2004 // Sandia National Labs, www.cs.sandia.gov/~sjplimp // // txt2html converts a text file with simple formatting & markup into HTML // formatting & markup specification is given in README // // Syntax: txt2html options file read one file, write to stdout // txt2html optoins file1 file2 ... read files, write files.html // // options: // -b = add a page-break comment to end of each HTML file // useful when set of HTML files will be converted to PDF // -x file = skip a file even if it appears in file list // specify full file name of input file // input files are first opened as-is // if that fails a .txt suffix is added // output files have an .html suffix added or replaced // (unless written to stdout) #include #include #include #include #include #include using namespace std; #define MAXLINE 1024 // function prototypes int next_paragraph(FILE *fp, string ¶graph); int index_of_first_char_of_last_word(string ¶graph); void process_commands(int flag, string &s, string &pre, string &post); void substitute(string &s); string td_tag(int currentc); int find_n(string &s, int nend, int &n1); void file_open(int npair, string &infile, FILE **in, FILE **out); // global variables for links, tables, lists, all command vector alias1; vector alias2; int nlink; int tableflag; // makes a table if tb command specified int rowquit; // number of cols per row if c=N specified (default = 0) string dwidth; // width for all of the columns string tabledelim; // speciallized separator string tablealign; // alignment for the table as an image string dataalign; // alignment for data in table string rowvalign; // vertical alignment for table int ncnum; // # of columns with specified width vector cnum; // column IDs vector cwidth; // column widths int ncalign; // # of columns with specified alignment vector acolnum; // column IDs vector colalign ; // column alignment int ncvalign; // # of columns with specified vertical alignment vector vacolnum; // column IDs vector colvalign ; // column vertical alignment string listflag; string allflag; // main program int main(int narg, char **arg) { int i,n,npair; string *infile; FILE *in,*out; int style,ifirst,ilast; string raw,pre,post,body,commands,final; // parse command-line options and args // setup list of files to process // npair = # of files to process // infile = input file names if (narg == 1) { fprintf(stderr,"Syntax: txt2html options file\n"); fprintf(stderr," txt2html options file1 file2 ...\n"); exit(1); } int breakflag = 0; int nskip = 0; char **skipfiles = NULL; int iarg = 1; while (arg[iarg][0] == '-') { if (strcmp(arg[iarg],"-b") == 0) breakflag = 1; else if (strcmp(arg[iarg],"-x") == 0) { skipfiles = (char **) realloc(skipfiles,(nskip+1)*sizeof(char *)); n = strlen(arg[iarg+1]) + 1; skipfiles[nskip] = new char[n]; strcpy(skipfiles[nskip],arg[iarg+1]); nskip++; iarg++; } else { fprintf(stderr,"Syntax: txt2html options file\n"); fprintf(stderr," txt2html options file1 file2 ...\n"); exit(1); } iarg++; } if (narg-iarg == 1) { npair = 1; infile = new string[npair]; infile[0] = arg[narg-1]; } else { npair = narg-iarg; infile = new string[npair]; for (int i = 0; i < npair; i++) infile[i] = arg[i+iarg]; } // loop over files for (int ipair = 0; ipair < npair; ipair++) { // skip file if matches -x switch int flag = 0; for (int i = 0; i < nskip; i++) if (strcmp(infile[ipair].c_str(),skipfiles[i]) == 0) flag = 1; if (flag) continue; // clear global variables before processing file alias1.clear(); alias2.clear(); nlink = 0; tableflag = 0; listflag = ""; allflag = ""; // open files & message to screen file_open(0,infile[ipair],&in,&out); fprintf(stderr,"Converting %s ...\n",infile[ipair].c_str()); // scan file for link definitions // read file one paragraph at a time // process commands, looking only for link definitions while (style = next_paragraph(in,raw)) { if (style == 2) { int n = index_of_first_char_of_last_word(raw); commands = raw.substr(n+1); process_commands(0,commands,pre,post); } raw.erase(); } // close & reopen files fclose(in); file_open(npair,infile[ipair],&in,&out); // write leading fprintf(out,"\n"); // process entire file // read file one paragraph at a time // delete newlines when line-continuation char at end-of-line // process commands for each paragraph // substitute text for each paragraph // write HTML to output file while (style = next_paragraph(in,raw)) { n = raw.find("\\\n"); while (n != string::npos) { raw.erase(n,2); n = raw.find("\\\n"); } ifirst = raw.find_first_not_of(" \t\n"); ilast = raw.find_last_not_of(" \t\n"); pre.erase(); post.erase(); if (raw[ifirst] == '<' && raw[ilast] == '>') { body = raw; } else if (style == 1) { body = raw; commands = "p\n"; process_commands(1,commands,pre,post); substitute(body); } else { int n = index_of_first_char_of_last_word(raw); body = raw.substr(0,n) + "\n"; commands = raw.substr(n+1); process_commands(1,commands,pre,post); substitute(body); } final = pre + body + post; fprintf(out,"%s\n",final.c_str()); raw.erase(); } // write trailing if (breakflag) fprintf(out,"\n"); fprintf(out,"\n"); // close files fclose(in); if (out != stdout) fclose(out); } // clean up memory for (i = 0; i < nskip; i++) delete [] skipfiles[i]; if (skipfiles) free(skipfiles); delete [] infile; } // return next paragraph as string // discard leading blank lines // paragraph is terminated by: // EOF or blank line or line ending with command that starts with ":" // return 0 if EOF and no paragraph // return 1 if no trailing command // return 2 if trailing command int next_paragraph(FILE *fp, string ¶graph) { char *ptr; char str[MAXLINE]; int first = 1; while (1) { ptr = fgets(str,MAXLINE,fp); if (ptr == NULL && first) return 0; if (ptr == NULL) return 1; if (strlen(str) == MAXLINE-1) { fprintf(stderr,"ERROR: File has too-long a string - increase MAXLINE\n"); exit(1); } if (strspn(str," \t\n") == strlen(str) && first) continue; if (strspn(str," \t\n") == strlen(str)) return 1; first = 0; paragraph += str; if (paragraph[index_of_first_char_of_last_word(paragraph)] == ':') return 2; } } // return index of first char in last word of paragraph string int index_of_first_char_of_last_word(string ¶graph) { int n = paragraph.find_last_not_of(" \t\n"); int m = paragraph.find_last_of(" \t\n",n); if (m == string::npos) return 0; else return m+1; } // apply commands one after the other to the paragraph void process_commands(int flag, string &s, string &pre, string &post) { int start,stop,last,narg; string command; vector arg; start = 0; last = s.find_last_not_of(" \t\n"); while (start <= last) { // grab a single command with optional arguments // command = name of command // narg = # of args // arg = list of argument strings stop = s.find_first_of(",( \t\n",start); if (s[stop] == '(') { command = s.substr(start,stop-start); start = stop+1; narg = 0; while (1) { stop = s.find_first_of(",)",start); if (stop == string::npos) { fprintf(stderr,"ERROR: No trailing parenthesis in %s\n",s.c_str()); exit(1); } arg.resize(narg+1); arg[narg] = s.substr(start,stop-start); narg++; start = stop+1; if (s[stop] == ')') { start++; break; } } } else { command = s.substr(start,stop-start); start = stop+1; narg = 0; } // if only in scan mode, just operate on link command if (flag == 0) { if (command == "link" && narg == 2) { // s.erase(s.length()-1,1); for (int i = 0; i < nlink; i++) if (alias1[i] == arg[0]) { fprintf(stderr,"ERROR: Link %s appears more than once\n", arg[0].c_str()); exit(1); } alias1.resize(nlink+1); alias2.resize(nlink+1); alias1[nlink] = arg[0]; alias2[nlink] = arg[1]; nlink++; } else continue; } // process the command if (command == "line") { pre.append("
"); } else if (command == "p") { pre.append("

"); post.insert(0,"

"); } else if (command == "pre") { pre.append("
");
      post.insert(0,"
"); } else if (command == "c") { pre.append("
"); post.insert(0,"
"); } else if (command == "h1") { pre.append("

"); post.insert(0,"

"); } else if (command == "h2") { pre.append("

"); post.insert(0,"

"); } else if (command == "h3") { pre.append("

"); post.insert(0,"

"); } else if (command == "h4") { pre.append("

"); post.insert(0,"

"); } else if (command == "h5") { pre.append("
"); post.insert(0,"
"); } else if (command == "h6") { pre.append("
"); post.insert(0,"
"); } else if (command == "b") { post.insert(0,"
"); } else if (command == "ulb") { pre.append("
    "); } else if (command == "ule") { post.insert(0,"
"); } else if (command == "olb") { pre.append("
    "); } else if (command == "ole") { post.insert(0,"
"); } else if (command == "dlb") { pre.append("
"); } else if (command == "dle") { post.insert(0,"
"); } else if (command == "l") { pre.append("
  • "); } else if (command == "dt") { pre.append("
    "); } else if (command == "dd") { pre.append("
    "); } else if (command == "ul") { listflag = command; pre.append("
      "); post.insert(0,"
    "); } else if (command == "ol") { listflag = command; pre.append("
      "); post.insert(0,"
    "); } else if (command == "dl") { listflag = command; pre.append("
    "); post.insert(0,"
    "); } else if (command == "link") { if (narg == 1) { string aname = ""; pre.append(aname); } } else if (command == "image") { if (narg == 1) { string img = ""; pre.append(img); } else if (narg == 2) { string img = "" + "" + ""; pre.append(img); } }else if (command == "tb") { // read the table command and set settings tableflag = 1; string tableborder = "1"; // these are the table defaults rowquit = 0; tablealign = "c"; dataalign = "0"; rowvalign = "0"; ncnum = 0; ncalign = 0; ncvalign = 0; cnum.clear(); acolnum.clear(); vacolnum.clear(); cwidth.clear(); colalign.clear(); colvalign.clear(); tabledelim = ","; string tw = ""; dwidth = "0"; for (int i = 0; i < narg; i++) { // loop through each tb() arg int tbstop; string tbcommand; tbstop = 0; tbstop = arg[i].find("="); tbcommand = arg[i].substr(0,tbstop); int n = arg[i].length(); if (tbstop == -1) { continue; } else if (tbcommand == "c") { string collumn= arg[i].substr (tbstop+1,n-(tbstop+1)); rowquit = atoi(collumn.c_str()); } else if (tbcommand == "s") { tabledelim= arg[i].substr (tbstop+1,n-(tbstop+1)); } else if (tbcommand == "b") { tableborder= arg[i].substr (tbstop+1,n-(tbstop+1)); } else if (tbcommand == "w") { string width = "0"; if (arg[i].substr (n-1,1) == "%") { string width = arg[i].substr (tbstop+1,n-(tbstop+1)); tw = " WIDTH=\"" + width + "\""; } else dwidth = arg[i].substr (tbstop+1,n-(tbstop+1)); } else if (tbcommand == "ea") { dataalign= arg[i].substr (tbstop+1,n-(tbstop+1)); } else if (tbcommand == "eva") { rowvalign= arg[i].substr (tbstop+1,n-(tbstop+1)); } else if (tbcommand == "a") { tablealign= arg[i].substr (tbstop+1,n-(tbstop+1)); } else if (tbcommand.substr(0,2) == "cw") { string cwnum= tbcommand.substr(2,tbstop-1); cnum.resize(ncnum+1); cnum[ncnum] = atoi(cwnum.c_str()); cwidth.resize(ncnum+1); cwidth[ncnum]= arg[i].substr(tbstop+1,n-(tbstop+1)); ncnum++; } else if (tbcommand.substr(0,2) == "ca") { string canum= tbcommand.substr(2,tbstop-1); acolnum.resize(ncalign+1); acolnum[ncalign] = atoi(canum.c_str()); colalign.resize(ncalign+1); colalign[ncalign]= arg[i].substr(tbstop+1,n-(tbstop+1)); ncalign++; } else if (tbcommand.substr(0,3) == "cva") { string cvanum= tbcommand.substr(2,tbstop-1); vacolnum.resize(ncvalign+1); vacolnum[ncvalign] = atoi(cvanum.c_str()); colvalign.resize(ncvalign+1); colvalign[ncvalign]= arg[i].substr(tbstop+1,n-(tbstop+1)); ncvalign++; } else { fprintf(stderr, "ERROR: Unrecognized table command %s\n",tbcommand.c_str()); exit(1); } tbstop = s.find("="); } string align; if (tablealign=="c") align="center"; else if (tablealign=="r") align="right "; else if (tablealign=="l") align="left "; else align="center"; string tablea = "
    " ; pre.append(tablea); pre.append("\n"; pre.append(border); post.insert(0,"
    \n"); } else if (command == "all") { if (narg == 1) allflag = arg[0]; } else { fprintf(stderr,"ERROR: Unrecognized command: %s\n",command.c_str()); exit(1); } } } // perform substitutions within text of paragraph void substitute(string &s) { int n,m,p; char c; string text,link,href; string punctuation = ".,?!;:()"; // substitute for bold & italic markers // if preceeded by \ char, then leave markers in text n = s.find_first_of("[]{}"); while (n != string::npos) { c = s[n]; if (n > 0 && s[n-1] == '\\') s.erase(n-1,1); else { s.erase(n,1); if (c == '[') s.insert(n,""); else if (c == ']') s.insert(n,""); else if (c == '{') s.insert(n,""); else if (c == '}') s.insert(n,""); } n = s.find_first_of("[]{}",n); } // substitute for links n = s.find("\"_"); while (n != string::npos) { m = s.rfind("\"",n-1); if (m == string::npos) { fprintf(stderr,"ERROR: Could not find matching \" for \"_ in %s\n", s.c_str()); exit(1); } p = s.find_first_of(" \t\n",n) - 1; if (p == string::npos) { fprintf(stderr,"ERROR: Could not find end-of-link in %s\n",s.c_str()); exit(1); } while (s.find_first_of(".,?!;:()",p) == p) p--; text = s.substr(m+1,n-m-1); link = s.substr(n+2,p-n-1); for (int i = 0; i < nlink; i++) if (alias1[i] == link) { link = alias2[i]; break; } s.erase(m,p-m+1); href = "" + text + ""; s.insert(m,href); n = s.find("\"_"); } // format the paragraph as a table if (tableflag) { tableflag = 0; string DT; // set up tag // alignment for data in rows string tbalign; if (dataalign != "0"){ string align; if (dataalign=="c") align="\"center\""; else if (dataalign=="r") align="\"right\""; else if (dataalign=="l") align="\"left\""; else { fprintf(stderr, "ERROR: Unrecognized table alignment argument %s for ea=X\n", dataalign.c_str()); exit(1); } tbalign = " ALIGN=" + align; } else tbalign=""; // set up vertical alignment for particular columns string va; if (rowvalign != "0"){ string valign; if (rowvalign == "t") valign= "top"; else if (rowvalign == "m") valign= "middle"; else if (rowvalign == "ba") valign= "baseline"; else if (rowvalign == "bo") valign= "bottom"; else { fprintf(stderr, "ERROR: Unrecognized table alignment argument %s for eva=X\n", rowvalign.c_str()); exit(1); } va = " VALIGN =\"" + valign + "\""; } else va=""; //tr_tag is keyword for data in rows string tr_tag= ""; //declare integers to help with counting and finding position int currentc=0; // current column int nend = 0; int n1=0; int n = find_n(s,nend,n1); // if there are no separators, go to the end of the stringx if (n < 0) n = s.length(); // while n exists: while (n != string::npos) { // ignore = 0 when pass by \n because looking for delimiters only // when ignore==0 do not put in a int ignore=1; // For each loop starts nend at n nend=n; // current column is 0, (very first loop), insert first if (currentc == 0){ currentc++; DT=td_tag(currentc); s.insert(0,tr_tag); s.insert(tr_tag.length(),DT); nend=nend+tr_tag.length()+DT.length(); n = find_n(s,nend,n1); if (n==n1) currentc++; else { // currentc will remain one if rowquit==0 if (rowquit>0){ s.erase(n,1); n = find_n(s,nend,n1); currentc++; } } } else { // if n is separator if (n == n1){ s.erase(n,tabledelim.length()); if(currentc==(rowquit+1)&& rowquit!=0){ s.insert(nend,"\n"); nend=nend+11; // set current column back to one to start new line currentc=1; }else{ DT= td_tag(currentc); s.insert (nend,""); nend=nend+5; s.insert (nend,DT); nend=nend+DT.length(); // add one so current column is updated currentc++; n = find_n(s,nend,n1); } } //if n is newline character else{ s.erase(n,1); // if columns == 0 means ARE searching for newlines // else erase and ignore insert later and // search for next separator if (rowquit==0){ s.insert(nend,"\n"); nend=nend+11; // set current column back to one to start new line currentc=1; }else{ ignore=0; n = find_n(s,nend,n1); } } // if we are at the beginning of the row then insert if (currentc==1&&ignore) { DT = td_tag(currentc); // find DT for currentc=1 s.insert(nend,tr_tag); nend=nend+tr_tag.length(); s.insert(nend,DT); n = find_n(s,nend,n1); // search for next separator currentc++; } } // end to else statement } // end to while loop } // end to if tableflag // if listflag is set, put list marker at beginning of every line if (listflag != "") { string marker; int toggle = 0; n = s.find('\n'); while (n != string::npos) { m = s.rfind('\n',n-1); if (listflag == "dl" && toggle == 0) marker = "
    "; else if (listflag == "dl" && toggle == 1) marker = "
    "; else marker = "
  • "; if (m == string::npos) s.insert(0,marker); else s.insert(m+1,marker); n = s.find('\n',m+1); n = s.find('\n',n+1); if (toggle) toggle = 0; else toggle = 1; } listflag = ""; } // if allflag is set, add markers to every line if (allflag != "") { string marker1,marker2; if (allflag == "p") { marker1 = "

    "; marker2 = "

    "; } else if (allflag == "c") { marker1 = "
    "; marker2 = "
    "; } else if (allflag == "b") { marker1 = ""; marker2 = "
    "; } else if (allflag == "l") { marker1 = "
  • "; marker2 = ""; } else marker1 = marker2 = ""; n = s.find('\n'); while (n != string::npos) { m = s.rfind('\n',n-1); if (m == string::npos) s.insert(0,marker1); else s.insert(m+1,marker1); n = s.find('\n',m+1); s.insert(n,marker2); n = s.find('\n',n); n = s.find('\n',n+1); } allflag = ""; } } // open input file as-is or as file.txt // if npair = 0, don't open output file (is just initial pass thru input) // if npair = 1, open output file as stdout // if npair > 1, open output file with .html suffix // either replace .txt in input file, or append .html void file_open(int npair, string &infile, FILE **in, FILE **out) { *in = fopen(infile.c_str(),"r"); if (*in == NULL) { string root = infile; infile = infile + ".txt"; *in = fopen(infile.c_str(),"r"); if (*in == NULL) { fprintf(stderr,"ERROR: Could not open %s or %s\n", root.c_str(),infile.c_str()); exit(1); } } if (npair == 0) return; else if (npair == 1) *out = stdout; else { string outfile; int pos = infile.rfind(".txt"); if (pos == infile.length()-4) outfile = infile.substr(0,pos) + ".html"; else outfile = infile + ".html"; *out = fopen(outfile.c_str(),"w"); if (*out == NULL) { fprintf(stderr,"ERROR: Could not open %s\n",outfile.c_str()); exit(1); } } } // for tables: // build string (DT) based on current column string td_tag(int currentc) { // eacolumn gives the alignment printout of a specific column string eacolumn; // va gives vertical alignment to a specific column string va; // DT is the complete tag, with width and align string DT; // dw is the width for tables. It is also the
    tag beginning string dw; // set up alignment for particular columns for (int counter=0; counter < ncalign; counter++){ if (ncalign != 0 && acolnum[counter] == currentc){ string align; if (colalign[counter] == "l") align= "left"; else if (colalign[counter] == "r") align= "right"; else if (colalign[counter] == "c") align= "center"; else { fprintf(stderr, "ERROR: Unrecognized table alignment argument %s for caM=X\n", colalign[counter].c_str()); exit(1); } eacolumn= " ALIGN =\"" + align +"\""; }else eacolumn= ""; } // set up vertical alignment for particular columns for (int counter=0; counter < ncvalign; counter++){ if (ncvalign != 0 && vacolnum[counter] == currentc){ string valign; if (colvalign[counter] == "t") valign= "top"; else if (colvalign[counter] == "m") valign= "middle"; else if (colvalign[counter] == "ba") valign= "baseline"; else if (colvalign[counter] == "bo") valign= "bottom"; else { fprintf(stderr, "ERROR: Unrecognized table alignment argument %s for cvaM=X\n", colvalign[counter].c_str()); exit(1); } va = " VALIGN =\"" + valign + "\""; } else va = " "; } // put in special width if specified // new code // if dwidth has not been set, dw is blank // if dwidth has been set, dw has that... unless if (dwidth=="0") dw = " "; else dw =" WIDTH=\""+ dwidth + "\""; for (int counter = 0; counter < ncnum; counter++){ // if it is the right column, dw = cwidth property if (cnum[counter] == currentc) dw= " WIDTH=\"" + cwidth[counter] + "\""; } // DT is set for all of this particular separator : reset next separator DT = ""; return DT; } // for tables: // find the next separator starting at nend(the end of the last .insert) // if there is either a delim or newline // decide which is first // set n = to that position // nsep is position of the next separator. changes in here. int find_n(string &s, int nend, int &nsep) { int n; nsep = s.find(tabledelim,nend); int n2 = s.find('\n',nend); int m = s.length() - 1; if (nsep >= 0 && n2 >= 0) { if (nsep <= n2) n = nsep; else n = n2; } else { if (nsep >= 0) n = nsep; else{ if (n2 < m) n = n2; else n = string::npos; } } return n; } mrmpi-1.0~20131122/python/0000755000175000017500000000000012243675123014710 5ustar mathieumathieumrmpi-1.0~20131122/python/install.py0000644000175000017500000000374412013170664016733 0ustar mathieumathieu#!/usr/local/bin/python # copy MR-MPI src/libmrmpi.so and mrmpi.py to system dirs instructions = """ Syntax: python install.py [-h] [libdir] [pydir] libdir = target dir for src/libmrmpi.so, default = /usr/local/lib pydir = target dir for mrmpi.py, default = Python site-packages dir """ import sys,os,commands if (len(sys.argv) > 1 and sys.argv[1] == "-h") or len(sys.argv) > 3: print instructions sys.exit() if len(sys.argv) >= 2: libdir = sys.argv[1] else: libdir = "/usr/local/lib" if len(sys.argv) == 3: pydir = sys.argv[2] else: pydir = "" # copy C lib to libdir if it exists # warn if not in LD_LIBRARY_PATH or LD_LIBRARY_PATH is undefined if not os.path.isdir(libdir): print "ERROR: libdir %s does not exist" % libdir sys.exit() if "LD_LIBRARY_PATH" not in os.environ: print "WARNING: LD_LIBRARY_PATH undefined, cannot check libdir %s" % libdir else: libpaths = os.environ['LD_LIBRARY_PATH'].split(':') if libdir not in libpaths: print "WARNING: libdir %s not in LD_LIBRARY_PATH" % libdir str = "cp ../src/libmrmpi.so %s" % libdir print str outstr = commands.getoutput(str) if len(outstr.strip()): print outstr # copy mrmpi.py to pydir if it exists # if pydir not specified, install in site-packages via distutils setup() if pydir: if not os.path.isdir(pydir): print "ERROR: pydir %s does not exist" % pydir sys.exit() str = "cp ../python/mrmpi.py %s" % pydir print str outstr = commands.getoutput(str) if len(outstr.strip()): print outstr sys.exit() print "installing mrmpi.py in Python site-packages dir" os.chdir('../python') # in case invoked via make in src dir from distutils.core import setup sys.argv = ["setup.py","install"] # as if had run "python setup.py install" setup(name = "mrmpi", version = "15Aug12", author = "Steve Plimpton", author_email = "sjplimp@sandia.gov", url = "http://mapreduce.sandia.gov", description = "MR-MPI MapReduce library", py_modules = ["mrmpi"]) mrmpi-1.0~20131122/python/README0000644000175000017500000000206412013025610015553 0ustar mathieumathieuThis directory contains Python code which wraps MR-MPI as a library and allows the MR-MPI library interface to be invoked from Python, either from a script or interactively. Details on the Python interface to MR-MPI and how to build MR-MPI as a shared library, for use with Python, are given in doc/Interface_python.html and in doc/Start.html. Basically you need to follow these steps in the src directory: % make -f Makefile.shlib g++ # or whatever machine target you wish % make install-python # may need to do this via sudo You can replace the last step with running the python/install.py script directly to give you more control over where two relevant files are installed, or by setting environment variables in your shell script. See doc/Interface_python.html for details. You can then launch Python and instantiate an instance of MR-MPI: % python >>> from mrmpi import mrmpi >>> mr = mrmpi() If that gives no errors, you have succesfully wrapped MR-MPI with Python. You should then be able to run the Python scripts in the examples sub-directory. mrmpi-1.0~20131122/python/mrmpi.py0000644000175000017500000003445712243674342016425 0ustar mathieumathieu# ---------------------------------------------------------------------- # MR-MPI = MapReduce-MPI library # http://www.cs.sandia.gov/~sjplimp/mapreduce.html # Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories # # Copyright (2009) Sandia Corporation. Under the terms of Contract # DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains # certain rights in this software. This software is distributed under # the modified Berkeley Software Distribution (BSD) License. # # See the README file in the top-level MapReduce directory. # ------------------------------------------------------------------------- # Python wrapper on MapReduce-MPI library via ctypes import types from ctypes import * from cPickle import dumps,loads class mrmpi: def __init__(self,comm=None,name=""): # load libmrmpi.so by default # if name = "g++", load libmrmpi_g++.so try: if not name: self.lib = CDLL("libmrmpi.so") else: self.lib = CDLL("libmrmpi_%s.so" % name) except: import sys,traceback type,value,tb = sys.exc_info() traceback.print_exception(type,value,tb) raise OSError,"Could not load MR-MPI dynamic library" # create an instance of MR-MPI if comm == None: self.mr = self.lib.MR_create_mpi() elif type(comm) == types.IntType: self.mr = self.lib.MR_create(comm) elif type(comm) == types.FloatType: self.mr = self.lib.MR_create_mpi_finalize() else: raise StandardError,"Could not create an MR-MPI library instance" # hardwire keyalign and valuealign to 1 because of pickling self.lib.MR_set_keyalign(self.mr,1) self.lib.MR_set_valuealign(self.mr,1) # setup callbacks self.lib.MR_create.restype = c_void_p self.lib.MR_copy.restype = c_void_p COMPAREFUNC = CFUNCTYPE(c_void_p,POINTER(c_char),c_int, POINTER(c_char),c_int) self.compare_def = COMPAREFUNC(self.compare_callback) COMPRESSFUNC = CFUNCTYPE(c_void_p,POINTER(c_char),c_int, POINTER(c_char),c_int,POINTER(c_int), c_void_p,c_void_p) self.compress_def = COMPRESSFUNC(self.compress_callback) HASHFUNC = CFUNCTYPE(c_void_p,POINTER(c_char),c_int) self.hash_def = HASHFUNC(self.hash_callback) MAPFUNC = CFUNCTYPE(c_void_p,c_int,c_void_p,c_void_p) self.map_def = MAPFUNC(self.map_callback) MAP_FILE_FUNC = CFUNCTYPE(c_void_p,c_int,c_char_p,c_void_p,c_void_p) self.map_file_def = MAP_FILE_FUNC(self.map_file_callback) MAP_STR_FUNC = CFUNCTYPE(c_void_p,c_int,POINTER(c_char),c_int, c_void_p,c_void_p) self.map_str_def = MAP_STR_FUNC(self.map_str_callback) MAP_MR_FUNC = CFUNCTYPE(c_void_p,c_int,POINTER(c_char),c_int, POINTER(c_char),c_int,c_void_p,c_void_p) self.map_mr_def = MAP_MR_FUNC(self.map_mr_callback) REDUCEFUNC = CFUNCTYPE(c_void_p,POINTER(c_char),c_int, POINTER(c_char),c_int,POINTER(c_int), c_void_p,c_void_p) self.reduce_def = REDUCEFUNC(self.reduce_callback) SCANKVFUNC = CFUNCTYPE(c_void_p,POINTER(c_char),c_int, POINTER(c_char),c_int,c_void_p) self.scankv_def = SCANKVFUNC(self.scankv_callback) SCANKMVFUNC = CFUNCTYPE(c_void_p,POINTER(c_char),c_int, POINTER(c_char),c_int,POINTER(c_int),c_void_p) self.scankmv_def = SCANKMVFUNC(self.scankmv_callback) def __del__(self): if self.mr: self.lib.MR_destroy(self.mr) def destroy(self): self.lib.MR_destroy(self.mr) self.mr = None def copy(self,mr): cmr = self.lib.MR_copy(self.mr,mr.mr) pymr = mrmpi() self.lib.MR_destroy(pymr.mr) pymr.mr = cmr return pymr def add(self,mr): n = self.lib.MR_add(self.mr,mr.mr) return n def aggregate(self,hash=None): if hash: self.hash_caller = hash n = self.lib.MR_aggregate(self.mr,self.hash_def) else: n = self.lib.MR_aggregate(self.mr,None) return n def broadcast(self,root): n = self.lib.MR_broadcast(self.mr,root) return n def clone(self): n = self.lib.MR_clone(self.mr) return n def close(self): n = self.lib.MR_close(self.mr) return n def collapse(self,key): ckey = dumps(key,1) n = self.lib.MR_collapse(self.mr,ckey,len(ckey)) return n def collate(self,hash=None): if hash: self.hash_caller = hash n = self.lib.MR_collate(self.mr,self.hash_def) else: n = self.lib.MR_collate(self.mr,None) return n def compress(self,commpress,ptr=None): self.compress_caller = compress self.compress_argcount = compress.func_code.co_argcount self.compress_ptr = ptr n = self.lib.MR_compress(self.mr,self.compress_def,None) return n def compress_callback(self,ckey,keybytes,multivalue,nvalues,valuesizes, kv,dummy): self.kv = kv key = loads(ckey[:keybytes]) mvalue = [] start = 0 for i in xrange(nvalues): stop = start + valuesizes[i] value = loads(multivalue[start:stop]) mvalue.append(value) start = stop if self.compress_argcount == 3: self.compress_caller(key,mvalue,self) else: self.compress_caller(key,mvalue,self,self.compress_ptr) def convert(self): n = self.lib.MR_convert(self.mr) return n def gather(self,nprocs): n = self.lib.MR_gather(self.mr,nprocs) return n def hash_callback(self,ckey,keybytes): key = loads(ckey[:keybytes]) return self.hash_caller(key,self) def map(self,nmap,map,ptr=None,addflag=0): self.map_caller = map self.map_argcount = map.func_code.co_argcount self.map_ptr = ptr if not addflag: n = self.lib.MR_map(self.mr,nmap,self.map_def,None) else: n = self.lib.MR_map_add(self.mr,nmap,self.map_def,None,addflag) return n def map_callback(self,itask,kv,dummy): self.kv = kv if self.map_argcount == 2: self.map_caller(itask,self) else: self.map_caller(itask,self,self.map_ptr) def map_file(self,files,selfflag,recurse,readfile,map,ptr=None,addflag=0): self.map_caller = map self.map_argcount = map.func_code.co_argcount self.map_ptr = ptr cfiles = (c_char_p*len(files))(*files) # array of C strings from list if not addflag: n = self.lib.MR_map_file(self.mr,len(cfiles),cfiles, selfflag,recurse,readfile, self.map_file_def,None) else: n = self.lib.MR_map_file_add(self.mr,len(cfiles),cfiles, selfflag,recurse,readfile, self.map_file_def,None,addflag) return n def map_file_callback(self,itask,file,kv,dummy): self.kv = kv if self.map_argcount == 3: self.map_caller(itask,file,self) else: self.map_caller(itask,file,self,self.map_ptr) def map_file_char(self,nmap,files,recurse,readfile,sepchar,delta,map, ptr=None,addflag=0): self.map_caller = map self.map_argcount = map.func_code.co_argcount self.map_ptr = ptr cfiles = (c_char_p*len(files))(*files) # array of C strings from list if not addflag: n = self.lib.MR_map_file_char(self.mr,nmap,len(files),cfiles, recurse,readfile, ord(sepchar),delta,self.map_str_def,None) else: n = self.lib.MR_map_file_char_add(self.mr,nmap,len(files),cfiles, recurse,readfile, ord(sepchar),delta, self.map_str_def,None,addflag) return n def map_file_str(self,nmap,files,recurse,readfile,sepstr,delta,map, ptr=None,addflag=0): self.map_caller = map self.map_argcount = map.func_code.co_argcount self.map_ptr = ptr cfiles = (c_char_p*len(files))(*files) # array of C strings from list if not addflag: n = self.lib.MR_map_file_str(self.mr,nmap,len(files),cfiles, recurse,readfile, sepstr,delta,self.map_str_def,None) else: n = self.lib.MR_map_file_str_add(self.mr,nmap,len(files),cfiles, recurse,readfile, sepstr,delta, self.map_str_def,None,addflag) return n def map_str_callback(self,itask,cstr,size,kv,dummy): self.kv = kv str = cstr[:size] if self.map_argcount == 3: self.map_caller(itask,str,self) else: self.map_caller(itask,str,self,self.map_ptr) def map_mr(self,mr,map,ptr=None,addflag=0): self.map_caller = map self.map_argcount = map.func_code.co_argcount self.map_ptr = ptr if not addflag: n = self.lib.MR_map_mr(self.mr,mr.mr,self.map_mr_def,None) else: n = self.lib.MR_map_mr_add(self.mr,mr.mr,self.map_mr_def,None,addflag) return n def map_mr_callback(self,itask,ckey,keybytes,cvalue,valuebytes,kv,dummy): self.kv = kv key = loads(ckey[:keybytes]) value = loads(cvalue[:valuebytes]) if self.map_argcount == 4: self.map_caller(itask,key,value,self) else: self.map_caller(itask,key,value,self,self.map_ptr) def open(self,addflag=0): if not addflag: self.lib.MR_open(self.mr) else: self.lib.MR_open_add(self.mr,addflag) def print_screen(self,proc,nstride,kflag,vflag): self.lib.MR_print(self.mr,proc,nstride,kflag,vflag) def print_file(self,file,fflag,proc,nstride,kflag,vflag): self.lib.MR_print_file(self.mr,file,fflag,proc,nstride,kflag,vflag) def reduce(self,reduce,ptr=None): self.reduce_caller = reduce self.reduce_argcount = reduce.func_code.co_argcount self.reduce_ptr = ptr n = self.lib.MR_reduce(self.mr,self.reduce_def,None) return n def reduce_callback(self,ckey,keybytes,multivalue,nvalues,valuesizes, kv,dummy): self.kv = kv key = loads(ckey[:keybytes]) mvalue = [] start = 0 for i in xrange(nvalues): stop = start + valuesizes[i] value = loads(multivalue[start:stop]) mvalue.append(value) start = stop if self.reduce_argcount == 3: self.reduce_caller(key,mvalue,self) else: self.reduce_caller(key,mvalue,self,self.reduce_ptr) def scan_kv(self,scan,ptr=None): self.scan_caller = scan self.scan_argcount = scan.func_code.co_argcount self.scan_ptr = ptr n = self.lib.MR_scan_kv(self.mr,self.scankv_def,None) return n def scankv_callback(self,ckey,keybytes,cvalue,valuebytes,dummy): key = loads(ckey[:keybytes]) value = loads(cvalue[:valuebytes]) if self.scan_argcount == 3: self.scan_caller(key,value) else: self.scan_caller(key,value,self.scan_ptr) def scan_kmv(self,scan,ptr=None): self.scan_caller = scan self.scan_argcount = scan.func_code.co_argcount self.scan_ptr = ptr n = self.lib.MR_scan_kmv(self.mr,self.scankmv_def,None) return n def scankmv_callback(self,ckey,keybytes,multivalue,nvalues,valuesizes,dummy): key = loads(ckey[:keybytes]) mvalue = [] start = 0 for i in xrange(nvalues): stop = start + valuesizes[i] value = loads(multivalue[start:stop]) mvalue.append(value) start = stop if self.scan_argcount == 3: self.scan_caller(key,mvalue) else: self.scan_caller(key,mvalue,self.scan_ptr) def scrunch(self,nprocs,key): ckey = dumps(key,1) n = self.lib.scrunch(self.mr,nprocs,ckey,len(ckey)) return n def multivalue_blocks(self): n = self.lib.multivalue_blocks(self.mr) return n def multivalue_block(self,iblock,multivalue,valuesizes): # might need to pass back a tuple of 3 things? # doc these 2 funcs in Python interface # check that reduce and compress callbacks can handle NULL case and MR ptr n = self.lib.multivalue_block(self.mr,iblock,multivalue,valuesizes) return n def sort_keys(self,compare): self.compare_caller = compare n = self.lib.MR_sort_keys(self.mr,self.compare_def) return n def sort_keys_flag(self,flag): n = self.lib.MR_sort_keys_flag(self.mr,flag) return n def sort_values(self,compare): self.compare_caller = compare n = self.lib.MR_sort_values(self.mr,self.compare_def) return n def sort_values_flag(self,flag): n = self.lib.MR_sort_values_flag(self.mr,flag) return n def sort_multivalues(self,compare): self.compare_caller = compare n = self.lib.MR_sort_multivalues(self.mr,self.compare_def) return n def sort_multivalues_flag(self,flag): n = self.lib.MR_sort_multivalues_flag(self.mr,flag) return n def compare_callback(self,cobj1,len1,cobj2,len2): obj1 = loads(cobj1[:len1]) obj2 = loads(cobj2[:len2]) return self.compare_caller(obj1,obj2) def kv_stats(self,level): n = self.lib.MR_kv_stats(self.mr,level) return n def kmv_stats(self,level): n = self.lib.MR_kmv_stats(self.mr,level) return n def mapstyle(self,value): self.lib.MR_set_mapstyle(self.mr,value) def all2all(self,value): self.lib.MR_set_all2all(self.mr,value) def verbosity(self,value): self.lib.MR_set_verbosity(self.mr,value) def timer(self,value): self.lib.MR_set_timer(self.mr,value) def memsize(self,value): self.lib.MR_set_memsize(self.mr,value) def minpage(self,value): self.lib.MR_set_minpage(self.mr,value) def maxpage(self,value): self.lib.MR_set_maxpage(self.mr,value) def add(self,key,value): ckey = dumps(key,1) cvalue = dumps(value,1) self.lib.MR_kv_add(self.kv,ckey,len(ckey),cvalue,len(cvalue)) def add_multi_static(self,keys,values): n = len(keys) ckeys = "" cvalues = "" for i in xrange(n): ckey = dumps(keys[i],1) cvalue = dumps(values[i],1) ckeys += ckey cvalues += cvalue keybytes = len(ckeys)/n valuebytes = len(cvalues)/n self.lib.MR_kv_add_multi_dynamic(self.kv,n, ckeys,keybytes,cvalues,valuebytes) def add_multi_dynamic(self,keys,values): n = len(keys) ckeys = "" cvalues = "" keybytes = (c_int*n)() valuebytes = (c_int*n)() for i in xrange(n): ckey = dumps(keys[i],1) cvalue = dumps(values[i],1) keybytes[i] = len(ckey) valuebytes[i] = len(cvalue) ckeys += ckey cvalues += cvalue self.lib.MR_kv_add_multi_dynamic(self.kv,n, ckeys,keybytes,cvalues,valuebytes)