kmer-code-2013-trunk/0000755000000000000000000000000012641613361013151 5ustar rootrootkmer-code-2013-trunk/sim4db/0000755000000000000000000000000012641613355014336 5ustar rootrootkmer-code-2013-trunk/sim4db/sim4th.C0000644000000000000000000004347612322046702015660 0ustar rootroot// This file is part of sim4db. // Copyright (c) 2005 Brian Walenz // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bio++.H" #include "sim4.H" #include "sweatShop.H" // XXX Both loader and loaderAll leave the last gen sequence undeleted! readBuffer *scriptFile = 0L; seqCache *GENs = 0L; seqCache *ESTs = 0L; uint32 lastGENiid = ~uint32ZERO; uint32 lastESTiid = ~uint32ZERO; seqInCore *lastGENseq = 0L; int fOutput = 0; int fYesNo = 0; char *cdnaFileName = 0L; char *scriptFileName = 0L; char *databaseFileName = 0L; char *outputFileName = 0L; char *yesnoFileName = 0L; char *touchFileName = 0L; bool pairwise = false; bool beVerbose = false; bool beYesNo = false; uint32 numThreads = 2; uint32 loaderCacheSize = 1024; sim4parameters sim4params; // Parse the command line to create a sim4command object // // [-f|-r] -e ESTid -D GENid GENlo GENhi // // -f Forward only // -r Reverse only // -D genSeqIID genLo genHi // -e estSeqIID // // char* getNextScript(uint32 &ESTiid, uint32 &GENiid, uint32 &GENlo, uint32 &GENhi, bool &doForward, bool &doReverse) { char x = scriptFile->read(); // Skip any white space in the file // while ((scriptFile->eof() == false) && (whitespaceSymbol[x])) x = scriptFile->read(); // Exit if we're all done. // if (scriptFile->eof()) return(0L); uint32 linePos = 0; uint32 lineMax = 128; char *line = new char [lineMax]; // Copy the line from the readBuffer into our storage // while ((scriptFile->eof() == false) && (x != '\n')) { line[linePos++] = x; x = scriptFile->read(); } line[linePos] = 0; // Decode the line // uint32 argWords = 0; splitToWords words(line); while (words.getWord(argWords)) { switch (words.getWord(argWords)[1]) { case 'f': doForward = true; doReverse = false; break; case 'r': doForward = false; doReverse = true; break; case 'D': GENiid = strtouint32(words.getWord(++argWords), 0L); GENlo = strtouint32(words.getWord(++argWords), 0L); GENhi = strtouint32(words.getWord(++argWords), 0L); break; case 'e': ESTiid = strtouint32(words.getWord(++argWords), 0L); break; default: //fprintf(stderr, "Unknown option '%s'\n", words.getWord(argWords)); break; } argWords++; } return(line); } class sim4thWork { public: sim4command *input; char *script; sim4polishList *output; seqInCore *gendelete; seqInCore *estdelete; sim4thWork() { input = 0L; script = 0L; output = 0L; gendelete = 0L; estdelete = 0L; }; }; void* loader(void *U) { bool doForward = true; bool doReverse = true; uint32 ESTiid = 0; uint32 GENiid = 0; uint32 GENlo = 0; uint32 GENhi = 0; sim4thWork *p = new sim4thWork(); p->script = getNextScript(ESTiid, GENiid, GENlo, GENhi, doForward, doReverse); if (p->script) { seqInCore *ESTseq = 0L; seqInCore *GENseq = 0L; // If we already have the GENseq, use that, otherwise, register it for deletion. // if (lastGENiid == GENiid) { GENseq = lastGENseq; } else { // Register it for deletion. Technically, we're deleting this // on the state AFTER it's used, but we can't guarantee that // that state is still around. The writer is deleting this, so // by the time it gets here, it already wrote everyone that // used this, which kind of implies that everyone that needs // this is already computed. // p->gendelete = lastGENseq; GENseq = GENs->getSequenceInCore(GENiid); lastGENiid = GENiid; lastGENseq = GENseq; } // The cache can, and does, overwrite the EST sequence we care // about. For now, we just copy the EST from the cache. // ESTseq = ESTs->getSequenceInCore(ESTiid)->copy(); p->estdelete = ESTseq; p->input = new sim4command(ESTseq, GENseq, GENlo, GENhi, doForward, doReverse); } else { delete p; p = 0L; } return(p); } void* loaderPairwise(void *) { // Align cDNA i to genomic i. if (lastGENiid == ~uint32ZERO) // happens on the first time through lastGENiid = 0; if (lastESTiid == ~uint32ZERO) // happens on the first time through lastESTiid = 0; // If we've run out of sequences, we're done! if ((lastGENiid >= GENs->getNumberOfSequences()) || (lastESTiid >= ESTs->getNumberOfSequences())) return(0L); sim4thWork *p = new sim4thWork(); // Grab the GEN sequence p->gendelete = GENs->getSequenceInCore(lastGENiid++); // Grab the EST sequence p->estdelete = ESTs->getSequenceInCore(lastESTiid++)->copy(); // build the command p->input = new sim4command(p->estdelete, p->gendelete, 0, p->gendelete->sequenceLength(), true, true); return(p); } void* loaderAll(void *) { sim4thWork *p = new sim4thWork(); // Previous implementations "Ping-pong'd" through the ESTs. The // idea being we would use the cache on the ends. We can't easily // do that here, so we always go forward. // Flip around the end, if needed. if (lastESTiid >= ESTs->getNumberOfSequences()) { lastESTiid = 0; p->gendelete = lastGENseq; lastGENseq = 0L; if (lastGENiid == ~uint32ZERO) // happens on the first time through lastGENiid = 0; else lastGENiid++; } // If we've run out of sequences, we're done! if (lastGENiid >= GENs->getNumberOfSequences()) { delete p; return(0L); } // Update the genomic sequence? if (lastGENseq == 0L) { lastGENseq = GENs->getSequenceInCore(lastGENiid); } // Grab the EST sequence p->estdelete = ESTs->getSequenceInCore(lastESTiid++)->copy(); // build the command p->input = new sim4command(p->estdelete, lastGENseq, 0, lastGENseq->sequenceLength(), true, true); return(p); } void worker(void *U, void *T, void *S) { sim4thWork *p = (sim4thWork *)S; Sim4 *sim = new Sim4(&sim4params); p->output = sim->run(p->input); delete sim; } void writer(void *U, void *S) { sim4thWork *p = (sim4thWork *)S; sim4polishList &L4 = *(p->output); for (uint32 i=0; L4[i]; i++) { char *o = L4[i]->s4p_polishToString(sim4params.getOutputFormat()); errno = 0; write(fOutput, o, strlen(o) * sizeof(char)); if (errno) fprintf(stderr, "Couldn't write the output file '%s': %s\n", outputFileName, strerror(errno)), exit(1); delete [] o; } if (yesnoFileName) { char str[128]; if (L4[0]) sprintf(str, "%s -Y "uint32FMT" "uint32FMT"\n", p->script, L4[0]->_percentIdentity, L4[0]->_querySeqIdentity); else sprintf(str, "%s -N 0 0\n", p->script); write(fYesNo, str, strlen(str) * sizeof(char)); } // Release this compute delete p->input; delete [] p->script; delete p->output; delete p->gendelete; delete p->estdelete; delete p; } int openOutputFile(char *name) { int f = 0; if (name == 0L) return(0); if (strcmp(name, "-") == 0) { f = fileno(stdout); } else { errno = 0; f = open(name, O_WRONLY | O_LARGEFILE | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "Couldn't open the output file '%s': %s\n", name, strerror(errno)), exit(1); } return(f); } int main(int argc, char **argv) { int arg = 1; int err = 0; while (arg < argc) { if (strncmp(argv[arg], "-alignments", 4) == 0) { sim4params.setPrintAlignments(true); } else if (strncmp(argv[arg], "-alwaysprint", 4) == 0) { sim4params.setFindAllExons(true); sim4params.setAlwaysReport(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-cdna", 3) == 0) { cdnaFileName = argv[++arg]; } else if (strncmp(argv[arg], "-cut", 3) == 0) { double x = atof(argv[++arg]); if (x < 0.0) { fprintf(stderr, "WARNING: -cut adjusted to 0.0 (you gave %f)!\n", x); x = 0.0; } if (x > 1.0) { fprintf(stderr, "WARNING: -cut adjusted to 1.0 (you gave %f)!\n", x); x = 1.0; } sim4params.setPolyTailPercent(x); } else if (strncmp(argv[arg], "-genomic", 4) == 0) { databaseFileName = argv[++arg]; } else if (strncmp(argv[arg], "-minc", 5) == 0) { sim4params.setFindAllExons(true); sim4params.setMinCoverage(atoi(argv[++arg]) / 100.0); } else if (strncmp(argv[arg], "-mini", 5) == 0) { sim4params.setFindAllExons(true); sim4params.setMinPercentExonIdentity(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-minl", 5) == 0) { sim4params.setFindAllExons(true); sim4params.setMinCoverageLength(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-nod", 4) == 0) { sim4params.setIncludeDefLine(false); } else if (strncmp(argv[arg], "-non", 4) == 0) { sim4params.setDontForceCanonicalSplicing(true); } else if (strncmp(argv[arg], "-f", 2) == 0) { sim4params.setForceStrandPrediction(true); } else if (strncmp(argv[arg], "-o", 2) == 0) { outputFileName = argv[++arg]; } else if (strncmp(argv[arg], "-po", 3) == 0) { sim4params.setIgnorePolyTails(false); } else if (strncmp(argv[arg], "-sc", 3) == 0) { scriptFileName = argv[++arg]; } else if (strncmp(argv[arg], "-sp", 3) == 0) { sim4params.setSpliceModel(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-pa", 3) == 0) { pairwise = true; } else if (strncmp(argv[arg], "-to", 3) == 0) { touchFileName = argv[++arg]; } else if (strncmp(argv[arg], "-verbose", 2) == 0) { beVerbose = true; } else if (strncmp(argv[arg], "-YN", 3) == 0) { yesnoFileName = argv[++arg]; } else if (strncmp(argv[arg], "-threads", 3) == 0) { numThreads = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-H", 2) == 0) { sim4params.setRelinkWeight(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-K", 2) == 0) { sim4params.setMSPThreshold1(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-C", 2) == 0) { sim4params.setMSPThreshold2(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-Z", 2) == 0) { sim4params.setSpacedSeed(argv[++arg]); } else if (strncmp(argv[arg], "-Ma", 3) == 0) { sim4params.setMSPLimitAbsolute(atoi(argv[++arg])); } else if (strncmp(argv[arg], "-Mp", 3) == 0) { sim4params.setMSPLimitPercent(atof(argv[++arg])); } else if (strncmp(argv[arg], "-interspecies", 2) == 0) { sim4params.setInterspecies(true); } else if (strcmp(argv[arg], "-gff3") == 0) { sim4params.setOutputFormat(S4P_POLISH_GFF3); } else { fprintf(stderr, "Unknown option '%s'.\n", argv[arg]); err++; } arg++; } if ((err) || (cdnaFileName == 0L) || (databaseFileName == 0L) || (outputFileName == 0L)) { fprintf(stderr, "usage: %s -genomic g.fasta -cdna c.fasta -output o.sim4db [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " -v print status to stderr while running\n"); fprintf(stderr, " -V print script lines (stderr) as they are processed\n"); fprintf(stderr, " -YN print script lines (to given file) as they are processed, annotated with yes/no\n"); fprintf(stderr, "\n"); fprintf(stderr, " -cdna use these cDNA sequences\n"); fprintf(stderr, " -genomic use these genomic sequences\n"); fprintf(stderr, " -script use this script file\n"); fprintf(stderr, " -pairwise do pairs of sequences\n"); fprintf(stderr, " -output write output to this file\n"); fprintf(stderr, " -touch create this file when the program finishes execution\n"); fprintf(stderr, "\n"); fprintf(stderr, " -threads Use n threads.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -mincoverage iteratively find all exon models with the specified\n"); fprintf(stderr, " minimum PERCENT COVERAGE\n"); fprintf(stderr, " -minidentity iteratively find all exon models with the specified\n"); fprintf(stderr, " minimum PERCENT EXON IDENTITY\n"); fprintf(stderr, " -minlength iteratively find all exon models with the specified\n"); fprintf(stderr, " minimum ABSOLUTE COVERAGE (number of bp matched)\n"); fprintf(stderr, " -alwaysreport always report exon models, even if they\n"); fprintf(stderr, " are below the quality thresholds\n"); fprintf(stderr, "\n"); fprintf(stderr, " If no mincoverage or minidentity or minlength is given, only\n"); fprintf(stderr, " the best exon model is returned.\n"); fprintf(stderr, "\n"); fprintf(stderr, " You will probably want to specify ALL THREE of mincoverage,\n"); fprintf(stderr, " minidentity and minlength! Don't assume the default values\n"); fprintf(stderr, " are what you want!\n"); fprintf(stderr, "\n"); fprintf(stderr, " You will DEFINITELY want to specify at least one of mincoverage,\n"); fprintf(stderr, " minidentity and minlength with alwaysreport! If you don't, mincoverage\n"); fprintf(stderr, " will be set to 90 and minidentity to 95 -- to reduce the number of\n"); fprintf(stderr, " spurious matches when a good match is found.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -nodeflines don't include the defline in the output\n"); fprintf(stderr, " -alignments print alignments\n"); fprintf(stderr, "\n"); fprintf(stderr, " -polytails DON'T mask poly-A and poly-T tails.\n"); fprintf(stderr, " -cut Trim marginal exons if A/T %% > x (poly-AT tails)\n"); fprintf(stderr, "\n"); fprintf(stderr, " -noncanonical Don't force canonical splice sites\n"); fprintf(stderr, " -splicemodel Use the following splice model: 0 - original sim4;\n"); fprintf(stderr, " 1 - GeneSplicer; 2 - Glimmer (default: 0)\n"); fprintf(stderr, "\n"); fprintf(stderr, " -forcestrand Force the strand prediction to always be\n"); fprintf(stderr, " 'forward' or 'reverse'\n"); fprintf(stderr, "\n"); fprintf(stderr, " -interspecies Use sim4cc for inter-species alignments\n"); fprintf(stderr, "\n"); fprintf(stderr, " The following are for use only by immortals.\n"); fprintf(stderr, " -Z set the (spaced) seed pattern\n"); fprintf(stderr, " -H set the relink weight factor\n"); fprintf(stderr, " -K set the first MSP threshold\n"); fprintf(stderr, " -C set the second MSP threshold\n"); fprintf(stderr, " -Ma set the limit of the number of MSPs allowed\n"); fprintf(stderr, " -Mp same, as percentage of bases in cDNA\n"); fprintf(stderr, " NOTE: If used, both -Ma and -Mp must be specified!\n"); exit(1); } // Open input files // GENs = new seqCache(databaseFileName); ESTs = new seqCache(cdnaFileName, loaderCacheSize, false); // Open the output file fOutput = openOutputFile(outputFileName); fYesNo = openOutputFile(yesnoFileName); sweatShop *ss = 0L; err = sim4params.setSpliceMutex(); if (err) { fprintf(stderr, "sim4th::main()-- Failed to initialize splice mutex: %s.\n", strerror(err)); exit(1); } // If we have a script, read work from there, otherwise, // do an all-vs-all. // if (scriptFileName) { scriptFile = new readBuffer(scriptFileName); ss = new sweatShop(loader, worker, writer); } else if (pairwise) { ss = new sweatShop(loaderPairwise, worker, writer); } else { ss = new sweatShop(loaderAll, worker, writer); } ss->setNumberOfWorkers(numThreads); ss->run(0L, beVerbose); delete ss; // Only close the file if it isn't stdout // if (strcmp(outputFileName, "-") != 0) close(fOutput); if (yesnoFileName) close(fYesNo); delete scriptFile; delete ESTs; delete GENs; exit(0); } kmer-code-2013-trunk/sim4db/Make.include0000644000000000000000000000071511512763666016572 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBSIM4/ :=$(realpath $/../libsim4/)/ $/.CXX_SRCS := $/sim4th.C $/.CXX_EXES := $/sim4db $/.CLEAN := $/*.o $/sim4db: $/sim4th.o ${$/.CXX_EXES}: ${LIBSIM4/}libsim4.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+= -I${LIBSIM4/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/ESTmapper GSAC.ppt0000644000000000000000000037200012431447644016244 0ustar rootrootࡱ> za.Hy]_{`@U( f3f3nŐ { DWDPNG  IHDRPLTEUUUUUUQHRbKGDH cmPPJCmp0712Om@IDATx[D@{йk*1` S;v ØCofgwݱXJ,p% \Wb+ y9?J]8 fg%S >Ӏi~+ޠ ;:^q&*R?t&V|%5ǦϒnKQ;^K+e0~1#0#({v4ؤ㤈D]RpwEȉ׵kH])XlP`5op_tv;E/#0%#|sڊ;e9\1We]ӘAS_!rxa+glãltiM7vt=;[*0gxxN5`Z$c?8e)̛DK9AϋA*dC1ٴ_Ox0zΩFs6{m^ zKE#дi^M& J)Gwd`@ 0NpFbFS@)K`Np۵.~e;KRD9ˍ& f3a V0b^>; & S飈& PƯR%K?˚VJ%'^Y6bhw~R<=1,p:YSg S>Ǭx@`ĝ;-M8݋NP` ~Q \wpɽZ;q) ` lؔ Kv5YWz}'؄0}FQ7ibg$뺿f `wތٷ ~_XkI{if}zS$M`U`isbZ jvW^-P`Nqu͒=i+Gw˹GlT`Wձ[ N|%7ȅE2~V.)Az.0Icusw:C nxD銶=?fM( G Lwh~7\ғIr$68MV}hjK>iiOgsWߑ'A~`"- L7xl'~G+u<[l9}tsb%`̰|[/`ӣ>0C`ũ? 9^%~y`fO,o+XshC~ΡLXoo}L3~1LOxV'l7O˳ϩ'݃}gL6 4o.}~`Sk7&`I7-6Gnr AϲrxOI&S@Ok_{~GWޫ:'xD0@^_dKӻ |-Lo&Ud0+KGwLo*Wq{?>~{ s t>d`ޚϵ!+`@;y[;NٚyAL+܍mv_o"mӷ`kohR`xJrY#`~Ӝ& lXn^ٱ `ג~h?f~X15G0Y`.mj<o,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0%w`>a|g4|#p0 Fs<L  @ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0 x͋N8n~~(3l<'MN @ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& J%d`@ 0Y,PL(& JiF!5I%dWY\i(0zΕ-s6\.qj`[`oZY5LDUJT \J%O;lv<ۿsZSLW4j-Չx}?ݢR=)9KÙØb:gM/>X*{3v6J,p% \ɜrʗ9,])úDx:3KQgucmmKsMX>\7VE nfFbe̠7gי.\. XF4+}DX>tD^qaT :hRDADAz:eV zrՓtK,yzMy٢XC -3:3:3fי˶eN`CXwYh]T6V.o]Qguт:3Jagu]Ң}!t]JjGK$[ KY 袣pt<~FG!^Goή.: KAgsǗu&1'몋1Ga3Wb+XJ,p%0,RFIENDB`n4 ?x洗*оPNG  IHDRPLTEUUUUUUQHRbKGDH cmPPJCmp0712Om IDATx[rE]LRLK+5\IH @G`{{;Z pr;E{' I$Bp! \H.zWI3\vaF{ӰnɭNm x.{Nm%­ad h,-%@YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%@D׵GQMWku )8c$&_DǺIMYZ K@ dӟDE%@ȦUdd hl`UMNY;.{3GA6cNCXK%`w /WKQ+`/2gt??̀z,2--{ѻ7zmSd'rm>̣;2hHd _޾7@iٽl}ol6,-%@YZ K@ d h,-%@63`N K@ d h,-%@YZ K@ d h,- %Y+`NgCv+i.pdʁ 8d\¿:w(n!_|[ <Ϋ.³@YZ K@ d h,- YZ K@ d h,-%@YZ K@ d h,-%@67`Ψ K@ d h,-%@YZ K@ d h,-%@9YZ K@ d h,-%@fR}ReZ K@ d h,-%s> @YZ x\( %(Ȇ< »m63<&`\&`@OZ @YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h^_M dQ%@YZ { `~Ԁ,-%@YZ K@ d h,-%@YZ K@ d h,-%@@TYZ X_:Tk{; /wol!3pdK{3GA_./"3@_.g,-=Qn@ d h,-=)~~, K@k 6p D$B,`d h,-- 8UQݽvxӷ/F,g y[{sOSd hզE=<~7Wd9-XrUqa,-O#LRdoP Njdop ܿY);OuI*<5^V إ3!W#L|Z#,y+?q"hIo=fwZg:k׏5Փ+wP%B4|Yod h|d% %?sG 8@3o>} _ݳ@)^#};/&΁ x]?pw fO/>]qO:k^} XKQe>YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%@YZ K@ d h,-%+Wb[ pX.pjk e^o߾9R_wcs%?~wVfx~qwgc/v^o_S5W6``$3[7:|80 sU8$ I$B/)b{c{01)6)Wn[oH1޾{Ǽ,6{ۚ Xٵ:8QyR3޾W6k߮5<]4[o˛_Ku[waMP|Vk0 `(=4c H'`}Iuho]~{Zߣ;╯L+o9J3ԾZ_ \H.$ I$B)°#IENDB`n{\4>5hHPNG  IHDRNjBgAMAcPLTE GR)^8iHuWufΔԲڲKzIDATxi{Jm0,@$&Y>*I9_f)W٥גJz(*m+H g*e8Q.(*#vaPEe$N"씽|d;-&N8(N)(HVCl+g)(H<,{8EQ8e/(*#vPEe$N||DSϲS@Q;yYq (2a'Oq>^>NEQTF")g)(H<,{ݕW=WY>ķra'Oq>^Gw}a)'T?E|7y|zzۑ֕DN 8e/_΃{5o9|;{j*j+_<,{rtCq{Axz_{T3NY")g˗.a縮牪v=xvxlƦ;8<<IvESϲ/;G;?Z:LQ>vwhW˛XwL ;ՖV;yY;_֦+Cؑx㤽utvwhV+8-y-EmTpvvd#y4wF9QMB`)ʥ8e/_#{7甆g<]cM0Exvvdr<6;\'-)-#-`qa'Oq>^\`1!aCc.u\;yY:{ Q!=yGe LQ.[<,{rإΞkv y$v5hvt\.@~(VvSM9yE8e/_Z^~;?tȷ,L`ar")g˗簞} /Vs󂝋 PWSa'Oq>^<~\ǂqvZXp֩jN_")g˗簮l.R\޺瓖 ar")g˗娼*\c?W.6LVhaq;He,N||Y#՛#-YXsT{`")g˗娮-K M!>צ{:ؙsa'Oq>^flW]/ưCAֱSKa'Oq>^w\ L -u8oyvjSm8Ŋ8e/_` iYFN^:ՌX` E){2g(ԫځv..XwgXv\`apL+G+{ 5)d]P+VUD;-gTvaoHϗFA|zb΀6z֩&RES/!z>Q!t3qX׌ev\;yS7W ƱB51,X)Ŋ;e/_v#zu6/@PBؑ-M`8e;e/_v#s@ YfG:w~jb;Kd")N˗݈n HęX>ΝammX)'v"S6WwWr;ߴ0j˹,Ri>O|%$2E册.XaBX(e2dݦmZ3K^4t7MȲM9Nhabۻ,"f& 晢mhd<+ba1d`ewl}C)Z-L 4^Lôs ]"O |7c":A^M6Vqv6#~`g/Jz'E)Z,s|$@`Xgxt7ߙ΁kU, b>n9wL᱂ƹ[Ot.|R[V6#IpNZxrv*O&rIQ"(.Է ;][;SK"`wYAO\M8Yy1?s\طxsC@ BcUam*8kvQ6 &I:̣9:`OF#VaG6.`G:[n'SY$T:J;X'h53mKs;|:vRiT/ Ҏ cSgYFUob'l:=p 2>xFP_օ!n^i5 |Xb``I>l7Տ9rYd=!ݓ_Y\g8 8VeE ;$vx;Φ rbT]\$ÀĐ_2od ,9ZW ;Vq9N&6 qF;64,;f[GOY,L݀Pˏт/b=˛B)BYWV e agk洨 SE ~+De-'L&e|/]4XE9b7ݰ?eT݋ vֆYV.jfaM$LfҞsyV0KaIK294F_SEc:! agm;^;+Lˌpy'<9= q|31K iu/"j#$NQe  쬭+aVn㕴vqcale09 kwdY"q6E/vDvZ 39N皯=擊&\z.;f:@x\頹3lSAY xq1arV%"O0yt [9(tځD'?NQ`giTDSŽЀg ;*,"V:{zXؙ@cvb'Y" Wm]N6G̹0marvv%UidsJ0ϙԕ}f*S'I1(8eaU'gp:_f^B,qrJyR Q+dv ``;&ͼP~=ݘw{!ފ,aG^s>nDzf'2Ƥ"O%_RYV.^zݧ~X]47=zc6lEοF0AM ;-2DN#:5ie $Qot۹:"O^SYVDT1ITp-ٰU栞 Pې4=ЯtkDaGp9'AJ}cn>sݩLza~{x5p׎|V; +d*vtY;`0dls[qۂvvK= ؍WBve`ˁNڑNQ(%^\;ִÃ6:Yvإ(ZXϣ ;XN1ؙqR$zk̕xaivlC;sFN&)eeJQ4 !2} ;i;(JZ<=;+JM! ~P/Lg._,响tre'FF԰!+ vZ,`go:] NS誁%OWڅɹҞ%ĬxÇr5>0˝XGL;Ph.vP-1Bv3?rfDig9BNJ0yWڳ"D:Ezywbȸ9K;vv%C`72[` "Wډ u:J0; uNt :ׯO8b:Hؑo`sN>Chf-huBJ0Zs>|'OWRdr j%:Ivlor`;C9dF4`^uJ;))䟶{ebZBwZ*eL?!1&EYlEFbةe#q`gkqO <"4S)P SB#ؿ/]B޹=ݣ!;DZjegvb%uIOMX0AA@(;!v Pʅy(vLeE(_r' 4 qإ@\G|KaG\jZd;[;ЍޅGY "a'_\BZ(e=)M4B#- ۩ U2Bؙ_,ü@>46(+(0Piȓ٠NL  gF/-5NfC'Rv!3 wV a'$[_6y\7E"Pcϫ3Nf:1%\l[E6M.r+bD4ݟqwEl?*8O<8NP"/AL}  \/Ag>Ԕpamә/q`s㩜B  "EYvы`'Q11`bhE9FkdOd:9[ؙ,*]j%Q/.dX֩Bĸ' EYЈ`'QHV<,z;;NJ0[,AyދJ;'q;Xz>IU**;T96Dp)lOT SBҞA4s EQǸ:E9DrCcX  v&s?N xmJRvT^hFPcPMI;qQ:E9D7(2P8P 1#A|kZ7Tx~%HQ>$ja iS8L`I# t@̐>e4AX4;(:l,1"3'kP^J0XƑ#}Ȼ`HX7}B]A8.)aR`Y!R>ųԪh2N8iddaⷊ5N'Y'ٽ[f$-8L$YxK!,s5QfE">`{$en5 guiDcޜNWI&~{IΈ/%JkT- 23m5k_2#Q2Q"`G%USI,{s:]X"^ٸM\=QqEM;LO(D mRH&Np!;Q$n+|R baⷊxOt'YEQۡ^&z(' >;MwYxJޅDmxb*=GAaG{BI%T2KYaN j)v` AKbB^61F5xFK;ٰqLoG>M ZEX 9:@Vѭ@@d\ibY*GYŽv=ʖ7,LV#:3ҋWmG*JJVɅvr|s IF'~AKD~㓟 BykI잩J wg%ĝ4E)KOA ,"E6t\FoC;^]&#l{I+$hc(+ێ |\')gÄ#"vaⷊ{P<_ TecıVrpi U7ં×ag/aE& {O*/LVsOy8V%фAk#jʨ[$€PvR*o8*rW@Hu>M˅d vCr }:؁s0[Eu: 㑦|z",`g;eNnι+; 7:|_]X||(4EsDb/LV,͆Pyl aC 'GMxű|uT ;}AQ.F{N9݄sW䅉*K_36d}(9SԢVv ʅV9݄sW䅉*KN #[ٖv Vi*eTR L4؁v,`I+$h2̬$%:|N9iփ˕>+noz.`g^٥z>0[E| 4_)BYAITN:2{@*vua4xHucr@'x//Ž[sP̅I*GQqt8%vw>WM{=2vc4\"73P̅IWB\WaF;/yٗ&#E!XӰ5vvr*vek|R)saqN\6ŦJx(ˊvV *-rn=@uv tlQG[vpr+vl9($hq%,Op(˨r ~wmί51ᦑT2`G,?l9(GXϽ݃OeSNpn?e2;'+ E|vvĬ"``ؙ+Zt0 ZE8t"V?k: G"T(yaX&Rm3U;cN* 23[t0/ug> ̥BMh'58Ui3L:΀'*i#̘RptI e}Q.0 Ƣs0A5_gy*BnV.*eNs#IֱX`'0? 䉧oa8huٳPHg@Ⱐ؇#4@SNmHiqXx\-%؁Fa6k;-LVUt碡{=z݀v"A%'Oq2|e<,n]g|inڴXonJY($hx~8UUK9إʀv>Q6omfy1=ºU]XL>]Bӭ8?ST0)ZE)1X1iA;zy#*J_L{4Er6<*4@m*fgNz4.p{: Z"V1X[(,qGM;1bX?9U -~eMK:aGe2#ؙ{^sUr5[t0WuģPLyVV(xv%J]_ 9L~^$:y2 gk)p͖="-LVG ʝ~H/}$=i~yz M c.^S8v W͓(^t0wu( >%DbэxãxkSf;Xݠq,K"|OH Uq/"UPv"帼D**5>=Yn?5vxkLfUS^V@T=,LV[ ԰66(352u +8JF"JU+ 1 ݜ`㷤 q?/n:Y"\ю8["ʿ9.{˲h%=$:=NhAN2 ڑ8ц,+ߔ$B\;sѳST0)ZE 2B@mg>VWWW)^d H%>N(Q;!6QC;JR,3A!Uu&07 Lp#(' zN2|,yPI:v:rp:ɶT`s5jHL "v@cŰSlSrf&ÛՅ$R2(=|ۣC UjdPAWL vFQ_ sJ% jN#Î( e':<ҙv_y nm*$I*b\2@#isAx!2v\bF E ?V:7shg:adpNmR!I*bePJċrI*c'Px6m ^vd`n*rYa':6 $9?t3,ȯ (9$c'L˨f$쀞  Y;^; Yg|.πV M0Ij+0t o8)hra#d,(v v$'jQKۑB,a'o/L (T:`GފBL;g\c,/:= 6 U=zK k-d3`.-# ;kŽ^Z ݄0I2}Ĵs t>0$ uUZ :6vi ;U3|;¦AEN&Qva!,N^"b,!u8!MFŴs+,؁'iۣ0 (t agk5eGO/Ԧ^XNr^$V:pJ̰֎'ȴ*+޺CTjJz~z16j4^ !f;\݅1v>ѿ]b,?svr$98NQ{)<'9џT/}yc2{߁H-J|<Jlt7_j K_Eҩs9KLC_jo6pat'._s΢G9\;|(؁3ش#]'yqaҴF2$+sLؑxR6q$1E b%,bEr\4*%pau$/RxLiDF5L#8xr4:?2յ|R0iB,ǓŽZf᧟F$:2:(:n)D ʩ 5눑~i;c>H[4"drMypّ4Y.hFӓw p7tcyN(8љ((o؉;n| ;%oaҔ<)q`Gx$봋YMWdBR;cD!u1Sq v|lesxxRNT;^| ;EoaWdE@A̓N:GYZT:elcܐ:Ўup;{S.ca'^ EO)[n _) AbQF|9No]QoGRv+da5 X;"bEr[4&XZjW~B.i;Q.uEQ Đ۞֑uF:%N4@ر!"V)IcQdd _Ind(Z`]sX0ҰKdHMmOJ`6L#g`Ǩ\ ;ցV1Od-IsHw<܉(mD~{u\0 !?XʙU߹ ;}gX1^3"0IkȪ4sgAn+kk4y]6- UPEEЁp;/SuJpq}lxPC 937 3eX5ad,LvMw!cbxB's :7E w?ʙPd>QJޙν[KGv#,Iԭ5h/;#A)<5T Fkrw'y6b[9]0{ʆO~Mk* DGu Uy\46l2ζ<>*=]v`N 1kƻ_-vڭ"?Jݴ(pwv0e9\3MYe@б%(-xïמ*`PHͼ,)84۶y/qbΎ2O`rfVsb8R]<6 ݛUl7}2kۍC|xO]o 8=mm&ۨ?M\sز/^F#Pi~6H)巠7w=?#nZMa(T/wxJeIQ bI{xrJ>O;arrߤ1FQ q4hu{ȘV~;ardr'r0\WLM=|#'sv(*G/wVK!-uENa\'CQT9Ӵ&Qy,dg^'̈!dsv(*G/w4YŐ%# e_=ywEQNtF3+߱u஝SMQcO%9;E#;Qވ{ D7w* ƗeSʑ˝D*!Y%ܵsBUtU|wwEQN*V7j#``ޯ̧?{; wEQNZӍZXcHɴb4+Aء(ܩF^ !.3cHsv(*G/w@(( DZ;98֟!$9;E#ۗ9\P%DZY"$9;E#ۗ;UK7I8BXq;wEQN*Z='n݀Z3cAN䝃CQT92}S+;cʑtdVߙ_J6`̝CQT92}4 vDwuqz)H-CQT9|_%Q'R.YeAjզCQTA|SƚD/hXzOv;uV|Sz6 ||r ;"v(dr:t{h*}&b.aCQg%×;9#9S⼓v>sgDE^dg N1Kv@;]İ;:+ٽܩZE_OƱlc{·zj@E^T]/q$yJ+D!PYN*¬'!/iItP]2u\E^TifS!ɰANOԇz;:+܉ZEX4#Iz̖;@;XߔhVM7vCء˝]7$I Ί=Rg~5 r d;5mlE^Dp.n8)Mu5AC!PYNuye8`P_}lyGpkc$v(drjqa@`ڱujdiްZ;E^T /M'a`zL %+VߩV1FaJ6/w?7!u 7[ EII;`&`aCQg%;U .YgyLi#`6w;:+ټɎ1Qgbv2_NvqCء˝,Gx6D'e2:TU#/6H!]9$2yXپͧ],,IݱRNtةV:Fadmji$x ٩f,7y<%$~v;jߗCdrjP.NvY`2&!mNC:J*;YIG^ܵcYwUw* T(OC)Ozi;H/w! \;f,.dv`NaJ:J*;U@2&v&,\`Zc$vJ~9TJ$;Ya>d+&nv(Fd#8E!rzh;H;YMKލS|Zvb7?!˝ţ˄ $JLΟc?ȪG=%{lN$%N*gBE$E-vB;靕v)6Fa'k+Cءl~oOf=:`;jyJ ;`2a!PRi_t H!x}џ\Ê v,@iav;j-aIr'kqz!M52f@NCVCڗ;Y''kDǤ! J ;sa'[v()_nKd 5m'5L{B9;;/vSXG`*LI ;;*ݟr>_1.ʰ$IOQ劰CyR#D9#YSadv(U,DDر_ +=4=E+J4%<ˬJر_ IsP{*UCd,aHn,v:mv-QŊCy--_fNqc%@n9 L{*UCo_i/(o8vtNλva*VCoh/IZ7v|`P7Wo**;e ;oZOx:v(Uߜk9 ΥtF>bϥO9Ό[U;?}!|ыlBi)=?ݭZܮ\;56Tv(UV_%M 'عwԞJTLkYvREءϽ`_3C:v(UV$tݧc)_7; KREء> K_ۯ81tU/Uİ$z`WF7i8~ ;!vvЎwfEMN5] ~FtV5wJĹ,'`TFf͝,͑o'EO7B~ G Uv0Qب/Y´ l)No;-vC 4•wvRi8Xhs/ /+?ϱxd[EP0w0 ;{#_n)*jܷPڭv*bT!m5tQ{/ᙲuiv~ BW(Ф.ބPr[;;)]e \Ѓ .za@c0)`'qE ?Ba>줥eŽ9R46.'tab@c]m;3,/z|ag, {Gju=@Te;HRDY?dhvЫwyNY: :D3ǕQ ѐ!5vSQGωfe}Gfj!?_![E CY`nx*ai<<;}ccc{L#\-,'0r ;藧2|.}0{Nt[S;+A;A0Rt '~קT6薔LrumgMYq,0ZڮZA1EsCZTQ1&]^U.NSRq4[OWҒ򲀝&=}ܕI%sC{ ָi}c=;B`kXxG0la0`;c_ By<WRIіq, a/WSAgYeֵN5ذ#͡Q >ThaP[ExnQX%Emҥr[I ;K{[Xa`V߷R`kaŊC2WD3j⌚z$!ogOjWKJ ;v cAO ;Nn?{ƹVG hո@[=S/*"<J0&y.%̎O5Nt# `g/75Z]m`Q#܀DZ72( (.9}.K̩6 v*" a]:)/a'Q*~;{RZowptp il~/IN ʫҕa"T~31ZRi牢Vm;[fU8jfx.6?Ʊ0ڧ!|a'ZDd;^ݑ|6AH`QRq/ TU ekYQOsYTLiT8jY3 ;~3 HH;WXA䰓U eSW;*D?%Mfz}v}צm$h`B` ̵~|۷Icvm4)O; Vݴ˷t;&b!';cB‰k6_y35K*ri|)XefNY .2UriT1ƍ˱^l\sigפ*4U&Iljqn? Wgp;]z77:xvT ZCk]LnbaĴ+ySQpư懞,]kmgaX6Lt,ް̧kű(,ƈlLaG4#Lzv|⍩6wmCN ;}1ꁊN5[Zʐ-zXvE Aq,6f)-޶âނ@nrv կ?Fo-\<(6̧nB`"hǷʼn>ҬxʂYv;;#~wB~oMBXA;Q(VŇխ;x;p ~naD_#LzpjB`'M ֓vL"@w}bv|Yv)1#_g]+ݢ+pyg< G-ӆI[E]v`īb$7vt8$o`t]K;x'v*J׎.W}bY`Gb,aGZg' Z,n‡X'ivл fY!oc( 133c ӥ_MQ%:W64[d;"6ECq cAQ]I:vw <8@&v#^9+:p%[deÔyٰ# aG5^篑.Vb)EHH)v]w\=qԓ'l 󉖶qaS|G*Qn)`zx`g6̵e4w\OWL@#>-v8@Y|{`ac]lwv*%4 v$NZ*\(-9ǭt!''sJBXWYM%O}rÆrISU)vjyH;4#ED;{4r2ɻKQ!'oNɣ&3c;X3`KM7Ȍ[ExȪ2;2w'TimV#q Tp.XQ<ɐ ^j/PTK;=Fz Odo?JdRX[ >SÎB/(p1Nܹ3kNBN[f.Er-'}3piXpo`[}<94iӠK7Րo‚C ŽApҭmr&<|_iS/#/F#3 )SuΰC382ॏDg&%^N4]fzswAҒ* 3ndJ2- vfZ#.E2f2Bi9_A=́wj;P,%ZCzÆ~26LvIxcĔ: v*SvA-]aJp ؙ.;m`OY{n|u"cS`8͂& "|Dv$ ^J߭[EʬUO$tz.vEU_%<1UU]m ;ߥmf;V#y[AÎ%;lx,3Dsn ٩VXXkR20NJ'nG+B#3+JG)\7<.'},44S—vY=Sʆٸkgg{8OV"v\a٠aOiF)ؕd>Ů aYl!ݥ`a6 4rFa^`gڻ >V`cv pH.m`G6J|W'vt=f҇M!c˲W :rM`޵v];pEȂ 쐫h!#9Mq` |,oRHkÙ_O).}+u4ť-{ƬS(m{n ؁)=C>}mOp(hl( ɩcv9%vBWvN*o ^M.qv*Plu}NVuPiڃZ4o=})tlCsaLf baRs{\ay-ޭV ;ӤӳJA6 ;lHCl+O܎Y>kLv΂a= 9ER9Sve ;TwkEA_@3Xyֳ3e;ߴF;Is[b%?v /l:"ﮰ[EF֣z2h,bIag w4ZOwVJ1vavy_Yu14gi'8sl ;3ð؅ll7|ΰ cvi'}=FRsSnb>;}`rT ;nLvhޢaXFigH-Oa;վnu8,L}NN.ެ3~mZnNV.z.}H;Z;$O8AypZ79_GbH+a v1&j9x-ϒ[Jq=;p4!޼vhaR09YN:٦p735;W)OvQ}ϣv2;=aȭ vPZ=AD dvv<|qI*"(VUI;1vrSմ+Ic5h8R*XGʳ1vO#Ƿ 3bů'd 2-`;%Žq/Q8dΓY.l%ߏ*EF>\ziqxN;Uc;KὠX]؟%13C-G`'mjz||rv`e?q#V\ze0K";~a?GǰN9.mezfη΢KGޤ.KM9aر0$maVq,z~v#vj[YzQxhrCX;/ N\sҰ*NVYF"WK["|q6|A-;.a6 k- #@7~n;$"a667vl;K<(VP7ue<Nvcq3y? Un[Ŀ#/?i^E ;37ga'mLX}3>MG#NnL{;zCv0Hk[ϴ4(B@!|ծ8줵Ffmϟ@D7NVxMYB`dс3ҬEOaQ($mxk+y|л aʬd;Sj,MЅ>ֽK+HP|P`=;;i[EdŊyF<렀y3ٜ=(jm.tC-#(\Tx:$C9zuG`'mƊN}.C|,.9Yh ѻy\ 2՗8Z;,'}RvXk}vB6 Fބ]?$;(Xs|O: bӺ pQakv 5; ;i[E|$қĝrC-ĜTMU^^@sX@kGZq-8g>O}^J aI*"(V_e5 rn HcL`go2lw^^L>XEvADS~I'Ŋ̈́yj>E.ҔӞBH؁u`>eXH̠@c&Gr4Ȼ9S\,m|cH>WgZ HSf3SeG؁ذvd\።@@ƧX0 ;;C`,YGby@#5XN;瞤|R)F,/ gƑlI~X>v+8Oț Eؑh-xsR w+ bSyR`Gk;wx B#:p?5i$IhvO(Ž;rTU0Ã~κ#v `>ny^[N9-#_ ='^|"^ EؑIziV=_($Uvxi;>O?loYL8rYVn/7Yi>6iYAŽLO\+ Kq5 8mh  ؁v}g.'tD 'H`v 8K"%d۴"c~\ 'Kz=!g4J>8 AEqWa=tLWF";vpr֧a%:~kdŊ$(;?}`;. H\lcDZ=g糐v"8D bŠK@|o=*v]FǑ|tR~8rIEIvLLq,mX1f*)ev] 0&2Մ~j]u 37K"M 8f;sv 3'mQ@g˜=(R9ö~3~=vq7씠 H7ti@M&}=s sm!ŽӔΞҚ}N&a3 dyR/·rel`cMI]? 9ʢX#]H ߥ-2З 9†1;R|>\aGTP6:%7|p}0yB/`'0}n<$aBa'7k_<=xD x:l`G>,H:1LUBT+櫃H@-M'ai~i?o") 8R_*u|8Ž ``njӅJ ;~{ D9yT\/Y3͐;>BӯYVD %kp.9ʿܟ- 0o;"׎vDحz}G0,ȎQ! 8aE;Q"x5/YVDc}'(7~?Uĵ#n9+9WPoD0 Jo~do ENjlgiTb97X AM+VJ($bTr]+8ECda;"׎v6c9Sl 'l86SVI5j:c+|&F涒k"Q~ضCU v/Ss!|[QHOФeD]g<-[;_98Gi2*5DMyD晒oUC ;v]m_a^ 6d?BOt^cMKpn^r}`Q\.=ӎsXYCRPkҶplCm/S$w<cS8ؑaj0/aƟz!L Lت?Bǵudn/p$h~fvy&IJοNZ[WP5k  dBxX5bL`g+ڨa-Nށ#,Z\/kbi!Bdm: ^Wh:03z(g ;WKϰLo<_2P4vF"[NΞxFM^rQ:ya7,\aGi`g_'Y dÔN귕#؄vF?<*;H/M^)NX:Y`aGRj]+l)۽''45vI{xUxu3{S O.r𕰓h~E:y.6 'UX2E'ht$Qa,MxHeedX/ GvpVŽ@A*?pGS:}~H ;nͺNhm܃a;K"d ; $^8 t@65z[|yH ;Gb+ضkd\8dW; OOWJ% B?}-i(['11NlAͨyS43L+K[qv;[Q,\cC @]#!0t„W\񁝸 33Z&tWbv; `6{;K*"`yԜPıI;5bpw& l3ǰw]ZsT̠ba'>|,W˔ +vm4S~+Y+)L v@庞7bw}\U>evGŽj ;#W7ǴakB\L>}.\;qLGٴ2b]PeMo1a'6|2nF\CZw$.0ԏEؙv)G؉a:;M:aZMbߊ}\3Nd[r9%/Jq:i`g.< ;}=fC if;zѧPͫ"+>%/5r|r0k7m!Q'4b ;ovӹSKA>k&eDYevstP"("togi[E; Npc@Vyʤ6eli,:NYɂLwf "9#ο :a'"?~};K(7q `7NÎs N툠۹՝D|U[E<X`'mb{穈 ;@;>%w>l ~v,APqAk zxIK(ejXBr~ǩv&mpy Z"~ɑEhi?;ScT t^e&h~a@aJ[08cf -gqt]8S/4&% Nzsػ<8b/EҐydm>/%yisB׳J3gñ[dKP\=fr{y9ڎEe/uziOÎi5!?f}gJ&(:eʋ#?P8d[h;QMU#,`I0t53BMЏ[Gf/vt`9MT='Xy+X1@w2^9tvڈR"hM5ZW++X&[Xf4?19>;n90XV[E2_˫|A].A]n mTy-K{Q]&Nߒ`a7>3g ÝFsk%L!3sPtW[\2?;+_pĜ9CF]Ɔ;fK{o&r6ƿ].]<t~k-E2ynjc u ^SXVWI ׳|`3*'?<=\h;Cm[Hƿ*4OTx>|ǖZFL|\fpr<@zSmg `Yqv)r1}^ oݴ 76ࡓ mƝ(2Fcno3}g@fnv=nΛ z~y(*ujތEǦ+m5Ha'flߠ}[jڼ٘6U߽Wi>v7~#6r)_JQҔSEQULV%Zr(JS@Qv 0hBQ"U\q(JS@QR2NETq (r;5牢d8EQ9牢d8EQ*u%T)(E,CQ.o(*Yd8EQ9hdrR)(A ӓ)I(*-Y=Tq ( bU EPE%׾F`gljU4( R)(Jש) R)(J5:SEPE~VaZs(*L(QD`U(GmjrS)(vΛٚ6MnZqyIENDB`@=f zϔE_ O #\M-}o\>4 x pUǿ/yI ` dA HBTR KNTmZk;v7((X)*l ((KAR0, wn,i=,v|TDty@CTNQGw|$/Fb~Or M܁rЄśa-`\VVEs]<勋j˙Qܠ"ιm ~0身H+Qǿ('&rMMv)GH>ٷxVuOLo9J+/g<NRUщ~/i/H%2)x%+c_D,<ٟr-Ի9wsVΔ̩Dg->BH#mWi9bǵ-G[h؉~1:9l/Իث\֟?[+޾ 友^d5Gdy>d`66yT;&~ >Sg}y Sd-X?J l[V>;Na؇OyD:/6ÖTm'[U+{/]*q']#=in"'6Q{bu.1;}&D$5q5q$4x2 +t-X'kѸFߗwYUh^ eUT?t%O^O\Ky`,5xXas`z 6:Y[miN38-yRrU}UE tUktÆrĶ/~}?>×i;# ߷Jى˃x`Bu]y<_]N},p;9yts<N$=!8c!óQv1Da QM!za(X> Vd Y. Y " ~ ~ېy;{t c ʃct,Dq0AOpuL׉pwiyIgLeNe(oL8[XrK+pq>\\ x.o%ٓ'g'mD&dnFv%:k:w{6Ɩش>z~t>Ǘt~t_ًjyke{ws:8K\#ܮف9Ԕow e]rA^b{@ aCٷb]|NbCk:~Dw1clr{&uIZ^css_'~轻@3F?m9Gj[4‚iK9޴O}։D=R:4zڂx1@ZRԵ#9XGL$OٝDk9ڹt :  +j: !^{ >vrٟ25}XU:g@ɼ2cyKͰftOt|Mt|OSצ-ʱΙ)5p,`e\&|/xNq7Xhģp>ݬ;m` `V@|3&CV2{#7:rѕt`C/l-5 /L=Qg#v=8u{Yz8_#qzGyH7sS;舯|\L Egb҅t!F)*u%v]a*L%i64;A:s'ߛo9"~IˣlwQr!uZ~QZZy0ck׏pK|[wQp=<9 t2wq|wD~_[6'~p_ TuL3Ns6VCgCdm14,/Ocsݠ]LyTs}ccQVO{rj8̲gMF ׆i-O9ȻThn[l\Щ9%Ahϳsﶅv^ܜri]aQL-\N13ēSusrüS8n?Mɭ|"dMmxrjS;_x<"m jo3VFlmiso{]3~9FZE<-|Mc5hH{"$zϔE_ O #\Mn ! 0AAP`@ g4!d!dz[ 0Fp}LpvcUʚ;op8ʚ;<4dddd@|- 0,<4!d!d@|- 0,r0___PPT10 2___PPT9/ 0? % ` ̙33` ` ff3333f` 333MMM` f` f` 3>?" dd |?" dd  y p" &  [r  n?" dd@   @@``PR  p   [ ` p>ree>re[[ *" (  vB  ND`11Ȝ?\ ]]}L`  s *`"`Mw vB   ND`11Ȝ?S 'H'HL^   6`1{g[LXB   0D`V gV XB   0D`xgH  0| ? ̙33 $Blank Presentationd  /p8( @` 0   HYYep  ^Brian Walenz and Liliana Florea, Informatics Research, Applied Biosystems, Rockville MD, 20850__"P Y..YY- _ Tx(a(a cG EESTmapper: A Tool for Fast Genome Mapping of Large cDNA Sequence SetsFFQ$ * ` Z(a(a? ? =& aABSTRACT Mapping large sets of cDNA and other sequence features to a genome is an essential precursor to a number of critical bioinformatics workflows, including genome annotation, SNP discovery and detection of alternative splicing. ESTmapper is a software package developed for high-throughput mapping of large cDNA data sets to a eukaryotic genome. It uses an efficient 20-mer index to rapidly locate genomic regions potentially containing the query, then generates a nucleotide-level spliced alignment between the query and each of the selected regions using an optimized version of the sim4 [1] algorithm. We evaluated ESTmapper against BLAT [2] using the dbEST sequence set (http://www.ncbi.nih.gov/dbEST/), and found the results comparable both in terms of sensitivity and computational performance. With the amount of genomic data ever increasing, ESTmapper can become an essential tool for large genome sequencing and analysis projects. INTRODUCTION Mapping an expressed DNA sequence on a target genome entails determining its exon model and location on the genome, including clear delimitation of the exon and intron boundaries, alignment quality indicators, and the orientation of the gene from which the sequence was sampled. Sim4 [1], EST_GENOME [3] and Spidey [4] were all designed to align a cDNA and a genomic sequence containing that gene, however, they are not capable of handling the massive mapping tasks required to produce an up-to-date annotation. x  (" ((  F4  G"{{{77 b Z`ƞ(a(a? ?KA I+e%IPH@___PPT9" REFERENCES Florea L, Hartzell G, Zhang Z, Rubin GM, Miller W. A computer program for aligning a cDNA sequence with a genomic sequence . Genome Res., 1998 8(9):964-974. Kent WJ.  BLAT the BLAST-like alignment tool. Genome Res., 2002 12(4):656-665. Mott R.  EST_GENOME: a program to align spliced DNA sequence to unspliced genomic Dna , CABIOS, 1997 13(4):477-478. Wheelan SJ, Church DM, Ostell JM.  Spidey: A tool for mRNA-to-Genomic Alignments , Genome Res., 2001 11(11):1952-1957.>  (( D  N{   # y Z(a(a? ? cG B METHOD ESTmapper generates a spliced alignment between the query and the target genome in three stages. Stage one detects genomic regions potentially containing the query, starting from exactly matching 20-mers and grouping them in chains consistent with the feature model, for instance allowing for introns. Stage two selects a subset of the spanned regions, based on the extent and depth of coverage of the query. Stage three produces nucleotide-level alignments of the query and selected regions, using an optimized version of the sim4 algorithm. Stage 1: Signal Finding Hash Table: ESTmapper uses a position index of all words of size k (k-mers) in the genome to quickly locate genomic regions of potential interest. A second table stores a list of k-mers that occur more than T times in the genome. K-mers in this list are ignored, as they lead to numerous false positives. Varying the parameters k and T does not dramatically affect ESTmapper s results, but has a significant effect on memory utilization and CPU time.f(($   /j0 K{{{77   HϞ|j>|j>6 cGO;  Stage 2: Signal Filtering Candidate regions are scored by the portion of the query contained in exact 20-mer matches, and the top scoring regions are selected for alignment in Stage 3. Examples of filtering of candidate regions for two repetitive ESTs are shown below.,=== ww  H|j>|j> If <<RESULTS We evaluated the performance of ESTmapper and BLAT [2] by mapping NCBI dbEST (http://www.ncbi.nih.gov/dbEST/) sequences to the human genome assembly Build 34. While BLAT reports 5% more sequences mapped than ESTmapper, a large portion (~2.2%) of the additional matches is due to BLAT favoring and retaining selected high-similarity sections of the match over the entire alignment, which would have <95% sequence identity. Another significant percentage (~2.4%) is regained when ESTmapper s thresholds are decreased to 90% identity. . (`) e  ,U 0Wu=== wwl  0A ??U'E|\6 5 |!@E& / #"6* $ |!E%   ZȞ\,\,\,\, ??%E& X83.675, 14.341p [   Z\,\,\,\, ?9%?& X83.646, 11.232p [   Z\,\,\,\, ?3%9& W 83.609, 7.260p [   Z(\,\,\,\, ?-%3& X83.606`, 1.782p [   Z*\,\,\,\, ?'%-& W 83.505, 1.170p [   Z"\,\,\,\, ?|!%'& PT=4000p [   ZC\,\,\,\, ??$E% X83.670, 13.358p [   Z M\,\,\,\, ?9$?% X83.645, 10.865p [   Zi.2  T< jJ?"`7,:5.b  ZG})HEI`2jJ?)6f.7/r  B?+3?3   ZX\,X\,3263 `Candidate Region     ZXX\,X\,t2,H5l- w Exon Grouping     ZLX\,X\,x)1,_2 `5-50Kb extension fB  6D=w,=3fB  6D.X2.3`B   0D4 -7-`B   0D3@-c4K/`B   0D+42,3    fDX\,X\,?&3+3 LQ    f X\,X\,??3@3 LQ    f(X\,X\,?&=,&3 LQxB b HD?v9-9f-  s  TX\,X\,)5.J,. } K-mer Match  ZB t s *Dx)@-*P.A  Z(a(a? ?88F Salient features: Position index of all 20-mers in the genome for efficient match detection Filtering of genomic regions to identify likely matches Detection of multiple occurrences of the query in the genomic sequence Optional identification of best-matches Large-scale computing capability applicable to whole chromosomes and even whole genomes High-throughput processing of sequences Parallel operation in multi-processor environments C++ and Perl modules, portability to IBM AIX, HP Tru64, Sun Solaris, FreeBSD, OS-X and LinuxH}Q{{{778 I-3 u)K0f 7  6./f 8  6 .N/f 9  6N./f :  6 . /f ;  6|12 <  vA.Dark downward diagonal"  . / =  vA.Dark downward diagonal" ./f >  6|1 2f ?  6' |12f @  6|12f A  6|12ZB B s *D/ |1ZB C s *D / |1ZB D s *D' / |1ZB E s *D/N|1ZB F s *DN/|1ZB G s *D/|1ZB H s *D/|1ZB I s *D/|1  <> I-i. J EST sequence   <dH 23 NGenomic sequence  <Q 1 h2 BGT  <t[$ 1Q h2 BAG  <en1( h2 Sexon1$   <g' 1h2 Sexon2$   <pzL1h2 Sexon3$   <|=1h2 Sexon4$   < 1d2 BGT  <1h2 BGT  <p1h2 BAG  <1h2 BAG  Hp|j>|j>N"mIcfl( #We analyzed the nature of differences between the two sets of matches using a set of 100,000 randomly selected EST sequences. Of the 5709 sequences found by BLAT but not by ESTmapper, our method produces more complete alignments that result in sequence identity lower than the 95% in 4652 of the cases. Additionally, each tool has a relatively small set that it maps exclusively (327 for ESTmapper, 1057 for BLAT). ,  === wwW ' <o**(Ye1 A Mapped by both B Mapped by ESTmapper only C Mapped by neither D Mapped by BLAT only E Mapped by BLAT and also by ESTmapper < 95% identity F Mapped by BLAT (but internal gaps reduced identity < 95%) and also by ESTmapper < 95% identity > N Z   H  0| ?/@ ̙33Px\ tJa;d" L@ @i!т@OՒQqAET0* "AA0 K)숀w$ _޽-Cܶ9K#Q+½0n\.zL&ԍq4|jS{lC usw{GMxϩbԭtO'O0c@MwwXg;x -0jWXLÕ Ljc:>ki-i[8DOZ7@2HR4slz{gz_?7:qaۑ(D.#~uY4@)49bz=MWW@SO|wm$te ^M`/>"k@,pHO72C~Tc6d46MQv E3rBpchP&<\; EkkqQ7f Mz6vd.#CΣrsoVIO՘3\{4aB?.>YfRSR&u頼0j%bmrDj1jh:8]A?1J.KHyq|]3wJ}&g8$dlUĵS9C14I-Iz+9gD6R_̴l%)ѼXfg{HEMbj7`9٦| GZ\t̬ U 7EhF*rlԹ&o^ q'dO2[ ̋շn,UX^w=R}Ke MK^Ky^{#.+RۼJ 9+SU)2 TiZT$ cĴKToB|("**rQAA'W׹}zs)Ck}\"4S|| \9Pނ| siƌ8Z3?W@"8R~; '2TdOs{wHg @fbj;u%C:M]R.uӥp]nÎ5My;4vYesW\gVUL3U^1<# t`IGL9R.`A?- [nA#yUpm7'v6D̦4\1d#cE,mYCWv uwn6|^lwԣm{8ޭPpĨN$h% Q =R[-}V b[9X-ĕe^*ƛ^A4Jm=ofxlVCoi|"gȉ3!M0-P۽ɩ▚ҭ#%j0?2cr>䗏poQIEm@ל[eȟUM#Z#'o4~v80FejTf֮Rn]r\& ,m,ȑ'ʊMxm_9uȓֲjFG%u-ڑݐFJDʠ -EN7Ls#dnC? Hgk-=`\g]O: B?~hF- J0?S_R'[s;D|s`ۧ ;4(Ow@tPm~?r9!Y^ o{սyC }LC\+ 5";񉊡#Ng[ ɳ*qe}XFԂ %X=68fj9 u7j]~yܫ; 4È?iwBt3Z^oy=TotdJkHm_s=㉆ty}< %CԭVS|=p4jNshvi]tvЏ]ttY*ӴN*:I+8-cަoh!7Am>^չ Is@y6}I/8̢mVz\^ .`5}10hN:߃"d.Cb肬̫j_Ha"cM[o*;{M ب&lU 6 Ձ caX6.]P6F`s }ۀOio/\6Ww>}Ӈ|ւ>Ly%w;ZǩOb>E !|9 i&U̳8p4q$.f;18Tb?EpLC4d=O@(t9CCA ]<ڮ>O;(˅Qo?Ú8VrX#%FˀoiQcE!do[dǜ@x!t>OQr>|> R$J6ቅ5%'h9"e)"e w"e!"Mxl+J^*R^٠.Q2})Efl2#e=mkw5Ƭ4z"\H ,BHRH =C\5I}=|:΄3C`Ih$4}9b*0Vy {{aÈ)ThCi@>@? :#@]? $Jqz)Ttz JE@̤8Q<-mzlD+"*^:}o:^XXDy1Qb9bb%b"b5X C$| ;l$;lǘv. _^<އ 7[t! ,4t8]AM2z :KV3+B(/PlS[moQC,XܧZa2>Pu*].OJ/2LuYIMJbR$ƥ$;C@G)=MG ?@ABCDEFGHIJKLMNOPQRTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~Root EntrydO)PicturesCurrent UserSummaryInformation(SPowerPoint Document(S+DocumentSummaryInformation8kmer-code-2013-trunk/PACKAGING0000644000000000000000000000314212533710731014357 0ustar rootroot ALL --- rm -rf .svn ATAC ---- rm -rf ESTmapper rm -rf ESTmapper\ GSAC.pdf rm -rf ESTmapper\ GSAC.ppt rm -rf ESTmapper\ LaTeX rm -rf Makefile.wiki rm -rf PACKAGING rm -rf README.leaff rm -rf README.meryl rm -rf README.sim4db rm -rf developer-doc rm -rf libsim4 rm -rf seagen rm -rf sim4db rm -rf sim4dbutils rm -rf snapper rm -rf tapper rm -rf trie MERYL ----- rm -rf ESTmapper rm -rf ESTmapper\ GSAC.pdf rm -rf ESTmapper\ GSAC.ppt rm -rf ESTmapper\ LaTeX rm -rf Makefile.wiki rm -rf PACKAGING rm -rf README.atac rm -rf README.leaff rm -rf README.sim4db rm -rf atac-driver rm -rf developer-doc rm -rf leaff rm -rf libsim4 rm -rf seagen rm -rf seatac rm -rf sim4db rm -rf sim4dbutils rm -rf snapper rm -rf tapper rm -rf trie SIM4DB ------ rm -rf ESTmapper rm -rf ESTmapper\ GSAC.pdf rm -rf ESTmapper\ GSAC.ppt rm -rf ESTmapper\ LaTeX rm -rf Makefile.wiki rm -rf PACKAGING rm -rf README.atac rm -rf README.meryl rm -rf atac-driver rm -rf developer-doc rm -rf libkmer rm -rf libmeryl rm -rf meryl rm -rf seagen rm -rf seatac rm -rf snapper rm -rf tapper rm -rf trie ESTmapper --------- rm -rf ESTmapper\ GSAC.pdf rm -rf ESTmapper\ GSAC.ppt rm -rf ESTmapper\ LaTeX rm -rf Makefile.wiki rm -rf PACKAGING rm -rf README.atac rm -rf README.leaff rm -rf README.meryl rm -rf README.sim4db rm -rf atac-driver rm -rf developer-doc rm -rf seatac rm -rf snapper rm -rf tapper rm -rf trie rm -rf ATAC-r2008/.svn ESTmapper-r2008/.svn meryl-r2008/.svn sim4db-r2008/.svn tar -cf ATAC-r2008.tar ATAC-r2008 tar -cf ESTmapper-r2008.tar ESTmapper-r2008 tar -cf meryl-r2008.tar meryl-r2008 tar -cf sim4db-r2008.tar sim4db-r2008 kmer-code-2013-trunk/libutil/0000755000000000000000000000000012641613360014614 5ustar rootrootkmer-code-2013-trunk/libutil/readBuffer.H0000644000000000000000000000344512322046702016774 0ustar rootroot#ifndef READ_BUFFER_H #define READ_BUFFER_H class readBuffer { public: readBuffer(const char *filename, uint64 bufferMax = 32 * 1024); readBuffer(FILE *F, uint64 bufferMax = 32 * 1024); ~readBuffer(); bool eof(void) { return(_eof); }; char peek(void); char read(void); uint64 read(void *buf, uint64 len); uint64 read(void *buf, uint64 maxlen, char stop); void seek(uint64 pos); uint64 tell(void) { return(_filePos); }; const char *filename(void) { return(_filename); }; private: void fillBuffer(void); void init(int fileptr, const char *filename, uint64 bufferMax); char *_filename; int _file; uint64 _filePos; bool _mmap; bool _stdin; bool _eof; // If bufferMax is zero, then we are using the mmapped interface, otherwise, // we are using a open()/read() and a small buffer. uint64 _bufferPos; uint64 _bufferLen; uint64 _bufferMax; char *_buffer; }; // Returns the next letter in the buffer, but DOES NOT advance past // it. Might have some wierd interaction with EOF -- if you peek() // and the next thing is eof , the _eof flag might get set. // inline char readBuffer::peek(void) { if ((_eof == false) && (_bufferPos >= _bufferLen)) fillBuffer(); if (_eof) return(0); return(_buffer[_bufferPos]); } // Returns the next letter in the buffer. Returns EOF (0) if there // is no next letter. // inline char readBuffer::read(void) { if ((_eof == false) && (_bufferPos >= _bufferLen)) fillBuffer(); if (_eof) return(0); _bufferPos++; _filePos++; return(_buffer[_bufferPos-1]); } #endif // READ_BUFFER_H kmer-code-2013-trunk/libutil/bitPackedArray.H0000644000000000000000000001564012322046702017614 0ustar rootroot#ifndef BITPACKEDARRAY_H #define BITPACKEDARRAY_H #undef DEBUG_BPH_ADD #undef DEBUG_BPH_GET //////////////////////////////////////// // // bitPackedArray // // implements an integer array using bit-widths less than word-sizes, // e.g., a memory efficient way to store 23 bit numbers. Numbers may // be up to 64 bits wide. // // The array is variable length, and it is implemented as an array, // not a list or tree -- accessing element 1,000,000 will allocate // elements 0 through 999,999. // class bitPackedArray { public: // Create a bitpacked array with elements of width 'width' using // 'segmentSize' KB per segment. If you know your array is going // to be much bigger or smaller, crank this value. // bitPackedArray(uint32 valueWidth, uint32 segmentSize = 1024); ~bitPackedArray(); // No array operator is provided, because we cannot return a // reference to a value that is split across two words (or even a // reference to a value that is not bit aligned in the word). // uint64 get(uint64 idx); void set(uint64 idx, uint64 val); // Clear the array. Since the array is variable sized, you must add // things to a new array before clearing it. void clear(void); private: uint32 _valueWidth; uint32 _segmentSize; uint64 _nextElement; // the first invalid element uint64 _valuesPerSegment; uint64 _numSegments; uint64 _maxSegments; uint64 **_segments; }; // An array of bits. Exactly the same as the bitPackedArray, but // optimized for width=1. // class bitArray { public: bitArray(uint32 segmentSize = 1024); ~bitArray(); uint64 get(uint64 idx); uint64 getAndSet(uint64 idx); void set(uint64 idx); void clr(uint64 idx); void clear(void); private: void resize(uint64 s); uint32 _segmentSize; uint64 _valuesPerSegment; uint64 _numSegments; uint64 _maxSegments; uint64 **_segments; }; // Uses the bitPackedArray to implement a heap. The bitPackedArray is dynamically sized, // so this can be too. // class bitPackedHeap { public: bitPackedHeap(uint32 width, uint64 size=16) { _array = new bitPackedArray(width, size); _array->set(0, 0); _lastVal = 0; }; ~bitPackedHeap() { delete _array; }; uint64 get(void) { uint64 biggestVal = ~uint64ZERO; if (_lastVal == 0) return(biggestVal); biggestVal = _array->get(0); _lastVal--; if (_lastVal == 0) return(biggestVal); uint64 t = _array->get(_lastVal); _array->set(0, t); uint64 pidx = 0; uint64 pval = t; uint64 cidx = 1; uint64 cval = 0; // set below while (cidx < _lastVal) { // Set cval here, so we can first test if cidx is in range. cval = _array->get(cidx); // Pick the smallest of the two kids if (cidx+1 < _lastVal) { t = _array->get(cidx+1); if (cval > t) { cidx++; cval = t; } } #ifdef DEBUG_BPH_GET fprintf(stderr, "test c="uint64FMT" and p="uint64FMT" lastVal="uint64FMT"\n", cidx, pidx, _lastVal); fprintf(stderr, "test c="uint64FMT"="uint64FMT"\n", cidx, cval); fprintf(stderr, "test p="uint64FMT"="uint64FMT"\n", pidx, pval); fprintf(stderr, "test c="uint64FMT"="uint64FMT" and p="uint64FMT"="uint64FMT"\n", cidx, cval, pidx, pval); #endif if (cval < pval) { #ifdef DEBUG_BPH_GET fprintf(stderr, "swap c="uint64FMT"="uint64FMT" and p="uint64FMT"="uint64FMT"\n", cidx, cval, pidx, pval); #endif // Swap p and c _array->set(pidx, cval); _array->set(cidx, pval); // Move down the tree -- pval doesn't change, we moved it into cidx! pidx = cidx; cidx = cidx * 2 + 1; } else { cidx = _lastVal; } } return(biggestVal); }; void add(uint64 value) { uint64 cidx = _lastVal; uint64 cval = value; uint64 pidx = 0; uint64 pval = 0; bool more = false; #ifdef DEBUG_BPH_ADD fprintf(stderr, "add c="uint64FMT"="uint64FMT" -- lastVal="uint64FMT"\n", cidx, cval, _lastVal); #endif _array->set(cidx, cval); if (cidx > 0) more = true; while (more) { pidx = (cidx-1) / 2; #ifdef DEBUG_BPH_ADD fprintf(stderr, "more c="uint64FMT" and p="uint64FMT"\n", cidx, pidx); #endif pval = _array->get(pidx); #ifdef DEBUG_BPH_ADD fprintf(stderr, "test c="uint64FMT"="uint64FMT" and p="uint64FMT"="uint64FMT"\n", cidx, cval, pidx, pval); #endif if (pval > cval) { #ifdef DEBUG_BPH_ADD fprintf(stderr, "swap c="uint64FMT"="uint64FMT" and p="uint64FMT"="uint64FMT"\n", cidx, cval, pidx, pval); #endif // Swap p and c _array->set(cidx, pval); _array->set(pidx, cval); // Move up the tree -- cval doesn't change, we moved it into pidx! cidx = pidx; } else { more = false; } if (cidx == 0) more = false; } _lastVal++; //dump(); }; void dump(void) { for (uint32 i=0; i<_lastVal; i++) fprintf(stderr, "HEAP["uint32FMT"]="uint64FMT"\n", i, _array->get(i)); } void clear(void) { _array->clear(); _lastVal = 0; }; private: bitPackedArray *_array; uint64 _lastVal; }; inline uint64 bitArray::get(uint64 idx) { uint64 s = idx / _valuesPerSegment; uint64 p = idx % _valuesPerSegment; uint64 wrd = (p >> 6) & 0x0000cfffffffffffllu; uint64 bit = (p ) & 0x000000000000003fllu; return((_segments[s][wrd] >> bit) & 0x0000000000000001llu); } inline void bitArray::resize(uint64 s) { if (s < _numSegments) return; if (s > _maxSegments) { _maxSegments = s + 16; uint64 **S = new uint64 * [_maxSegments]; for (uint32 i=0; i<_numSegments; i++) S[i] = _segments[i]; delete [] _segments; _segments = S; } while (_numSegments <= s) _segments[_numSegments++] = new uint64 [_segmentSize * 1024 / 8]; } inline uint64 bitArray::getAndSet(uint64 idx) { uint64 s = idx / _valuesPerSegment; uint64 p = idx % _valuesPerSegment; uint64 wrd = (p >> 6) & 0x0000cfffffffffffllu; uint64 bit = (p ) & 0x000000000000003fllu; uint64 ret = (_segments[s][wrd] >> bit) & 0x0000000000000001llu; _segments[s][wrd] |= uint64ONE << bit; return(ret); } inline void bitArray::set(uint64 idx) { uint64 s = idx / _valuesPerSegment; uint64 p = idx % _valuesPerSegment; resize(s); uint64 wrd = (p >> 6) & 0x0000cfffffffffffllu; uint64 bit = (p ) & 0x000000000000003fllu; _segments[s][wrd] |= uint64ONE << bit; } inline void bitArray::clr(uint64 idx) { uint64 s = idx / _valuesPerSegment; uint64 p = idx % _valuesPerSegment; resize(s); uint64 wrd = (p >> 6) & 0x0000cfffffffffffllu; uint64 bit = (p ) & 0x000000000000003fllu; _segments[s][wrd] &= ~(0x0000000000000001llu << bit); } #endif // BITPACKEDARRAY_H kmer-code-2013-trunk/libutil/bzipBuffer.C0000644000000000000000000001464612322046702017025 0ustar rootroot#include "util++.H" #include #include #include #include #include #include // This is probably correct, it just cannot read a normal *.bz file; // it probably reads an unpackaged raw bzip stream. bzipBuffer::bzipBuffer(const char *filename, uint32 bufferMax) { _filename = new char [strlen(filename) + 1]; strcpy(_filename, filename); if (bufferMax == 0) bufferMax = 32 * 1024; errno = 0; _file = open(filename, O_RDONLY | O_LARGEFILE); if (errno) { fprintf(stderr, "bzipBuffer()-- couldn't open the file '%s': %s\n", filename, strerror(errno)); exit(1); } _filePos = 0; _eof = false; _bzip2bufferMax = bufferMax; _bzip2inPos = 0; _bzip2outPos = 0; _bzip2in = new char [_bzip2bufferMax]; _bzip2out = new char [_bzip2bufferMax]; _bzip2streamEnd = false; _bzip2stream.next_in = _bzip2in; _bzip2stream.avail_in = 0; _bzip2stream.total_in_lo32 = 0; _bzip2stream.total_in_hi32 = 0; _bzip2stream.next_out = _bzip2out; _bzip2stream.avail_out = 0; _bzip2stream.total_out_lo32 = 0; _bzip2stream.total_out_hi32 = 0; _bzip2stream.state = 0L; _bzip2stream.bzalloc = 0L; _bzip2stream.bzfree = 0L; _bzip2stream.opaque = 0L; int res = BZ2_bzDecompressInit(&_bzip2stream, 0, 0); if (res != BZ_OK) { // BZ_CONFIG_ERROR, BZ_PARAM_ERROR, BZ_MEM_ERROR fprintf(stderr, "bzipBuffer::bzipBuffer()-- Failed to initialize the decompressor.\n"); exit(1); } fillBuffer(); } bzipBuffer::~bzipBuffer() { delete [] _bzip2in; delete [] _bzip2out; close(_file); } void bzipBuffer::fillBuffer(void) { if (_bzip2streamEnd) { _eof = true; return; } // Scream and holler if the bzip2 buffer isn't exhausted! // if (_bzip2outPos < _bzip2stream.avail_out) { fprintf(stderr, "bzipBuffer::fillBuffer()-- Buffer isn't empty! Still %d bytes!\n", (int)(_bzip2stream.avail_out - _bzip2outPos)); return; } _bzip2outPos = 0; again: // If there is stuff in the input, run the decompressor. If it // decompresses anything, return. // if (_bzip2stream.avail_in > 0) { fprintf(stderr, "about to decompress %d bytes in input\n", (int)_bzip2stream.avail_in); fprintf(stderr, "in is bzip2:%p and real:%p (diff %d)\n", _bzip2stream.next_in, _bzip2in, _bzip2stream.next_in - _bzip2in); fprintf(stderr, "out is bzip2:%p and real:%p (diff %d)\n", _bzip2stream.next_out, _bzip2out, _bzip2stream.next_out - _bzip2out); int res = BZ2_bzDecompress(&_bzip2stream); if (res == BZ_STREAM_END) { fprintf(stderr, "GOT STREAM END!\n"); BZ2_bzDecompressEnd(&_bzip2stream); _bzip2streamEnd = true; res = BZ_OK; } if (res != BZ_OK) { fprintf(stderr, "bzipBuffer::fillBuffer()-- Failed to decompress.\n"), exit(1); } fprintf(stderr, "decompressed %d bytes; still have %d in input\n", (int)_bzip2stream.avail_out, (int)_bzip2stream.avail_in); fprintf(stderr, "in is bzip2:%p and real:%p (diff %d)\n", _bzip2stream.next_in, _bzip2in, _bzip2stream.next_in - _bzip2in); fprintf(stderr, "out is bzip2:%p and real:%p (diff %d)\n", _bzip2stream.next_out, _bzip2out, _bzip2stream.next_out - _bzip2out); if (_bzip2stream.avail_out > 0) { fprintf(stderr, "----------------------------------------\n"); fwrite(_bzip2stream.next_out, sizeof(char), _bzip2stream.avail_out, stderr); fprintf(stderr, "\n----------------------------------------\n"); return; } } // If we're here and _bzip2streamEnd is true, we hit the end of the // stream at the same time we hit the end of the input data. // if (_bzip2streamEnd) { _eof = true; return; } // Otherwise, we need to read some input. // errno = 0; _bzip2stream.next_in = _bzip2in; _bzip2stream.avail_in = (uint32)::read(_file, _bzip2in, sizeof(char) * _bzip2bufferMax); _bzip2stream.next_out = _bzip2out; _bzip2stream.avail_out = _bzip2bufferMax; if (errno) { fprintf(stderr, "bzipBuffer::fillBuffer()-- read failed: %s\n", strerror(errno)); exit(1); } fprintf(stderr, "read %d bytes\n", (int)_bzip2stream.avail_in); if (_bzip2stream.avail_in == 0) { fprintf(stderr, "bzipBuffer::fillBuffer()-- hit end of file?\n"); _eof = true; return; } // And now try to decompress it again // goto again; } bool bzipBuffer::seek(off_t pos) { fprintf(stderr, "bzipBuffer()-- seek() not available for file '%s'.\n", _filename); return(false); } size_t bzipBuffer::read(char *buf, size_t len) { #if 0 if (_fileType == 2) { size_t c = 0; while ((_bufferPos < _bufferLen) && (c < len)) buf[c++] = _buffer[_bufferPos++]; return(c); } else { // The trick here is to use the existing buffered input first, // then do a direct read to get the rest. // // We fill the buffer again if it is empty. // // The number of bytes actually put into buf is returned. size_t bCopied = 0; // Number of bytes copied into the buffer size_t bRead = 0; // Number of bytes read into the buffer size_t bAct = 0; // Number of bytes actually read from disk // Easy case; the next len bytes are already in the buffer; just // copy and move the position. // // XXX: Check the zero-left-in-buffer case // if (_bufferLen - _bufferPos > len) { bCopied = len; bRead = 0; memcpy(buf, _buffer + _bufferPos, sizeof(char) * len); _bufferPos += (uint32)len; } else { // Existing buffer not big enough. Copy what's there, then finish // with a read. // memcpy(buf, _buffer + _bufferPos, (_bufferLen - _bufferPos) * sizeof(char)); bCopied = _bufferLen - _bufferPos; _bufferPos = _bufferLen; while (bCopied + bRead < len) { errno = 0; bAct = (uint32)::read(_file, buf + bCopied + bRead, (len - bCopied - bRead) * sizeof(char)); if (errno) { fprintf(stderr, "bzipBuffer()-- couldn't read %d bytes from '%s': n%s\n", (uint32)len * sizeof(char), _filename, strerror(errno)); exit(1); } // If we hit EOF, return a short read if (bAct == 0) { len = 0; } bRead += bAct; } } if (_bufferPos == _bufferLen) fillBuffer(); return(bCopied + bRead); } #endif return(0); } kmer-code-2013-trunk/libutil/unaryEncodingTester.C0000644000000000000000000001151112322046702020707 0ustar rootroot#include "util++.H" uint64 numLoops = 1; uint64 numNums = 4000000; uint64 numSize = 300; // The space in bits that we can play with, and the pointer to said space. // uint64 spa = 128 * 1024 * 1024 * 8; uint64 *ptr = 0L; uint64 *rnd = 0L; void testUnary(void) { uint64 pos = uint64ZERO; uint64 siz = uint64ZERO; uint64 val = uint64ZERO; uint64 i = uint64ZERO; for (i=0; i= spa) { fprintf(stderr, "ERROR: Ran out of space in testUnary at number "uint64FMT" out of "uint64FMT"\n", i, numNums); exit(1); } } //fprintf(stderr, "unaryEncodedNumbers used "uint64FMT"MB of storage out of "uint64FMT"MB.\n", pos >> 23, spa >> 23); pos = uint64ZERO; for (i=0; i= spa) { fprintf(stderr, "ERROR: Ran out of space in testGeneralizedUnary at number "uint64FMT" out of "uint64FMT"\n", i, numNums); exit(1); } } //fprintf(stderr, "generalizedUnaryEncodedNumbers used "uint64FMT"MB of storage out of "uint64FMT"MB.\n", pos >> 23, spa >> 23); pos = uint64ZERO; for (i=0; i= spa) { fprintf(stderr, "ERROR: Ran out of space in testGeneralizedUnary at number "uint64FMT" out of "uint64FMT"\n", i, numNums); exit(1); } } //fprintf(stderr, "eliasGammaEncodedNumbers used "uint64FMT"MB of storage out of "uint64FMT"MB.\n", pos >> 23, spa >> 23); pos = uint64ZERO; for (i=0; i= spa) { fprintf(stderr, "ERROR: Ran out of space in testGeneralizedUnary at number "uint64FMT" out of "uint64FMT"\n", i, numNums); exit(1); } } //fprintf(stderr, "eliasDeltaEncodedNumbers used "uint64FMT"MB of storage out of "uint64FMT"MB.\n", pos >> 23, spa >> 23); pos = uint64ZERO; for (i=0; i \n", argv[0]); fprintf(stderr, " -> DEFAULTS USED <-\n"); } else { numLoops = strtouint32(argv[1], 0L); numNums = strtouint32(argv[2], 0L); } rnd = new uint64 [numNums]; ptr = new uint64 [spa >> 6]; mt_s *ctx = mtInit(time(NULL)); // Generate some random numbers to store // while (numLoops--) { // Test out unary encodings on small numbers // for (uint64 i=0; i #include #include #include #include #include // If bufferMax is zero, then the file is accessed using memory // mapped I/O. Otherwise, a small buffer is used. // readBuffer::readBuffer(const char *filename, uint64 bufferMax) { _filename = 0L; _file = 0; _filePos = 0; _mmap = false; _stdin = false; _eof = false; _bufferPos = 0; _bufferLen = 0; _bufferMax = 0; _buffer = 0L; if (((filename == 0L) && (isatty(fileno(stdin)) == 0)) || ((filename != 0L) && (filename[0] == '-') && (filename[1] == 0))) { _filename = new char [32]; strcpy(_filename, "(stdin)"); _stdin = true; if (bufferMax == 0) bufferMax = 32 * 1024; } else if (filename == 0L) { fprintf(stderr, "readBuffer()-- no filename supplied, and I will not use the terminal for input.\n"), exit(1); } else { _filename = new char [strlen(filename) + 1]; strcpy(_filename, filename); } if (bufferMax == 0) { _mmap = true; _buffer = (char *)mapFile(_filename, &_bufferLen, 'r'); } else { errno = 0; _file = (_stdin) ? fileno(stdin) : open(_filename, O_RDONLY | O_LARGEFILE); if (errno) fprintf(stderr, "readBuffer()-- couldn't open the file '%s': %s\n", _filename, strerror(errno)), exit(1); _bufferMax = bufferMax; _buffer = new char [_bufferMax]; } fillBuffer(); if (_bufferLen == 0) _eof = true; } readBuffer::readBuffer(FILE *file, uint64 bufferMax) { if (bufferMax == 0) fprintf(stderr, "readBuffer()-- WARNING: mmap() not supported in readBuffer(FILE *)\n"); _filename = new char [32]; _file = fileno(file); _filePos = 0; _mmap = false; _stdin = false; _eof = false; _bufferPos = 0; _bufferLen = 0; _bufferMax = (bufferMax == 0) ? 32 * 1024 : bufferMax; _buffer = new char [_bufferMax]; strcpy(_filename, "(hidden file)"); // Just be sure that we are at the start of the file. errno = 0; lseek(_file, 0, SEEK_SET); if ((errno) && (errno != ESPIPE)) fprintf(stderr, "readBuffer()-- '%s' couldn't seek to position 0: %s\n", _filename, strerror(errno)), exit(1); fillBuffer(); if (_bufferLen == 0) _eof = true; } readBuffer::~readBuffer() { delete [] _filename; if (_mmap) unmapFile(_buffer, _bufferLen); else delete [] _buffer; if (_stdin == false) close(_file); } void readBuffer::fillBuffer(void) { // If there is still stuff in the buffer, no need to fill. if (_bufferPos < _bufferLen) return; // No more stuff in the buffer. But if mmap'd, ths means we're EOF. if (_mmap) { _eof = true; return; } _bufferPos = 0; _bufferLen = 0; again: errno = 0; _bufferLen = (uint64)::read(_file, _buffer, _bufferMax); if (errno == EAGAIN) goto again; if (errno) fprintf(stderr, "readBuffer::fillBuffer()-- only read "uint64FMT" bytes, couldn't read "uint64FMT" bytes from '%s': %s\n", _bufferLen, _bufferMax, _filename, strerror(errno)), exit(1); if (_bufferLen == 0) _eof = true; } void readBuffer::seek(uint64 pos) { if (_stdin == true) { if (_filePos < _bufferLen) { _filePos = 0; _bufferPos = 0; return; } else { fprintf(stderr, "readBuffer()-- seek() not available for file 'stdin'.\n"); exit(1); } return; } assert(_stdin == false); if (_mmap) { _bufferPos = pos; _filePos = pos; } else { errno = 0; lseek(_file, pos, SEEK_SET); if (errno) fprintf(stderr, "readBuffer()-- '%s' couldn't seek to position "int64FMT": %s\n", _filename, pos, strerror(errno)), exit(1); _bufferLen = 0; _bufferPos = 0; _filePos = pos; fillBuffer(); } _eof = (_bufferPos >= _bufferLen); } uint64 readBuffer::read(void *buf, uint64 len) { char *bufchar = (char *)buf; // Handle the mmap'd file first. if (_mmap) { uint64 c = 0; while ((_bufferPos < _bufferLen) && (c < len)) { bufchar[c++] = _buffer[_bufferPos++]; _filePos++; } if (c == 0) _eof = true; return(c); } // Easy case; the next len bytes are already in the buffer; just // copy and move the position. if (_bufferLen - _bufferPos > len) { memcpy(bufchar, _buffer + _bufferPos, len); _bufferPos += len; fillBuffer(); _filePos += len; return(len); } // Existing buffer not big enough. Copy what's there, then finish // with a read. uint64 bCopied = 0; // Number of bytes copied into the buffer uint64 bRead = 0; // Number of bytes read into the buffer uint64 bAct = 0; // Number of bytes actually read from disk memcpy(bufchar, _buffer + _bufferPos, _bufferLen - _bufferPos); bCopied = _bufferLen - _bufferPos; _bufferPos = _bufferLen; while (bCopied + bRead < len) { errno = 0; bAct = (uint64)::read(_file, bufchar + bCopied + bRead, len - bCopied - bRead); if (errno) fprintf(stderr, "readBuffer()-- couldn't read "uint64FMT" bytes from '%s': n%s\n", len, _filename, strerror(errno)), exit(1); // If we hit EOF, return a short read if (bAct == 0) len = 0; bRead += bAct; } fillBuffer(); _filePos += bCopied + bRead; return(bCopied + bRead); } uint64 readBuffer::read(void *buf, uint64 maxlen, char stop) { char *bufchar = (char *)buf; uint64 c = 0; // We will copy up to 'maxlen'-1 bytes into 'buf', or stop at the first occurrence of 'stop'. // This will reserve space at the end of any string for a zero-terminating byte. maxlen--; if (_mmap) { // Handle the mmap'd file first. while ((_bufferPos < _bufferLen) && (c < maxlen)) { bufchar[c++] = _buffer[_bufferPos++]; if (bufchar[c-1] == stop) break; } if (_bufferPos >= _bufferLen) _eof = true; } else { // And the usual case. while ((_eof == false) && (c < maxlen)) { bufchar[c++] = _buffer[_bufferPos++]; if (_bufferPos >= _bufferLen) fillBuffer(); if (bufchar[c-1] == stop) break; } } bufchar[c] = 0; return(c); } kmer-code-2013-trunk/libutil/file.c0000644000000000000000000002262112322046702015676 0ustar rootroot#include #include #include #include #include #include #include #include #include #include #include #include "util.h" int isHuman(FILE *F) { return(isatty(fileno(F))); } #ifdef __alpha unsigned long __sbrk_override = 1; // See malloc(3) for details. #define MMAPFLAGS (MAP_FILE | MAP_VARIABLE | MAP_SHARED) #endif #ifdef _AIX #define MMAPFLAGS (MAP_FILE | MAP_VARIABLE | MAP_SHARED) #endif #ifdef __CYGWIN__ #define MMAPFLAGS (MAP_FILE | MAP_SHARED) #endif #ifdef __linux #define MMAPFLAGS (MAP_FILE | MAP_SHARED) #endif #ifdef __FreeBSD__ #define MMAPFLAGS (MAP_FILE | MAP_SHARED) #endif #ifdef __sun #define MMAPFLAGS (MAP_SHARED) #endif #ifdef __APPLE__ #define MMAPFLAGS (MAP_FILE | MAP_SHARED) #endif FILE* makeTempFile(char *path) { char template[PATH_MAX + 1]; int fildes; FILE *F; if (path) { strcpy(template, path); strcat(template, "/XXXXXX"); } else { strcpy(template, "XXXXXX"); } errno = 0; fildes = mkstemp(template); if (errno) { fprintf(stderr, "Failed to create temporary file '%s': %s\n", template, strerror(errno)); exit(1); } errno = 0; F = fdopen(fildes, "w+"); if (errno) { fprintf(stderr, "Failed to open temporary file '%s': %s\n", template, strerror(errno)); exit(1); } errno = 0; unlink(template); if (errno) { fprintf(stderr, "Failed to hide temporary file '%s': %s\n", template, strerror(errno)); exit(1); } return(F); } void* mapFile(const char *filename, uint64 *length, char mode) { void *ptr = 0L; struct stat sb; int f; int openMode = O_RDONLY | O_LARGEFILE; int mapMode = O_RDWR | O_LARGEFILE; switch (mode) { case 'r': openMode = O_RDONLY | O_LARGEFILE; mapMode = PROT_READ; break; case 'w': openMode = O_RDWR | O_LARGEFILE; mapMode = PROT_READ | PROT_WRITE; break; default: fprintf(stderr, "Invalid mode to mapFile; must be 'r' or 'w'\n"); exit(1); break; } errno = 0; f = open(filename, openMode); if (errno) { fprintf(stderr, "Couldn't open() '%s'\n%s\n", filename, strerror(errno)); exit(1); } fstat(f, &sb); if (errno) { fprintf(stderr, "Couldn't fstat() '%s'\n%s\n", filename, strerror(errno)); exit(1); } *length = sb.st_size; ptr = mmap(0L, *length, mapMode, MMAPFLAGS, f, (off_t)0); if (errno) { fprintf(stderr, "Couldn't mmap() '%s'\n%s\n", filename, strerror(errno)); exit(1); } close(f); return(ptr); } void unmapFile(void *addr, uint64 length) { #ifdef __sun // This might work in general, but sun definitely needs the cast. // (void)munmap((caddr_t)addr, length); #else (void)munmap(addr, length); #endif } // Copies all of srcFile to dstFile, returns the number of bytes written // off_t copyFile(char *srcName, FILE *dstFile) { off_t srcSize = 0; off_t bytesRemain = 0; off_t bytesRead = 0; int bufferSize = 1024 * 1024; char *buffer = 0L; FILE *srcFile = 0L; buffer = (char *)malloc(sizeof(char) * bufferSize); if (buffer == 0L) { fprintf(stderr, "copyFile()-- Can't allocate buffer.\n"); exit(1); } srcSize = sizeOfFile(srcName); bytesRemain = srcSize; errno = 0; srcFile = fopen(srcName, "r"); if (errno) { fprintf(stderr, "copyFile()-- failed to open the '%s' during merge: %s\n", srcName, strerror(errno)); exit(1); } while (bytesRemain > 0) { errno = 0; if (bytesRemain > bufferSize) bytesRead = fread(buffer, sizeof(char), (size_t)bufferSize, srcFile); else bytesRead = fread(buffer, sizeof(char), (size_t)bytesRemain, srcFile); if (errno) { fprintf(stderr, "copyFile()-- Error reading source: %s\n", strerror(errno)); exit(1); } if (bytesRead == 0) { fprintf(stderr, "copyFile()-- Short read (%d bytes) on source: %s\n", (int)bytesRead, strerror(errno)); exit(1); } if (bytesRead > 0) { fwrite(buffer, sizeof(char), (size_t)bytesRead, dstFile); if (errno) { fprintf(stderr, "copyFile()-- Error writing %d bytes to destination: %s\n", (int)bytesRead, strerror(errno)); exit(1); } } bytesRemain -= bytesRead; } fclose(srcFile); free(buffer); return(srcSize); } // Takes a path to a file (that possibly doesn't exist) and returns // the number of MB (1048576 bytes) free in the directory of that // file. // uint32 freeDiskSpace(char *path) { char *p, *t; struct statvfs dst; struct stat fst; uint64 ret = 0; // Stat the path; if it exists, we're golden. // if (stat(path, &fst) == 0) { if (statvfs(path, &dst) == -1) { perror("statvfs"); exit(1); } } else { // Doesn't exist. Try to find the directory that the file goes into. // // Copy the input path to a temporary string. Strip off // the last component (probably a file prefix, but it could also // be a directory -- see below) and return the free space on // that device. // p = (char *)malloc(sizeof(char) * (strlen(path) + 1)); strcpy(p, path); t = strrchr(p, '/'); if (t) { *t = 0; } else { p[0] = '.'; p[1] = 0; } if (statvfs(p, &dst) == -1) { perror("statvfs"); exit(1); } free(p); } ret = dst.f_frsize; ret *= dst.f_bavail; ret >>= 20; return((uint32)ret); } // Split writes/reads into smaller pieces, check the result of each // piece. Really needed by OSF1 (V5.1). // void safeWrite(int filedes, const void *buffer, const char *desc, size_t nbytes) { size_t position = 0; size_t length = 32 * 1024 * 1024; size_t towrite = 0; size_t written = 0; while (position < nbytes) { towrite = length; if (position + towrite > nbytes) towrite = nbytes - position; errno = 0; written = write(filedes, ((char *)buffer) + position, towrite); if ((errno) || (towrite != written)) { fprintf(stderr, "safeWrite()-- Write failure on %s: %s\n", desc, strerror(errno)); fprintf(stderr, "safeWrite()-- Wanted to write "int64FMT" bytes, wrote "int64FMT".\n", (int64)towrite, (int64)written); exit(1); } position += written; } } int safeRead(int filedes, const void *buffer, const char *desc, size_t nbytes) { size_t position = 0; size_t length = 32 * 1024 * 1024; size_t toread = 0; size_t written = 0; // readen? int failed = 0; while (position < nbytes) { toread = length; if (position + toread > nbytes) toread = nbytes - position; errno = 0; written = read(filedes, ((char *)buffer) + position, toread); failed = errno; #ifdef VERY_SAFE if (toread != written) failed = 1; #endif if ((failed) && (errno != EINTR)) { fprintf(stderr, "safeRead()-- Read failure on %s: %s.\n", desc, strerror(errno)); fprintf(stderr, "safeRead()-- Wanted to read "int64FMT" bytes, read "int64FMT".\n", (int64)toread, (int64)written); exit(1); } if (written == 0) break; position += written; } return(position); } void closeFile(FILE *F, const char *path) { // If we're given the path name, see if we need to pclose(), // otherwise just fclose() the file. if ((path) && ((strcmp(path + strlen(path) - 4, ".bz2") == 0) || (strcmp(path + strlen(path) - 3, ".gz") == 0))) { pclose(F); } else { fclose(F); } } FILE* openFile(const char *path, const char *mode) { FILE *F = 0L; int isBz = 0; int isGz = 0; int isRead = 0; int isWrite = 0; int isRW = 1; char cmd[1024] = { 0 };; // Yes, one could make this significantly simpler by saving the // compression command into a variable, instead of the isBz and // isGz flags. Maybe instead we should find a compression command // that uses different flags. if (strcmp(path + strlen(path) - 4, ".bz2") == 0) isBz = 1; if (strcmp(path + strlen(path) - 3, ".gz") == 0) isGz = 1; if (strcmp(mode, "w") == 0) { isRead = 0; isWrite = 1; isRW = 0; } if (strcmp(mode, "r") == 0) { isRead = 1; isWrite = 0; isRW = 0; } if (isBz) { if (isRead) { sprintf(cmd, "bzip2 -dc %s", path); } else if (isWrite) { sprintf(cmd, "bzip2 -9c > %s", path); } else { fprintf(stderr, "openFile()-- Error! Requested mode '%s' unavailable for bzip2 file '%s'\n", mode, path); exit(1); } } else if (isGz) { if (isRead) { sprintf(cmd, "gzip -dc %s", path); } else if (isWrite) { sprintf(cmd, "gzip -9c > %s", path); } else { fprintf(stderr, "openFile()-- Error! Requested mode '%s' unavailable for gzip file '%s'\n", mode, path); exit(1); } } else { // Must be a normal file! } if (cmd[0]) { errno = 0; F = popen(cmd, mode); // popen doesn't reliably set errnoman //if (errno) // fprintf(stderr, "openFile()-- Failed to open pipe '%s': %s\n", cmd, strerror(errno)), exit(1); if (F == 0L) fprintf(stderr, "openFile()-- Failed to open pipe '%s'\n", cmd), exit(1); } else { errno = 0; F = fopen(path, mode); if (errno) fprintf(stderr, "openFile()-- Failed to open '%s': %s\n", path, strerror(errno)), exit(1); } return(F); } kmer-code-2013-trunk/libutil/fibonacciEncoding.h0000644000000000000000000000674612322046702020362 0ustar rootroot#ifndef FIBONACCI_ENCODING_H #define FIBONACCI_ENCODING_H #include "bitPacking.h" // Routines to store and retrieve a Fibonacci encoded number to/from a // bit packed word array based at 'ptr' and currently at location // 'pos'. Both routines return the size of the encoded number in // 'siz'. // // FibEncoding can store values up to 17,167,680,177,565 (slightly // below 2^45, so at most a 44-bit number) in a 64-bit quantity. // // 93 bits (92 + 1) are needed to store up to 64-bit values. // // Remember that since we can't store 0, we increment all incoming // values, so the actual space used is: // // #### bits // 0 2 // 1 3 // 2 4 // 3 4 // 4 5 // 5 5 // 6 5 // 7 6 // 8 6 // 9 6 // 10 6 // 11 6 // 12 7 // 20 8 // 33 9 // 54 10 // 88 11 // 143 12 // 232 13 // 376 14 // 609 15 // 986 16 // 1596 17 // 2583 18 // 4180 19 // 6764 20 // 10945 21 // 17710 22 // 28656 23 // 46387 24 // 75024 25 // 121392 26 extern uint32 fibonacciValuesLen; extern uint64 fibonacciValues[92]; inline void setFibonacciEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz, uint64 val) { uint64 out1 = uint64ZERO; uint64 out2 = uint64ZERO; uint32 fib = fibonacciValuesLen; uint32 fibmax = uint64ZERO; // We cannot store zero as a fibonacci number, so we simply // increase everything by one. // val++; // Estimate a starting point for our search; we need a function // that is always slightly more than fib() // // Find the highest bit set, do a lookup // // XXX: Still need this! while (fib-- > 0) { if (val >= fibonacciValues[fib]) { if (fib >= 64) out2 |= uint64ONE << (127 - fib); else out1 |= uint64ONE << (63 - fib); val -= fibonacciValues[fib]; if (fibmax == uint64ZERO) { fibmax = fib + 1; if (fibmax >= 64) out2 |= uint64ONE << (127 - fibmax); else out1 |= uint64ONE << (63 - fibmax); } } } fibmax++; // Write the encoded numbers to the stream // if (fibmax > 64) { setDecodedValue(ptr, pos, 64, out1); pos += 64; out2 >>= (128 - fibmax); setDecodedValue(ptr, pos, fibmax - 64, out2); } else { out1 >>= (64 - fibmax); setDecodedValue(ptr, pos, fibmax, out1); } *siz = fibmax; } inline uint64 getFibonacciEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 sft = 0x8000000000000000llu >> (pos & 0x000000000000003fllu); uint64 val = 0; uint32 fib = 0; uint64 newbit; uint64 oldbit; oldbit = ptr[wrd] & sft; sft >>= 1; if (sft == uint64ZERO) { wrd++; sft = 0x8000000000000000llu; } newbit = ptr[wrd] & sft; sft >>= 1; if (sft == uint64ZERO) { wrd++; sft = 0x8000000000000000llu; } while (!oldbit || !newbit) { if (oldbit) val += fibonacciValues[fib]; fib++; oldbit = newbit; newbit = ptr[wrd] & sft; sft >>= 1; if (sft == uint64ZERO) { wrd++; sft = 0x8000000000000000llu; } } val += fibonacciValues[fib]; (*siz) = fib + 2; // We stored val+1, remember? Probably not, because the encoder is // next. // return(val - 1); } #endif // FIBONACCI_ENCODING_H kmer-code-2013-trunk/libutil/bitPackedFile.C0000644000000000000000000003074712322046702017415 0ustar rootroot#include "util++.H" #include #include #include #include #include #include // N.B. any read() / write() pair (either order) must have a seek (or // a fflush) in between. bitPackedFile::bitPackedFile(char const *name, uint64 offset, bool forceTruncate) { _file = 0; _name = new char [strlen(name) + 1]; strcpy(_name, name); #ifdef WITH_BZIP2 _bzFILE = 0L; _bzerr = 0; _bzfile = 0L; #endif _bfrmax = 1048576 / 8; _bfr = new uint64 [_bfrmax]; _pos = uint64ZERO; _bit = uint64ZERO; memset(_bfr, 0, sizeof(uint64) * _bfrmax); _inCore = false; _bfrDirty = false; _forceFirstLoad = false; _isReadOnly = false; _isBzip2 = false; stat_seekInside = uint64ZERO; stat_seekOutside = uint64ZERO; stat_dirtyFlushes = uint64ZERO; file_offset = 0; endianess_offset = 0; endianess_flipped = false; // Try to open the original name -- we don't support compressed // files for rewrite. We just fail with a can't open message. // // To get read/write and create we have to use open(2), as mode // "r+" of fopen(3) will not create. (Yes, but w+ does, sigh.) // if (forceTruncate) { errno = 0; _file = open(_name, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "bitPackedFile::bitPackedFile()-- failed to open and truncate '%s': %s\n", _name, strerror(errno)), exit(1); } else if (fileExists(_name)) { errno = 0; _file = open(_name, O_RDONLY | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "bitPackedFile::bitPackedFile()-- failed to open '%s': %s\n", _name, strerror(errno)), exit(1); _isReadOnly = true; } else { errno = 0; _file = open(_name, O_RDWR | O_CREAT | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "bitPackedFile::bitPackedFile()-- failed to open '%s': %s\n", _name, strerror(errno)), exit(1); } // Move to the correct position in the file. // file_offset = offset; if (file_offset > 0) lseek(_file, file_offset, SEEK_SET); // Deal with endianess. We write out some bytes (or read back some bytes) to the start of // the file, and then hide them from the user. // endianess_offset = 32 + file_offset; endianess_flipped = false; char t[16] = { 'b', 'i', 't', 'P', 'a', 'c', 'k', 'e', 'd', 'F', 'i', 'l', 'e', 0, 0, 1 }; char c[16] = { 0 }; uint64 at = uint64NUMBER(0xdeadbeeffeeddada ); uint64 bt = uint64NUMBER(0x0abeadedbabed8f8); uint64 ac = uint64NUMBER(0); uint64 bc = uint64NUMBER(0); size_t nr = 0; errno = 0; nr += read(_file, c, sizeof(char) * 16); nr += read(_file, &ac, sizeof(uint64)); nr += read(_file, &bc, sizeof(uint64)); if (nr == 0) { // Empty file! Write the magic number and our endianess check. errno = 0; write(_file, t, sizeof(char) * 16); write(_file, &at, sizeof(uint64)); write(_file, &bt, sizeof(uint64)); if (errno) fprintf(stderr, "bitPackedFile::bitPackedFile()-- '%s' failed to write the header: %s\n", _name, strerror(errno)), exit(1); return; } if ((c[0] == 'B') && (c[1] == 'Z') && (c[2] == 'h')) { #ifdef WITH_BZIP2 // Looks like a bzip2 file! errno = 0; _bzFILE = fopen(_name, "r"); if (errno) { fprintf(stderr, "bitPackedFile::bitPackedFile()-- failed to open bzip2 file '%s'\n", _name); exit(1); } _bzerr = 0; _bzfile = BZ2_bzReadOpen(&_bzerr, _bzFILE, 0, 0, 0L, 0); if ((_bzfile == 0L) || (_bzerr != BZ_OK)) { fprintf(stderr, "bitPackedFile::bitPackedFile()-- failed to init bzip2 file '%s'\n", _name); exit(1); } BZ2_bzRead(&_bzerr, _bzfile, c, sizeof(char) * 16); BZ2_bzRead(&_bzerr, _bzfile, &ac, sizeof(uint64)); BZ2_bzRead(&_bzerr, _bzfile, &bc, sizeof(uint64)); // XXX should check bzerr! _isReadOnly = true; _isBzip2 = true; #else fprintf(stderr, "bitPackedFile::bitPackedFile()-- '%s' looks like a bzip2 file, but bzip2 support not available!\n", _name); exit(1); #endif } // Check the magic number, decide on an endianess to use. // if (strncmp(t, c, 16) == 0) { if ((at == ac) && (bt == bc)) { endianess_flipped = false; } else if ((at == uint64Swap(ac)) && (bt == uint64Swap(bc))) { endianess_flipped = true; } else { fprintf(stderr, "bitPackedFile::bitPackedFile()-- '%s' looked like a bitPackedFile, but failed the endianess check, not opened.\n", _name); exit(1); } } else { fprintf(stderr, "bitPackedFile::bitPackedFile()-- '%s' doesn't appear to be a bitPackedFile, not opened.\n", _name); fprintf(stderr, "bitPackedFile::bitPackedFile()-- found "); for (uint32 i=0; i<16; i++) fprintf(stderr, "%c", isascii(c[i]) ? c[i] : '.'); fprintf(stderr, " at position "uint64HEX"\n", file_offset); exit(1); } _forceFirstLoad = true; seek(0); } bitPackedFile::~bitPackedFile() { flushDirty(); delete [] _bfr; delete [] _name; close(_file); #ifdef WITH_BZIP2 if (_bzFILE) fclose(_bzFILE); if (_bzfile) BZ2_bzReadClose(&_bzerr, _bzfile); #endif } // If the page is dirty, flush it to disk // void bitPackedFile::flushDirty(void) { if (_bfrDirty == false) return; if (_isReadOnly) { fprintf(stderr, "bitPackedFile::bitPackedFile()-- '%s' is readonly, but is dirty!\n", _name); exit(1); } stat_dirtyFlushes++; errno = 0; lseek(_file, _pos * sizeof(uint64) + endianess_offset, SEEK_SET); if (errno) { fprintf(stderr, "bitPackedFile::seek()-- '%s' failed: %s\n", _name, strerror(errno)); exit(1); } // If we need to, flip all the words we are going to write // if (endianess_flipped) for (uint32 i=0; i<_bfrmax; i++) _bfr[i] = uint64Swap(_bfr[i]); // We should only write bits up to _bit, the position we are // currently at. However, we don't know if the block is being // flushed because we're totally finished with it, or because we // are moving on to the next block. If we're done with it, we // want to flush the word that contains _bit, and if we're moving // on to the next one, we'll flush that word again. So, in // either case, we flush the word that contains _bit. // errno = 0; write(_file, _bfr, sizeof(uint64) * _bfrmax); if (errno) { fprintf(stderr, "bitPackedFile::write()-- '%s' failed: %s\n", _name, strerror(errno)); exit(1); } // And then flip them back // if (endianess_flipped) for (uint32 i=0; i<_bfrmax; i++) _bfr[i] = uint64Swap(_bfr[i]); _bfrDirty = false; } void bitPackedFile::seekBzip2(uint64 bitpos) { #ifdef WITH_BZIP2 // All we can do here is check that bitpos is // a) in our current buffer // b) would be in the next buffer once we read it uint64 newpos = bitpos >> 6; if (_pos + _bfrmax < newpos) { // nope, not in the buffer -- we could probably handle this by just reading and // discarding from the file until we get to the correct bitpos. fprintf(stderr, "bitPackedFile::seekBzip2()-- '%s' seek was not contiguous!\n", _name); exit(1); } // Copy the remaining bits of the current buffer to the start. Or // not, if this is the first load. uint64 lastpos = _bit >> 6; // The word we are currently in uint64 lastlen = (_bfrmax - lastpos); // The number of words left in the buffer if (_forceFirstLoad == true) { lastpos = 0; lastlen = 0; } else { memcpy(_bfr, _bfr + lastpos, sizeof(uint64) * lastlen); } // Update _bit and _pos -- lastlen is now the first invalid word // _bit = bitpos & 0x3f; // 64 * lastlen; _pos = bitpos >> 6; // Fill the buffer size_t wordsread = 0; if (_bzfile) { _bzerr = 0; wordsread = BZ2_bzRead(&_bzerr, _bzfile, _bfr + lastlen, sizeof(uint64) * (_bfrmax - lastlen)); if (_bzerr == BZ_STREAM_END) { //fprintf(stderr, "bitPackedFile::seekBzip2() file ended.\n"); BZ2_bzReadClose(&_bzerr, _bzfile); fclose(_bzFILE); _bzfile = 0L; _bzFILE = 0L; } else if (_bzerr != BZ_OK) { fprintf(stderr, "bitPackedFile::seekBzip2() '%s' read failed.\n", _name); exit(1); } } //fprintf(stderr, "Filled buffer with %d words!\n", wordsread); // Adjust to make wordsread be the index of the last word we actually read. // wordsread += lastlen; // Flip all the words we just read, if needed // if (endianess_flipped) for (uint32 i=lastlen; i> 6) is just before the old // position (_pos), assume that we are being accessed iteratively // backwards and load a full buffer so that the position we want to // access is at the end. // // Easy to think of bone-headed ways to break this (e.g., seek to // the second element in a structure, access the first, then access // the third). Not so easy to think of a logical reason someone // would want to do that. // if (((bitpos >> 6) < _pos) && (_pos <= (bitpos >> 6) + 32)) { _pos = bitpos >> 6; if (_pos > _bfrmax) _pos = _pos - _bfrmax + 32; else _pos = 0; } else { _pos = bitpos >> 6; } _bit = bitpos - (_pos << 6); errno = 0; lseek(_file, _pos * 8 + endianess_offset, SEEK_SET); if (errno) { fprintf(stderr, "bitPackedFile::seekNormal() '%s' seek to pos="uint64FMT" failed: %s\n", _name, _pos * 8 + endianess_offset, strerror(errno)); exit(1); } errno = 0; size_t wordsread = read(_file, _bfr, sizeof(uint64) * _bfrmax); if (errno) { fprintf(stderr, "bitPackedFile::seekNormal() '%s' read of "uint64FMT" bytes failed': %s\n", _name, sizeof(uint64) * _bfrmax, strerror(errno)); exit(1); } // Flip all the words we just read, if needed // if (endianess_flipped) for (uint32 i=0; i> 6; if ((_pos <= np) && (np <= _pos + _bfrmax - 32)) { _bit = bitpos - (_pos << 6); stat_seekInside++; //fprintf(stderr, "SEEK INSIDE to _bit="uint64FMT"\n", _bit); return; } } if (_inCore) { fprintf(stderr, "bitPackedFile::seek()-- file '%s' is in core, but still needed to seek??\n", _name); exit(1); } stat_seekOutside++; flushDirty(); if (_isBzip2) seekBzip2(bitpos); else seekNormal(bitpos); _forceFirstLoad = false; //fprintf(stderr, "SEEK OUTSIDE to _pos="uint64FMT" _bit="uint64FMT"\n", _pos, _bit); } uint64 bitPackedFile::loadInCore(void) { struct stat sb; // Convert this disk-based, read/write bitPackedFile to memory-based read-only. flushDirty(); fstat(_file, &sb); // The extra 1024 words is to keep seek() from attempting to grab // the next block (there isn't a next block, we've got it all!) // when we're near the end of this block. We just make the block // a little bigger than it really is. delete [] _bfr; _bfrmax = sb.st_size / 8 + 1024; _bfr = new uint64 [_bfrmax]; _pos = 0; _bit = 0; // Tada! All we need to do now is load the block! _forceFirstLoad = true; seek(0); _inCore = true; return(_bfrmax * 8); } kmer-code-2013-trunk/libutil/test/0000755000000000000000000000000012641613357015601 5ustar rootrootkmer-code-2013-trunk/libutil/test/test-md5.c0000644000000000000000000000316611042143776017412 0ustar rootroot#include #include #include #include "util.h" // Performs the md5 test suite using libbri. MD5 itself is tested in // external/md5. // // Appendix 5 of RFC 1321; // // MD5 test suite: // MD5 ("") = d41d8cd98f00b204e9800998ecf8427e // MD5 ("a") = 0cc175b9c0f1b6a831c399e269772661 // MD5 ("abc") = 900150983cd24fb0d6963f7d28e17f72 // MD5 ("message digest") = f96b697d7cb7938d525a2f31aaf161d0 // MD5 ("abcdefghijklmnopqrstuvwxyz") = c3fcd3d76192e4007dfb496cca67e13b // MD5 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") = d174ab98d277d9f5a5611c2c9f419d9f // MD5 ("12345678901234567890123456789012345678901234567890123456789012345678901234567890") = 57edf4a22be3c955ac49da2e2107b67a // int testit(char *str, char *ans) { md5_s m; char r[33]; int ret = 0; md5_toascii(md5_string(&m, str, strlen(str)), r); ret = strcmp(r, ans); if (ret) printf("ERROR: expect %s, got %s for %s\n", ans, r, str); return(ret == 0); } int main(int argc, char **argv) { int ret = 7; ret -= testit("", "d41d8cd98f00b204e9800998ecf8427e"); ret -= testit("a", "0cc175b9c0f1b6a831c399e269772661"); ret -= testit("abc", "900150983cd24fb0d6963f7d28e17f72"); ret -= testit("message digest", "f96b697d7cb7938d525a2f31aaf161d0"); ret -= testit("abcdefghijklmnopqrstuvwxyz", "c3fcd3d76192e4007dfb496cca67e13b"); ret -= testit("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "d174ab98d277d9f5a5611c2c9f419d9f"); ret -= testit("12345678901234567890123456789012345678901234567890123456789012345678901234567890", "57edf4a22be3c955ac49da2e2107b67a"); exit(ret); } kmer-code-2013-trunk/libutil/test/tcat.C0000644000000000000000000000315711061606335016637 0ustar rootroot#include #include #include #include #include #include "sweatShop.H" // Reads stdin, writes stdout. Uses threads. int blockSize = 8192; struct tcat_s { int dataLen; char *data; }; void* tcatReader(void *) { tcat_s *s = new tcat_s; s->data = new char [blockSize]; s->dataLen = safeRead(STDIN_FILENO, s->data, "tcatReader", sizeof(char) * blockSize); if (s->dataLen == 0) { delete [] s->data; delete s; return(0L); } return(s); } void tcatWorker(void *, void *, void *) { // Noop! } void tcatWriter(void *, void *S) { tcat_s *s = (tcat_s *)S; safeWrite(STDOUT_FILENO, s->data, "tcatWriter", sizeof(char) * s->dataLen); delete [] s->data; delete s; } int main(int argc, char **argv) { int readBuf = 64; int writBuf = 64; int arg = 1; int err = 0; while (arg < argc) { if (strcmp(argv[arg], "-r") == 0) { readBuf = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-w") == 0) { writBuf = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-b") == 0) { blockSize = atoi(argv[++arg]); } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); err++; } arg++; } if (err) { fprintf(stderr, "usage: %s [-b blockSizeBytes] [-r readBufferSizeMB] [-w writeBufferSizeMB]\n", argv[0]); exit(1); } sweatShop *ss = new sweatShop(tcatReader, tcatWorker, tcatWriter); ss->setLoaderQueueSize(readBuf * 1024 * 1024 / blockSize); ss->setWriterQueueSize(writBuf * 1024 * 1024 / blockSize); ss->run(); exit(0); } kmer-code-2013-trunk/libutil/test/order.C0000644000000000000000000000332012322046702017004 0ustar rootroot#include #include #include "../util.h" //#include union u64 { uint64 u; unsigned char c[8]; }; union u32 { uint32 u; unsigned char c[4]; }; union u16 { uint16 u; unsigned char c[2]; }; uint64 uint64Swap(uint64 x) { x = ((x >> 8) & uint64NUMBER(0x00ff00ff00ff00ff)) | ((x << 8) & uint64NUMBER(0xff00ff00ff00ff00)); x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) | ((x << 16) & uint64NUMBER(0xffff0000ffff0000)); x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) | ((x << 32) & uint64NUMBER(0xffffffff00000000)); return(x); } uint32 uint32Swap(uint32 x) { x = ((x >> 8) & uint32NUMBER(0x00ff00ff)) | ((x << 8) & uint32NUMBER(0xff00ff00)); x = ((x >> 16) & uint32NUMBER(0x0000ffff)) | ((x << 16) & uint32NUMBER(0xffff0000)); return(x); } uint16 uint16Swap(uint16 x) { x = ((x >> 8) & 0x00ff) | ((x << 8) & 0xff00); return(x); } int main(int argc, char **argv) { u64 u64v; u32 u32v; u16 u16v; u64v.u = 0x1234567890abcdefLLU; u32v.u = 0x12345678; u16v.u = 0x1234; for (int i=0; i<8; i++) fprintf(stderr, "%02x", u64v.c[i]); fprintf(stderr, "\n"); for (int i=0; i<4; i++) fprintf(stderr, "%02x", u32v.c[i]); fprintf(stderr, "\n"); for (int i=0; i<2; i++) fprintf(stderr, "%02x", u16v.c[i]); fprintf(stderr, "\n"); u64v.u = uint64Swap(u64v.u); u32v.u = uint32Swap(u32v.u); u16v.u = uint16Swap(u16v.u); for (int i=0; i<8; i++) fprintf(stderr, "%02x", u64v.c[i]); fprintf(stderr, "\n"); for (int i=0; i<4; i++) fprintf(stderr, "%02x", u32v.c[i]); fprintf(stderr, "\n"); for (int i=0; i<2; i++) fprintf(stderr, "%02x", u16v.c[i]); fprintf(stderr, "\n"); } kmer-code-2013-trunk/libutil/test/test-logMsg.C0000644000000000000000000001362510453342204020106 0ustar rootroot#include "util++.H" int main(int argc, char **argv) { logMsg M; M.add("this is a simple test\n"); M.add("%s %s %s %s %s\n", "1", "2", "3", "4", "5"); M.add("%s%s%s%s%s", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n", "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n", "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n", "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\n"); for (int a=0; a<1024; a++) { M.add("%s%s%s%s%s", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n", "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n", "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n", "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\n"); } M.fwrite(stdout); } kmer-code-2013-trunk/libutil/test/test-bitPackedArray.C0000644000000000000000000000731612322046702021544 0ustar rootroot#include #include #include #include "util++.H" uint32 wordSize = 41; uint32 testSize = 1 * 1024 * 1024; uint32 arrySize = 1 * 1024 * 1024; int uint64compare(const void *a, const void *b) { const uint64 A = *(const uint64 *)a; const uint64 B = *(const uint64 *)b; if (AB) return(1); return(0); } int main(int argc, char **argv) { mt_s *mtctx = mtInit(time(NULL)); // Test the bitPackedArray by writing a bunch of random gibberish // to it, and see if it's the same. uint32 *pos = new uint32 [testSize]; uint64 *val = new uint64 [testSize]; uint64 *ans = new uint64 [arrySize]; bitPackedArray *ARR = new bitPackedArray(wordSize, 16); uint32 fail = uint32ZERO; #if 1 fprintf(stderr, "Touching the end of the array and clearing.\n"); //ARR->set(arrySize, 0); //ARR->clear(); fprintf(stderr, "Generating random test data.\n"); // Hit every element first, just to do it for (uint32 i=0; iset(pos[i], val[i]); fprintf(stderr, "Validating array.\n"); for (uint32 i=0; iget(i) != ans[i]) { fprintf(stderr, "FAIL at i="uint32FMT"\n", i); fail++; if (fail > 1024) { fprintf(stderr, "bitPackedArray has errors, aborting!\n"); return(1); } } if (fail) { fprintf(stderr, "bitPackedArray had "uint32FMT" errors.\n", fail); return(1); } fprintf(stderr, "OK!\n"); #endif delete ARR; delete [] pos; delete [] val; delete [] ans; // // // for (uint32 testNum=0; testNum<32; testNum++) { uint32 thisTestSize = 0; uint32 thisWordSize = 0; // Test a BIG heap the first iteration. if (testNum == 0) { thisTestSize = 857353; //23987153; thisWordSize = 63; fprintf(stderr, "Building heap "uint32FMT" (wordsize="uint32FMT" testsize="uint32FMT").\n", testNum, thisWordSize, thisTestSize); } else { thisTestSize = (mtRandom64(mtctx) % (2 * testNum)) * 1024 + 1024; thisWordSize = (mtRandom64(mtctx) % 63) + 1; } uint32 blockSize = mtRandom64(mtctx) % 32 + 1; bitPackedHeap *HEAP = new bitPackedHeap(thisWordSize, blockSize); val = new uint64 [thisTestSize]; for (uint32 i=0; iadd(val[i]); } fprintf(stderr, "Testing heap "uint32FMT" (wordsize="uint32FMT" testsize="uint32FMT").\n", testNum, thisWordSize, thisTestSize); qsort(val, thisTestSize, sizeof(uint64), uint64compare); for (uint32 i=0; iget(); //fprintf(stderr, "val["uint32FMT"]="uint64FMT" -- HEAP="uint64FMT"\n", i, val[i], h); if (val[i] != h) { fprintf(stderr, "val["uint32FMT"]="uint64FMT" !! HEAP="uint64FMT"\n", i, val[i], h); fail++; if (fail > 25) { fprintf(stderr, "bitPackedHeap has errors, aborting!\n"); return(1); } } } if (fail) { fprintf(stderr, "bitPackedHeap had "uint32FMT" errors.!\n", fail); return(1); } delete HEAP; delete [] val; } fprintf(stderr, "OK!\n"); return(fail); } kmer-code-2013-trunk/libutil/test/test-freeDiskSpace.c0000644000000000000000000000041410132173315021415 0ustar rootroot#include "util.h" int main(int argc, char **argv) { int i; if (argc == 1) { fprintf(stderr, "usage: %s file [...]\n", argv[0]); exit(1); } for (i=1; i #include "util.h" int main(void) { uint32 errors = 0; uint32 u3 = -1; int32 s3 = -1; uint64 u6 = -1; int64 s6 = -1; if (sizeof(uint32) != 4) fprintf(stderr, "uint32 has %d bytes (should be 4)!\n", (int)sizeof(uint32)), errors++; if (sizeof(uint64) != 8) fprintf(stderr, "uint64 has %d bytes (should be 8)!\n", (int)sizeof(uint64)), errors++; if (u3 < 0) fprintf(stderr, "uint32 is signed (should be unsigned)!\n"), errors++; if (s3 > 0) fprintf(stderr, "int32 is unsigned (should be signed)!\n"), errors++; if (u6 < 0) fprintf(stderr, "uint64 is signed (should be unsigned)!\n"), errors++; if (s6 > 0) fprintf(stderr, "int64 is unsigned (should be signed)!\n"), errors++; return(errors); } kmer-code-2013-trunk/libutil/test/test-readBuffer.C0000644000000000000000000000657712322046702020734 0ustar rootroot#include #include "util++.H" char *filename = 0L; md5_s *full = 0L; md5_s *part = 0L; int doTest(readBuffer *B, md5_s *correct, const char *description) { int error = 0; md5_increment_s *testing = 0L; int bread = 0; fprintf(stderr, "readBuffer test %s.\n", description); for (char x = B->read(); !B->eof(); x = B->read()) { testing = md5_increment_char(testing, x); bread++; } md5_increment_finalize(testing); if ((testing->a != correct->a) || (testing->b != correct->b)) { fprintf(stderr, "readBuffer test %s failed (read %d bytes).\n", description, bread); fprintf(stderr, "Got correct md5 of "uint64HEX" "uint64HEX"\n", correct->a, correct->b); fprintf(stderr, "Got testing md5 of "uint64HEX" "uint64HEX"\n", testing->a, testing->b); error = 1; } md5_increment_destroy(testing); return(error); } int doTestRead(readBuffer *B, md5_s *correct, size_t bufferSize, const char *description) { int error = 0; char *buffer = new char [bufferSize]; size_t bufferLen = 0; md5_increment_s *testing = 0L; fprintf(stderr, "readBuffer test %s.\n", description); while (!B->eof()) { bufferLen = B->read(buffer, bufferSize); //fprintf(stderr, "Read bufferLen=%d bufferSize=%d\n", bufferLen, bufferSize); testing = md5_increment_block(testing, buffer, bufferLen); } md5_increment_finalize(testing); if ((testing->a != correct->a) || (testing->b != correct->b)) { fprintf(stderr, "readBuffer test %s failed.\n", description); fprintf(stderr, "Got correct md5 of "uint64HEX" "uint64HEX"\n", correct->a, correct->b); fprintf(stderr, "Got testing md5 of "uint64HEX" "uint64HEX"\n", testing->a, testing->b); error = 1; } md5_increment_destroy(testing); return(error); } int main(int argc, char **argv) { int error = 0; readBuffer *B = 0L; size_t L = 0; size_t H = 0; size_t R = 0; // If we are given a file, use that, otherwise, use ourself. // filename = argv[argc-1]; L = sizeOfFile(filename); H = L/2; R = L - H; fprintf(stderr, "L=%d H=%d R=%d\n", L, H, R); // Suck in the whole file, compute the correct md5 checksum on it // char *c = new char [L]; FILE *F = fopen(filename, "r"); fread(c, sizeof(char), L, F); fclose(F); full = md5_string(0L, c, L); part = md5_string(0L, c+H, R); delete [] c; B = new readBuffer(filename, 999); error += doTest(B, full, "#1 (read)"); B->seek(0); error += doTest(B, full, "#2 (seek)"); B->seek(H); error += doTest(B, part, "#2 (seek half)"); delete B; B = new readBuffer(filename, 0); error += doTest(B, full, "#3 (mmap)"); B->seek(0); error += doTest(B, full, "#2 (mmap seek)"); B->seek(H); error += doTest(B, part, "#2 (mmap seek half)"); delete B; B = new readBuffer(filename, 0); error += doTestRead(B, full, 10000, "#4 (read buffer=mmap readsize=10000)"); delete B; B = new readBuffer(filename, 100); error += doTestRead(B, full, 10000, "#4 (read buffer=100 readsize=10000)"); delete B; B = new readBuffer(filename, 2000); error += doTestRead(B, full, 1000, "#4 (read buffer=2000 readsize=1000)"); delete B; B = new readBuffer(filename, L); error += doTestRead(B, full, L+1000, "#5 (read buffer=filesize readsize=filesize+1000)"); delete B; return(error); } kmer-code-2013-trunk/libutil/test/Makefile0000644000000000000000000001006311061606335017232 0ustar rootrootPROG = test-bigQueue \ test-bitPackedArray \ test-bitPackedFile \ test-bitPacking \ test-freeDiskSpace \ test-intervalList \ test-logMsg \ test-md5 \ test-mmap \ test-palloc \ test-readBuffer \ test-recordFile \ test-types \ tcat # Broken, don't test. #test-bzipBuffer INCLUDE = -I.. LIBS = -L.. -lutil -lm OBJS = include ../../Make.compilers all: $(PROG) @echo Tests passed! test-bigQueue: test-bigQueue.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-bigQueue.o test-bigQueue.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-bigQueue test-bigQueue.o $(LIBS) time ./test-bigQueue time sort -k2n < junk-bigQueue-out-2 > junk-bigQueue-out-2.s diff junk-bigQueue-out-1 junk-bigQueue-out-2.s test-bitPackedArray: test-bitPackedArray.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-bitPackedArray.o test-bitPackedArray.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-bitPackedArray test-bitPackedArray.o $(LIBS) ./test-bitPackedArray test-bitPackedFile: test-bitPackedFile.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-bitPackedFile.o test-bitPackedFile.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-bitPackedFile test-bitPackedFile.o $(LIBS) -lbz2 ./test-bitPackedFile test-bitPacking: test-bitPacking.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-bitPacking.o test-bitPacking.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-bitPacking test-bitPacking.o $(LIBS) ./test-bitPacking test-freeDiskSpace: test-freeDiskSpace.c ../libutil.a $(CC) $(CFLAGS_COMPILE) -c -o test-freeDiskSpace.o test-freeDiskSpace.c $(INCLUDE) $(CC) $(CLDFLAGS) -o test-freeDiskSpace test-freeDiskSpace.o $(LIBS) ./test-freeDiskSpace test-freeDiskSpace.c test-intervalList: test-intervalList.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-intervalList.o test-intervalList.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-intervalList test-intervalList.o $(LIBS) ./test-intervalList test-logMsg: test-logMsg.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-logMsg.o test-logMsg.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-logMsg test-logMsg.o $(LIBS) ./test-logMsg | wc test-md5: test-md5.c ../libutil.a ../util.h ../util++.H $(CC) $(CFLAGS_COMPILE) -c -o test-md5.o test-md5.c $(INCLUDE) $(CC) $(CLDFLAGS) -o test-md5 test-md5.o $(LIBS) ./test-md5 test-mmap: test-mmap.c ../libutil.a ../util.h ../util++.H $(CC) $(CFLAGS_COMPILE) -c -o test-mmap.o test-mmap.c $(INCLUDE) $(CC) $(CLDFLAGS) -o test-mmap test-mmap.o $(LIBS) ./test-mmap 16 test-palloc: test-palloc.c ../libutil.a ../util.h ../util++.H $(CC) $(CFLAGS_COMPILE) -c -o test-palloc.o test-palloc.c $(INCLUDE) $(CC) $(CLDFLAGS) -o test-palloc test-palloc.o $(LIBS) ./test-palloc test-readBuffer: test-readBuffer.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-readBuffer.o test-readBuffer.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-readBuffer test-readBuffer.o $(LIBS) ./test-readBuffer test-readBuffer test-recordFile: test-recordFile.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-recordFile.o test-recordFile.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-recordFile test-recordFile.o $(LIBS) ./test-recordFile test-bzipBuffer: test-bzipBuffer.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-bzipBuffer.o test-bzipBuffer.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-bzipBuffer test-bzipBuffer.o -lbz2 $(LIBS) bzip2 -9vc ./test-bzipBuffer > ./test-bzipBuffer.bz2 ./test-bzipBuffer ./test-bzipBuffer.bz2 test-types: test-types.c ../libutil.a ../util.h ../util++.H $(CC) $(CFLAGS_COMPILE) -c -o test-types.o test-types.c $(INCLUDE) $(CC) $(CLDFLAGS) -o test-types test-types.o $(LIBS) ./test-types tcat: tcat.C ../libutil.a ../util.h ../util++.H $(CXX) $(CXXFLAGS_COMPILE) -c -o tcat.o tcat.C $(INCLUDE) $(THREADS) $(CXX) $(CXXLDFLAGS) -o tcat tcat.o $(LIBS) $(THREADL) clean: rm -f $(PROG) *.o *junk* kmer-code-2013-trunk/libutil/test/test-mmap.c0000644000000000000000000000423512322046702017646 0ustar rootroot#include #include #include #include #include #include "util.h" // Does a quick test of memory mapped files. First, writes a small // file, then it reads it back, checking the data. // // Takes one optional argument, the size in MB of the file to map. int main(int argc, char **argv) { size_t lw; uint32 *ww = 0L; uint32 idx = 0; uint32 err = 0; FILE *out; uint32 blockSize = 1048576; uint32 numBlocks = 32; if (argc == 2) numBlocks = strtouint32(argv[1], 0L); // The file must exist, and it must be large enough to contain all // that we want to write. So, we create the file and fill it with // junk. // ww = (uint32 *)malloc(sizeof(uint32) * blockSize); if (ww == NULL) { fprintf(stderr, "can't allocate %d uint32's for clearing the file.\n", blockSize); exit(1); } errno = 0; out = fopen("mmap.test.junk", "w"); if (errno) { fprintf(stderr, "can't open 'mmap.test.junk' to fill with junk: %s\n", strerror(errno)); exit(1); } for (idx=0; idx #include "util++.H" char *filename = 0L; md5_s *correct = 0L; int doTest(bzipBuffer *B, char *description) { int error = 0; md5_increment_s *testing = 0L; while (!B->eof()) testing = md5_increment_char(testing, B->getnext()); md5_increment_finalize(testing); if ((testing->a != correct->a) || (testing->b != correct->b)) { fprintf(stderr, "bzipBuffer test %s failed.\n", description); fprintf(stderr, "Got correct md5 of "uint64HEX" "uint64HEX"\n", correct->a, correct->b); fprintf(stderr, "Got testing md5 of "uint64HEX" "uint64HEX"\n", testing->a, testing->b); error = 1; } md5_increment_destroy(testing); return(error); } int doTestRead(bzipBuffer *B, size_t bufferSize, char *description) { int error = 0; char *buffer = new char [bufferSize]; size_t bufferLen = 0; md5_increment_s *testing = 0L; while (!B->eof()) { bufferLen = B->read(buffer, bufferSize); testing = md5_increment_block(testing, buffer, bufferLen); } md5_increment_finalize(testing); if ((testing->a != correct->a) || (testing->b != correct->b)) { fprintf(stderr, "bzipBuffer test %s failed.\n", description); fprintf(stderr, "Got correct md5 of "uint64HEX" "uint64HEX"\n", correct->a, correct->b); fprintf(stderr, "Got testing md5 of "uint64HEX" "uint64HEX"\n", testing->a, testing->b); error = 1; } md5_increment_destroy(testing); return(error); } int main(int argc, char **argv) { int error = 0; bzipBuffer *B = 0L; // If we are given a file, use that, otherwise, use ourself. // filename = argv[argc-1]; // Suck in the whole file, compute the correct md5 checksum on it // char *c = new char [sizeOfFile(filename)]; FILE *F = fopen(filename, "r"); fread(c, sizeof(char), sizeOfFile(filename), F); fclose(F); correct = md5_string(0L, c, sizeOfFile(filename)); delete [] c; // Test just reading, with a small buffer // B = new bzipBuffer(filename, 999); error += doTest(B, "#1 (read)"); exit(1); // Test read() with a small buffer, reading large chunks // B = new bzipBuffer(filename, 100); error += doTestRead(B, 10000, "#4 (read)"); delete B; // Test read() with a small buffer, reading small chunks that are a // factor of the buffersize. // B = new bzipBuffer(filename, 2000); error += doTestRead(B, 1000, "#4 (read)"); delete B; // Test read() with a large buffer, reading even larger pieces // B = new bzipBuffer(filename, sizeOfFile(filename)); error += doTestRead(B, sizeOfFile(filename) + 100000, "#5 (read)"); delete B; return(error); } kmer-code-2013-trunk/libutil/test/test-palloc.c0000644000000000000000000000156310132173315020165 0ustar rootroot#include #include #include "util.h" int main(int argc, char **argv) { mt_s *mtctx; int i; psetdebug(2); psetblocksize(1024); palloc(2048); palloc(128); palloc(999); palloc(1); palloc(2); palloc(3); palloc(4); palloc(2056); palloc(8); palloc(2064); palloc(8); palloc(2072); palloc(8); pdumppalloc(); pfree(); fprintf(stderr, "----------------------------------------\n"); psetblocksize(10240); palloc(2048); palloc(128); palloc(999); palloc(8); palloc(8); palloc(8); palloc(8); palloc(2056); palloc(8); palloc(2064); palloc(8); palloc(2072); palloc(8); pdumppalloc(); pfree(); psetdebug(0); psetblocksize(16 * 1024 * 1024); mtctx = mtInit(time(NULL)); for (i=0; i<512 * 1024; i++) palloc(mtRandom32(mtctx) & 0xfff); psetdebug(1); pfree(); return(0); } kmer-code-2013-trunk/libutil/test/endianess.c0000644000000000000000000000554212322046702017712 0ustar rootroot#include #include #include // BYTE_ORDER #include "/home/work/src/genomics/libutil/util.h" // Reports the byte order, writes words to files for testing. #if 0 begin 644 test-alpha .`@$$`P(!"`<&!00#`@$` ` end begin 644 test-i386 .`@$$`P(!"`<&!00#`@$` ` end begin 644 test-opteron .`@$$`P(!"`<&!00#`@$` ` end begin 644 test-power .`0(!`@,$`0(#!`4&!P@` ` end #endif int isBig1(void) { uint64 l = uint64ONE; if (*((char *)(&l)) == 1) return(0); return(1); } // supposedly due to Harbison and Steele int isBig2(void) { union { uint64 l; char c[sizeof(uint64)]; } u; u.l = uint64ONE; #if 0 fprintf(stderr, "%d%d%d%d%d%d%d%d\n", u.c[0], u.c[1], u.c[2], u.c[3], u.c[4], u.c[5], u.c[6], u.c[7]); #endif if (u.c[0] == 1) // LSB is first return(0); return(1); // MSB is first } int main(int argc, char **argv) { uint16 u16 = 0x0102; uint32 u32 = uint32NUMBER(0x01020304); uint64 u64 = uint64NUMBER(0x0102030405060708); fprintf(stderr, "BYTE_ORDER = %d\n", BYTE_ORDER); fprintf(stderr, " BIG_ENDIAN = %d\n", BIG_ENDIAN); fprintf(stderr, " LITTLE_ENDIAN = %d\n", LITTLE_ENDIAN); fprintf(stderr, " PDP_ENDIAN = %d\n", PDP_ENDIAN); fprintf(stderr, "isBig1() = %d\n", isBig1()); fprintf(stderr, "isBig2() = %d\n", isBig2()); if (argc == 1) { fprintf(stderr, "usage: %s [ write | read ] < source > check\n", argv[0]); exit(1); } if (strcmp(argv[1], "write") == 0) { fwrite(&u16, sizeof(uint16), 1, stdout); fwrite(&u32, sizeof(uint32), 1, stdout); fwrite(&u64, sizeof(uint64), 1, stdout); return(0); } fread(&u16, sizeof(uint16), 1, stdin); fread(&u32, sizeof(uint32), 1, stdin); fread(&u64, sizeof(uint64), 1, stdin); #if 0 // swap bytes to convert u16 u16 = (((u16 >> 8) & 0x00ff) | ((u16 << 8) & 0xff00)); // swap bytes, then swap words to convert u32 u32 = (((u32 >> 24) & 0x000000ff) | ((u32 >> 8) & 0x0000ff00) | ((u32 << 8) & 0x00ff0000) | ((u32 << 24) & 0xff000000)); // swap bytes, then flip words [0<->3, 1<->2] to convert u64 u64 = (((u64 >> 24) & uint64NUMBER(0x000000ff000000ff)) | ((u64 >> 8) & uint64NUMBER(0x0000ff000000ff00)) | ((u64 << 8) & uint64NUMBER(0x00ff000000ff0000)) | ((u64 << 24) & uint64NUMBER(0xff000000ff000000))); u64 = (((u64 >> 32) & uint64NUMBER(0x00000000ffffffff)) | ((u64 << 32) & uint64NUMBER(0xffffffff00000000))); #endif if (u16 != 0x1234) fprintf(stderr, "u16 -- 0x%04x correct=0x%04x\n", u16, 0x1234); if (u32 != 0x12345678) fprintf(stderr, "u32 -- "uint32HEX" correct="uint32HEX"\n", u32, 0x12345678); if (u64 != uint64NUMBER(0x1234567890abcdef)) fprintf(stderr, "u64 -- "uint64HEX" correct="uint64HEX"\n", u64, uint64NUMBER(0x1234567890abcdef)); return(0); } kmer-code-2013-trunk/libutil/test/test-intervalList.C0000644000000000000000000002105512417326046021342 0ustar rootroot#include #include "util++.H" mt_s *mt = 0L; void test(void) { int e = 0; intervalList I; I.add(71, 3); I.add( 5, 3); I.add(32, 5); I.add(73, 3); I.add(55, 10); I.add( 5, 3); I.add(10, 5); I.add(20, 10); I.add(30, 10); I.add(50, 10); I.add(70, 3); I.add(72, 3); I.add( 5, 3); I.add(15, 5); #if 0 for (uint32 i=0; i A; intervalList B; // // Build two interval lists // // type == 0 --> all pairwise // type == 1 --> A sequence is solid // type == 2 --> B sequence is solid // if (type == 1) A.add(1, 1500000000); if (type == 2) B.add(1, 1500000000); for (uint32 i=0; i I; I.intersect(A, B); // // Check the result. // for (uint32 i=0, j=0; i "uint32FMT","uint32FMT" ("uint32FMT","uint32FMT") (should have been "uint32FMT","uint32FMT")\n", i, beg[i] - abegh[i], beg[i] - abegh[i] + abegh[i] + end[i] - beg[i] + aendh[i], beg[i] - bbegh[i], beg[i] - bbegh[i] + bbegh[i] + end[i] - beg[i] + bendh[i], b, e, (uint32)I.lo(j), (uint32)I.hi(j), beg[i], end[i]); errors++; } else { passed++; } j++; } } fprintf(stderr, "intersection test had "uint32FMT" successes and "uint32FMT" errors.\n", passed, errors); } void testMerge(void) { intervalList IL; intervalList ID; // Test 1: one long sequence containing lots of little non-overlapping sequences // Test 2: three long overlapping sequences, containing lots of non-overlapping sequences // Test 3: dense random // Test 4: special cases fprintf(stderr, "Merge test 1\n"); IL.clear(); IL.add(0, 100000); for (uint32 i=0; i<999; i++) IL.add(100 + 100 * i, 50); IL.merge(); for (uint32 i=0; i hi) hi = end; IL.add(beg, end - beg); } IL.merge(); if ((IL.lo(0) != lo) || (IL.hi(0) != hi)) fprintf(stderr, "ERROR!\n"); for (uint32 i=0; i #include #include #include #include #include #include #include // Tests if add is atomic. long int count = 0; long int counts[8] = { 0 }; pthread_t threadID[8]; double getTime(void) { struct timeval tp; gettimeofday(&tp, NULL); return(tp.tv_sec + (double)tp.tv_usec / 1000000.0); } void* workerThread(void *idx) { long int &c = (*(long int *)idx); double start = getTime(); while (getTime() - start < 5) { c++; count++; c++; count++; c++; count++; c++; count++; c++; count++; c++; count++; } } int main(int argc, char **argv) { pthread_attr_t threadAttr; pthread_attr_init(&threadAttr); pthread_attr_setscope(&threadAttr, PTHREAD_SCOPE_SYSTEM); pthread_attr_setschedpolicy(&threadAttr, SCHED_OTHER); int numThreads = 5; int sum = 0; for (int i=0; i #include #include #include "util++.H" struct header_s { uint64 t1; char s1[570]; uint64 t2; }; struct record_s { uint64 t1; char s1[123]; }; int main(int argc, char **argv) { header_s h; record_s r; h.t1 = 0x0123456789abcdefllu; memset(h.s1, 0x66, 570); strcpy(h.s1, "this is the header"); h.t2 = 0xdeadbeefdeadbeefllu; recordFile *RF = new recordFile("test", sizeof(header_s), sizeof(record_s), 'w'); memcpy(RF->header(), &h, sizeof(header_s)); r.t1 = 1; memset(r.s1, 0x66, 123); strcpy(r.s1, "record1"); RF->putRecord(&r); r.t1 = 2; memset(r.s1, 0x66, 123); strcpy(r.s1, "record2"); RF->putRecord(&r); r.t1 = 3; memset(r.s1, 0x66, 123); strcpy(r.s1, "record3"); RF->putRecord(&r); r.t1 = 4; memset(r.s1, 0x66, 123); strcpy(r.s1, "record4"); RF->putRecord(&r); r.t1 = 5; memset(r.s1, 0x66, 123); strcpy(r.s1, "record5"); RF->putRecord(&r); delete RF; RF = new recordFile("test", sizeof(header_s), sizeof(record_s), 'r'); header_s *hh = (header_s *)RF->header(); fprintf(stderr, "header t1 "uint64HEX" '%s' t2 "uint64HEX"\n", hh->t1, hh->s1, hh->t2); RF->getRecord(&r); fprintf(stderr, "record "uint64FMT" '%s'\n", r.t1, r.s1); RF->getRecord(&r); fprintf(stderr, "record "uint64FMT" '%s'\n", r.t1, r.s1); RF->getRecord(&r); fprintf(stderr, "record "uint64FMT" '%s'\n", r.t1, r.s1); RF->getRecord(&r); fprintf(stderr, "record "uint64FMT" '%s'\n", r.t1, r.s1); RF->getRecord(&r); fprintf(stderr, "record "uint64FMT" '%s'\n", r.t1, r.s1); RF->getRecord(&r); fprintf(stderr, "record "uint64FMT" '%s'\n", r.t1, r.s1); delete RF; return(0); } kmer-code-2013-trunk/libutil/test/test-bigQueue.C0000644000000000000000000000230410213527362020420 0ustar rootroot#include #include #include #include "util++.H" // // mbri && CC -g -o test-bigQueue test-bigQueue.C -L. -lutil && ./bigQueue-test | & more // struct thing_s { int a; int b; double c; int d; }; int sortthing(const void *a, const void *b) { thing_s *A = *((thing_s **)a); thing_s *B = *((thing_s **)b); if (A->a < B->a) return(-1); if (A->a > B->a) return(1); if (A->b < B->b) return(-1); if (A->b > B->b) return(1); return(0); } int main(int argc, char **argv) { bigQueue *T = new bigQueue(sortthing, 0L, 0L, 0L, sizeof(thing_s), 1, 0L); mt_s *mtctx = mtInit(3); int testSize = 2000000; FILE *out = fopen("junk-bigQueue-out-1", "w"); for (int i=0; ia = mtRandom32(mtctx) / 4; t->b = i; t->c = (double)i; t->d = -i; fprintf(out, "%012d %08d %12.3f %08d\n", t->a, t->b, t->c, t->d); T->add(t); } fclose(out); out = fopen("junk-bigQueue-out-2", "w"); T->sort(); while (T->next()) { thing_s *t = (thing_s *)T->get(); fprintf(out, "%012d %08d %12.3f %08d\n", t->a, t->b, t->c, t->d); } delete T; fclose(out); } kmer-code-2013-trunk/libutil/test/test-bitPackedFile.C0000644000000000000000000001373712322046702021351 0ustar rootroot#include #include #include #include "util++.H" // This will perform various tests on the bitPackedFile class, // returning 0 if OK and 1 if error. // // testSize -- the number of words to use in a write then read test // testIter -- the number of random access tests to do uint32 testSize = 2000000; uint32 testIter = 50; mt_s *mtctx; // Generate a list of random 64-bit numbers, remember the number and the size // void generateRandom(uint32 *siz, uint64 *val) { for (uint32 i=0; iputBits(val[i], siz[i]); F->putNumber(val[i]); } delete F; // Open the file and check what we just wrote. // F = new bitPackedFile("bittest.junk"); for (i=0; igetBits(siz[i]); if (v != val[i]) { fprintf(stderr, uint32FMT"] ERROR in getBits() -- retrieved "uint64HEX" != expected "uint64HEX" ("uint32FMT" bits).\n", i, v, val[i], siz[i]); errs++; } v = F->getNumber(); if (v != val[i]) { fprintf(stderr, uint32FMT"] ERROR in getNumber() -- retrieved "uint64HEX" != expected "uint64HEX".\n", i, v, val[i]); errs++; } } delete F; delete [] val; delete [] siz; if (errs > 0) { fprintf(stderr, "There are "uint32FMT" errors in the stream test.\n", errs); exit(1); } else { fprintf(stderr, "The stream test PASSED.\n"); } unlink("bittest.junk"); } void testRandomReading(bool inCore) { bitPackedFile *F = 0L; uint32 i; uint32 *siz = new uint32 [testSize + 1]; uint64 *val = new uint64 [testSize]; uint32 errs = 0; fprintf(stderr, "BUILDING random test set.\n"); generateRandom(siz, val); // Create a new bitpacked file, writing just numbers as binary encoded. // fprintf(stderr, "SAVING random test set.\n"); F = new bitPackedFile("bittest.junk"); for (i=0; iputBits(val[i], siz[i]); delete F; // Covert the siz[] into offsets // uint32 t = siz[0]; siz[0] = 0; for (uint32 i=1; iloadInCore(); fprintf(stderr, "Begin INCORE seek test!\n"); } else { fprintf(stderr, "Begin DISKBASED seek test!\n"); } double startTime = getTime(); for (i=0; iseek(siz[idx]); uint64 r = F->getBits(siz[idx+1] - siz[idx]); if (r != val[idx]) { fprintf(stderr, uint32FMT"] ERROR in seek()/getBits() -- retrieved "uint64HEX" != expected "uint64HEX" ("uint32FMT" bits).\n", i, r, val[i], siz[i]); errs++; } } delete F; if (errs > 0) { fprintf(stderr, "There are "uint32FMT" errors in the %s random access.\n", errs, (inCore) ? "inCore" : "disk"); exit(1); } else { fprintf(stderr, "The %s seek test PASSED (%f seconds).\n", (inCore) ? "inCore" : "disk", getTime() - startTime); } delete [] val; delete [] siz; unlink("bittest.junk"); } void testReWrite(void) { bitPackedFile *F = 0L; uint32 i; uint32 *siz = new uint32 [testSize]; uint64 *val = new uint64 [testSize]; uint32 errs = 0; uint64 pos = uint64ZERO; generateRandom(siz, val); // First, write zeros to the file // F = new bitPackedFile("bittest.junk"); for (i=0; iputBits(uint64ZERO, siz[i]); delete F; fprintf(stderr, "WRITING FORWARDS!\n"); // Now, write every other number to the file // F = new bitPackedFile("bittest.junk"); for (i=0; iseek(pos); F->putBits(val[i], siz[i]); } pos += siz[i]; } F->showStats(stderr); delete F; fprintf(stderr, "WRITING BACKWARDS!\n"); // And go backwards and write the other set of numbers to the file // F = new bitPackedFile("bittest.junk"); for (i=testSize; i--; ) { pos -= siz[i]; if ((i % 2) == 0) { F->seek(pos); F->putBits(val[i], siz[i]); } } F->showStats(stderr); delete F; // Now, stream through the file and see if we wrote what we should have // F = new bitPackedFile("bittest.junk"); for (i=0; igetBits(siz[i]); if (v != val[i]) { fprintf(stderr, uint32FMT"] ERROR in seekstream/getBits() -- retrieved "uint64HEX" != expected "uint64HEX" ("uint32FMT" bits).\n", i, v, val[i], siz[i]); errs++; } } F->showStats(stderr); delete F; delete [] val; delete [] siz; if (errs > 0) { fprintf(stderr, "There are "uint32FMT" errors in the rewrite test.\n", errs); exit(1); } else { fprintf(stderr, "The rewrite test PASSED.\n"); } unlink("bittest.junk"); } int main(int argc, char **argv) { mtctx = mtInit(time(NULL)); testSize = 30000000; testIter = 2000; //testStreaming(); //testReWrite(); testSize = 40000000; testIter = 10000; testRandomReading(false); testRandomReading(true); } kmer-code-2013-trunk/libutil/test/test-bitPacking.C0000644000000000000000000002035012322046702020723 0ustar rootroot#include #include #include #include "util++.H" // An integer multiplier on the test length. 1 is pretty quick, but // 10 is the default. // #define TEST_LENGTH (10 * 1024 * 1024) // We test // // 1) binary encoding/decoding // // 2) pre/post increment of binary encoding // // 3) Perform some testing on the fibonacci encoded bit-packed stream // -- encode a bunch of random 64-bit numbers, make sure we can // decode back to the same number. // // NOTES: pre/post increment/decrement work modulo whatever size they // are. So, if you have a 6-bit value of zero, and you decrement, // you end up with a 6-bit value of all 1's, or 63. void testBinaryEncoding(void) { time_t mtseed = time(0L); mt_s *mtctx = 0L; uint32 iterations = TEST_LENGTH; uint64 *bits = new uint64 [iterations + 2]; uint64 bpos = uint64ZERO; uint64 *V = new uint64 [iterations]; uint64 *C = new uint64 [iterations]; uint64 *S = new uint64 [iterations]; uint32 failed = 0; uint32 errors = 0; fprintf(stderr, "Starting test of binary encoding\n"); bpos = uint64ZERO; mtctx = mtInit(mtseed); // Build some values to stuff into the bits for (uint32 j=0; j < iterations; j++) { S[j] = (mtRandom32(mtctx) % 63) + 1; V[j] = mtRandom64(mtctx) & uint64MASK(S[j]); //fprintf(stderr, "[%2d] S="uint64FMT" V="uint64HEX"\n", j, S[j], V[j]); } // Stuff them in, in blocks of some size. At the same time, decode // (this has found bugs in the past). failed = 0; for (uint32 j=0; j < iterations; ) { uint64 num = (mtRandom32(mtctx) % 8); if (j + num > iterations) num = iterations - j; if (num == 0) { setDecodedValue(bits, bpos, S[j], V[j]); C[j] = getDecodedValue(bits, bpos, S[j]); //fprintf(stderr, "[%2d] V="uint64HEX" C="uint64HEX" single\n", j, V[j], C[j]); bpos += S[j]; } else { uint64 newp1 = setDecodedValues(bits, bpos, num, S+j, V+j); uint64 newp2 = getDecodedValues(bits, bpos, num, S+j, C+j); if (newp1 != newp2) { // not perfect; we should be checking the values too, but we do that later. for (uint32 x=0; x iterations) num = iterations - j; if (num == 0) { C[j] = getDecodedValue(bits, bpos, S[j]); bpos += S[j]; } else { bpos = getDecodedValues(bits, bpos, num, S+j, C+j); } j += num; if (num == 0) j++; } // Check that V == C failed = 0; for (uint32 j=0; j #include #include #include "util.h" class bzipBuffer { public: bzipBuffer(const char *filename, uint32 bufferMax = 32 * 1024); ~bzipBuffer(); bool eof(void); bool next(void); char get(void); char getnext(void); bool seek(off_t pos); size_t read(char *buf, size_t len); // read the next len bytes into the user buffer buf off_t tell(void); private: void fillBuffer(void); void init(int fileptr, const char *filename, uint32 bufferMax); char *_filename; int _file; off_t _filePos; bool _eof; uint32 _bzip2bufferMax; uint32 _bzip2inPos; uint32 _bzip2outPos; char *_bzip2in; char *_bzip2out; bool _bzip2streamEnd; bz_stream _bzip2stream; }; inline bool bzipBuffer::eof(void) { return(_eof); } inline bool bzipBuffer::next(void) { if (_eof) return(true); _bzip2outPos++; _filePos++; if (_bzip2outPos >= _bzip2stream.avail_out) fillBuffer(); return(_eof); } inline char bzipBuffer::get(void) { return(_bzip2out[_bzip2outPos]); } inline char bzipBuffer::getnext(void) { char x = _bzip2out[_bzip2outPos]; next(); return(x); } inline off_t bzipBuffer::tell(void) { return(_filePos); } #endif // BZIP_BUFFER_H kmer-code-2013-trunk/libutil/palloc.c0000644000000000000000000001313711512512020016222 0ustar rootroot#include #include #include #include #include "util.h" typedef struct pallocroot pallocroot; typedef struct pallocnode pallocnode; // _dbg: 0 -- print nothing // 1 -- print block allocation // 2 -- print all allocations struct pallocroot { size_t _bs; // size of block pallocnode *_nl; // nodeList pallocnode *_cn; // currentNode int _dbg; // if set, debug information is printed }; struct pallocnode { size_t _cp; // cuurentPosition char *_dt; // data pallocnode *_nx; // next pallocnode }; extern pallocroot _palloc_stuff; pallocroot _palloc_stuff = { 128 * 1024 * 1024, NULL, NULL, 0 }; static void * really_allocate(size_t size) { void *ret = malloc(size); if (ret == 0L) { fprintf(stderr, "palloc()-- can't allocate "sizetFMT" bytes: %s.\n", size, strerror(errno)); exit(1); } return(ret); } void psetblocksize(size_t size) { if (_palloc_stuff._nl == 0L) _palloc_stuff._bs = size; } size_t pgetblocksize(void) { return(_palloc_stuff._bs); } void psetdebug(int on) { _palloc_stuff._dbg = on; } void* pallochandle(size_t size) { pallocroot *root = (pallocroot *)malloc(sizeof(pallocroot)); if (root == NULL) fprintf(stderr, "pallochandle()-- can't allocate a handle!\n"), exit(1); if (size == 0) size = 128 * 1024 * 1024; root->_bs = size; root->_nl = NULL; root->_cn = NULL; root->_dbg = 0; return(root); } // Release a palloc handle, does not release the memory in the handle! void pfreehandle(void *handle) { free((pallocroot *)handle); } // Clear out memory inside the handle. The handle remains valid after this. void pfree2(void *handle) { pallocroot *root = (pallocroot *)handle; pallocnode *n; size_t r = 0; size_t b = 0; if (root == NULL) root = &_palloc_stuff; while ((n = root->_nl) != 0L) { r += n->_cp; b++; root->_nl = n->_nx; free(n->_dt); free(n); } if (root->_dbg > 0) fprintf(stderr, "palloc()-- "sizetFMT" bytes in "sizetFMT" blocks returned to free store.\n", r, b); root->_nl = 0L; root->_cn = 0L; } void pfree(void) { pfree2(&_palloc_stuff); } void * palloc2(size_t size, void *handle) { pallocroot *root = (pallocroot *)handle; if (root == NULL) root = &_palloc_stuff; // Make size a multiple of 8 // if (size & 0x7) { size >>= 3; size++; size <<= 3; } if (size == 0) return(0L); // Allocate the initial block if it doesn't exist. // if (root->_nl == NULL) { root->_nl = (pallocnode *)really_allocate(sizeof(pallocnode)); root->_cn = root->_nl; if (root->_dbg > 0) fprintf(stderr, "palloc()-- Inital block of "sizetFMT" bytes at %p.\n", root->_bs, root->_cn); root->_cn->_cp = 0; root->_cn->_dt = (char *)really_allocate(root->_bs); root->_cn->_nx = NULL; } // If the requested space is larger than our block size, allocate a // new node with the required amount of space. The new node is // placed on the start of the alloc'd list. // // We also place blocks that are bigger than the amount free in the // current block, AND bigger than the amount used in the current // block here. Since the new block is larger than the free space, // it won't fit in the current block. Since the new block is // larger than the current block, it is wasteful to throw out the // current block and replace it with a new block. // // The tests read: // new block is bigger than our block size // new block won't fit in current block // new block is larger than current block // if ((size > root->_bs) || ((size > root->_bs - root->_cn->_cp) && (size > root->_cn->_cp))) { pallocnode *n; n = (pallocnode *)really_allocate(sizeof(pallocnode)); n->_cp = size; n->_dt = (char *)really_allocate(size); n->_nx = root->_nl; if (root->_dbg > 0) fprintf(stderr, "palloc()-- New needs "sizetFMT" bytes: custom new block at %p.\n", size, n); root->_nl = n; if (root->_cn == 0L) root->_cn = n; return(n->_dt); } // Need more space? // if (size + root->_cn->_cp > root->_bs) { root->_cn->_nx = (pallocnode *)really_allocate(sizeof(pallocnode)); if (root->_dbg > 0) fprintf(stderr, "palloc()-- Old block %.3f%% used ("sizetFMT" bytes remaining), new needs "sizetFMT" bytes: new block of "sizetFMT" bytes at %p.\n", 100.0 * root->_cn->_cp / root->_bs, root->_bs - root->_cn->_cp, size, root->_bs, root->_cn->_nx); root->_cn = root->_cn->_nx; root->_cn->_cp = 0; root->_cn->_dt = (char *)really_allocate(root->_bs); root->_cn->_nx = NULL; } // OK, grab the space, and return it. // root->_cn->_cp += size; if (root->_dbg > 1) fprintf(stderr, "palloc()-- Old block %.3f%% used ("sizetFMT" bytes remaining): returning "sizetFMT" bytes at %p.\n", 100.0 * root->_cn->_cp / root->_bs, root->_bs - root->_cn->_cp, size, root->_cn->_dt + root->_cn->_cp - size); return(root->_cn->_dt + root->_cn->_cp - size); } void * palloc(size_t size) { return(palloc2(size, &_palloc_stuff)); } void pdumppalloc(void *handle) { pallocroot *root = (pallocroot *)handle; pallocnode *n = root->_nl; fprintf(stderr, "palloc dump\n"); fprintf(stderr, ""sizetFMT" bytes per block\n", root->_bs); while (n != 0L) { fprintf(stderr, "%p: currentPosition: "sizetFMT" bytes used%s\n", n, n->_cp, (n == root->_cn) ? ", current block" : ""); n = n->_nx; } } kmer-code-2013-trunk/libutil/uint32List.H0000644000000000000000000000175412322046702016710 0ustar rootroot#ifndef UINT32LIST_H #define UINT32LIST_H #include // A very simple integer list. Hopefully lighter weight than a // vector. // It might be useful to extend this to have 'undef' values, // and to allow shift(), pop(). class uint32List { public: uint32List(uint32 max=16) { _len = 0; _max = max; _lst = new uint32 [_max]; }; ~uint32List() { delete [] _lst; }; private: void resize(uint32 idx) { if (idx >= _max) { _max *= 2; uint32 *L = new uint32 [_max]; memcpy(L, _lst, sizeof(uint32) * _len); delete [] _lst; _lst = L; } if (idx >= _len) _len = idx + 1; } public: uint32 &operator[](uint32 idx) { resize(idx); return(_lst[idx]); } void push(uint32 val) { resize(_len); _lst[_len++] = val; } uint32 length(void) { return(_len); }; void clear(void) { _len = 0; } private: uint32 _len; uint32 _max; uint32 *_lst; }; #endif // UINT32LIST_H kmer-code-2013-trunk/libutil/util++.H0000644000000000000000000000200012322046702016014 0ustar rootroot#ifndef UTIL_PLUS_PLUS_H #define UTIL_PLUS_PLUS_H #include "util.h" // These are all inlined, and C doesn't want to listen to that, so // they're here. // #include "bitOperations.h" #include "bitPacking.h" #include "endianess.H" // Various methods for encoding numbers into a bitstream. // // Still missing: // minimal binary // golomb (actually rice, since power of two) // teuhola exponential golomb // // And a nice way of getting parameters to those (and generalizedUnary) // #include "unaryEncoding.h" #include "generalizedUnaryEncoding.h" #include "eliasGammaEncoding.h" #include "eliasDeltaEncoding.h" #include "fibonacciEncoding.h" // Lists? #include "uint32List.H" // Now the good stuff! // #include "speedCounter.H" //#include "bzipBuffer.H" #include "readBuffer.H" #include "splitToWords.H" #include "bitPackedArray.H" #include "bitPackedFile.H" #include "recordFile.H" #include "intervalList.H" #include "bigQueue.H" #include "sweatShop.H" #include "logMsg.H" #endif // UTIL_PLUS_PLUS_H kmer-code-2013-trunk/libutil/bitPackedArray.C0000644000000000000000000000450712322046702017607 0ustar rootroot#include #include #include #include #include #include #include "util++.H" bitPackedArray::bitPackedArray(uint32 valueWidth, uint32 segmentSize) { _valueWidth = valueWidth; _segmentSize = segmentSize; _nextElement = 0; _valuesPerSegment = (uint64)_segmentSize * 1024 * 8 / (uint64)_valueWidth; _numSegments = 0; _maxSegments = 16; _segments = new uint64 * [_maxSegments]; } bitPackedArray::~bitPackedArray() { for (uint32 i=0; i<_numSegments; i++) delete [] _segments[i]; delete [] _segments; } uint64 bitPackedArray::get(uint64 idx) { uint64 s = idx / _valuesPerSegment; uint64 p = _valueWidth * (idx % _valuesPerSegment); if (idx >= _nextElement) { fprintf(stderr, "bitPackedArray::get()-- element index "uint64FMT" is out of range, only "uint64FMT" elements.\n", idx, _nextElement-1); return(0xdeadbeefdeadbeefULL); } return(getDecodedValue(_segments[s], p, _valueWidth)); } void bitPackedArray::set(uint64 idx, uint64 val) { uint64 s = idx / _valuesPerSegment; uint64 p = _valueWidth * (idx % _valuesPerSegment); //fprintf(stderr, "s="uint64FMT" p="uint64FMT" segments="uint64FMT"/"uint64FMT"\n", s, p, _numSegments, _maxSegments); if (idx >= _nextElement) _nextElement = idx+1; if (s >= _maxSegments) { _maxSegments = s + 16; uint64 **S = new uint64 * [_maxSegments]; for (uint32 i=0; i<_numSegments; i++) S[i] = _segments[i]; delete [] _segments; _segments = S; } while (_numSegments <= s) _segments[_numSegments++] = new uint64 [_segmentSize * 1024 / 8]; setDecodedValue(_segments[s], p, _valueWidth, val); } void bitPackedArray::clear(void) { for (uint32 s=0; s<_numSegments; s++) bzero(_segments[s], _segmentSize * 1024); } //////////////////////////////////////// bitArray::bitArray(uint32 segmentSize) { _segmentSize = segmentSize; _valuesPerSegment = (uint64)_segmentSize * 1024 * 8; _numSegments = 0; _maxSegments = 16; _segments = new uint64 * [_maxSegments]; } bitArray::~bitArray() { for (uint32 i=0; i<_numSegments; i++) delete [] _segments[i]; delete [] _segments; } void bitArray::clear(void) { for (uint32 s=0; s<_numSegments; s++) bzero(_segments[s], _segmentSize * 1024); } kmer-code-2013-trunk/libutil/bigQueue.C0000644000000000000000000001764012322046702016472 0ustar rootroot#include "bigQueue.H" #include #include #include #include // Kaz Kylheku library. #include "kazlib/dict.h" #include "kazlib/except.h" #include "kazlib/hash.h" #include "kazlib/list.h" #include "kazlib/sfx.h" // qsort and kazlib are incombatible. qsort passes a pointer to the data, kaz lib passes // the data (which it assumes is a pointer to begin with). void bigQueue::_initialize(int (*sortfcn)(const void *a, const void *b), bool (*readfcn)(FILE *f, void *a), bool (*writfcn)(FILE *f, void *a), void (*killfcn)(void *a), uint32 objectSize, uint32 memoryToUse, char *tmppath, char *filename) { _saveFile = 0L; _tmpPath = 0L; if (filename) { _saveFile = new char [strlen(filename) + 1]; strcpy(_saveFile, filename); } if (tmppath) { _tmpPath = new char [strlen(tmppath) + 1]; strcpy(_tmpPath, tmppath); } _sortFunction = sortfcn; _writFunction = writfcn; _readFunction = readfcn; _killFunction = killfcn; _objectSize = objectSize; _memoryToUse = memoryToUse; _maxOpenFiles = getdtablesize() - 8; _numTemporaryFiles = 0; _numMergeFiles = 0; _temporaryFiles = new FILE* [_maxOpenFiles]; for (uint32 i=0; i<_maxOpenFiles; i++) _temporaryFiles[i] = 0L; // Open the first temporary file for writing. // _temporaryFiles[_numTemporaryFiles++] = makeTempFile(_tmpPath); // XXX: It would be rather convenient if we could get another file // handle given an existing handle (no, dup(2) doesn't do that). // In particular, we want two file pointers, one for read, one for // write. // //_inputFile = fdopen(dup(fileno(_temporaryFiles[0])), "w+"); _thingBuffer = new uint64 [_objectSize / 8 + 1]; _bufferMax = 0; _bufferLen = 0; _buffer = 0L; if (_sortFunction) { _bufferMax = (uint64)memoryToUse * 1024 * 1024 / ((uint64)sizeof(void *) + objectSize); _bufferLen = 0; _buffer = new void* [_bufferMax]; } } bigQueue::~bigQueue() { delete [] _saveFile; delete [] _tmpPath; for (uint32 i=0; i<_numTemporaryFiles; i++) fclose(_temporaryFiles[i]); delete [] _temporaryFiles; //fclose(_inputFile); clearBuffer(); } // Add elements to the end of the array. void bigQueue::add(void *thing) { if (_buffer == 0L) { if (_writFunction) (*_writFunction)(_temporaryFiles[_numTemporaryFiles-1], thing); else fwrite(thing, _objectSize, 1, _temporaryFiles[_numTemporaryFiles-1]); } else { // No space in the buffer? Sort it, write it out and make a new // one. // if (_bufferLen >= _bufferMax) { sortAndWriteBuffer(); if (_numTemporaryFiles+1 >= _maxOpenFiles) mergeTemporaryFiles(); _temporaryFiles[_numTemporaryFiles++] = makeTempFile(_tmpPath); } _buffer[_bufferLen++] = thing; } } void bigQueue::sortAndWriteBuffer(void) { if (_bufferLen > 0) { // Sort! // qsort(_buffer, _bufferLen, sizeof(void *), _sortFunction); // Write! // if (_writFunction) { for (uint32 i=0; i<_bufferLen; i++) (*_writFunction)(_temporaryFiles[_numTemporaryFiles-1], _buffer[i]); } else { for (uint32 i=0; i<_bufferLen; i++) fwrite(_buffer[i], _objectSize, 1, _temporaryFiles[_numTemporaryFiles-1]); } // Flush and rewind the file! // fflush(_temporaryFiles[_numTemporaryFiles-1]); ::rewind(_temporaryFiles[_numTemporaryFiles-1]); clearBuffer(); } } void bigQueue::clearBuffer(void) { if (_killFunction) for (uint32 i=0; i<_bufferLen; i++) (*_killFunction)(_buffer[i]); else for (uint32 i=0; i<_bufferLen; i++) free(_buffer[i]); _bufferLen = 0; } void bigQueue::mergeTemporaryFiles(void) { if (_numTemporaryFiles > 1) { dict_t *sorted; dnode_t *nodes = new dnode_t [_maxOpenFiles]; // To be efficient, we need to maintain a sorted queue of the head // elements of each temporary file. A red-black tree would do // nicely, eh? // sorted = dict_create(DICTCOUNT_T_MAX, _sortFunction); // Grab the first thing off each file, insert it into the dictionary. // The 'key' is our chunk of data, and the 'value' is the file number // it came from. // for (uint32 i=0; i<_numTemporaryFiles; i++) { if (_temporaryFiles[i]) { // Rewind all the temporary files. XXXX This is probably done // already. // ::rewind(_temporaryFiles[i]); void *thing = malloc(_objectSize); if (_readFunction) (*_readFunction)(_temporaryFiles[i], thing); else fread(thing, _objectSize, 1, _temporaryFiles[i]); if (feof(_temporaryFiles[i])) { fclose(_temporaryFiles[i]); _temporaryFiles[i] = 0L; } else { // initialize the node with the value dnode_init(&nodes[i], (void *)(unsigned long)i); // insert the node into the tree using the key dict_insert(sorted, &nodes[i], thing); } } } FILE *mergeFile = makeTempFile(_tmpPath); // while there is stuff in the tree while (dict_isempty(sorted) == 0) { // pop the head element off, and print it dnode_t *head = dict_first(sorted); // XXX: should be const thing void *thing = (void *)dnode_getkey(head); long fileid = (long)dnode_get(head); if (_writFunction) (*_writFunction)(mergeFile, thing); else fwrite(thing, _objectSize, 1, mergeFile); // delete the node from the tree dict_delete(sorted, head); // destroy the thing if (_killFunction) (*_killFunction)(thing); else free(thing); // load the next element from the same file that the head was // from (that's stored as the value of the head element) thing = malloc(_objectSize); if (_readFunction) (*_readFunction)(_temporaryFiles[fileid], thing); else fread(thing, _objectSize, 1, _temporaryFiles[fileid]); // if there was a next element in that file, insert it // into the tree. if not, close the temporary file. // if (feof(_temporaryFiles[fileid])) { fclose(_temporaryFiles[fileid]); _temporaryFiles[fileid] = 0; free(thing); } else { // initialize the node with the value dnode_init(&nodes[fileid], (void *)fileid); // insert the node into the tree using the key dict_insert(sorted, &nodes[fileid], thing); } } dict_free(sorted); delete [] nodes; _numTemporaryFiles = 1; _temporaryFiles[0] = mergeFile; } ::rewind(_temporaryFiles[0]); #if 0 fclose(_inputFile); errno = 0; _inputFile = fdopen(dup(fileno(_temporaryFiles[0])), "w+"); if (errno) fprintf(stderr, "bigQueue::mergeTemporaryFiles()-- _inputFile = fdopen() failed: %s\n", strerror(errno)), exit(1); ::rewind(_inputFile); #endif } bool bigQueue::next(void) { if (_readFunction) { //(*_readFunction)(_inputFile, _thingBuffer); (*_readFunction)(_temporaryFiles[0], _thingBuffer); } else { //fread(_thingBuffer, _objectSize, 1, _inputFile); fread(_thingBuffer, _objectSize, 1, _temporaryFiles[0]); } #if 0 if (feof(_inputFile)) return(false); #endif if (feof(_temporaryFiles[0])) return(false); return(true); } void* bigQueue::get(void) { return(_thingBuffer); } void bigQueue::rewind(void) { //::rewind(_inputFile); ::rewind(_temporaryFiles[0]); next(); } void bigQueue::save(char *filepath) { fprintf(stderr, "bigQueue::save()-- not implemented.\n"); } void bigQueue::sort(void) { sortAndWriteBuffer(); mergeTemporaryFiles(); } void bigQueue::flush(void) { fflush(_temporaryFiles[_numTemporaryFiles-1]); } kmer-code-2013-trunk/libutil/speedCounter.H0000644000000000000000000000373412322046702017370 0ustar rootroot#ifndef SPEEDCOUNTER_H #define SPEEDCOUNTER_H #include class speedCounter { public: // fmt specifies the status format. An example: // " %8f [unit]things (%8.5f [unit]things/sec)\r" // speedCounter(char const *fmt, double unit, uint64 freq, bool enabled=true); ~speedCounter(); void enableSpinner(void) { _spin = true; }; void enableLiner(void) { _line = true; }; bool tick(void) { if (_enabled && ((++_count & _freq) == uint64ZERO)) { double v = _count / _unit; if (_spin) fputs(_spinr[_draws % 4], stderr); if (_line) fputs(_liner[_draws % 19], stderr); _draws++; fprintf(stderr, _fmt, v, v / (getTime() - _startTime)); fflush(stderr); return(true); } return(false); }; bool tick(uint64 increment) { if (_enabled == false) return(false); _count += increment; if ((_count & _freq) == uint64ZERO) { double v = _count / _unit; if (_spin) fputs(_spinr[_draws % 4], stderr); if (_line) fputs(_liner[_draws % 19], stderr); _draws++; fprintf(stderr, _fmt, v, v / (getTime() - _startTime)); fflush(stderr); return(true); } return(false); }; void finish(void) { if (_enabled && (_count >= _freq)) { double v = _count / _unit; if (_spin) fputs(_spinr[_draws % 4], stderr); if (_line) fputs(_liner[_draws % 19], stderr); fprintf(stderr, _fmt, v, v / (getTime() - _startTime)); fprintf(stderr, "\n"); fflush(stderr); } _count = 0; }; private: static const char *_spinr[4]; static const char *_liner[19]; uint64 _count; uint64 _draws; double _unit; uint64 _freq; double _startTime; char const *_fmt; bool _spin; bool _line; bool _enabled; }; #endif // SPEEDCOUNTER_H kmer-code-2013-trunk/libutil/fibonacciNumbers.C0000644000000000000000000000735212322046702020174 0ustar rootroot#include "util.h" // // Argh, 64-bit guys use LU as their modifier, but 32-bit guys use LLU. // #ifdef TRUE64BIT #define _(VAL) VAL ## LU #else #define _(VAL) VAL ## LLU #endif uint32 fibonacciValuesLen = 92; uint64 fibonacciValues[92] = { _(1), _(2), _(3), _(5), _(8), _(13), _(21), _(34), _(55), _(89), _(144), _(233), _(377), _(610), _(987), _(1597), _(2584), _(4181), _(6765), _(10946), _(17711), _(28657), _(46368), _(75025), _(121393), _(196418), _(317811), _(514229), _(832040), _(1346269), _(2178309), _(3524578), _(5702887), _(9227465), _(14930352), _(24157817), _(39088169), _(63245986), _(102334155), _(165580141), _(267914296), _(433494437), _(701408733), _(1134903170), _(1836311903), _(2971215073), _(4807526976), _(7778742049), _(12586269025), _(20365011074), _(32951280099), _(53316291173), _(86267571272), _(139583862445), _(225851433717), _(365435296162), _(591286729879), _(956722026041), _(1548008755920), _(2504730781961), _(4052739537881), _(6557470319842), _(10610209857723), _(17167680177565), _(27777890035288), _(44945570212853), _(72723460248141), _(117669030460994), _(190392490709135), _(308061521170129), _(498454011879264), _(806515533049393), _(1304969544928657), _(2111485077978050), _(3416454622906707), _(5527939700884757), _(8944394323791464), _(14472334024676221), _(23416728348467685), _(37889062373143906), _(61305790721611591), _(99194853094755497), _(160500643816367088), _(259695496911122585), _(420196140727489673), _(679891637638612258), _(1100087778366101931), _(1779979416004714189), _(2880067194370816120), _(4660046610375530309), _(7540113804746346429), _(12200160415121876738) }; kmer-code-2013-trunk/libutil/sweatShop.C0000644000000000000000000004013412322046702016673 0ustar rootroot#include "sweatShop.H" #include #include #include #include #include #include #include // pthread scheduling stuff class sweatShopWorker { public: sweatShopWorker() { shop = 0L; threadUserData = 0L; numComputed = 0; workerQueue = 0L; workerQueueLen = 0L; }; sweatShop *shop; void *threadUserData; pthread_t threadID; uint32 numComputed; sweatShopState **workerQueue; uint32 workerQueueLen; }; // This gets created by the loader, passed to the worker, and printed // by the writer. userData is controlled by the user. // class sweatShopState { public: sweatShopState(void *userData) { _user = userData; _computed = false; _next = 0L; }; ~sweatShopState() { }; void *_user; bool _computed; sweatShopState *_next; }; // Simply forwards control to the class void* _sweatshop_loaderThread(void *ss_) { sweatShop *ss = (sweatShop *)ss_; return(ss->loader()); } void* _sweatshop_workerThread(void *sw_) { sweatShopWorker *sw = (sweatShopWorker *)sw_; return(sw->shop->worker(sw)); } void* _sweatshop_writerThread(void *ss_) { sweatShop *ss = (sweatShop *)ss_; return(ss->writer()); } void* _sweatshop_statusThread(void *ss_) { sweatShop *ss = (sweatShop *)ss_; return(ss->status()); } sweatShop::sweatShop(void*(*loaderfcn)(void *G), void (*workerfcn)(void *G, void *T, void *S), void (*writerfcn)(void *G, void *S)) { _userLoader = loaderfcn; _userWorker = workerfcn; _userWriter = writerfcn; _globalUserData = 0L; _writerP = 0L; _workerP = 0L; _loaderP = 0L; _showStatus = false; _loaderQueueSize = 1024; _loaderQueueMax = 10240; _loaderQueueMin = 4; // _numberOfWorkers * 2, reset when that changes _loaderBatchSize = 1; _workerBatchSize = 1; _writerQueueSize = 4096; _writerQueueMax = 10240; _numberOfWorkers = 2; _workerData = 0L; _numberLoaded = 0; _numberComputed = 0; _numberOutput = 0; } sweatShop::~sweatShop() { delete [] _workerData; } void sweatShop::setThreadData(uint32 t, void *x) { if (_workerData == 0L) _workerData = new sweatShopWorker [_numberOfWorkers]; if (t >= _numberOfWorkers) fprintf(stderr, "sweatShop::setThreadData()-- worker ID "uint32FMT" more than number of workers="uint32FMT"\n", t, _numberOfWorkers), exit(1); _workerData[t].threadUserData = x; } // Build a list of states to add in one swoop // void sweatShop::loaderSave(sweatShopState *&tail, sweatShopState *&head, sweatShopState *thisState) { thisState->_next = 0L; if (tail) { head->_next = thisState; head = thisState; } else { tail = head = thisState; } _numberLoaded++; } // Add a bunch of new states to the queue. // void sweatShop::loaderAppend(sweatShopState *&tail, sweatShopState *&head) { int err; if ((tail == 0L) || (head == 0L)) return; err = pthread_mutex_lock(&_stateMutex); if (err != 0) fprintf(stderr, "sweatShop::loaderAppend()-- Failed to lock mutex (%d). Fail.\n", err), exit(1); if (_loaderP == 0L) { _writerP = tail; _workerP = tail; _loaderP = head; } else { _loaderP->_next = tail; } _loaderP = head; err = pthread_mutex_unlock(&_stateMutex); if (err != 0) fprintf(stderr, "sweatShop::loaderAppend()-- Failed to unlock mutex (%d). Fail.\n", err), exit(1); tail = 0L; head = 0L; } void* sweatShop::loader(void) { struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 166666666ULL; // 1/6 second // We can batch several loads together before we push them onto the // queue, this should reduce the number of times the loader needs to // lock the queue. // // But it also increases the latency, so it's disabled by default. // sweatShopState *tail = 0L; // The first thing loaded sweatShopState *head = 0L; // The last thing loaded uint32 numLoaded = 0; bool moreToLoad = true; while (moreToLoad) { // Zzzzzzz.... while (_numberLoaded > _numberComputed + _loaderQueueSize) nanosleep(&naptime, 0L); sweatShopState *thisState = new sweatShopState((*_userLoader)(_globalUserData)); // If we actually loaded a new state, add it // if (thisState->_user) { loaderSave(tail, head, thisState); numLoaded++; if (numLoaded >= _loaderBatchSize) loaderAppend(tail, head); } else { // Didn't read, must be all done! Push on the end-of-input marker state. // loaderSave(tail, head, new sweatShopState(0L)); loaderAppend(tail, head); moreToLoad = false; delete thisState; } } //fprintf(stderr, "sweatShop::reader exits.\n"); return(0L); } void* sweatShop::worker(sweatShopWorker *workerData) { struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 50000000ULL; bool moreToCompute = true; int err; while (moreToCompute) { // Usually beacuse some worker is taking a long time, and the // output queue isn't big enough. // while (_numberOutput + _writerQueueSize < _numberComputed) nanosleep(&naptime, 0L); // Grab the next state. We don't grab it if it's the last in the // queue (else we would fall off the end) UNLESS it really is the // last one. // err = pthread_mutex_lock(&_stateMutex); if (err != 0) fprintf(stderr, "sweatShop::worker()-- Failed to lock mutex (%d). Fail.\n", err), exit(1); for (workerData->workerQueueLen = 0; ((workerData->workerQueueLen < _workerBatchSize) && (_workerP) && ((_workerP->_next != 0L) || (_workerP->_user == 0L))); workerData->workerQueueLen++) { workerData->workerQueue[workerData->workerQueueLen] = _workerP; _workerP = _workerP->_next; } if (_workerP == 0L) moreToCompute = false; err = pthread_mutex_unlock(&_stateMutex); if (err != 0) fprintf(stderr, "sweatShop::worler()-- Failed to lock mutex (%d). Fail.\n", err), exit(1); if (workerData->workerQueueLen == 0) { // No work, sleep a bit to prevent thrashing the mutex and resume. nanosleep(&naptime, 0L); continue; } // Execute // for (uint32 x=0; xworkerQueueLen; x++) { sweatShopState *ts = workerData->workerQueue[x]; if (ts && ts->_user) { (*_userWorker)(_globalUserData, workerData->threadUserData, ts->_user); ts->_computed = true; workerData->numComputed++; } else { // When we really do run out of stuff to do, we'll end up here // (only one thread will end up in the other case, with // something to do and moreToCompute=false). If it's actually // the end, skip the sleep and just get outta here. // if (moreToCompute == true) { fprintf(stderr, "WARNING! Worker is sleeping because the reader is slow!\n"); nanosleep(&naptime, 0L); } } } } //fprintf(stderr, "sweatShop::worker exits.\n"); return(0L); } void* sweatShop::writer(void) { sweatShopState *deleteState = 0L; // Wait for output to appear, then write. // while (_writerP && _writerP->_user) { if (_writerP->_computed == false) { // Wait for a slow computation. struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 5000000ULL; //fprintf(stderr, "Writer waits for slow thread at "uint64FMT".\n", _numberOutput); nanosleep(&naptime, 0L); } else if (_writerP->_next == 0L) { // Wait for the input. struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 5000000ULL; //fprintf(stderr, "Writer waits for all threads at "uint64FMT".\n", _numberOutput); nanosleep(&naptime, 0L); } else { (*_userWriter)(_globalUserData, _writerP->_user); _numberOutput++; deleteState = _writerP; _writerP = _writerP->_next; delete deleteState; } } // Tell status to stop. _writerP = 0L; //fprintf(stderr, "sweatShop::writer exits.\n"); return(0L); } // This thread not only shows a status message, but it also updates the critical shared variable // _numberComputed. Worker threads use this to throttle themselves. Thus, even if _showStatus is // not set, and this thread doesn't _appear_ to be doing anything useful....it is. // void* sweatShop::status(void) { struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 250000000ULL; double startTime = getTime() - 0.001; double thisTime = 0; uint64 deltaOut = 0; uint64 deltaCPU = 0; double cpuPerSec = 0; uint64 readjustAt = 16384; while (_writerP) { uint32 nc = 0; for (uint32 i=0; i<_numberOfWorkers; i++) nc += _workerData[i].numComputed; _numberComputed = nc; deltaOut = deltaCPU = 0; thisTime = getTime(); if (_numberComputed > _numberOutput) deltaOut = _numberComputed - _numberOutput; if (_numberLoaded > _numberComputed) deltaCPU = _numberLoaded - _numberComputed; cpuPerSec = _numberComputed / (thisTime - startTime); if (_showStatus) { fprintf(stderr, " %6.1f/s - "uint64FMTW(8)" loaded; "uint64FMTW(8)" queued for compute; "uint64FMTW(8)" finished; "uint64FMTW(8)" written; "uint64FMTW(8)" queued for output)\r", cpuPerSec, _numberLoaded, deltaCPU, _numberComputed, _numberOutput, deltaOut); fflush(stderr); } // Readjust queue sizes based on current performance, but don't let it get too big or small. // In particular, don't let it get below 2*numberOfWorkers. // if (_numberComputed > readjustAt) { readjustAt += (uint64)(2 * cpuPerSec); _loaderQueueSize = (uint32)(5 * cpuPerSec); } if (_loaderQueueSize < _loaderQueueMin) _loaderQueueSize = _loaderQueueMin; if (_loaderQueueSize < 2 * _numberOfWorkers) _loaderQueueSize = 2 * _numberOfWorkers; if (_loaderQueueSize > _loaderQueueMax) _loaderQueueSize = _loaderQueueMax; nanosleep(&naptime, 0L); } if (_showStatus) { thisTime = getTime(); if (_numberComputed > _numberOutput) deltaOut = _numberComputed - _numberOutput; if (_numberLoaded > _numberComputed) deltaCPU = _numberLoaded - _numberComputed; cpuPerSec = _numberComputed / (thisTime - startTime); fprintf(stderr, " %6.1f/s - "uint64FMTW(8)" queued for compute; "uint64FMTW(8)" finished; "uint64FMTW(8)" queued for output)\n", cpuPerSec, deltaCPU, _numberComputed, deltaOut); } //fprintf(stderr, "sweatShop::status exits.\n"); return(0L); } void sweatShop::run(void *user, bool beVerbose) { pthread_attr_t threadAttr; pthread_t threadIDloader; pthread_t threadIDwriter; pthread_t threadIDstats; #if 0 int threadSchedPolicy = 0; struct sched_param threadSchedParamDef; struct sched_param threadSchedParamMax; #endif int err = 0; _globalUserData = user; _showStatus = beVerbose; // Configure everything ahead of time. if (_workerBatchSize < 1) _workerBatchSize = 1; if (_workerData == 0L) _workerData = new sweatShopWorker [_numberOfWorkers]; for (uint32 i=0; i<_numberOfWorkers; i++) { _workerData[i].shop = this; _workerData[i].workerQueue = new sweatShopState * [_workerBatchSize]; } // Open the doors. errno = 0; err = pthread_mutex_init(&_stateMutex, NULL); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (state mutex): %s.\n", strerror(err)), exit(1); err = pthread_attr_init(&threadAttr); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (attr init): %s.\n", strerror(err)), exit(1); err = pthread_attr_setscope(&threadAttr, PTHREAD_SCOPE_SYSTEM); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (set scope): %s.\n", strerror(err)), exit(1); err = pthread_attr_setdetachstate(&threadAttr, PTHREAD_CREATE_JOINABLE); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (joinable): %s.\n", strerror(err)), exit(1); #if 0 err = pthread_attr_getschedparam(&threadAttr, &threadSchedParamDef); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (get default param): %s.\n", strerror(err)), exit(1); err = pthread_attr_getschedparam(&threadAttr, &threadSchedParamMax); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (get max param): %s.\n", strerror(err)), exit(1); #endif // SCHED_RR needs root privs to run on FreeBSD. // //err = pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); //if (err) // fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (sched policy): %s.\n", strerror(err)), exit(1); #if 0 err = pthread_attr_getschedpolicy(&threadAttr, &threadSchedPolicy); if (err) fprintf(stderr, "sweatShop::run()-- Failed to configure pthreads (sched policy): %s.\n", strerror(err)), exit(1); errno = 0; threadSchedParamMax.sched_priority = sched_get_priority_max(threadSchedPolicy); if (errno) fprintf(stderr, "sweatShop::run()-- WARNING: Failed to configure pthreads (set max param priority): %s.\n", strerror(errno)); // Fire off the loader err = pthread_attr_setschedparam(&threadAttr, &threadSchedParamMax); if (err) fprintf(stderr, "sweatShop::run()-- Failed to set loader priority: %s.\n", strerror(err)), exit(1); #endif err = pthread_create(&threadIDloader, &threadAttr, _sweatshop_loaderThread, this); if (err) fprintf(stderr, "sweatShop::run()-- Failed to launch loader thread: %s.\n", strerror(err)), exit(1); // Wait for it to actually load something (otherwise all the // workers immediately go home) while (!_writerP && !_workerP && !_loaderP) { struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 250000ULL; nanosleep(&naptime, 0L); } // Start the statistics and writer #if 0 err = pthread_attr_setschedparam(&threadAttr, &threadSchedParamMax); if (err) fprintf(stderr, "sweatShop::run()-- Failed to set status and writer priority: %s.\n", strerror(err)), exit(1); #endif err = pthread_create(&threadIDstats, &threadAttr, _sweatshop_statusThread, this); if (err) fprintf(stderr, "sweatShop::run()-- Failed to launch status thread: %s.\n", strerror(err)), exit(1); err = pthread_create(&threadIDwriter, &threadAttr, _sweatshop_writerThread, this); if (err) fprintf(stderr, "sweatShop::run()-- Failed to launch writer thread: %s.\n", strerror(err)), exit(1); // And some labor #if 0 err = pthread_attr_setschedparam(&threadAttr, &threadSchedParamDef); if (err) fprintf(stderr, "sweatShop::run()-- Failed to set worker priority: %s.\n", strerror(err)), exit(1); #endif for (uint32 i=0; i<_numberOfWorkers; i++) { err = pthread_create(&_workerData[i].threadID, &threadAttr, _sweatshop_workerThread, _workerData + i); if (err) fprintf(stderr, "sweatShop::run()-- Failed to launch worker thread "uint32FMT": %s.\n", i, strerror(err)), exit(1); } // Now sit back and relax. err = pthread_join(threadIDloader, 0L); if (err) fprintf(stderr, "sweatShop::run()-- Failed to join loader thread: %s.\n", strerror(err)), exit(1); err = pthread_join(threadIDwriter, 0L); if (err) fprintf(stderr, "sweatShop::run()-- Failed to join writer thread: %s.\n", strerror(err)), exit(1); err = pthread_join(threadIDstats, 0L); if (err) fprintf(stderr, "sweatShop::run()-- Failed to join status thread: %s.\n", strerror(err)), exit(1); for (uint32 i=0; i<_numberOfWorkers; i++) { err = pthread_join(_workerData[i].threadID, 0L); if (err) fprintf(stderr, "sweatShop::run()-- Failed to join worker thread "uint32FMT": %s.\n", i, strerror(err)), exit(1); } // Cleanup. delete _loaderP; _loaderP = _workerP = _writerP = 0L; } kmer-code-2013-trunk/libutil/recordFile.H0000644000000000000000000000311612322046702017000 0ustar rootroot#ifndef RECORDFILE_H #define RECORDFILE_H #include #include #include #include "util.h" // A file of fixed size records, with an optional header at the // start. Derived from the bitPackedFile at SVN-1533, but heavily // modified. Records can only be added, not updated (probably // trivial to fix). Records must be dense (also probably trivial to // fix). class recordFile { public: recordFile(char const *name, uint32 headerSize, uint32 recordSize, char mode); ~recordFile(); void *header(void) { return(_header); }; uint64 numRecords(void) { return(_numRecords); }; // Read/write records. uint32 getRecord(void *record, uint32 num=1); void putRecord(void *record, uint32 num=1); // Seek to record rec, optionally repositioning the buffer to that // record. void seek(uint64 rec, bool forced=false); // Set an artificial EOF at record rec. void limit(uint64 rec) { _limit = rec; }; private: void flushDirty(void); int _file; char *_name; uint64 _numRecords; uint32 _recordSize; uint32 _headerSize; char *_header; uint64 _bfrmax; // Number of records in the buffer char *_bfr; // A chunk of the bitPackedFile in core uint64 _limit; // An artificial EOF uint64 _pos; // The location this chunk is from (in records) uint64 _rec; // The record we're modifying relative to _pos bool _bfrDirty; bool _isReadOnly; }; #endif // RECORDFILE_H kmer-code-2013-trunk/libutil/eliasGammaEncoding.h0000644000000000000000000000127612322046702020476 0ustar rootroot#ifndef ELIAS_GAMMA_ENCODING_H #define ELIAS_GAMMA_ENCODING_H #include "bitPacking.h" inline void setEliasGammaEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz, uint64 val) { uint64 b = logBaseTwo64(val); setUnaryEncodedNumber(ptr, pos, siz, b); pos += *siz; setDecodedValue(ptr, pos, b, val); *siz += b; } inline uint64 getEliasGammaEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz) { uint64 b = getUnaryEncodedNumber(ptr, pos, siz); pos += *siz; *siz += b; return(getDecodedValue(ptr, pos, b)); } #endif // ELIAS_GAMMA_ENCODING_H kmer-code-2013-trunk/libutil/recordFile.C0000644000000000000000000002070212322046702016773 0ustar rootroot#include "util++.H" #include #include #include #include #include #include // N.B. any read() / write() pair (either order) must have a seek (or // a fflush) in between. uint64 recordFileMagic1 = 0x694664726f636572llu; uint64 recordFileMagic2 = 0x000000000000656cllu; recordFile::recordFile(char const *name, uint32 headerSize, uint32 recordSize, char mode) { _file = 0; _name = new char [strlen(name) + 1]; strcpy(_name, name); _numRecords = 0; _recordSize = recordSize; _headerSize = headerSize; _header = new char [_headerSize]; memset(_header, 0, sizeof(char) * _headerSize); _bfrmax = MAX(1048576 / _recordSize, 16); _bfr = new char [_bfrmax * _recordSize]; _limit = ~uint32ZERO; _pos = uint64ZERO; _rec = 0; memset(_bfr, 0, sizeof(char) * _bfrmax * _recordSize); _bfrDirty = false; _isReadOnly = true; if ((mode != 'r') && (mode != 'w') && (mode |= 'a')) { fprintf(stderr, "recordFile::recordFile()-- Invalid mode '%c'.\n", mode); exit(1); } // If the file doesn't exist, or we're opening for write, we're // basically done. Do that first. // Write the magic. // Write the metadata. // Write the header. if (((mode == 'w')) || ((mode == 'a') && (fileExists(_name) == false))) { errno = 0; _file = open(_name, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "recordFile::recordFile()-- failed to open '%s': %s\n", _name, strerror(errno)), exit(1); _isReadOnly = false; write(_file, &recordFileMagic1, sizeof(uint64)); write(_file, &recordFileMagic2, sizeof(uint64)); write(_file, &_numRecords, sizeof(uint64)); write(_file, &_recordSize, sizeof(uint32)); write(_file, &_headerSize, sizeof(uint32)); write(_file, _header, sizeof(char) * _headerSize); if (errno) fprintf(stderr, "recordFile::recordFile()-- failed to write header to '%s': %s\n", _name, strerror(errno)), exit(1); return; } // File does exist. If we're not appending, open it read-only. // Otherwise, open read-write. if (mode == 'r') { errno = 0; _file = open(_name, O_RDONLY | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "recordFile::recordFile()-- failed to open '%s': %s\n", _name, strerror(errno)), exit(1); _isReadOnly = true; } else { errno = 0; _file = open(_name, O_RDWR | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "recordFile::recordFile()-- failed to open for write '%s': %s\n", _name, strerror(errno)), exit(1); _isReadOnly = false; } // Read the magic, metadata and header. { uint64 m1, m2; errno = 0; read(_file, &m1, sizeof(uint64)); read(_file, &m2, sizeof(uint64)); read(_file, &_numRecords, sizeof(uint64)); read(_file, &_recordSize, sizeof(uint32)); read(_file, &_headerSize, sizeof(uint32)); read(_file, _header, sizeof(char) * _headerSize); if (errno) fprintf(stderr, "recordFile::recordFile()-- failed to read header from '%s': %s\n", _name, strerror(errno)), exit(1); if ((m1 != recordFileMagic1) || (m2 != recordFileMagic2)) fprintf(stderr, "recordFile::recordFile()-- magic number disagreement; '%s' not a recordFile?\n", _name), exit(1); } if (mode == 'a') { _pos = _numRecords; _rec = 0; errno = 0; lseek(_file, 0, SEEK_END); if (errno) fprintf(stderr, "recordFile::recordFile()-- seek to end of '%s' failed: %s\n", _name, strerror(errno)), exit(1); } else { seek(0, true); } } recordFile::~recordFile() { flushDirty(); if (_isReadOnly == false) { errno = 0; lseek(_file, 0, SEEK_SET); if (errno) fprintf(stderr, "recordFile::~recordFile()-- seek to start of '%s' failed: %s\n", _name, strerror(errno)), exit(1); write(_file, &recordFileMagic1, sizeof(uint64)); write(_file, &recordFileMagic2, sizeof(uint64)); write(_file, &_numRecords, sizeof(uint64)); write(_file, &_recordSize, sizeof(uint32)); write(_file, &_headerSize, sizeof(uint32)); write(_file, _header, sizeof(char) * _headerSize); if (errno) fprintf(stderr, "recordFile::~recordFile()-- failed to write header to '%s': %s\n", _name, strerror(errno)), exit(1); } close(_file); if (errno) fprintf(stderr, "recordFile::~recordFile()-- failed to close '%s': %s\n", _name, strerror(errno)), exit(1); delete [] _bfr; delete [] _name; delete [] _header; } // If the page is dirty, flush it to disk // void recordFile::flushDirty(void) { if (_bfrDirty == false) return; if (_isReadOnly) fprintf(stderr, "recordFile::recordFile()-- '%s' is readonly, but is dirty!\n", _name), exit(1); errno = 0; lseek(_file, 32 + _headerSize + _pos * _recordSize, SEEK_SET); if (errno) fprintf(stderr, "recordFile::seek()-- '%s' failed: %s\n", _name, strerror(errno)), exit(1); // Write records up to, not including, _rec. Unlike the // bitPackedFile, there is no issue with partially filled words // here. // errno = 0; write(_file, _bfr, _recordSize * _rec); if (errno) fprintf(stderr, "recordFile::write()-- '%s' failed: %s\n", _name, strerror(errno)), exit(1); _bfrDirty = false; } // Seeks to rec in the file, reads in a new block. // void recordFile::seek(uint64 rec, bool forced) { // If we are seeking to somewhere in the current block, don't do a // real seek, just move our position within the block. // if ((forced == false) && (_pos <= rec) && (rec < _pos + _bfrmax)) { _rec = rec - _pos; return; } flushDirty(); _pos = rec; // Root of buffer is now here _rec = 0; // See? errno = 0; lseek(_file, 32 + _headerSize + _pos * _recordSize, SEEK_SET); if (errno) fprintf(stderr, "recordFile::seek() '%s' seek to record="uint64FMT" at fileposition="uint64FMT" failed: %s\n", _name, _pos, _headerSize + _pos * _recordSize, strerror(errno)), exit(1); errno = 0; read(_file, _bfr, _recordSize * _bfrmax); if (errno) fprintf(stderr, "recordFile::seek() '%s' read of "uint64FMT" bytes failed at record "uint64FMT", fileposition "uint64FMT"': %s\n", _name, _recordSize * _bfrmax, _pos, _headerSize + _pos * _recordSize, strerror(errno)), exit(1); } uint32 recordFile::getRecord(void *record, uint32 num) { uint32 maxnum = _bfrmax / 2; // Reading large blocks -- bigger than the in-core size? Loop and // recurse. // if (num > maxnum) { uint32 numread = 0; uint32 pos = 0; uint32 len = 0; while (num > 0) { len = MIN(maxnum, num); len = getRecord((char *)record + pos * _recordSize, len); if (len == 0) return(numread); num -= len; pos += len; numread += len; } return(numread); } // If asked to read too many records, read whatever is left. // if (_numRecords < _pos + _rec + num) num = _numRecords - _pos - _rec; if (_limit < _pos + _rec + num) num = _limit - _pos - _rec; // If the current position is already past eof, return without // reading. The previous 'if' ensures we will never read a block // past eof. // if ((_numRecords < _pos + _rec) || (_limit < _pos + _rec)) return(0); if (_bfrmax < _rec + num + 1) seek(_pos + _rec, true); memcpy(record, _bfr + _rec * _recordSize, _recordSize * num); _rec += num; return(num); } void recordFile::putRecord(void *record, uint32 num) { uint32 maxnum = _bfrmax / 2; if (num > maxnum) { uint32 pos = 0; uint32 len = 0; while (num > 0) { len = MIN(maxnum, num); putRecord((char *)record + pos * _recordSize, len); num -= len; pos += len; } } else { if (_bfrmax < _rec + num + 1) seek(_pos + _rec, true); memcpy(_bfr + _rec * _recordSize, record, _recordSize * num); _rec += num; _numRecords += num; _bfrDirty = true; } } kmer-code-2013-trunk/libutil/sweatShop.H0000644000000000000000000000513612322046702016703 0ustar rootroot#ifndef SWEATSHOP_H #define SWEATSHOP_H #include #include #include "util++.H" class sweatShopWorker; class sweatShopState; class sweatShop { public: sweatShop(void*(*loaderfcn)(void *G), void (*workerfcn)(void *G, void *T, void *S), void (*writerfcn)(void *G, void *S)); ~sweatShop(); void setNumberOfWorkers(uint32 x) { _numberOfWorkers = x; _loaderQueueMin = x * 2; }; void setThreadData(uint32 t, void *x); void setLoaderBatchSize(uint32 batchSize) { _loaderBatchSize = batchSize; }; void setLoaderQueueSize(uint32 queueSize) { _loaderQueueSize = queueSize; _loaderQueueMax = queueSize; }; void setWorkerBatchSize(uint32 batchSize) { _workerBatchSize = batchSize; }; void setWriterQueueSize(uint32 queueSize) { _writerQueueSize = queueSize; _writerQueueMax = queueSize; }; void run(void *user=0L, bool beVerbose=false); private: // Stubs that forward control from the c-based pthread to this class friend void *_sweatshop_loaderThread(void *ss); friend void *_sweatshop_workerThread(void *ss); friend void *_sweatshop_writerThread(void *ss); friend void *_sweatshop_statusThread(void *ss); // The threaded routines void *loader(void); void *worker(sweatShopWorker *workerData); void *writer(void); void *status(void); // Utilities for the loader thread //void loaderAdd(sweatShopState *thisState); void loaderSave(sweatShopState *&tail, sweatShopState *&head, sweatShopState *thisState); void loaderAppend(sweatShopState *&tail, sweatShopState *&head); pthread_mutex_t _stateMutex; void *(*_userLoader)(void *global); void (*_userWorker)(void *global, void *thread, void *thing); void (*_userWriter)(void *global, void *thing); void *_globalUserData; sweatShopState *_writerP; // Where output takes stuff from, the tail sweatShopState *_workerP; // Where computes happen, the middle sweatShopState *_loaderP; // Where input is put, the head bool _showStatus; uint32 _loaderQueueSize, _loaderQueueMin, _loaderQueueMax; uint32 _loaderBatchSize; uint32 _workerBatchSize; uint32 _writerQueueSize, _writerQueueMax; uint32 _numberOfWorkers; sweatShopWorker *_workerData; uint64 _numberLoaded; uint64 _numberComputed; uint64 _numberOutput; }; #endif // SWEATSHOP_H kmer-code-2013-trunk/libutil/speedCounter.C0000644000000000000000000000344112322046702017356 0ustar rootroot#include #include #include "util++.H" const char* speedCounter::_spinr[4] = { "[|]", "[/]", "[-]", "[\\]" }; const char* speedCounter::_liner[19] = { "[- ]", "[-- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ --]", "[ -]", "[ --]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]", "[ -- ]" }; speedCounter::speedCounter(char const *fmt, double unit, uint64 freq, bool enabled) { _count = 0; _draws = 0; _unit = unit; _freq = freq; _startTime = getTime(); _fmt = fmt; _spin = false; _line = false; _enabled = enabled; // We use _draws instead of shifting _count just because it's // simpler, and both methods need another variable anyway. // Set all the bits below the hightest set in _freq -- // this allows us to do a super-fast test in tick(). // _freq |= _freq >> 1; _freq |= _freq >> 2; _freq |= _freq >> 4; _freq |= _freq >> 8; _freq |= _freq >> 16; _freq |= _freq >> 32; } speedCounter::~speedCounter() { finish(); } kmer-code-2013-trunk/libutil/md5.c0000644000000000000000000003063712322046702015452 0ustar rootroot#include #include #include #include #include "util.h" // The RSA MD5 implementation. Functions md5_* (at the end) are glue // to kmer libutil. // See RFC1321, "The MD5 Message-Digest Algorithm", R. Rivest. // Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All // rights reserved. // // License to copy and use this software is granted provided that it // is identified as the "RSA Data Security, Inc. MD5 Message-Digest // Algorithm" in all material mentioning or referencing this software // or this function. // // License is also granted to make and use derivative works provided // that such works are identified as "derived from the RSA Data // Security, Inc. MD5 Message-Digest Algorithm" in all material // mentioning or referencing the derived work. // // RSA Data Security, Inc. makes no representations concerning either // the merchantability of this software or the suitability of this // software for any particular purpose. It is provided "as is" // without express or implied warranty of any kind. // // These notices must be retained in any copies of any part of this // documentation and/or software. typedef struct { uint32 state[4]; // state (ABCD) uint32 count[2]; // number of bits, modulo 2^64 (lsb first) unsigned char buffer[64]; // input buffer } MD5_CTX; static void MD5Init(MD5_CTX *); static void MD5Update(MD5_CTX *, unsigned char const *, size_t); static void MD5Final(unsigned char [16], MD5_CTX *); static void MD5Transform(uint32 [4], unsigned char const [64]); static void Encode(unsigned char *, uint32 *, unsigned int); static void Decode(uint32 *, unsigned char const *, unsigned int); // Constants for MD5Transform routine. #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 static unsigned char PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // F, G, H and I are basic MD5 functions. #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & (~z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | (~z))) // ROTATE_LEFT rotates x left n bits. #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. // Rotation is separate from addition to prevent recomputation. #define FF(a, b, c, d, x, s, ac) { \ (a) += F ((b), (c), (d)) + (x) + (uint32)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define GG(a, b, c, d, x, s, ac) { \ (a) += G ((b), (c), (d)) + (x) + (uint32)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define HH(a, b, c, d, x, s, ac) { \ (a) += H ((b), (c), (d)) + (x) + (uint32)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define II(a, b, c, d, x, s, ac) { \ (a) += I ((b), (c), (d)) + (x) + (uint32)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } // MD5 initialization. Begins an MD5 operation, writing a new context. // void MD5Init (MD5_CTX *context) { context->count[0] = context->count[1] = 0; // Load magic initialization constants. context->state[0] = 0x67452301; context->state[1] = 0xefcdab89; context->state[2] = 0x98badcfe; context->state[3] = 0x10325476; } // MD5 block update operation. Continues an MD5 message-digest // operation, processing another message block, and updating the // context. // void MD5Update (MD5_CTX *context, unsigned char const *input, size_t inputLen) { unsigned int i, index, partLen; // Compute number of bytes mod 64 index = (unsigned int)((context->count[0] >> 3) & 0x3F); // Update number of bits if ((context->count[0] += ((uint32)inputLen << 3)) < ((uint32)inputLen << 3)) context->count[1]++; context->count[1] += ((uint32)inputLen >> 29); partLen = 64 - index; // Transform as many times as possible. if (inputLen >= partLen) { memcpy(&context->buffer[index], input, partLen); MD5Transform(context->state, context->buffer); for (i = partLen; i + 63 < inputLen; i += 64) MD5Transform(context->state, &input[i]); index = 0; } else i = 0; // Buffer remaining input memcpy(&context->buffer[index], &input[i], inputLen-i); } // MD5 finalization. Ends an MD5 message-digest operation, writing the // the message digest and zeroizing the context. // void MD5Final (unsigned char digest[16], MD5_CTX *context) { unsigned char bits[8]; unsigned int index, padLen; // Save number of bits Encode (bits, context->count, 8); // Pad out to 56 mod 64. index = (unsigned int)((context->count[0] >> 3) & 0x3f); padLen = (index < 56) ? (56 - index) : (120 - index); MD5Update (context, PADDING, padLen); // Append length (before padding) MD5Update (context, bits, 8); // Store state in digest Encode (digest, context->state, 16); // Zeroize sensitive information. memset(context, 0, sizeof(*context)); } // MD5 basic transformation. Transforms state based on block. // static void MD5Transform(uint32 state[4], unsigned char const block[64]) { uint32 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; Decode(x, block, 64); // Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); // 1 FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); // 2 FF (c, d, a, b, x[ 2], S13, 0x242070db); // 3 FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); // 4 FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); // 5 FF (d, a, b, c, x[ 5], S12, 0x4787c62a); // 6 FF (c, d, a, b, x[ 6], S13, 0xa8304613); // 7 FF (b, c, d, a, x[ 7], S14, 0xfd469501); // 8 FF (a, b, c, d, x[ 8], S11, 0x698098d8); // 9 FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); // 10 FF (c, d, a, b, x[10], S13, 0xffff5bb1); // 11 FF (b, c, d, a, x[11], S14, 0x895cd7be); // 12 FF (a, b, c, d, x[12], S11, 0x6b901122); // 13 FF (d, a, b, c, x[13], S12, 0xfd987193); // 14 FF (c, d, a, b, x[14], S13, 0xa679438e); // 15 FF (b, c, d, a, x[15], S14, 0x49b40821); // 16 // Round 2 GG (a, b, c, d, x[ 1], S21, 0xf61e2562); // 17 GG (d, a, b, c, x[ 6], S22, 0xc040b340); // 18 GG (c, d, a, b, x[11], S23, 0x265e5a51); // 19 GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); // 20 GG (a, b, c, d, x[ 5], S21, 0xd62f105d); // 21 GG (d, a, b, c, x[10], S22, 0x2441453); // 22 GG (c, d, a, b, x[15], S23, 0xd8a1e681); // 23 GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); // 24 GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); // 25 GG (d, a, b, c, x[14], S22, 0xc33707d6); // 26 GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); // 27 GG (b, c, d, a, x[ 8], S24, 0x455a14ed); // 28 GG (a, b, c, d, x[13], S21, 0xa9e3e905); // 29 GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); // 30 GG (c, d, a, b, x[ 7], S23, 0x676f02d9); // 31 GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); // 32 // Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); // 33 HH (d, a, b, c, x[ 8], S32, 0x8771f681); // 34 HH (c, d, a, b, x[11], S33, 0x6d9d6122); // 35 HH (b, c, d, a, x[14], S34, 0xfde5380c); // 36 HH (a, b, c, d, x[ 1], S31, 0xa4beea44); // 37 HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); // 38 HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); // 39 HH (b, c, d, a, x[10], S34, 0xbebfbc70); // 40 HH (a, b, c, d, x[13], S31, 0x289b7ec6); // 41 HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); // 42 HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); // 43 HH (b, c, d, a, x[ 6], S34, 0x4881d05); // 44 HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); // 45 HH (d, a, b, c, x[12], S32, 0xe6db99e5); // 46 HH (c, d, a, b, x[15], S33, 0x1fa27cf8); // 47 HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); // 48 // Round 4 II (a, b, c, d, x[ 0], S41, 0xf4292244); // 49 II (d, a, b, c, x[ 7], S42, 0x432aff97); // 50 II (c, d, a, b, x[14], S43, 0xab9423a7); // 51 II (b, c, d, a, x[ 5], S44, 0xfc93a039); // 52 II (a, b, c, d, x[12], S41, 0x655b59c3); // 53 II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); // 54 II (c, d, a, b, x[10], S43, 0xffeff47d); // 55 II (b, c, d, a, x[ 1], S44, 0x85845dd1); // 56 II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); // 57 II (d, a, b, c, x[15], S42, 0xfe2ce6e0); // 58 II (c, d, a, b, x[ 6], S43, 0xa3014314); // 59 II (b, c, d, a, x[13], S44, 0x4e0811a1); // 60 II (a, b, c, d, x[ 4], S41, 0xf7537e82); // 61 II (d, a, b, c, x[11], S42, 0xbd3af235); // 62 II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); // 63 II (b, c, d, a, x[ 9], S44, 0xeb86d391); // 64 state[0] += a; state[1] += b; state[2] += c; state[3] += d; // Zeroize sensitive information. memset (x, 0, sizeof(x)); } // Encodes input (uint32) into output (unsigned char). Assumes len is // a multiple of 4. // static void Encode (unsigned char *output, uint32 *input, unsigned int len) { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) { output[j] = (unsigned char)(input[i] & 0xff); output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); } } // Decodes input (unsigned char) into output (uint32). Assumes len is // a multiple of 4. // static void Decode (uint32 *output, unsigned char const *input, unsigned int len) { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) output[i] = ((uint32)input[j]) | (((uint32)input[j+1]) << 8) | (((uint32)input[j+2]) << 16) | (((uint32)input[j+3]) << 24); } //////////////////////////////////////////////////////////////////////////////// // // kmer glue functions // //////////////////////////////////////////////////////////////////////////////// int md5_compare(void const *a, void const *b) { md5_s const *A = (md5_s const *)a; md5_s const *B = (md5_s const *)b; if (A->a < B->a) return(-1); if (A->a > B->a) return(1); if (A->b < B->b) return(-1); if (A->b > B->b) return(1); return(0); } static const char *md5_letters = "0123456789abcdef"; char* md5_toascii(md5_s *m, char *s) { int i; for (i=0; i<16; i++) { s[15-i ] = md5_letters[(m->a >> 4*i) & 0x0f]; s[15-i+16] = md5_letters[(m->b >> 4*i) & 0x0f]; } s[32] = 0; return(s); } md5_s* md5_string(md5_s *m, char *s, uint32 l) { MD5_CTX ctx; unsigned char dig[16]; int i = 0; if (m == NULL) { errno = 0; m = (md5_s *)malloc(sizeof(md5_s)); if (errno) { fprintf(stderr, "md5_string()-- Can't allocate a md5_s.\n%s\n", strerror(errno)); exit(1); } } MD5Init(&ctx); MD5Update(&ctx, (unsigned char*)s, l); MD5Final(dig, &ctx); m->a = dig[0]; while (i<8) { m->a <<= 8; m->a |= dig[i++]; } m->b = dig[i++]; while (i<16) { m->b <<= 8; m->b |= dig[i++]; } return(m); } static md5_increment_s* md5_increment_initialize(void) { md5_increment_s *m; errno = 0; m = (md5_increment_s *)malloc(sizeof(md5_increment_s)); if (errno) { fprintf(stderr, "md5_increment_*()-- Can't allocate a md5_increment_s.\n%s\n", strerror(errno)); exit(1); } m->context = (MD5_CTX *)malloc(sizeof(MD5_CTX)); if (errno) { fprintf(stderr, "md5_increment_*()-- Can't allocate a md5 context.\n%s\n", strerror(errno)); exit(1); } MD5Init((MD5_CTX *)m->context); m->bufferPos = 0; return(m); } md5_increment_s* md5_increment_char(md5_increment_s *m, char s) { if (m == NULL) m = md5_increment_initialize(); m->buffer[m->bufferPos++] = s; if (m->bufferPos == MD5_BUFFER_SIZE) { MD5Update((MD5_CTX *)m->context, m->buffer, m->bufferPos); m->bufferPos = 0; } return(m); } md5_increment_s* md5_increment_block(md5_increment_s *m, char *s, uint32 l) { if (m == NULL) m = md5_increment_initialize(); MD5Update((MD5_CTX *)m->context, (unsigned char*)s, l); return(m); } void md5_increment_finalize(md5_increment_s *m) { MD5_CTX *ctx = (MD5_CTX *)m->context; unsigned char dig[16]; int i = 0; if (m->bufferPos > 0) { MD5Update((MD5_CTX *)m->context, m->buffer, m->bufferPos); m->bufferPos = 0; } MD5Final(dig, ctx); m->a = dig[0]; while (i<8) { m->a <<= 8; m->a |= dig[i++]; } m->b = dig[i++]; while (i<16) { m->b <<= 8; m->b |= dig[i++]; } m->context = 0L; free(ctx); } void md5_increment_destroy(md5_increment_s *m) { free(m); } kmer-code-2013-trunk/libutil/splitToWords.H0000644000000000000000000000530612322046702017402 0ustar rootroot#ifndef SPLITTOWORDS_H #define SPLITTOWORDS_H class splitToWords { public: splitToWords() { _argWords = 0; _maxWords = 0; _arg = 0L; _maxChars = 0; _cmd = 0L; }; splitToWords(char *cmd) { _argWords = 0; _maxWords = 0; _arg = 0L; _maxChars = 0; _cmd = 0L; split(cmd); }; ~splitToWords() { delete [] _cmd; delete [] _arg; }; void split(char *cmd) { // Step Zero: // // Count the length of the string, in words and in characters. // For simplicity, we overcount words, by just counting white-space. // // Then, allocate space for a temporary copy of the string, and a // set of pointers into the temporary copy (much like argv). // uint32 cmdChars = 1; // 1 == Space for terminating 0 uint32 cmdWords = 2; // 2 == Space for first word and terminating 0L for (char *tmp=cmd; *tmp; tmp++) { cmdWords += *tmp == ' '; cmdWords += *tmp == '\t'; cmdChars++; } if (cmdChars > _maxChars) { delete [] _cmd; _cmd = new char [cmdChars]; _maxChars = cmdChars; } if (cmdWords > _maxWords) { delete [] _arg; _arg = new char * [cmdWords]; _maxWords = cmdWords; } _argWords = 0; // Step One: // // Determine where the words are in the command string, copying the // string to _cmd and storing words in _arg. // bool isFirst = true; char *cmdI = cmd; char *cmdO = _cmd; while (*cmdI) { // If we are at a non-space character, we are in a word. If // this is the first character in the word, save the word in // the args list. // // Otherwise we are at a space and thus not in a word. Make // all spaces be string terminators, and declare that we are // at the start of a word. // if ((*cmdI != ' ') && (*cmdI != '\t')) { *cmdO = *cmdI; if (isFirst) { _arg[_argWords++] = cmdO; isFirst = false; } } else { *cmdO = 0; isFirst = true; } cmdI++; cmdO++; } // Finish off the list by terminating the last arg, and // terminating the list of args. // *cmdO = 0; _arg[_argWords] = 0L; }; uint32 numWords(void) { return(_argWords); }; char *getWord(uint32 i) { return(_arg[i]); }; char *operator[](uint32 i) { return(_arg[i]); }; int64 operator()(uint32 i) { return(strtoull(_arg[i], NULL, 10)); }; private: uint32 _argWords; uint32 _maxWords; char **_arg; uint32 _maxChars; char *_cmd; }; #endif // SPLITTOWORDS_H kmer-code-2013-trunk/libutil/qsort_mt.c0000644000000000000000000002531311042110757016630 0ustar rootroot/*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * Multithread implementation Copyright (c) 2006, 2007 Diomidis Spinellis. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ //static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; //__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.12 2002/09/10 02:04:49 wollman Exp $"); //#include #include #include #include #include #include #include #include #include #ifdef __FreeBSD__ #include #endif typedef int cmp_t(const void *, const void *); static inline char *med3(char *, char *, char *, cmp_t *); static inline void swapfunc(char *, char *, int, int); #define min(a, b) (a) < (b) ? a : b /* * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". */ #define swapcode(TYPE, parmi, parmj, n) { \ long i = (n) / sizeof (TYPE); \ TYPE *pi = (TYPE *) (parmi); \ TYPE *pj = (TYPE *) (parmj); \ do { \ TYPE t = *pi; \ *pi++ = *pj; \ *pj++ = t; \ } while (--i > 0); \ } static inline void swapfunc(a, b, n, swaptype) char *a, *b; int n, swaptype; { if(swaptype <= 1) swapcode(long, a, b, n) else swapcode(char, a, b, n) } #define swap(a, b) \ if (swaptype == 0) { \ long t = *(long *)(a); \ *(long *)(a) = *(long *)(b); \ *(long *)(b) = t; \ } else \ swapfunc(a, b, es, swaptype) #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) #define CMP(x, y) (cmp((x), (y))) static inline char * med3(char *a, char *b, char *c, cmp_t *cmp) { return CMP(a, b) < 0 ? (CMP(b, c) < 0 ? b : (CMP(a, c) < 0 ? c : a )) :(CMP(b, c) > 0 ? b : (CMP(a, c) < 0 ? a : c )); } /* * We use some elaborate condition variables and signalling * to ensure a bound of the number of active threads at * 2 * maxthreads and the size of the thread data structure * to maxthreads. */ /* Condition of starting a new thread. */ enum thread_state { ts_idle, /* Idle, waiting for instructions. */ ts_work, /* Has work to do. */ ts_term /* Asked to terminate. */ }; /* Variant part passed to qsort invocations. */ struct qsort { enum thread_state st; /* For coordinating work. */ struct common *common; /* Common shared elements. */ void *a; /* Array base. */ size_t n; /* Number of elements. */ pthread_t id; /* Thread id. */ pthread_mutex_t mtx_st; /* For signalling state change. */ pthread_cond_t cond_st; /* For signalling state change. */ }; /* Invariant common part, shared across invocations. */ struct common { int swaptype; /* Code to use for swapping */ size_t es; /* Element size. */ cmp_t *cmp; /* Comparison function */ int nthreads; /* Total number of pool threads. */ int idlethreads; /* Number of idle threads in pool. */ int forkelem; /* Minimum number of elements for a new thread. */ struct qsort *pool; /* Fixed pool of threads. */ pthread_mutex_t mtx_al; /* For allocating threads in the pool. */ }; static void *qsort_thread(void *p); /* The multithreaded qsort public interface */ void qsort_mt(void *a, size_t n, size_t es, cmp_t *cmp, int maxthreads, int forkelem) { struct qsort *qs; struct common c; int i, islot; int bailout = 1; if (n < forkelem) goto f1; errno = 0; if (maxthreads <= 1) goto f1; /* Try to initialize the resources we need. */ if (pthread_mutex_init(&c.mtx_al, NULL) != 0) goto f1; if ((c.pool = (struct qsort *)calloc(maxthreads, sizeof(struct qsort))) ==NULL) goto f2; for (islot = 0; islot < maxthreads; islot++) { qs = &c.pool[islot]; if (pthread_mutex_init(&qs->mtx_st, NULL) != 0) goto f3; if (pthread_cond_init(&qs->cond_st, NULL) != 0) { pthread_mutex_destroy(&qs->mtx_st); goto f3; } qs->st = ts_idle; qs->common = &c; if (pthread_create(&qs->id, NULL, qsort_thread, qs) != 0) { pthread_mutex_destroy(&qs->mtx_st); pthread_cond_destroy(&qs->cond_st); goto f3; } } /* All systems go. */ bailout = 0; /* Initialize common elements. */ c.swaptype = ((char *)a - (char *)0) % sizeof(long) || \ es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; c.es = es; c.cmp = cmp; c.forkelem = forkelem; c.idlethreads = c.nthreads = maxthreads; /* Hand out the first work batch. */ qs = &c.pool[0]; pthread_mutex_lock(&qs->mtx_st); qs->a = a; qs->n = n; qs->st = ts_work; c.idlethreads--; pthread_cond_signal(&qs->cond_st); pthread_mutex_unlock(&qs->mtx_st); /* * Wait for all threads to finish, and * free acquired resources. */ f3: for (i = 0; i < islot; i++) { qs = &c.pool[i]; if (bailout) { pthread_mutex_lock(&qs->mtx_st); qs->st = ts_term; pthread_cond_signal(&qs->cond_st); pthread_mutex_unlock(&qs->mtx_st); } pthread_join(qs->id, NULL); pthread_mutex_destroy(&qs->mtx_st); pthread_cond_destroy(&qs->cond_st); } free(c.pool); f2: pthread_mutex_destroy(&c.mtx_al); if (bailout) { /* XXX should include a syslog call here */ fprintf(stderr, "Resource initialization failed; bailing out.\n"); f1: qsort(a, n, es, cmp); } } /* * Allocate an idle thread from the pool, lock its * mutex, change its state to work, decrease the number * of idle threads, and return a * pointer to its data area. * Return NULL, if no thread is available. */ static struct qsort * allocate_thread(struct common *c) { int i; pthread_mutex_lock(&c->mtx_al); for (i = 0; i < c->nthreads; i++) if (c->pool[i].st == ts_idle) { c->idlethreads--; c->pool[i].st = ts_work; pthread_mutex_lock(&c->pool[i].mtx_st); pthread_mutex_unlock(&c->mtx_al); return (&c->pool[i]); } pthread_mutex_unlock(&c->mtx_al); return (NULL); } /* Thread-callable quicksort. */ static void qsort_algo(struct qsort *qs) { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; long d, r, swaptype, swap_cnt; void *a; /* Array of elements. */ size_t n, es; /* Number of elements; size. */ cmp_t *cmp; long nl, nr; struct common *c; struct qsort *qs2; pthread_t id; /* Initialize qsort arguments. */ id = qs->id; c = qs->common; es = c->es; cmp = c->cmp; swaptype = c->swaptype; a = qs->a; n = qs->n; top: /* From here on qsort(3) business as usual. */ swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = a; pn = (char *)a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp); pm = med3(pm - d, pm, pm + d, cmp); pn = med3(pn - 2 * d, pn - d, pn, cmp); } pm = med3(pl, pm, pn, cmp); } swap(a, pm); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (r = CMP(pb, a)) <= 0) { if (r == 0) { swap_cnt = 1; swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (r = CMP(pc, a)) >= 0) { if (r == 0) { swap_cnt = 1; swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); swap_cnt = 1; pb += es; pc -= es; } if (swap_cnt == 0) { /* Switch to insertion sort */ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pn = (char *)a + n * es; r = min(pa - (char *)a, pb - pa); vecswap(a, pb - r, r); r = min(pd - pc, pn - pd - es); vecswap(pb, pn - r, r); nl = (pb - pa) / es; nr = (pd - pc) / es; /* Now try to launch subthreads. */ if (nl > c->forkelem && nr > c->forkelem && (qs2 = allocate_thread(c)) != NULL) { qs2->a = a; qs2->n = nl; pthread_cond_signal(&qs2->cond_st); pthread_mutex_unlock(&qs2->mtx_st); } else if (nl > 0) { qs->a = a; qs->n = nl; qsort_algo(qs); } if (nr > 0) { a = pn - nr * es; n = nr; goto top; } } /* Thread-callable quicksort. */ static void * qsort_thread(void *p) { struct qsort *qs, *qs2; int i; struct common *c; pthread_t id; qs = p; id = qs->id; c = qs->common; again: /* Wait for work to be allocated. */ pthread_mutex_lock(&qs->mtx_st); while (qs->st == ts_idle) pthread_cond_wait(&qs->cond_st, &qs->mtx_st); pthread_mutex_unlock(&qs->mtx_st); if (qs->st == ts_term) { return(NULL); } assert(qs->st == ts_work); qsort_algo(qs); pthread_mutex_lock(&c->mtx_al); qs->st = ts_idle; c->idlethreads++; if (c->idlethreads == c->nthreads) { for (i = 0; i < c->nthreads; i++) { qs2 = &c->pool[i]; if (qs2 == qs) continue; pthread_mutex_lock(&qs2->mtx_st); qs2->st = ts_term; pthread_cond_signal(&qs2->cond_st); pthread_mutex_unlock(&qs2->mtx_st); } pthread_mutex_unlock(&c->mtx_al); return(NULL); } pthread_mutex_unlock(&c->mtx_al); goto again; } kmer-code-2013-trunk/libutil/bitOperations.h0000644000000000000000000001057312322046702017611 0ustar rootroot#ifndef BRI_BITS_H #define BRI_BITS_H // For dealing with the bits in bytes. // I wish I could claim these. // // Freed, Edwin E. 1983. "Binary Magic Number" Dr. Dobbs Journal // Vol. 78 (April) pp. 24-37 // // Supposedly tells us how to reverse the bits in a word, count the number // of set bits in a words and more. // // A bit of verbage on counting the number of set bits. The naive way // is to loop and shift: // // uint32 r = uint32ZERO; // while (x) { // r++; // x >>= 1; // } // return(r); // // http://remus.rutgers.edu/~rhoads/Code/bitcount3.c has an optimized // method: // // x -= (0xaaaaaaaa & x) >> 1; // x = (x & 0x33333333) + ((x >> 2) & 0x33333333); // x += x >> 4; // x &= 0x0f0f0f0f; // x += x >> 8; // x += x >> 16; // x &= 0x000000ff; // return(x); // // No loops! // // Freed's methods are easier to understand, and just as fast. // // Using our bit counting routines, Ross Lippert suggested a nice // way of computing log2 -- use log2 shifts to fill up the lower // bits, then count bits. See logBaseTwo*() // inline uint32 reverseBits32(uint32 x) { x = ((x >> 1) & uint32NUMBER(0x55555555)) | ((x << 1) & uint32NUMBER(0xaaaaaaaa)); x = ((x >> 2) & uint32NUMBER(0x33333333)) | ((x << 2) & uint32NUMBER(0xcccccccc)); x = ((x >> 4) & uint32NUMBER(0x0f0f0f0f)) | ((x << 4) & uint32NUMBER(0xf0f0f0f0)); x = ((x >> 8) & uint32NUMBER(0x00ff00ff)) | ((x << 8) & uint32NUMBER(0xff00ff00)); x = ((x >> 16) & uint32NUMBER(0x0000ffff)) | ((x << 16) & uint32NUMBER(0xffff0000)); return(x); } inline uint64 reverseBits64(uint64 x) { x = ((x >> 1) & uint64NUMBER(0x5555555555555555)) | ((x << 1) & uint64NUMBER(0xaaaaaaaaaaaaaaaa)); x = ((x >> 2) & uint64NUMBER(0x3333333333333333)) | ((x << 2) & uint64NUMBER(0xcccccccccccccccc)); x = ((x >> 4) & uint64NUMBER(0x0f0f0f0f0f0f0f0f)) | ((x << 4) & uint64NUMBER(0xf0f0f0f0f0f0f0f0)); x = ((x >> 8) & uint64NUMBER(0x00ff00ff00ff00ff)) | ((x << 8) & uint64NUMBER(0xff00ff00ff00ff00)); x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) | ((x << 16) & uint64NUMBER(0xffff0000ffff0000)); x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) | ((x << 32) & uint64NUMBER(0xffffffff00000000)); return(x); } #if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) #define PREFETCH(x) __builtin_prefetch((x), 0, 0) #else #define PREFETCH(x) #endif // Amazingingly, this is slower. From what I can google, the builtin // is using the 2^16 lookup table method - so a 64-bit popcount does // 4 lookups in the table and sums. Bad cache performance in codes // that already have bad cache performance, I'd guess. // //#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) //#define BUILTIN_POPCOUNT //#endif #ifdef BUILTIN_POPCOUNT inline uint32 countNumberOfSetBits32(uint32 x) { return(__builtin_popcount(x)); } inline uint64 countNumberOfSetBits64(uint64 x) { return(__builtin_popcountll(x)); } #else inline uint32 countNumberOfSetBits32(uint32 x) { x = ((x >> 1) & uint32NUMBER(0x55555555)) + (x & uint32NUMBER(0x55555555)); x = ((x >> 2) & uint32NUMBER(0x33333333)) + (x & uint32NUMBER(0x33333333)); x = ((x >> 4) & uint32NUMBER(0x0f0f0f0f)) + (x & uint32NUMBER(0x0f0f0f0f)); x = ((x >> 8) & uint32NUMBER(0x00ff00ff)) + (x & uint32NUMBER(0x00ff00ff)); x = ((x >> 16) & uint32NUMBER(0x0000ffff)) + (x & uint32NUMBER(0x0000ffff)); return(x); } inline uint64 countNumberOfSetBits64(uint64 x) { x = ((x >> 1) & uint64NUMBER(0x5555555555555555)) + (x & uint64NUMBER(0x5555555555555555)); x = ((x >> 2) & uint64NUMBER(0x3333333333333333)) + (x & uint64NUMBER(0x3333333333333333)); x = ((x >> 4) & uint64NUMBER(0x0f0f0f0f0f0f0f0f)) + (x & uint64NUMBER(0x0f0f0f0f0f0f0f0f)); x = ((x >> 8) & uint64NUMBER(0x00ff00ff00ff00ff)) + (x & uint64NUMBER(0x00ff00ff00ff00ff)); x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) + (x & uint64NUMBER(0x0000ffff0000ffff)); x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) + (x & uint64NUMBER(0x00000000ffffffff)); return(x); } #endif inline uint32 logBaseTwo32(uint32 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; return(countNumberOfSetBits32(x)); } inline uint64 logBaseTwo64(uint64 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32; return(countNumberOfSetBits64(x)); } #endif // BRI_BITS_H kmer-code-2013-trunk/libutil/logMsg.H0000644000000000000000000000504712322046702016157 0ustar rootroot#ifndef LOGMSG_H #define LOGMSG_H #include #include #include #include #include #include #include #include #include "util.h" class logMsg { public: logMsg(bool toScreen=false, uint32 r=8192) { _logLen = 0; _logMax = r; _log = new char [_logMax]; _resize = r; _toScreenToo = toScreen; }; ~logMsg() { delete [] _log; }; void setResize(uint32 r) { _resize = r; }; // Ensure that the string has at least 'moreSpace' available. // void resize(uint32 moreSpace) { if (_logLen + moreSpace < _logMax) return; _logMax += _logMax + moreSpace + 1; char *ll = new char [_logMax]; memcpy(ll, _log, sizeof(char) * _logLen); delete [] _log; _log = ll; }; // Add a message to the log, assume the message is less than 8192 bytes. Would be nice to parse // the fmt string (and any args) but that's a lot of work (and already done if you have // vsnprintf. // // It warns if you overwrote memory. // void add(char const *fmt, ...) { va_list ap; resize(_resize); if (_toScreenToo) { va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } // Reinit the ap, since it seems to get 'used up' if _toScreenToo is set. va_start(ap, fmt); _logLen += vsprintf(_log + _logLen, fmt, ap); va_end(ap); if (_logLen > _logMax) fprintf(stderr, "logMsg::add()-- HEY! I wrote "uint32FMT" bytes beyond the end of the buffer!\n" "logMsg::add()-- This program will probably crash soon....\n\n%s\n\n", _logLen - _logMax, _log); }; // Dump the message to a file, taking care of errors. // void write(int file, char const *name=0L) { errno = 0; ::write(file, _log, sizeof(char) * _logLen); if (errno) { fprintf(stderr, "logMsg::write()-- Couldn't write to the log message file '%s': %s\n", name ? name : "(unknown)", strerror(errno)); exit(1); } }; void fwrite(FILE *file, char const *name=0L) { errno = 0; ::fwrite(_log, sizeof(char), _logLen, file); if (errno) { fprintf(stderr, "logMsg::fwrite()-- Couldn't write to the log message file '%s': %s\n", name ? name : "(unknown)", strerror(errno)); exit(1); } }; private: uint32 _logLen; uint32 _logMax; char *_log; uint32 _resize; bool _toScreenToo; }; #endif // LOGMSG_H kmer-code-2013-trunk/libutil/kazlib/0000755000000000000000000000000012641613360016070 5ustar rootrootkmer-code-2013-trunk/libutil/kazlib/dict.h0000644000000000000000000001070010541426140017155 0ustar rootroot/* * Dictionary Abstract Data Type * Copyright (C) 1997 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #ifndef DICT_H #define DICT_H #include #ifdef KAZLIB_SIDEEFFECT_DEBUG #include "sfx.h" #endif /* * Blurb for inclusion into C++ translation units */ #ifdef __cplusplus extern "C" { #endif typedef unsigned long dictcount_t; #define DICTCOUNT_T_MAX ULONG_MAX /* * The dictionary is implemented as a red-black tree */ typedef enum { dnode_red, dnode_black } dnode_color_t; typedef struct dnode_t { #if defined(DICT_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) struct dnode_t *dict_left; struct dnode_t *dict_right; struct dnode_t *dict_parent; dnode_color_t dict_color; const void *dict_key; void *dict_data; #else int dict_dummy; #endif } dnode_t; typedef int (*dict_comp_t)(const void *, const void *); typedef dnode_t *(*dnode_alloc_t)(void *); typedef void (*dnode_free_t)(dnode_t *, void *); typedef struct dict_t { #if defined(DICT_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) dnode_t dict_nilnode; dictcount_t dict_nodecount; dictcount_t dict_maxcount; dict_comp_t dict_compare; dnode_alloc_t dict_allocnode; dnode_free_t dict_freenode; void *dict_context; int dict_dupes; #else int dict_dummmy; #endif } dict_t; typedef void (*dnode_process_t)(dict_t *, dnode_t *, void *); typedef struct dict_load_t { #if defined(DICT_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) dict_t *dict_dictptr; dnode_t dict_nilnode; #else int dict_dummmy; #endif } dict_load_t; extern dict_t *dict_create(dictcount_t, dict_comp_t); extern void dict_set_allocator(dict_t *, dnode_alloc_t, dnode_free_t, void *); extern void dict_destroy(dict_t *); extern void dict_free_nodes(dict_t *); extern void dict_free(dict_t *); extern dict_t *dict_init(dict_t *, dictcount_t, dict_comp_t); extern void dict_init_like(dict_t *, const dict_t *); extern int dict_verify(dict_t *); extern int dict_similar(const dict_t *, const dict_t *); extern dnode_t *dict_lookup(dict_t *, const void *); extern dnode_t *dict_lower_bound(dict_t *, const void *); extern dnode_t *dict_upper_bound(dict_t *, const void *); extern void dict_insert(dict_t *, dnode_t *, const void *); extern dnode_t *dict_delete(dict_t *, dnode_t *); extern int dict_alloc_insert(dict_t *, const void *, void *); extern void dict_delete_free(dict_t *, dnode_t *); extern dnode_t *dict_first(dict_t *); extern dnode_t *dict_last(dict_t *); extern dnode_t *dict_next(dict_t *, dnode_t *); extern dnode_t *dict_prev(dict_t *, dnode_t *); extern dictcount_t dict_count(dict_t *); extern int dict_isempty(dict_t *); extern int dict_isfull(dict_t *); extern int dict_contains(dict_t *, dnode_t *); extern void dict_allow_dupes(dict_t *); extern int dnode_is_in_a_dict(dnode_t *); extern dnode_t *dnode_create(void *); extern dnode_t *dnode_init(dnode_t *, void *); extern void dnode_destroy(dnode_t *); extern void *dnode_get(dnode_t *); extern const void *dnode_getkey(dnode_t *); extern void dnode_put(dnode_t *, void *); extern void dict_process(dict_t *, void *, dnode_process_t); extern void dict_load_begin(dict_load_t *, dict_t *); extern void dict_load_next(dict_load_t *, dnode_t *, const void *); extern void dict_load_end(dict_load_t *); extern void dict_merge(dict_t *, dict_t *); #if defined(DICT_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) #ifdef KAZLIB_SIDEEFFECT_DEBUG #define dict_isfull(D) (SFX_CHECK(D)->dict_nodecount == (D)->dict_maxcount) #else #define dict_isfull(D) ((D)->dict_nodecount == (D)->dict_maxcount) #endif #define dict_count(D) ((D)->dict_nodecount) #define dict_isempty(D) ((D)->dict_nodecount == 0) #define dnode_get(N) ((N)->dict_data) #define dnode_getkey(N) ((N)->dict_key) #define dnode_put(N, X) ((N)->dict_data = (X)) #endif #ifdef __cplusplus } #endif #endif kmer-code-2013-trunk/libutil/kazlib/dict.c0000644000000000000000000007451510541426140017166 0ustar rootroot/* * Dictionary Abstract Data Type * Copyright (C) 1997 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #define NDEBUG #include #include #include #include #define DICT_IMPLEMENTATION #include "dict.h" // bpw 20050309 define this to use a qsort(3) compatible sort function, // requiring two dereferences to get the data instead of one. // #define BE_QSORT_COMPATIBLE /* * These macros provide short convenient names for structure members, * which are embellished with dict_ prefixes so that they are * properly confined to the documented namespace. It's legal for a * program which uses dict to define, for instance, a macro called ``parent''. * Such a macro would interfere with the dnode_t struct definition. * In general, highly portable and reusable C modules which expose their * structures need to confine structure member names to well-defined spaces. * The resulting identifiers aren't necessarily convenient to use, nor * readable, in the implementation, however! */ #define left dict_left #define right dict_right #define parent dict_parent #define color dict_color #define key dict_key #define data dict_data #define nilnode dict_nilnode #define nodecount dict_nodecount #define maxcount dict_maxcount #define compare dict_compare #define allocnode dict_allocnode #define freenode dict_freenode #define context dict_context #define dupes dict_dupes #define dictptr dict_dictptr #define dict_root(D) ((D)->nilnode.left) #define dict_nil(D) (&(D)->nilnode) #define DICT_DEPTH_MAX 64 static dnode_t *dnode_alloc(void *context); static void dnode_free(dnode_t *node, void *context); /* * Perform a ``left rotation'' adjustment on the tree. The given node P and * its right child C are rearranged so that the P instead becomes the left * child of C. The left subtree of C is inherited as the new right subtree * for P. The ordering of the keys within the tree is thus preserved. */ static void rotate_left(dnode_t *upper) { dnode_t *lower, *lowleft, *upparent; lower = upper->right; upper->right = lowleft = lower->left; lowleft->parent = upper; lower->parent = upparent = upper->parent; /* don't need to check for root node here because root->parent is the sentinel nil node, and root->parent->left points back to root */ if (upper == upparent->left) { upparent->left = lower; } else { assert (upper == upparent->right); upparent->right = lower; } lower->left = upper; upper->parent = lower; } /* * This operation is the ``mirror'' image of rotate_left. It is * the same procedure, but with left and right interchanged. */ static void rotate_right(dnode_t *upper) { dnode_t *lower, *lowright, *upparent; lower = upper->left; upper->left = lowright = lower->right; lowright->parent = upper; lower->parent = upparent = upper->parent; if (upper == upparent->right) { upparent->right = lower; } else { assert (upper == upparent->left); upparent->left = lower; } lower->right = upper; upper->parent = lower; } /* * Do a postorder traversal of the tree rooted at the specified * node and free everything under it. Used by dict_free(). */ static void free_nodes(dict_t *dict, dnode_t *node, dnode_t *nil) { if (node == nil) return; free_nodes(dict, node->left, nil); free_nodes(dict, node->right, nil); dict->freenode(node, dict->context); } /* * This procedure performs a verification that the given subtree is a binary * search tree. It performs an inorder traversal of the tree using the * dict_next() successor function, verifying that the key of each node is * strictly lower than that of its successor, if duplicates are not allowed, * or lower or equal if duplicates are allowed. This function is used for * debugging purposes. */ static int verify_bintree(dict_t *dict) { dnode_t *first, *next; first = dict_first(dict); if (dict->dupes) { while (first && (next = dict_next(dict, first))) { #ifdef BE_QSORT_COMPATIBLE if (dict->compare(&first->key, &next->key) > 0) return 0; #else if (dict->compare(first->key, next->key) > 0) return 0; #endif first = next; } } else { while (first && (next = dict_next(dict, first))) { #ifdef BE_QSORT_COMPATIBLE if (dict->compare(&first->key, &next->key) >= 0) return 0; #else if (dict->compare(first->key, next->key) >= 0) return 0; #endif first = next; } } return 1; } /* * This function recursively verifies that the given binary subtree satisfies * three of the red black properties. It checks that every red node has only * black children. It makes sure that each node is either red or black. And it * checks that every path has the same count of black nodes from root to leaf. * It returns the blackheight of the given subtree; this allows blackheights to * be computed recursively and compared for left and right siblings for * mismatches. It does not check for every nil node being black, because there * is only one sentinel nil node. The return value of this function is the * black height of the subtree rooted at the node ``root'', or zero if the * subtree is not red-black. */ static unsigned int verify_redblack(dnode_t *nil, dnode_t *root) { unsigned height_left, height_right; if (root != nil) { height_left = verify_redblack(nil, root->left); height_right = verify_redblack(nil, root->right); if (height_left == 0 || height_right == 0) return 0; if (height_left != height_right) return 0; if (root->color == dnode_red) { if (root->left->color != dnode_black) return 0; if (root->right->color != dnode_black) return 0; return height_left; } if (root->color != dnode_black) return 0; return height_left + 1; } return 1; } /* * Compute the actual count of nodes by traversing the tree and * return it. This could be compared against the stored count to * detect a mismatch. */ static dictcount_t verify_node_count(dnode_t *nil, dnode_t *root) { if (root == nil) return 0; else return 1 + verify_node_count(nil, root->left) + verify_node_count(nil, root->right); } /* * Verify that the tree contains the given node. This is done by * traversing all of the nodes and comparing their pointers to the * given pointer. Returns 1 if the node is found, otherwise * returns zero. It is intended for debugging purposes. */ static int verify_dict_has_node(dnode_t *nil, dnode_t *root, dnode_t *node) { if (root != nil) { return root == node || verify_dict_has_node(nil, root->left, node) || verify_dict_has_node(nil, root->right, node); } return 0; } /* * Dynamically allocate and initialize a dictionary object. */ dict_t *dict_create(dictcount_t maxcount, dict_comp_t comp) { dict_t *new = malloc(sizeof *new); if (new) { new->compare = comp; new->allocnode = dnode_alloc; new->freenode = dnode_free; new->context = NULL; new->nodecount = 0; new->maxcount = maxcount; new->nilnode.left = &new->nilnode; new->nilnode.right = &new->nilnode; new->nilnode.parent = &new->nilnode; new->nilnode.color = dnode_black; new->dupes = 0; } return new; } /* * Select a different set of node allocator routines. */ void dict_set_allocator(dict_t *dict, dnode_alloc_t al, dnode_free_t fr, void *context) { assert (dict_count(dict) == 0); assert ((al == NULL && fr == NULL) || (al != NULL && fr != NULL)); dict->allocnode = al ? al : dnode_alloc; dict->freenode = fr ? fr : dnode_free; dict->context = context; } /* * Free a dynamically allocated dictionary object. Removing the nodes * from the tree before deleting it is required. */ void dict_destroy(dict_t *dict) { assert (dict_isempty(dict)); free(dict); } /* * Free all the nodes in the dictionary by using the dictionary's * installed free routine. The dictionary is emptied. */ void dict_free_nodes(dict_t *dict) { dnode_t *nil = dict_nil(dict), *root = dict_root(dict); free_nodes(dict, root, nil); dict->nodecount = 0; dict->nilnode.left = &dict->nilnode; dict->nilnode.right = &dict->nilnode; } /* * Obsolescent function, equivalent to dict_free_nodes */ void dict_free(dict_t *dict) { #ifdef KAZLIB_OBSOLESCENT_DEBUG assert ("call to obsolescent function dict_free()" && 0); #endif dict_free_nodes(dict); } /* * Initialize a user-supplied dictionary object. */ dict_t *dict_init(dict_t *dict, dictcount_t maxcount, dict_comp_t comp) { dict->compare = comp; dict->allocnode = dnode_alloc; dict->freenode = dnode_free; dict->context = NULL; dict->nodecount = 0; dict->maxcount = maxcount; dict->nilnode.left = &dict->nilnode; dict->nilnode.right = &dict->nilnode; dict->nilnode.parent = &dict->nilnode; dict->nilnode.color = dnode_black; dict->dupes = 0; return dict; } /* * Initialize a dictionary in the likeness of another dictionary */ void dict_init_like(dict_t *dict, const dict_t *template) { dict->compare = template->compare; dict->allocnode = template->allocnode; dict->freenode = template->freenode; dict->context = template->context; dict->nodecount = 0; dict->maxcount = template->maxcount; dict->nilnode.left = &dict->nilnode; dict->nilnode.right = &dict->nilnode; dict->nilnode.parent = &dict->nilnode; dict->nilnode.color = dnode_black; dict->dupes = template->dupes; assert (dict_similar(dict, template)); } /* * Remove all nodes from the dictionary (without freeing them in any way). */ static void dict_clear(dict_t *dict) { dict->nodecount = 0; dict->nilnode.left = &dict->nilnode; dict->nilnode.right = &dict->nilnode; dict->nilnode.parent = &dict->nilnode; assert (dict->nilnode.color == dnode_black); } /* * Verify the integrity of the dictionary structure. This is provided for * debugging purposes, and should be placed in assert statements. Just because * this function succeeds doesn't mean that the tree is not corrupt. Certain * corruptions in the tree may simply cause undefined behavior. */ int dict_verify(dict_t *dict) { dnode_t *nil = dict_nil(dict), *root = dict_root(dict); /* check that the sentinel node and root node are black */ if (root->color != dnode_black) return(0 * fprintf(stderr, "dict_verify()-- Root node not black!\n")); if (nil->color != dnode_black) return(0 * fprintf(stderr, "dict_verify()-- Nil node not black!\n")); if (nil->right != nil) return(0 * fprintf(stderr, "dict_verify()-- Nul->right not Nil!\n")); /* nil->left is the root node; check that its parent pointer is nil */ if (nil->left->parent != nil) return(0 * fprintf(stderr, "dict_verify()-- Nul->left->parent is not Nil!\n")); /* perform a weak test that the tree is a binary search tree */ if (!verify_bintree(dict)) return(0 * fprintf(stderr, "dict_verify()-- Not a binary search tree!\n")); /* verify that the tree is a red-black tree */ if (!verify_redblack(nil, root)) return(0 * fprintf(stderr, "dict_verify()-- Not a red-black tree!\n")); if (verify_node_count(nil, root) != dict_count(dict)) return(0 * fprintf(stderr, "dict_verify()-- Node count is wrong!\n")); return 1; } /* * Determine whether two dictionaries are similar: have the same comparison and * allocator functions, and same status as to whether duplicates are allowed. */ int dict_similar(const dict_t *left, const dict_t *right) { if (left->compare != right->compare) return 0; if (left->allocnode != right->allocnode) return 0; if (left->freenode != right->freenode) return 0; if (left->context != right->context) return 0; if (left->dupes != right->dupes) return 0; return 1; } /* * Locate a node in the dictionary having the given key. * If the node is not found, a null a pointer is returned (rather than * a pointer that dictionary's nil sentinel node), otherwise a pointer to the * located node is returned. */ dnode_t *dict_lookup(dict_t *dict, const void *key) { dnode_t *root = dict_root(dict); dnode_t *nil = dict_nil(dict); dnode_t *saved; int result; /* simple binary search adapted for trees that contain duplicate keys */ while (root != nil) { #ifdef BE_QSORT_COMPATIBLE result = dict->compare(&key, &root->key); #else result = dict->compare(key, root->key); #endif if (result < 0) root = root->left; else if (result > 0) root = root->right; else { if (!dict->dupes) { /* no duplicates, return match */ return root; } else { /* could be dupes, find leftmost one */ do { saved = root; root = root->left; #ifdef BE_QSORT_COMPATIBLE while (root != nil && dict->compare(&key, &root->key)) root = root->right; #else while (root != nil && dict->compare(key, root->key)) root = root->right; #endif } while (root != nil); return saved; } } } return NULL; } /* * Look for the node corresponding to the lowest key that is equal to or * greater than the given key. If there is no such node, return null. */ dnode_t *dict_lower_bound(dict_t *dict, const void *key) { dnode_t *root = dict_root(dict); dnode_t *nil = dict_nil(dict); dnode_t *tentative = 0; while (root != nil) { #ifdef BE_QSORT_COMPATIBLE int result = dict->compare(&key, &root->key); #else int result = dict->compare(key, root->key); #endif if (result > 0) { root = root->right; } else if (result < 0) { tentative = root; root = root->left; } else { if (!dict->dupes) { return root; } else { tentative = root; root = root->left; } } } return tentative; } /* * Look for the node corresponding to the greatest key that is equal to or * lower than the given key. If there is no such node, return null. */ dnode_t *dict_upper_bound(dict_t *dict, const void *key) { dnode_t *root = dict_root(dict); dnode_t *nil = dict_nil(dict); dnode_t *tentative = 0; while (root != nil) { #ifdef BE_QSORT_COMPATIBLE int result = dict->compare(&key, &root->key); #else int result = dict->compare(key, root->key); #endif if (result < 0) { root = root->left; } else if (result > 0) { tentative = root; root = root->right; } else { if (!dict->dupes) { return root; } else { tentative = root; root = root->right; } } } return tentative; } /* * Insert a node into the dictionary. The node should have been * initialized with a data field. All other fields are ignored. * The behavior is undefined if the user attempts to insert into * a dictionary that is already full (for which the dict_isfull() * function returns true). */ void dict_insert(dict_t *dict, dnode_t *node, const void *key) { dnode_t *where = dict_root(dict), *nil = dict_nil(dict); dnode_t *parent = nil, *uncle, *grandpa; int result = -1; node->key = key; assert (!dict_isfull(dict)); assert (!dict_contains(dict, node)); assert (!dnode_is_in_a_dict(node)); /* basic binary tree insert */ while (where != nil) { parent = where; #ifdef BE_QSORT_COMPATIBLE result = dict->compare(&key, &where->key); #else result = dict->compare(key, where->key); #endif /* trap attempts at duplicate key insertion unless it's explicitly allowed */ assert (dict->dupes || result != 0); if (result < 0) where = where->left; else where = where->right; } assert (where == nil); if (result < 0) parent->left = node; else parent->right = node; node->parent = parent; node->left = nil; node->right = nil; dict->nodecount++; /* red black adjustments */ node->color = dnode_red; while (parent->color == dnode_red) { grandpa = parent->parent; if (parent == grandpa->left) { uncle = grandpa->right; if (uncle->color == dnode_red) { /* red parent, red uncle */ parent->color = dnode_black; uncle->color = dnode_black; grandpa->color = dnode_red; node = grandpa; parent = grandpa->parent; } else { /* red parent, black uncle */ if (node == parent->right) { rotate_left(parent); parent = node; assert (grandpa == parent->parent); /* rotation between parent and child preserves grandpa */ } parent->color = dnode_black; grandpa->color = dnode_red; rotate_right(grandpa); break; } } else { /* symmetric cases: parent == parent->parent->right */ uncle = grandpa->left; if (uncle->color == dnode_red) { parent->color = dnode_black; uncle->color = dnode_black; grandpa->color = dnode_red; node = grandpa; parent = grandpa->parent; } else { if (node == parent->left) { rotate_right(parent); parent = node; assert (grandpa == parent->parent); } parent->color = dnode_black; grandpa->color = dnode_red; rotate_left(grandpa); break; } } } dict_root(dict)->color = dnode_black; assert (dict_verify(dict)); } /* * Delete the given node from the dictionary. If the given node does not belong * to the given dictionary, undefined behavior results. A pointer to the * deleted node is returned. */ dnode_t *dict_delete(dict_t *dict, dnode_t *delete) { dnode_t *nil = dict_nil(dict), *child, *delparent = delete->parent; /* basic deletion */ assert (!dict_isempty(dict)); assert (dict_contains(dict, delete)); /* * If the node being deleted has two children, then we replace it with its * successor (i.e. the leftmost node in the right subtree.) By doing this, * we avoid the traditional algorithm under which the successor's key and * value *only* move to the deleted node and the successor is spliced out * from the tree. We cannot use this approach because the user may hold * pointers to the successor, or nodes may be inextricably tied to some * other structures by way of embedding, etc. So we must splice out the * node we are given, not some other node, and must not move contents from * one node to another behind the user's back. */ if (delete->left != nil && delete->right != nil) { dnode_t *next = dict_next(dict, delete); dnode_t *nextparent = next->parent; dnode_color_t nextcolor = next->color; assert (next != nil); assert (next->parent != nil); assert (next->left == nil); /* * First, splice out the successor from the tree completely, by * moving up its right child into its place. */ child = next->right; child->parent = nextparent; if (nextparent->left == next) { nextparent->left = child; } else { assert (nextparent->right == next); nextparent->right = child; } /* * Now that the successor has been extricated from the tree, install it * in place of the node that we want deleted. */ next->parent = delparent; next->left = delete->left; next->right = delete->right; next->left->parent = next; next->right->parent = next; next->color = delete->color; delete->color = nextcolor; if (delparent->left == delete) { delparent->left = next; } else { assert (delparent->right == delete); delparent->right = next; } } else { assert (delete != nil); assert (delete->left == nil || delete->right == nil); child = (delete->left != nil) ? delete->left : delete->right; child->parent = delparent = delete->parent; if (delete == delparent->left) { delparent->left = child; } else { assert (delete == delparent->right); delparent->right = child; } } delete->parent = NULL; delete->right = NULL; delete->left = NULL; dict->nodecount--; assert (verify_bintree(dict)); /* red-black adjustments */ if (delete->color == dnode_black) { dnode_t *parent, *sister; dict_root(dict)->color = dnode_red; while (child->color == dnode_black) { parent = child->parent; if (child == parent->left) { sister = parent->right; assert (sister != nil); if (sister->color == dnode_red) { sister->color = dnode_black; parent->color = dnode_red; rotate_left(parent); sister = parent->right; assert (sister != nil); } if (sister->left->color == dnode_black && sister->right->color == dnode_black) { sister->color = dnode_red; child = parent; } else { if (sister->right->color == dnode_black) { assert (sister->left->color == dnode_red); sister->left->color = dnode_black; sister->color = dnode_red; rotate_right(sister); sister = parent->right; assert (sister != nil); } sister->color = parent->color; sister->right->color = dnode_black; parent->color = dnode_black; rotate_left(parent); break; } } else { /* symmetric case: child == child->parent->right */ assert (child == parent->right); sister = parent->left; assert (sister != nil); if (sister->color == dnode_red) { sister->color = dnode_black; parent->color = dnode_red; rotate_right(parent); sister = parent->left; assert (sister != nil); } if (sister->right->color == dnode_black && sister->left->color == dnode_black) { sister->color = dnode_red; child = parent; } else { if (sister->left->color == dnode_black) { assert (sister->right->color == dnode_red); sister->right->color = dnode_black; sister->color = dnode_red; rotate_left(sister); sister = parent->left; assert (sister != nil); } sister->color = parent->color; sister->left->color = dnode_black; parent->color = dnode_black; rotate_right(parent); break; } } } child->color = dnode_black; dict_root(dict)->color = dnode_black; } assert (dict_verify(dict)); return delete; } /* * Allocate a node using the dictionary's allocator routine, give it * the data item. */ int dict_alloc_insert(dict_t *dict, const void *key, void *data) { dnode_t *node = dict->allocnode(dict->context); if (node) { dnode_init(node, data); dict_insert(dict, node, key); return 1; } return 0; } void dict_delete_free(dict_t *dict, dnode_t *node) { dict_delete(dict, node); dict->freenode(node, dict->context); } /* * Return the node with the lowest (leftmost) key. If the dictionary is empty * (that is, dict_isempty(dict) returns 1) a null pointer is returned. */ dnode_t *dict_first(dict_t *dict) { dnode_t *nil = dict_nil(dict), *root = dict_root(dict), *left; if (root != nil) while ((left = root->left) != nil) root = left; return (root == nil) ? NULL : root; } /* * Return the node with the highest (rightmost) key. If the dictionary is empty * (that is, dict_isempty(dict) returns 1) a null pointer is returned. */ dnode_t *dict_last(dict_t *dict) { dnode_t *nil = dict_nil(dict), *root = dict_root(dict), *right; if (root != nil) while ((right = root->right) != nil) root = right; return (root == nil) ? NULL : root; } /* * Return the given node's successor node---the node which has the * next key in the the left to right ordering. If the node has * no successor, a null pointer is returned rather than a pointer to * the nil node. */ dnode_t *dict_next(dict_t *dict, dnode_t *curr) { dnode_t *nil = dict_nil(dict), *parent, *left; if (curr->right != nil) { curr = curr->right; while ((left = curr->left) != nil) curr = left; return curr; } parent = curr->parent; while (parent != nil && curr == parent->right) { curr = parent; parent = curr->parent; } return (parent == nil) ? NULL : parent; } /* * Return the given node's predecessor, in the key order. * The nil sentinel node is returned if there is no predecessor. */ dnode_t *dict_prev(dict_t *dict, dnode_t *curr) { dnode_t *nil = dict_nil(dict), *parent, *right; if (curr->left != nil) { curr = curr->left; while ((right = curr->right) != nil) curr = right; return curr; } parent = curr->parent; while (parent != nil && curr == parent->left) { curr = parent; parent = curr->parent; } return (parent == nil) ? NULL : parent; } void dict_allow_dupes(dict_t *dict) { dict->dupes = 1; } #undef dict_count #undef dict_isempty #undef dict_isfull #undef dnode_get #undef dnode_put #undef dnode_getkey dictcount_t dict_count(dict_t *dict) { return dict->nodecount; } int dict_isempty(dict_t *dict) { return dict->nodecount == 0; } int dict_isfull(dict_t *dict) { return dict->nodecount == dict->maxcount; } int dict_contains(dict_t *dict, dnode_t *node) { return verify_dict_has_node(dict_nil(dict), dict_root(dict), node); } static dnode_t *dnode_alloc(void *context) { return malloc(sizeof *dnode_alloc(NULL)); } static void dnode_free(dnode_t *node, void *context) { free(node); } dnode_t *dnode_create(void *data) { dnode_t *new = malloc(sizeof *new); if (new) { new->data = data; new->parent = NULL; new->left = NULL; new->right = NULL; } return new; } dnode_t *dnode_init(dnode_t *dnode, void *data) { dnode->data = data; dnode->parent = NULL; dnode->left = NULL; dnode->right = NULL; return dnode; } void dnode_destroy(dnode_t *dnode) { assert (!dnode_is_in_a_dict(dnode)); free(dnode); } void *dnode_get(dnode_t *dnode) { return dnode->data; } const void *dnode_getkey(dnode_t *dnode) { return dnode->key; } void dnode_put(dnode_t *dnode, void *data) { dnode->data = data; } int dnode_is_in_a_dict(dnode_t *dnode) { return (dnode->parent && dnode->left && dnode->right); } void dict_process(dict_t *dict, void *context, dnode_process_t function) { dnode_t *node = dict_first(dict), *next; while (node != NULL) { /* check for callback function deleting */ /* the next node from under us */ assert (dict_contains(dict, node)); next = dict_next(dict, node); function(dict, node, context); node = next; } } static void load_begin_internal(dict_load_t *load, dict_t *dict) { load->dictptr = dict; load->nilnode.left = &load->nilnode; load->nilnode.right = &load->nilnode; } void dict_load_begin(dict_load_t *load, dict_t *dict) { assert (dict_isempty(dict)); load_begin_internal(load, dict); } void dict_load_next(dict_load_t *load, dnode_t *newnode, const void *key) { dict_t *dict = load->dictptr; dnode_t *nil = &load->nilnode; assert (!dnode_is_in_a_dict(newnode)); assert (dict->nodecount < dict->maxcount); #ifndef NDEBUG if (dict->nodecount > 0) { #ifdef BE_QSORT_COMPATIBLE if (dict->dupes) assert (dict->compare(&nil->left->key, &key) <= 0); else assert (dict->compare(&nil->left->key, &key) < 0); #else if (dict->dupes) assert (dict->compare(nil->left->key, key) <= 0); else assert (dict->compare(nil->left->key, key) < 0); #endif } #endif newnode->key = key; nil->right->left = newnode; nil->right = newnode; newnode->left = nil; dict->nodecount++; } void dict_load_end(dict_load_t *load) { dict_t *dict = load->dictptr; dnode_t *tree[DICT_DEPTH_MAX] = { 0 }; dnode_t *curr, *dictnil = dict_nil(dict), *loadnil = &load->nilnode, *next; dnode_t *complete = 0; dictcount_t fullcount = DICTCOUNT_T_MAX, nodecount = dict->nodecount; dictcount_t botrowcount; unsigned baselevel = 0, level = 0, i; assert (dnode_red == 0 && dnode_black == 1); while (fullcount >= nodecount && fullcount) fullcount >>= 1; botrowcount = nodecount - fullcount; for (curr = loadnil->left; curr != loadnil; curr = next) { next = curr->left; if (complete == NULL && botrowcount-- == 0) { assert (baselevel == 0); assert (level == 0); baselevel = level = 1; complete = tree[0]; if (complete != 0) { tree[0] = 0; complete->right = dictnil; while (tree[level] != 0) { tree[level]->right = complete; complete->parent = tree[level]; complete = tree[level]; tree[level++] = 0; } } } if (complete == NULL) { curr->left = dictnil; curr->right = dictnil; curr->color = level % 2; complete = curr; assert (level == baselevel); while (tree[level] != 0) { tree[level]->right = complete; complete->parent = tree[level]; complete = tree[level]; tree[level++] = 0; } } else { curr->left = complete; curr->color = (level + 1) % 2; complete->parent = curr; tree[level] = curr; complete = 0; level = baselevel; } } if (complete == NULL) complete = dictnil; for (i = 0; i < DICT_DEPTH_MAX; i++) { if (tree[i] != 0) { tree[i]->right = complete; complete->parent = tree[i]; complete = tree[i]; } } dictnil->color = dnode_black; dictnil->right = dictnil; complete->parent = dictnil; complete->color = dnode_black; dict_root(dict) = complete; assert (dict_verify(dict)); } void dict_merge(dict_t *dest, dict_t *source) { dict_load_t load; dnode_t *leftnode = dict_first(dest), *rightnode = dict_first(source); assert (dict_similar(dest, source)); if (source == dest) return; dest->nodecount = 0; load_begin_internal(&load, dest); for (;;) { if (leftnode != NULL && rightnode != NULL) { #ifdef BE_QSORT_COMPATIBLE if (dest->compare(&leftnode->key, &rightnode->key) < 0) goto copyleft; else goto copyright; #else if (dest->compare(leftnode->key, rightnode->key) < 0) goto copyleft; else goto copyright; #endif } else if (leftnode != NULL) { goto copyleft; } else if (rightnode != NULL) { goto copyright; } else { assert (leftnode == NULL && rightnode == NULL); break; } copyleft: { dnode_t *next = dict_next(dest, leftnode); #ifndef NDEBUG leftnode->left = NULL; /* suppress assertion in dict_load_next */ #endif dict_load_next(&load, leftnode, leftnode->key); leftnode = next; continue; } copyright: { dnode_t *next = dict_next(source, rightnode); #ifndef NDEBUG rightnode->left = NULL; #endif dict_load_next(&load, rightnode, rightnode->key); rightnode = next; continue; } } dict_clear(source); dict_load_end(&load); } kmer-code-2013-trunk/libutil/kazlib/sfx.h0000644000000000000000000000262710541426140017043 0ustar rootroot/* * SideChk---A utility which tries to determine whether a given C expression * is free of side effects. This can be used for verifying that macros which * expand their arguments more than once are not being accidentally misused. * * Copyright (C) 1999 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #ifndef SFX_H #define SFX_H #include #ifdef __cplusplus extern "C" { #endif typedef enum { sfx_none, sfx_potential, sfx_certain } sfx_rating_t; int sfx_determine(const char *, sfx_rating_t *); int sfx_declare(const char *, sfx_rating_t); void sfx_check(const char *, const char *, unsigned long); #ifdef __cplusplus } #endif #define SFX_CHECK(E) (sfx_check(#E, __FILE__, __LINE__), (E)) #define SFX_STRING(E) #E #endif kmer-code-2013-trunk/libutil/kazlib/list.h0000644000000000000000000001143610541426140017214 0ustar rootroot/* * List Abstract Data Type * Copyright (C) 1997 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #ifndef LIST_H #define LIST_H #include #ifdef KAZLIB_SIDEEFFECT_DEBUG #include "sfx.h" #define LIST_SFX_CHECK(E) SFX_CHECK(E) #else #define LIST_SFX_CHECK(E) (E) #endif /* * Blurb for inclusion into C++ translation units */ #ifdef __cplusplus extern "C" { #endif typedef unsigned long listcount_t; #define LISTCOUNT_T_MAX ULONG_MAX typedef struct lnode_t { #if defined(LIST_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) struct lnode_t *list_next; struct lnode_t *list_prev; void *list_data; #else int list_dummy; #endif } lnode_t; typedef struct lnodepool_t { #if defined(LIST_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) struct lnode_t *list_pool; struct lnode_t *list_free; listcount_t list_size; #else int list_dummy; #endif } lnodepool_t; typedef struct list_t { #if defined(LIST_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) lnode_t list_nilnode; listcount_t list_nodecount; listcount_t list_maxcount; #else int list_dummy; #endif } list_t; lnode_t *lnode_create(void *); lnode_t *lnode_init(lnode_t *, void *); void lnode_destroy(lnode_t *); void lnode_put(lnode_t *, void *); void *lnode_get(lnode_t *); int lnode_is_in_a_list(lnode_t *); #if defined(LIST_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) #define lnode_put(N, D) ((N)->list_data = (D)) #define lnode_get(N) ((N)->list_data) #endif lnodepool_t *lnode_pool_init(lnodepool_t *, lnode_t *, listcount_t); lnodepool_t *lnode_pool_create(listcount_t); void lnode_pool_destroy(lnodepool_t *); lnode_t *lnode_borrow(lnodepool_t *, void *); void lnode_return(lnodepool_t *, lnode_t *); int lnode_pool_isempty(lnodepool_t *); int lnode_pool_isfrom(lnodepool_t *, lnode_t *); list_t *list_init(list_t *, listcount_t); list_t *list_create(listcount_t); void list_destroy(list_t *); void list_destroy_nodes(list_t *); void list_return_nodes(list_t *, lnodepool_t *); listcount_t list_count(list_t *); int list_isempty(list_t *); int list_isfull(list_t *); int list_contains(list_t *, lnode_t *); void list_append(list_t *, lnode_t *); void list_prepend(list_t *, lnode_t *); void list_ins_before(list_t *, lnode_t *, lnode_t *); void list_ins_after(list_t *, lnode_t *, lnode_t *); lnode_t *list_first(list_t *); lnode_t *list_last(list_t *); lnode_t *list_next(list_t *, lnode_t *); lnode_t *list_prev(list_t *, lnode_t *); lnode_t *list_del_first(list_t *); lnode_t *list_del_last(list_t *); lnode_t *list_delete(list_t *, lnode_t *); void list_process(list_t *, void *, void (*)(list_t *, lnode_t *, void *)); int list_verify(list_t *); #if defined(LIST_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) #define lnode_pool_isempty(P) ((P)->list_free == 0) #define list_count(L) ((L)->list_nodecount) #define list_isempty(L) ((L)->list_nodecount == 0) #define list_isfull(L) (LIST_SFX_CHECK(L)->list_nodecount == (L)->list_maxcount) #define list_next(L, N) (LIST_SFX_CHECK(N)->list_next == &(L)->list_nilnode ? NULL : (N)->list_next) #define list_prev(L, N) (LIST_SFX_CHECK(N)->list_prev == &(L)->list_nilnode ? NULL : (N)->list_prev) #define list_first(L) list_next(LIST_SFX_CHECK(L), &(L)->list_nilnode) #define list_last(L) list_prev(LIST_SFX_CHECK(L), &(L)->list_nilnode) #endif #if defined(LIST_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) #define list_append(L, N) list_ins_before(LIST_SFX_CHECK(L), N, &(L)->list_nilnode) #define list_prepend(L, N) list_ins_after(LIST_SFX_CHECK(L), N, &(L)->list_nilnode) #define list_del_first(L) list_delete(LIST_SFX_CHECK(L), list_first(L)) #define list_del_last(L) list_delete(LIST_SFX_CHECK(L), list_last(L)) #endif /* destination list on the left, source on the right */ void list_extract(list_t *, list_t *, lnode_t *, lnode_t *); void list_transfer(list_t *, list_t *, lnode_t *first); void list_merge(list_t *, list_t *, int (const void *, const void *)); void list_sort(list_t *, int (const void *, const void *)); lnode_t *list_find(list_t *, const void *, int (const void *, const void *)); int list_is_sorted(list_t *, int (const void *, const void *)); #ifdef __cplusplus } #endif #endif kmer-code-2013-trunk/libutil/kazlib/docs/0000755000000000000000000000000012641613360017020 5ustar rootrootkmer-code-2013-trunk/libutil/kazlib/docs/docs.ltx0000644000000000000000000051202010541426140020474 0ustar rootroot\documentclass{article} \usepackage{makeidx} \usepackage[margin=1.0in]{geometry} \makeatletter \newcommand{\defsubsection}{\@startsection {subsection} {2} {0pt} {2.0ex plus 0.1ex minus 0.05ex} {-0pt} {\normalfont\normalsize\bfseries}} \newcommand{\defsubsubsection}{\@startsection {subsection} {3} {0ex} {2.0ex plus 0.1ex minus 0.05ex} {1.0ex} {\normalfont\normalsize\bfseries}} \renewcommand{\paragraph}{\@startsection {paragraph} {4} {0ex} {2.0ex plus 0.1ex minus 0.05ex} {1.0ex} {\normalsize\bfseries}} \makeatother \title{Kazlib---Reusable Components\\for C Programming} \author{Kaz Kylheku} \date{Release 1.20\\July 24, 2001} \makeindex \setcounter{tocdepth}{1} \setcounter{secnumdepth}{4} \begin{document} \catcode`\_=11 \def\indextype#1{\index{#1@{\tt #1} type}} \def\indexmacro#1{\index{#1@{\tt #1} macro}} \def\indexobject#1{\index{#1@{\tt #1} object}} \def\indexfunc#1{\index{#1@{\tt #1} function}} \def\indexenum#1{\index{#1@{\tt #1} enum constant}} \def\synopsis{\paragraph*{Synopsis}} \def\constraints{\paragraph*{Constraints}} \def\description{\paragraph*{Description}} \def\example{\paragraph*{Example}} \maketitle \abstract{The aim of the Kazlib project is to provide a well-documented programming interface featuring commonly needed programming abstractions, accompanied by a high quality, portable reference implementation. Kazlib consists of four independent components: a list module, a hash table module, a dictionary module and an exception handling module. The reference implementations of the first three of these are based on, respectively, the following algorithms: doubly linked circular list with sentinel node, extendible hashing, and red-black tree.} \tableofcontents \section{Introduction} This document establishes the provisions required of an implementation of the Kazlib library, and describes a reference implementation thereof. This document specifies \begin{itemize} \item the names and types of identifiers and preprocessor symbols made available by each component; \item identifier name spaces reserved for future use by each component; \item the interface syntax and semantics of each component operation; \item the conditions required for the well-defined execution of each operation; \item the externally visible behavior of each component, including global side effects and the effects on the subject data structures; \item and the implementation language of Kazlib. \end{itemize} Furthermore, this document describes, but does not specify \begin{itemize} \item the implementation details of structure objects manipulated by the operations of each component; \item objects and functions that are defined by the implementation of each component but are not externally visible; \item the algorithms and implementation details of the operations. \end{itemize} Finally, this document does {\em not\/} specify or describe \begin{itemize} \item the specific choices for parameters which may be adjusted by an installation or implementation of Kazlib. \item the size of any data structure which will exceed the capacity of a particular installation. \item the mechanisms or procedures for the translation of Kazlib and their integration with other translation units. \end{itemize} \section{References} \label{sec:references} \begin{trivlist} \item ISO 9899:1990, {\it Programming Languages---C.} \item {\it Introduction to Algorithms}, Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, eighth printing, 1992. \end{trivlist} \section{Definitions and conventions} The following terms shall be interpreted in accordance with the definitions below. Other terms appearing in this document shall be defined upon their first mention, indicated by {\it italic\/} type. Any terms not explicitly defined in this document should be interpreted according to ISO 9899-1990, clause 3. Failing that, they should be interpreted according to other works listed in section \ref{sec:references}. \nobreak \defsubsection{implementation}: A library and set of C language headers which conforms to the specifications of this Document. \index{production mode} \indexmacro{NDEBUG} \defsubsection{production mode}: A mode of operating the implementation in such a way that maximum efficiency of execution is achieved at the expense of the verification of constraints. An implementation shall provide a production mode, which is enabled in an implementation-defined manner.\footnote{An implementation may have to supply a separate set of libraries for production and for verification use, for instance. The manner of selecting libraries varies with each programming environment.} Each translation unit of the program which includes a Kazlib header shall ensure that the macro {\tt NDEBUG} is defined prior to the inclusion of that header, otherwise the implementation is not said to be operated in production mode. \index{verification mode} \defsubsection{verification mode}: A mode of operating the implementation in such a way that maximum error checking is obtained at the cost of execution efficiency. An implementation shall provide a verification mode, which is enabled in an implementation-defined manner. If any translation unit which includes a Kazlib header defines the macro name {\tt NDEBUG}\footnote{The intent is that the standard {\tt assert} macro may be exploited by the implementation's headers for the purpose of provisioning verification mode.} prior to including that header, the implementation is not said to be in verification mode. The least requirements of a Kazlib implementation operated in verification mode, is that it shall stop translation or execution of any program which violates a constraint. \index{undefined behavior} \defsubsection{undefined behavior}: Behavior of a program, upon violation of a requirement with respect to the use of Kazlib, or upon use of corrupt or incorrect data, for which this document does not impose any requirements. Additional undefined behaviors are: \begin{itemize} \item any behavior that is undefined by the C language standard; \item evaluation of an object whose contents are indeterminate; \item a violation of any explicit constraint stated in this document, if that program was built using Kazlib in production mode;\footnote{The intent is that violations of constraints are diagnosed by the implementation in verification mode, and hence do not lead to undefined behavior.} \item a violation of any requirement stated in this document that is not designated as a constraint, and is introduced using the word {\it shall}; and \item any other construct for which no definition of behavior can be deduced from this document. \end{itemize} If a program invokes undefined behavior of any kind, the Kazlib implementation is absolved from any requirements as to what events should ensue. The implementation may respond by invoking undefined behavior in the C language sense, or it may detect the behavior and terminate with a diagnostic message. \defsubsection{implementation-defined}: An adjective which, when appearing in the description of a feature, represents a requirement that the implementor must supply a definition, and document that definition. This adjective is applied to both behavior and to results. Implementation-defined behavior is behavior which depends on the characteristics of an implementation.\footnote{It is not considered adequate for the implementor to allow implementation-defined behavior to produce unpredictable effects or to terminate the program when such behavior is invoked.} When said of a result, implementation-defined means that a value is successfully computed, but depends on the characteristics of the implementation. It is possible for the presence of a requirement on a program to be described as implementation-defined, giving the implementor a choice whether to make that requirement or not. If a program violates a requirement whose presence is implementation-defined, that program's behavior is undefined in any implementation which elects to in fact impose that requirement. \index{implementation-defined} \defsubsection{unpredictable result}: A successfully computed value which is unreliable because some procedure or data failed to satisfy a property required by the computation. \defsubsection{constraint}: A semantic restriction with which a program must comply. Some sections of this Document contain paragraphs under the heading {\it Constraints\/} which list all constraints pertaining to the described feature. When operated in production mode, the Kazlib implementation is not required to diagnose constraint violations. When operated in verification mode, the Kazlib implementation must halt translation or execution of a program which violates a constraint. \index{constraint} \defsubsection{comparison function}: A function which accepts two arguments \index{comparison function} of type \verb|const void *| and returns a value of type int based on a ranking comparison of these arguments, and which satisfies the following additional semantic properties. If the two arguments are deemed to be equal, the function must return zero. If the first argument is determined to have a greater rank than the second, a positive value is returned. Otherwise if the first argument is determined to have a lesser rank than the second, a negative value is returned. The rank is computed as if each value has associated with it an integer, not necessarily unique, and as if these integers are compared for ordinary equality or inequality when values are said to be compared. The assignment of integers is up to the designer of the comparison function, and does not change between successive invocations of the function.\footnote{Of course, an actual comparison function need not assign actual integer ranks to data items, but it must behave as if such ranks were assigned.} If a comparison function is invoked in the context of an operation on some data structure, it shall not invoke any operation on any component of that same structure.\footnote{Thus, if a comparison function is invoked from, for instance, {\tt list_sort}, it must not call any list operations that inspect or modify the list being sorted, or any of its constituent nodes.} \defsubsection{opaque data type}: A data type whose precise definition is not documented, and which is intended to be manipulated only using the documented interface, which consists of a set of functions. Many data types in Kazlib are described as opaque. A program which bypasses the documented interfaces in inspecting or manipulating these data types invokes undefined behavior, and is not portable among Kazlib implementations. \defsubsection{user}: \index{user} The program which uses Kazlib. \defsubsection{user data}: \index{user data} Data provided by the program to which Kazlib stores a pointer, but otherwise does not inspect or modify. \section{Environment} \label{sec:environment} The translation and use of Kazlib requires a conforming, hosted implementation of the C language which meets the following additional minimal requirements: \begin{enumerate} \item The C implementation distinguishes external names by at least their initial 15 characters\footnote{The ISO 9899:1990 standard demands only that external names be distinguished by their initial six characters.}. External names that are distinct in their first 15 characters are treated by the implementation as distinct names. Upper and lower case letters in external identifiers need not be treated as distinct. \item The C implementation does not claim the identifier \verb|__cplusplus| for its internal use as a preprocessor symbol or keyword. \end{enumerate} If Kazlib headers are used by a C++ program, the C++ implementation meets these additional requirements: \begin{enumerate} \item the C++ implementation identifies itself by predefining the preprocessor symbol \verb|__cplusplus|; \item the C++ implementation is be capable of linkage against the C implementation with which the Kazlib source files units were translated. \end{enumerate} The Kazlib headers shall not make use of any names that are claimed by the C++ programming language, and shall ensure that the \verb|extern "C"| mechanism is used for all declarations when they are included into a C++ translation unit, or otherwise provide compatibility with C++.\footnote{The intent is that the Kazlib implementation could, in principle, provide a separate set of headers for use with each language.} In programming environments that support the programming mechanism of multiple threads of execution an implementation of Kazlib may be designated as {\it thread safe}. To be called thread safe, it must guarantee that the use of an object by one thread cannot visibly interact or interfere with the concurrent or interleaved use of another object by another thread. If a Kazlib implementation that is not thread safe is provided for an environment which supports threads, it shall be accompanied by documentation which describes the extent of this limitation. A Kazlib implementation can also be designated as being {\it async safe}. The minimum requirement for this designation is that an operation on an object can be interrupted by delivery of an asynchronous signal and from within the catching function for that signal, it is safe to perform an operation on another object. An implementation shall document that it is async safe, or the extent to which it fails to be async safe. \section{General restrictions} \subsection{Headers} The Kazlib headers may be included in any order, and may be included more than once. Prior to the inclusion of a Kazlib header, the translation unit shall not define any macro name that has the same spelling as a C language keyword. The Kazlib headers may behave as though they include arbitrary standard C headers, so any requirements related to the inclusion of standard headers apply to Kazlib headers. A header shall be included before the first reference to any of the functions, types or macros that it defines. If one or more preprocessor symbols whose names begin with the sequence \verb|KAZLIB_| are defined prior to the inclusion of a Kazlib header, the behavior is implementation-defined. \subsection{Reserved macros} A Kazlib header defines all of the macros explicitly listed in the section of this document that defines the contents of that header. It may also define additional macros that belong to the macro namespace reserved by that header. The translation unit that includes the header shall not \verb|#define| or \verb|#undef| any of these macros. A header may define function-like macros that supplement existing functions, provided that such macros do not cause multiple evaluation of arguments except as explicitly permitted, and are safe to use wherever the corresponding function call would be. These function-like macros may be subject to \verb|#undef|.\footnote{In principle, an implementation may provide, within the reserved namespaces, additional functions not specified in this document, and function-like macro equivalents of these functions. A program that uses such identifiers in a block or function scope should use {\tt \#undef} on these identifiers prior to their use.} \subsection{Reserved symbols} Each Kazlib header provides file scope declarations for the typedef names, struct tags, enum constants and function names listed in its corresponding section in this document. Moreover, each header may define additional such names that fall into the documented reserved namespaces. The behavior is undefined if a translation unit that includes a Kazlib header defines any identifier that is the same as an identifier reserved by the header in the same scope and namespace.\footnote{Therefore, it is permitted to redeclare or redefine the identifiers reserved by a previously included Kazlib header, provided that the declarations or definitions are in a different namespace or scope. Reserved names may be redeclared in a block scope, or used as statement labels which have function scope and are in their own namespace.} The behavior is also undefined if the program contains a definition of an object or function with external linkage whose name matches an external object of unction defined by Kazlib component that is used as part of the program, or whose name is in a namespace reserved by that component.\footnote{This restriction exists whether or not the corresponding Kazlib header is included.} Lastly, the behavior is undefined if a translation unit defines a macro whose name is in the space of reserved symbols of a Kazlib header that is included in that translation unit. \subsection{Argument aliasing} Kazlib provides functions that operate on objects of various types. Pointers to objects are passed to these functions, thereby giving rise to the possibility of {\it aliasing}---passing of objects that wholly or partially overlap. The program shall not present aliased objects to any Kazlib function. Objects of distinct types shall not be aliased in a function call under any circumstances. The aliasing of two or more objects of compatible type is permitted only as explicitly documented in the description of a function; in all such circumstances, only exact overlapping is permitted.\footnote{That is to say, where explicitly allowed, a pointer to the same object may be specified for two (or more) parameters of like type.} \subsection{Object initialization} The Kazlib opaque data types can only be initialized with the initialization functions provided by the Kazlib library, or by implementation-defined initialization functions.\footnote{Of course, the use of implementation-defined functions results in programs that are not portable among library implementations.} An opaque object that is initialized by a method other than by being passed to an appropriate initialization function, or that is not initialized at all, has indeterminate contents. A pointer to an object having indeterminate contents may be passed to an initialization function; the object then has well-determined contents. An object whose initialization function is capable of indicating failure is considered indeterminate if the attempt to initialize that object using that function does in fact fail. The program shall not attempt to deinitialize such an object. The implementation shall reclaim any resources that were allocated for an object whose initialization failed. This reclamation need not be immediate, but may be delayed; however, the delay shall not give rise to the possibility of resource leaks in any correct program. Those objects for which deinitialization operations are defined should be subject to these operations when these objects are no longer needed. Failure to apply the deinitialization functions may result in the leakage of resources. \subsection{Object copying} Certain data types may be sensitive to their own location in memory. This means that copying their values by assignment or \verb|memcpy| results in the copy having an indeterminate value which cannot be used. All opaque types in Kazlib are assumed to have this property; copying the value of an opaquely typed object to another suitably typed object causes the destination object to have indeterminate contents. \section{List component} The List component provides a set of functions, macros and type declarations which together provide a library for maintaining a possibly empty ordered set of elements, called a {\it list}. This list has the following properties: \index{List}\begin{enumerate} \item If the list is not empty, a first and last element can be identified. In a list having only one element, that one element is both the first and last element. \item Each element that is not the last element has another element as its {\it successor}. \index{successor!of a list element} \index{List!successor of an element} \item Each element that is not the first element has a {\it predecessor}. \index{predecessor!of a list element} \index{List!predecessor of an element} \item No element is the predecessor or successor of more than one element. \item If one element is the successor of another, the other is necessarily the predecessor of the first. \item Each element is associated with arbitrary {\it satellite\/} data. \end{enumerate} The {\it size} of a list, also known as the {\it list count}, is simply the number of elements contained in it.\index{size!of a list}\index{List!count} A list imposes a maximum value on the number of nodes that may be in it simultaneously. This is known as the list's {\it capacity}. A list that has the maximum number of nodes is said to be full. \subsection{Interface} \subsubsection{The {\tt list.h} header} Each C or C++ translation unit that is to use the functionality of the List component shall include the header \verb|list.h|. This header shall contain declarations of types and external functions, and definitions of macros. The following typedef names shall be defined:\index{List!typedef names} \index{typedefs!defined by List} \begin{verbatim} list_t listcount_t lnode_t lnodepool_t \end{verbatim} In addition, the following structure tags may be defined:\index{List!tag names} \index{tags!defined by List} \begin{verbatim} struct list_t struct lnode_t struct lnodepool_t \end{verbatim} The following external function names shall be declared: \index{List!function names}\index{functions!defined by List} \begin{verbatim} list_append list_prev list_contains list_process list_count list_return_nodes list_create list_sort list_del_first list_find list_del_last list_transfer list_delete list_verify list_destroy lnode_borrow list_destroy_nodes lnode_create list_extract lnode_destroy list_first lnode_get list_init lnode_init list_ins_after lnode_is_in_a_list list_ins_before lnode_pool_create list_is_sorted lnode_pool_destroy list_isempty lnode_pool_init list_isfull lnode_pool_isempty list_last lnode_pool_isfrom list_merge lnode_put list_next lnode_return list_prepend \end{verbatim} The following preprocessor symbols (macros) shall be defined: \index{List!macro names}\index{macros!defined by List} \indexmacro{LISTCOUNT_T_MAX} \indexmacro{LIST_H} \begin{verbatim} LISTCOUNT_T_MAX LIST_H\end{verbatim} \index{symbols!reserved by List}\index{List!reserved symbols} Macro identifiers which begin with the upper-case prefix \verb|LIST| are reserved for future extensions to the \verb|list.h| header, as are names in the ordinary and tag namespaces which begin with \verb|list_| or \verb|lnode_|. External names which begin with \verb|list_| or \verb|lnode_| are reserved by the Kazlib library regardless of what header files are included. \subsubsection{The {\tt list_t} type} \indextype{list_t} The type \verb|list_t| is an opaque data type which maintains information about the current state of a single list. A list consists of an instance of the \verb|list_t| type, plus zero or more instances of the type \verb|lnode_t|. An instance of the \verb|list_t| type can be dynamically created using the \verb|list_create| function, and destroyed by the \verb|list_destroy| function. Alternately, the program can declare an object of type \verb|list_t| and have it initialized via the \verb|list_init| function. \subsubsection{The {\tt listcount_t} type} \indextype{listcount_t} \indexmacro{LISTCOUNT_T_MAX} The type \verb|listcount_t| is an unsigned integral type which represents the number of nodes in a list. The specific choice of unsigned integral type is implementation defined. The \verb|LISTCOUNT_T_MAX| macro expands to a constant expression of type \verb|listcount_t| which specifies the maximum value of that type.\footnote{For example, if the implementation defines {\tt listcount_t} as an alias for the type unsigned long, then {\tt LISTCOUNT_T_MAX} must have the same value as {\tt ULONG_MAX}.} \subsubsection{The {\tt lnode_t} type} \indextype{lnode_t} The type \verb|lnode_t| is an opaque type that represents a single node of a list. A node contains a a reference to satellite data provided by the user, and also stores the key that is associated with the node when it is inserted. Nodes may be dynamically created by the \verb|lnode_create| function. Alternately, the program may supply an \verb|lnode_t| object that can be initialized by the \verb|lnode_init| function. \subsubsection{The {\tt lnodepool_t} type} \indextype{lnodepool_t} The \verb|lnodepool_t| type provides an alternate method for supplying list nodes to the application. A user-supplied or dynamically allocated fixed size array of nodes is converted into a a {\it pool\/} of nodes from which free nodes may be obtained and to which they may be returned. A user-supplied node pool is created by the function \verb|lnode_pool_init| which requires a pointer to an object of type \verb|lnode_pool_t|, a pointer to the first element of an array of \verb|lnode_t| objects, as well as an integer representing the size of the array. Alternately, the function \verb|lnode_pool_create| will dynamically allocate an object of type \verb|lnode_pool_t| containing the specified number of list nodes. \subsubsection{The {\tt list_append} function} \indexfunc{list_append} \index{List!appending a node} \index{append node to list} \synopsis \begin{verbatim} void list_append(list_t *, lnode_t *);\end{verbatim} \constraints The second argument shall not refer to a node that is already in a list or in a list node pool. The first argument shall not refer to a list that is full. \description The append operation causes the node pointed at by the second argument to become the last node in the list pointed at by the first argument.\footnote{That is to say, after the operation, the {\tt list_last} function, when applied to the list, shall return a pointer to that node.} If the first argument is an expression with side effects, the behavior is undefined.\footnote{Thus, the implementation may provide a macro version of {\tt list_append} which evaluates the first argument more than once.} \index{macros!and side effects} \subsubsection{The {\tt list_contains} function} \indexfunc{list_contains} \index{List!testing for presence of node} \nobreak \synopsis \begin{verbatim} int list_contains(list_t *, lnode_t *node);\end{verbatim} \nobreak \description \nobreak The \verb|list_contains| function shall return 1 if the node pointed at by the second argument is in the list pointed at by the first argument. Otherwise, it shall return 0. \subsubsection{The {\tt list_count} function} \indexfunc{list_count} \index{List!count} \index{List!size} \synopsis \begin{verbatim} listcount_t list_count(list_t *);\end{verbatim} \description The \verb|list_count| function returns a value which represents the number of nodes currently stored in the list pointed at by the argument. \subsubsection{The {\tt list_create} function} \indexfunc{list_create} \index{List!creation of} \index{create!list object} \synopsis \begin{verbatim} list_t *list_create(listcount_t);\end{verbatim} \description The \verb|list_create| function instantiates and initializes an object of type \verb|list_t|, and returns a pointer to it unless insufficient resources exist for the creation of the object, in which case a null pointer is returned. The value of the function's argument establishes, for the entire duration of the list object, its capacity. The newly created list object is empty. \subsubsection{The {\tt list_del_first} function} \index{List!first node} \indexfunc{list_del_first} \index{List!deletion} \index{delete!first node of a list} \synopsis \begin{verbatim} lnode_t *list_del_first(list_t *);\end{verbatim} \constraints The argument shall not point to an empty list. \description The \verb|list_del_first| function removes the first node from the list pointed at by the argument and returns a pointer to that node. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_del_last} function} \index{List!last node} \indexfunc{list_del_last} \index{List!deletion} \index{delete!last node of a list} \synopsis \begin{verbatim} lnode_t *list_del_last(list_t *);\end{verbatim} \constraints The argument shall not point to an empty list. \description The \verb|list_del_last| function removes the last node from the list specified by the argument, and returns a pointer to that node. If, prior to the operation, that node had a predecessor, that predecessor shall become the new last node of the list. Otherwise, the list shall become empty. The new value of the list count shall be one less than its value prior to the call to this function. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_delete} function} \indexfunc{list_delete} \index{List!deletion} \index{delete!arbitrary node of a list} \synopsis \begin{verbatim} lnode_t *list_delete(list_t *, lnode_t *);\end{verbatim} \constraints The second argument shall point to a node that is inside the list pointed at by the first argument. \description The \verb|list_delete| function removes the node pointed at by its second argument from the list pointed at by its first argument. A pointer to the deleted node is returned. \subsubsection{The {\tt list_destroy} function} \indexfunc{list_destroy} \index{List!destruction of} \synopsis \begin{verbatim} void list_destroy(list_t *);\end{verbatim} \constraints The argument shall point to an empty list. \description The empty list pointed at by the argument is destroyed. If the list has not been created by a call to the \verb|list_create| function, the behavior is undefined. A pointer that previously referred to a list that has been disposed by \verb|list_destroy| has an indeterminate value. \subsubsection{The {\tt list_destroy_nodes} function} \indexfunc{list_destroy_nodes} \synopsis \begin{verbatim} void list_destroy_nodes(list_t *);\end{verbatim} \description The nodes, if any, contained in the list pointed at by the argument are disposed of as if by a call to the \verb|lnode_destroy| function. If any node contained in the list was created by means other than the \verb|lnode_create| function, the behavior is undefined. After the operation, the list is empty. Any pointer that referred to any of the destroyed nodes takes on an indeterminate value. \subsubsection{The {\tt list_extract} function} \index{List!node range extraction} \indexfunc{list_extract} \synopsis \begin{verbatim} void list_extract(list_t *, list_t *, lnode_t *, lnode_t *);\end{verbatim} \constraints The second argument points to the {\it source list}. The third argument is either null, or points to a node that is an occupant of the source list. This node is called the {\it starting node}. The fourth argument is either null, or points to a node that is an occupant of the source list. This node is called the {\it ending node}. If the starting node and ending node are both specified, and are distinct nodes, then the starting node shall appear earlier in the source list than the ending node. The transfer request shall not call for the capacity of the destination list to be exceeded. \description The \verb|list_extract| function moves nodes from the source list to the {\it destination list\/} pointed at by the first argument.\footnote{This right-to-left direction of transfer is consistent with the semantics of standard C library functions such as {\tt memmove} or {\tt strcpy}.} If the third and fourth arguments are not null, the entire range of nodes from the starting node and to the ending node, inclusive, is transferred from the source list to the end of the destination list, where they appear in their original order. Other nodes in the source list, if any, are unaffected. If the third and fourth arguments both point to the same node, that node alone is transferred to the end of the destination list. If either the third argument or the fourth argument is null, or both are null, no transfer of nodes takes place. The source and destination list may be the same object. \subsubsection{The {\tt list_first} function} \index{List!first node} \indexfunc{list_first} \synopsis \begin{verbatim} lnode_t *list_first(list_t *);\end{verbatim} \description If the list pointed at by the argument is an empty list, a null pointer is returned. Otherwise, a pointer to the first node in that list is returned. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_init} function} \indexfunc{list_init} \synopsis \begin{verbatim} list_t *list_init(list_t *, listcount_t);\end{verbatim} \constraints The second argument shall not have a zero value. \description The \verb|list_init| function initializes the list object pointed at by the first argument, turning it into a valid, empty list. If the object is an already initialized list, the behavior is undefined. A list returned by \verb|list_create| is considered initialized. The second argument specifies the maximum number of nodes that may simultaneously occupy the list. The value returned is that of the first argument. \subsubsection{The {\tt list_ins_after} function} \indexfunc{list_ins_after} \index{insert!node into list} \index{List!insertion} \synopsis \begin{verbatim} void list_ins_after(list_t *, lnode_t *, lnode_t *);\end{verbatim} \constraints The first argument shall point to a list that is not already full. The second argument shall point to a node, called the {\it new node}, that is not already an occupant of the list pointed at by the first argument, nor of any other list or node pool object. The third argument shall point to a node, called the {\it reference node}, that is an occupant of the list. \description The new node becomes an occupant of the list, such that its predecessor is the reference node. If the reference node has a successor, the new node is inserted between the reference node and that successor. Otherwise, the new node becomes the last node of the list. \subsubsection{The {\tt list_ins_before} function} \indexfunc{list_ins_before} \index{insert!node into list} \index{List!insertion} \synopsis \begin{verbatim} void list_ins_before(list_t *, lnode_t *, lnode_t *);\end{verbatim} \constraints The first argument shall point to a list that is not already full. The second argument shall point to a node, called the {\it new node}, that is not already an occupant of the list pointed at by the first argument, nor of any other list or node pool object. The third argument shall point to a node, called the {\it reference node}, that is an occupant of the list. \description The new node becomes an occupant of the list, such that its successor is the reference node. If the reference node has a predecessor, the new node is inserted between the reference node and that predecessor. Otherwise, the new node becomes the first node of the list. \subsubsection{The {\tt list_is_sorted} function} \label{list:is:sorted} \indexfunc{list_is_sorted} \synopsis \begin{verbatim} int list_is_sorted(list_t *, int (const void *, const void *));\end{verbatim} \description The first argument points to a list object. The second is assumed to point to a comparison function. If the list has exactly one node or is empty, $1$ is returned unconditionally. Otherwise, nodes of the list are examined to determine whether they are in a sorted order according to the comparison function. This is true if the integer ranks of their data items, examined from the first node of the list through to the last node, form a monotonically increasing sequence. If the nodes are in order, the value $1$ is returned. Otherwise $0$ is returned. If the list has two or more nodes, and the second argument is a pointer to a function that has the correct type, but does not satisfy the semantic properties of a comparison function, the result is unpredictable, but is guaranteed to be one of the values~$0$~or~$1$. \subsubsection{The {\tt list_isempty} function} \indexfunc{list_isempty} \synopsis \begin{verbatim} int list_isempty(list_t *);\end{verbatim} \description The \verb|list_isempty| function returns $1$ if the list pointed at by the first argument is empty. Otherwise it returns $0$. \subsubsection{The {\tt list_isfull} function} \indexfunc{list_isfull} \synopsis \begin{verbatim} int list_isfull(list_t *);\end{verbatim} \description The \verb|list_isfull| function returns $1$ if the list pointed at by the first argument is full. Otherwise it returns $0$. A list is considered full when it contains the maximum number of nodes that was specified upon its initialization. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_last} function} \index{List!last node} \indexfunc{list_last} \synopsis \begin{verbatim} lnode_t *list_last(list_t *);\end{verbatim} \description If the list pointed at by its first argument is empty, the \verb|list_last| function returns a null pointer. Otherwise it returns a pointer to the last node. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_merge} function} \index{List!merge operation} \indexfunc{list_merge} \synopsis \begin{verbatim} void list_merge(list_t *, list_t *, int (const void *, const void *));\end{verbatim} \constraints The list pointed at by the first argument is called the {\it destination list}. The second argument points to the {\it source list}. The third argument points to a comparison function. The sum of the number of nodes occupying the source list and the destination list shall not exceed the maximum number of nodes that are permitted to occupy the destination list. Furthermore, both the source and destination list shall be sorted such that a call to \verb|list_is_sorted| given a pointer to either list as a first argument, and the pointer to the comparison function as its second argument, shall yield the value $1$. \description Nodes from the sorted source list are merged into the sorted destination list. After the operation, the source list is empty and the destination list contains all of the nodes it contained prior to the operation, as well as all of the nodes that the source list contained. The nodes are in sorted order according to the comparison function. If the third argument is a pointer to a function that has the correct type, but does not fulfill the semantic properties of a comparison function, the order of the nodes in the destination list is unpredictable. If the source and destination list are the same object, the \verb|list_merge| operation has no effect. \subsubsection{The {\tt list_next} function} \indexfunc{list_next} \synopsis \begin{verbatim} lnode_t *list_next(list_t *, lnode_t *);\end{verbatim} \constraints The node pointed at by the second argument is an occupant of the list pointed at by the first argument. \description If the node pointed at by the second argument has a successor, a pointer to that successor is returned. Otherwise, a null pointer is returned. If the second argument is an expression which has side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_prepend} function} \indexfunc{list_prepend} \index{List!prepending a node} \index{prepend node to list} \synopsis \begin{verbatim} void list_prepend(list_t *, lnode_t *);\end{verbatim} \constraints The second argument shall not refer to a node that is already in a list or in a list node pool. The first argument shall not refer to a list that is full. \description The prepend operation causes the node pointed at by the second argument to become the first node in the list pointed at by the first argument. After the operation, the \verb|list_first| function, when applied to the list, shall return a pointer to that node. If, prior to to the operation, the list is empty, then the prepended node shall become the first node in that list, otherwise, the prepended node becomes the predecessor of what was previously the first node. If the first argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt list_prev} function} \indexfunc{list_prev} \synopsis \begin{verbatim} lnode_t *list_prev(list_t *, lnode_t *);\end{verbatim} \constraints The node pointed at by the second argument is an occupant of the list pointed at by the first argument. \description If the node pointed at by the second argument has a predecessor, a pointer to that predecessor is returned. Otherwise, a null pointer is returned. If the second argument is an expression which has side effects, the behavior \index{macros!and side effects} is undefined. \subsubsection{The {\tt list_process} function} \indexfunc{list_process} \synopsis \begin{verbatim} void list_process(list_t *, void *, void (*)(list_t *, lnode_t *, void *));\end{verbatim} \nobreak \description The \verb|list_process| function iterates over the nodes of a list, and for each node invokes a callback function.\footnote{In most cases, it is more convenient and preferable to iterate over the list using explicit calls to {\tt list_first} and {\tt list_next}.} The second argument is a {\it context pointer\/} which can have any value. The third argument of \verb|list_process| shall be a pointer to a function which is compatible with the specified type. If the list contains one or more nodes, then the function is invoked once for each node, in order from first to last. On each invocation, the first argument of the callback is a pointer to the list; the second argument is a pointer to a node, called the {\it subject node}; and the third argument repeats the context pointer value that was originally passed to \verb|list_process|. The callback function may delete the subject node by, for instance, calling \verb|list_delete|. It may insert new nodes to any place in the list; however, if such an insertion causes the subject node to acquire a new successor, it is implementation-defined whether upon returning from the callback function, the traversal shall continue with the new successor, or with the original successor. The callback function, and any function invoked from the callback function, shall not destroy the list or make any modifications other than the insertion of new nodes, or the deletion of the subject node. The callback function may recursively invoke \verb|list_process| for the same list or for a different list; the callback invocations arising out of the nested call inherit all of the restrictions of the outer callback in addition to being subject to the usual restrictions.\footnote{This means, for instance, that if two callbacks are in progress for different subject nodes from the same list, the inner callback may not delete its subject node, because it inherits the restriction that the only permitted deletion is the outer callback's subject node.} The callback function may freely operate on a different list, subject to any inherited restrictions. \subsubsection{The {\tt list_return_nodes} function} \indexfunc{list_return_nodes} \synopsis \begin{verbatim} void list_return_nodes(list_t *, lnodepool_t *);\end{verbatim} \description Every node in the list specified by the first argument is returned to the node pool specified by the second argument If the list contains a node that has not been allocated from that node pool, the behavior is undefined. \subsubsection{The {\tt list_sort} function} \index{List!sort operation} \indexfunc{list_sort} \synopsis \begin{verbatim} void list_sort(list_t *, int (const void *, const void *));\end{verbatim} \description The \verb|list_sort| function changes the order of the nodes of the list specified by the first argument according to the comparison function pointed at by the second argument. If the list is empty, or contains only one node, the comparison function is not called. Whenever the comparison function is invoked, its arguments are are the data pointers stored in two distinct nodes of the list. \subsubsection{The {\tt list_find} function} \index{List!find operation} \indexfunc{list_find} \synopsis \begin{verbatim} lnode_t *list_find(list_t *, const void *, int (const void *, const void *));\end{verbatim} \description The \verb|list_find| function exhaustively searches the key for a node whose satellite data matches a search key according to the comparison function. The first argument is the list to be searched, the second argument specifies the search key and the third argument is a pointer to the comparison function. The comparison function is invoked to compare the key against the satellite data of successive nodes of the list, starting with the first node. A pointer to the first node for which the comparison function returns zero is returned. If the list is empty, or the comparison function returns non-zero for each item, a null pointer is returned. \subsubsection{The {\tt list_transfer} function} \index{List!node transfer} \indexfunc{list_transfer} \synopsis \begin{verbatim} void list_transfer(list_t *, list_t *, lnode_t *);\end{verbatim} \constraints The third argument is either null, or it points at a node which is an occupant of the list pointed at by the second argument. The transfer request shall not call for the capacity of the destination list to be exceeded. \description The \verb|list_transfer| function moves nodes from the list pointed at by the second argument to the list pointed at by the first argument. If the third argument is not null, it specifies the node in the source list at which the transfer begins. That node, its successor, and all subsequent nodes, are transferred to the end of the destination list where they appear in their original order. Other nodes in the source list are unaffected. If the third argument is null, no transfer of nodes takes place. The source and destination list may be the same object. If \verb|DL|, \verb|SL| and \verb|SN| are appropriately typed expressions, the function call \begin{verbatim} void list_transfer(DL, SL, SN); \end{verbatim} is equivalent to \begin{verbatim} list_extract(DL, SL, SN, list_last(SL)); \end{verbatim} except that \verb|SL| is evaluated only once. \subsubsection{The {\tt list_verify} function} \indexfunc{list_verify} \synopsis \begin{verbatim} int list_verify(list_t *list);\end{verbatim} \description The intent of the \verb|list_verify| function is to perform a verification on the list object, regardless of whether the Kazlib implementation is operated in verification or production mode. If the list objects and its constituent nodes have been correctly manipulated, and the program has not caused any undefined behaviors, the value $1$ is returned. Otherwise, the function may be able to, but is not guaranteed to, detect corruption, and return the value zero. \subsubsection{The {\tt lnode_borrow} function} \indexfunc{lnode_borrow} \synopsis \begin{verbatim} lnode_t *lnode_borrow(lnodepool_t *, void *);\end{verbatim} \description The \verb|lnode_borrow| function allocates a node from the pool managed by the given \verb|lnodepool_t| object. If the request succeeds, a pointer to the node is returned. If the object has run out of nodes, the return value is a null pointer. \subsubsection{The {\tt lnode_create} function} \indexfunc{lnode_create} \synopsis \begin{verbatim} lnode_t *lnode_create(void *);\end{verbatim} \description The \verb|lnode_create| function dynamically allocates a list node, stores in it the data value specified in the argument and returns a pointer to it. The allocation is performed by a call to the standard \verb|malloc| function. If the allocation fails, a null pointer is returned. \subsubsection{The {\tt lnode_destroy} function} \indexfunc{lnode_destroy} \synopsis \begin{verbatim} void lnode_destroy(lnode_t *);\end{verbatim} \description The \verb|lnode_destroy| function destroys a list node that has been allocated with the \verb|lnode_create| function. The value of any pointer that referred to the node that was thus freed is indeterminate. If the node is currently the occupant of a list, the behavior is undefined if the list is subsequently used. \subsubsection{The {\tt lnode_get} function} \indexfunc{lnode_get} \synopsis \begin{verbatim} void *lnode_get(lnode_t *);\end{verbatim} \description The \verb|lnode_get| function retrieves the \verb|void *| data value associated with a node.\footnote{This is the {\bf only} interface for retrieving the data element.} \subsubsection{The {\tt lnode_init} function} \indexfunc{lnode_init} \synopsis \begin{verbatim} lnode_t *lnode_init(lnode_t *, void *);\end{verbatim} The \verb|lnode_init| function initializes the contents of the specified list node object, assigning it the data value specified as the second argument. The first argument is a pointer which refers to a data object that has a suitable size and alignment for the representation of an \verb|lnode_t| type. After initialization with \verb|lnode_init|, the object is subsequently eligible as an operand to the functions of the List component. \subsubsection{The {\tt lnode_is_in_a_list} function} \indexfunc{lnode_is_in_a_list} \synopsis \begin{verbatim} int lnode_is_in_a_list(lnode_t *);\end{verbatim} \description The \verb|lnode_is_in_a_list| function determines whether the given node is an occupant of some list. If the node is in a list, the function returns the value $1$. If the node is not in any list, the return value is zero. \subsubsection{The {\tt lnode_pool_create} function} \indexfunc{lnode_pool_create} \synopsis \begin{verbatim} lnodepool_t *lnode_pool_create(listcount_t);\end{verbatim} \constraints The value of the argument shall not be zero. \description The \verb|lnode_pool_create| function dynamically allocates, by means of the standard library function \verb|malloc| a node pool object containing the number of nodes specified as the first argument. If not enough resources are available, a null pointer is returned, otherwise a pointer to the \verb|lnodepool_t| object is returned. \subsubsection{The {\tt lnode_pool_destroy} function} \indexfunc{lnode_pool_destroy} \synopsis \begin{verbatim} void lnode_pool_destroy(lnodepool_t *);\end{verbatim} \description The \verb|lnode_pool_destroy| function deallocates a node pool that was allocated by \verb|lnode_pool_create|. The value of any pointer which referred to the node pool object becomes indeterminate. \subsubsection{The {\tt lnode_pool_init} function} \indexfunc{lnode_pool_init} \synopsis \begin{verbatim} lnodepool_t *lnode_pool_init(lnodepool_t *, lnode_t *, listcount_t);\end{verbatim} \constraints The third argument, which specifies the node count, shall not be zero. \description The \verb|lnode_pool_init| function initializes a data object that has a suitable size and alignment to represent an \verb|lnodepool_t| type. A pointer to this object is passed as the first argument. The node pool thus created draws nodes from an array specified by the second argument, which shall be a pointer to an object that can behave like an array of \verb|lnode_t| objects. The third argument specifies the number of elements in this array. After this function, the object pointed at by the \verb|lnodepool_t *| argument is eligible for use with the node pool management functions of the List component. Nodes may be drawn from the pool and returned to it. As long as the pool continues to be used, the program should not directly manipulate the node array. In particular, if the program modifies any part of the array, then the behavior is undefined if the \verb|lnodepool_t| object or any nodes drawn from it are subsequently passed to a List function. The program shall not directly use the array elements as independent \verb|lnode_t| objects while the array is associated with the pool; in particular, it shall not pass these elements to Kazlib functions that operate on \verb|lnode_t|. The behavior is undefined if the same array is associated with more than one node pool object, or if two node pool objects are given overlapping arrays. The node array is managed in an manner that is specific to the implementation; the intent is that each element of the array represents a distinct node object, a pointer to which can be returned in response to an allocation request. The \verb|lnode_pool_init| function returns a copy of the first argument. \subsubsection{The {\tt lnode_pool_isempty} function} \indexfunc{lnode_pool_isempty} \synopsis \begin{verbatim} int lnode_pool_isempty(lnodepool_t *);\end{verbatim} \description The \verb|lnode_pool_isempty| function tests the specified \verb|lnodepool_t| object for ability to supply nodes. If the object has been subject to so many requests that it is no longer capable of of supplying additional list nodes, the value $1$ is returned. Otherwise the return value returned is zero. \subsubsection{The {\tt lnode_pool_isfrom} function} \indexfunc{lnode_pool_isfrom} \synopsis \begin{verbatim} int lnode_pool_isfrom(lnodepool_t *, lnode_t *);\end{verbatim} \description The function \verb|lnode_pool_isfrom|, intended to serve as a software verification aid, determines whether a list node originates from a particular node pool. The return value is $1$ if this relationship is true, otherwise zero. \subsubsection{The {\tt lnode_put} function} \indexfunc{lnode_put} \synopsis \begin{verbatim} void lnode_put(lnode_t *, void *);\end{verbatim} \description The function \verb|lnode_put| replaces the data element associated with the list node. \subsubsection{The {\tt lnode_return} function} \indexfunc{lnode_return} \synopsis \begin{verbatim} void lnode_return(lnodepool_t *, lnode_t *);\end{verbatim} \constraints The node pointed at by the second argument was derived by an allocation request from the pool pointed at by the first argument.\footnote{In other words, the {\tt lnode_pool_isfrom} function, were it called with the same two arguments, would return $1$ if this constraint is met.} Furthermore, the node must not be the occupant of a list. \description The \verb|lnode_return| function returns a node back to the node pool from which it came. The node must not be subsequently used as an argument to any List functions, until it happens to be allocated again. The pointer to the node object remains valid, and may be returned by a subsequent allocation request from the same node pool. \subsection{Implementation} \index{List!reference implementation} This section describes the elements of the reference implementation of the List component. No requirement is imposed that an implementation should follow the reference implementation. The same is true of the implementation notes for the other components. \subsubsection{Types} \index{implementation!List types} \index{typedefs!implementation of List} The reference List implementation is a doubly-linked circular list \index{sentinel node!of linked list} with a {\it sentinel node}. The node structure type is defined like this: \begin{verbatim} typedef struct lnode_t { struct lnode_t *list_next; struct lnode_t *list_prev; void *list_data; } lnode_t; \end{verbatim} and the list structure is defined like this: \begin{verbatim} typedef struct list_t { lnode_t list_nilnode; listcount_t list_nodecount; listcount_t list_maxcount; } list_t; \end{verbatim} The \verb|list_nilnode| member of the list object is the sentinel. It is always present in the list, never deleted. When the list is empty, the sentinel node's \verb|list_next| and \verb|list_prev| pointers simply point back at the sentinel node. The \verb|list_maxcount| member of the list tells how many nodes may be inserted and \verb|list_nodecount| keeps track of the actual count. The reason the sentinel node is called \verb|list_nilnode| is that it acts as the successor of a list's tail node, if there is one, and as the predecessor of the first node. In a linked list implementation that does not use a sentinel node, the \verb|list_next| pointer of the the tail node and the \verb|list_prev| pointer of the first node would be null. Note that prefixed names are used for all of the structure members. This is so that the header file conforms to the documented namespace. If, for example, the \verb|list_nilnode| member were simply called \verb|nilnode|, then if the program contained somewhere a macro called \verb|nilnode|, there would be a potential clash. If the program defined \verb|nilnode| prior to including the \verb|list.h| header, the declaration of \verb|struct list_t| would be confounded. If the program defined \verb|nilnode| after including \verb|list.h|, the definition would interfere with \verb|list.h| macros whose replacement text refers to the \verb|nilnode| member. For programming convenience, the list implementation source file defines short macro names for the structure members: \begin{verbatim} #define next list_next #define prev list_prev #define data list_data \end{verbatim} ... and so forth. These names are private to the translation unit, which includes only standard ANSI C headers. Some of the examples in this section make use of the short names; it is assumed that these macros are in effect. \subsubsection{Selected operations} \index{implementation!List operations} \paragraph{Retrieving the first node} \index{List!first node} Given a pointer \verb|P| to a \verb|list_t| type, the \verb|list_first| function examines the value of \verb|P->nilnode.next| which points at the head node if the list is not empty. If the list is empty, then this expression points back at the sentinel node. In other words, the comparison \begin{verbatim} P->nilnode.next == &P->nilnode \end{verbatim} yields true when the list is empty. In this case, the interface requires that a null pointer be returned by \verb|list_first|. The implementation actually uses the above test, through a test for \verb|P->nodecount| being equal to zero is also possible. In general, any operation which produces a pointer to the nilnode that must be returned back to the calling program must test for that case and return a null pointer instead to satisfy the interface requirements. \paragraph{Node deletion} \index{List!deletion} Thanks to the use of the sentinel node, the list deletion operation doesn't have to test for special cases. A node in the middle of the list is deleted in exactly the same way as the first or the last node: \begin{verbatim} lnode_t *list_delete(list_t *list, lnode_t *del) { lnode_t *next = del->next; lnode_t *prev = del->prev; assert (list_contains(list, del)); prev->next = next; next->prev = prev; list->nodecount--; del->next = del->prev = NULL; return del; } \end{verbatim} Quite simply, the successor and predecessor of the deleted node are connected together so that the deleted node is spliced out from the list. If the node is the last remaining one, then the sentinel node serves as both the successor and the predecessor. The effect of the deletion then is to set the sentinel's next and previous links to point to itself, as they did initially when the list was previously empty. The next and prev pointers are set to null not only for enhanced error checking in language implementations that trap dereferences of null pointers, but also to indicate that the node is not on any list. The interface function \verb|lnode_is_in_a_list| makes use of this. It's worth discussing in some detail why the values of expressions \verb|del->next| and \verb|del->prev| are cached in local variables. The actual statements that splice the node out of the list could instead have been written: \begin{verbatim} del->prev->next = del->next; del->next->prev = del->prev; \end{verbatim} However, this causes some compilers to generate less than optimal code because they fail to apply common subexpression elimination to the double occurrence of \verb|del->next|. Caching this expression in a local variable helps to get better code by making the semantics more obvious. In any case, modern compilers tend to do a good job of caching locals in high speed storage, particularly on architectures generously endowed with registers, so using a few extra locals is unlikely to lead to worse target code. The principle of using local variables to perform ``manual CSE'' is applied throughout the Kazlib reference implementation. \paragraph{Node insertion} Node insertion is also simple, thanks to the sentinel node which makes the doubly linked list circular. All insertions are done using the functions \verb|list_ins_before| and \verb|list_ins_after|. These are very similar, so it suffices to show \verb|list_ins_before|: \begin{verbatim} void list_ins_before(list_t *list, lnode_t *new, lnode_t *this) { lnode_t *that = this->prev; assert (new != NULL); assert (!list_contains(list, new)); assert (!lnode_is_in_a_list(new)); assert (this == list_nil(list) || list_contains(list, this)); assert (list->nodecount + 1 > list->nodecount); new->next = this; new->prev = that; that->next = new; this->prev = new; list->nodecount++; assert (list->nodecount <= list->maxcount); } \end{verbatim} The node \verb|this| is the one before which the new node is being inserted. Internally, the pointer \verb|that| points to the node after which the insertion takes place. In other words, the function inserts the node \verb|new| in between \verb|this| and \verb|that|. Note the copious assertions which verify all of the documented constraints: that the node is not already on the list, or any other list, that the reference node \verb|this| is in the list, and that the list capacity won't be exceeded, and that the node count doesn't overflow its type. \index{List!insertion} \section{Hash component} The Hash component provides a means to manage collections of elements, called hashes, that are not ordered. Each element in the collection has a unique key, which is used for searching and inserting. The intent is that the implementation is based on extendible hashing, and the interface allows for user-defined hashing functions. The number of elements that can be stored in a hash is limited; maximum number of entries in a hash is known as its {\it capacity}. \subsection{Interface} \subsubsection{The {\tt hash.h} header} Each C or C++ translation unit that is to use the functionality of the Hash component shall include the header \verb|hash.h|. This header shall contain declarations of types and external functions, and definitions of macros. The following typedef names shall be defined:\index{Hash!typedef names} \index{typedefs!defined by Hash} \begin{verbatim} hash_t hashcount_t hnode_t hash_val_t hash_comp_t hnode_alloc_t hscan_t hnode_free_t hash_fun_t \end{verbatim} In addition, the following structure tags may be defined:\index{Hash!tag names} \index{tags!defined by Hash} \begin{verbatim} struct hash_t struct hnode_t struct hscan_t \end{verbatim} The following external function names shall be declared: \index{Hash!function names}\index{functions!defined by Hash} \begin{verbatim} hash_create hash_count hash_set_allocator hash_size hash_destroy hash_isfull hash_free_nodes hash_isempty hash_init hash_scan_begin hash_insert hash_scan_next hash_lookup hash_scan_delete hash_delete hash_scan_delfree hash_alloc_insert hash_verify hash_delete_free hnode_create hnode_put hnode_init hnode_get hnode_destroy hnode_getkey hash_free \end{verbatim} \index{Hash!external objects} In addition, the external object name \begin{verbatim} hash_val_t_bit \end{verbatim} shall be declared. The following preprocessor symbols (macros) shall be defined: \index{Hash!macro names}\index{macros!defined by Hash} \indexmacro{HASHCOUNT_T_MAX} \indexmacro{HASH_VAL_T_BIT} \indexmacro{HASH_VAL_T_MAX} \indexmacro{HASH_H} \begin{verbatim} HASHCOUNT_T_MAX HASH_VAL_T_BIT HASH_H\end{verbatim} \index{symbols!reserved by Hash}\index{Hash!reserved symbols} Macro identifiers which begin with the upper-case prefix \verb|HASH| are reserved for future extensions to the \verb|hash.h| header, as are names in the ordinary and tag namespaces which begin with \verb|hash_|, \verb|hnode_| or \verb|hscan_|. External names which begin with \verb|hash_|, \verb|hnode_| or \verb|hscan_| are reserved by the Kazlib library regardless of what headers are included. \subsubsection{The {\tt hash_t} type} \indextype{hash_t} The type \verb|hash_t| is an opaque data type which maintains information about the current state of a single hash. From the programmer's viewpoint, a hash consists of an instance of the \verb|hash_t| type, plus zero or more instances of the type \verb|hnode_t|. An instance of the \verb|hash_t| type can be dynamically created using the \verb|hash_create| function, and destroyed by the \verb|hash_destroy| function. Alternately, the program can declare an object of type \verb|hash_t| and have it initialized via the \verb|hash_init| function. When initializing a hash this way, the user must also provide a fixed-size array of \verb|hnode_t *| objects which serves as the hash table. \footnote{A hash initialized this way does not support extendible hashing, because there is no mechanism for growing the user-supplied array.} \subsubsection{The {\tt hnode_t} type} \indextype{hnode_t} The \verb|hnode_t| type is an opaque type that represents a single element that can be inserted into a hash. A hash node contains a a reference to satellite data provided by the user. Nodes may be dynamically created by the \verb|hnode_create| function. Alternately, the program may supply an \verb|hnode_t| object that can be initialized by the \verb|hnode_init| function. \subsubsection{The {\tt hash_comp_t} type} \indextype{hash_comp_t} The \verb|hash_comp_t| type is a typedef name for the pointer-to-function type \begin{verbatim} int (*)(const void *, const void *); \end{verbatim} In the context of the Hash component, this type denotes pointers to comparison functions. \subsubsection{The {\tt hscan_t} type} \indextype{hscan_t} The \verb|hscan_t| typedef stands for an opaque type which represents context information for traversing a hash. It is initialized by the \verb|hash_scan_begin| function, which specifies a hash to be traversed. Successive elements are retrieved using the \verb|hash_scan_next| function, which eventually indicates that no more elements remain. Inserting to, or deleting from a hash other than using the function \verb|hash_scan_delete| causes any \verb|hscan_t| objects that refer to it to become indeterminate. \subsubsection{The {\tt hashcount_t} type} \indextype{hashcount_t} \indexmacro{HASHCOUNT_T_MAX} This is an unsigned integral type which is capable of representing the number of nodes in a hash. The \verb|HASHCOUNT_T_MAX| macro expands to a constant expression of type \verb|hashcount_t| which specifies the maximum value of that type. \subsubsection{The {\tt hash_val_t} type} \indextype{hash_val_t} \indexmacro{HASH_VAL_T_MAX} The \verb|hash_val_t| type is an unsigned integral type capable of holding at least 32 bits. The purpose of this type is to represent the output values of hashing functions. The \verb|HASH_VAL_T_MAX| macro expands to a constant expression of type \verb|hash_val_t| which specifies the maximum value of that type. \subsubsection{The {\tt hnode_alloc_t} type} \index{Hash!allocator function} The \verb|hnode_alloc_t| identifier is a typedef name for the pointer-to-function type \begin{verbatim} hnode_t *(*)(void *); \end{verbatim} In other words, a pointer to a function that takes a \verb|void *| argument and returns a pointer to \verb|hnode_t|. A function of this type which meets certain behavior criteria may be registered with a \verb|hash_t| object as node allocator, together with a compatible deallocator function. The \verb|void *| argument passes user-specified context information through to the allocator routines (see section \ref{section:hash_set_allocator}). \subsubsection{The {\tt hnode_free_t} type} \index{Hash!deallocator function} The \verb|hnode_free_t| identifier is a typedef name for the pointer-to-function type \begin{verbatim} void (*)(hnode_t *, void *); \end{verbatim} A function of this type which meets certain behavior criteria may be registered with a \verb|hash_t| object as node deallocator together with a compatible allocator function. \subsubsection{The {\tt hash_fun_t} type} \index{hashing function} The \verb|hash_fun_t| identifier is a typedef name for the pointer-to-function type \begin{verbatim} hash_val_t (*hash_fun_t)(const void *); \end{verbatim} A function of this type which behaves a certain way is called a {\it hashing function}. To be a viable hashing function, such a function must take a pointer to a key object, and produce an integer value that depends only on the contents of the key, and possibly on information that does not change over the lifetime of any hash for which that hashing function is used. Additional requirements for hashing functions are introduced later. \subsubsection{The {\tt hash_val_t_bit} object} \indexobject{hash_val_t_bit} \synopsis \begin{verbatim} extern int hash_val_t_bit;\end{verbatim} \description The \verb|hash_val_t_bit| object of type int has a fixed value which counts the number of bits in the \verb|hash_val_t| object. The program shall not store a value into this object. The value of \verb|hash_val_t_bit| need not be correct until the first successful call to \verb|hash_create| or to \verb|hash_init| completes. The implementation shall provide the macro \verb|HASH_VAL_T_BIT| which expands to a non-lvalue expression that has the same value and type as the object, but which may be a constant expression.\footnote{The intent of providing these values is to ease the implementation of portable hashing functions that take advantage of all of the available bits of a given Kazlib implementation. Alternately, hashing functions may be constructed to only use the lower 32 bits of the type.} \subsubsection{The {\tt hash_create} function} \indexfunc{hash_create} \index{Hash!creation of} \index{create!hash object} \synopsis \begin{verbatim} hash_t *hash_create(hashcount_t, hash_comp_t, hash_fun_t);\end{verbatim} \description If sufficient resources exist, the \verb|hash_create| function instantiates and initializes an object of type \verb|hash_t| and returns a pointer to it. Otherwise it returns a null pointer. The first argument establishes the capacity of the hash, which is initially empty. The second argument is a pointer to a comparison function that will be associated with the \verb|hash_t| object for its entire duration. \index{hashing function} The third argument is either null or a pointer to a hashing function that is permanently associated with the object. If it is null, a {\it default hashing function\/} is assigned by the implementation. The hashing function shall be invoked with an argument that is one of the keys that are being inserted into, or sought after, in the hash. The hashing function must produce the same value each time it is called for a given key. It is up to the hash user to define the representation of keys, to manage their storage, and to provide a matching hashing function. The hash stores only generic \verb|void *| pointers to keys. The default hashing function assumes that keys are null terminated strings. That is to say, it behaves as though its \verb|void *| argument points to the first elements of an array of \verb|unsigned| \verb|char|, the last of which is a null character. The use of the default hashing function with keys that do not have this representation results in undefined behavior. \subsubsection{The {\tt hash_set_allocator} function} \indexfunc{hash_set_allocator} \label{section:hash_set_allocator} \synopsis \begin{verbatim} void hash_set_allocator(hash_t *, hnode_alloc_t, hnode_free_t, void *);\end{verbatim} \constraints The second and third arguments---the function pointers---shall either both be null, or both be non-null. The hash pointed at by the first argument shall be empty. \description When a hash is initialized, it is outfitted with a pair of default node allocation functions. These functions may be replaced with functions supplied by the program by calling the \verb|hash_set_allocator| function and specifying two suitable pointers. If these pointers are null, the default functions are restored. These functions are called to allocate and free \verb|hnode_t| objects by the functions \verb|hash_alloc_insert| and \verb|hash_delete_free| (see sections \ref{section:hash_delete_free} and \ref{section:hash_alloc_insert}). If sufficient resources exist, the allocation function shall return a pointer to a unique storage object that is large enough and suitably aligned to represent an object of type \verb|dnode_t|. Otherwise, the function shall return a null pointer. The deallocation function shall be capable of disposing of the objects created by the matching allocator function. \subsubsection{The {\tt hash_destroy} function} \indexfunc{hash_destroy} \synopsis \begin{verbatim} void hash_destroy(hash_t *);\end{verbatim} \constraints The hash pointed at by the first argument shall be empty. \description The \verb|hash_destroy| function deinitializes and deallocates a hash that was created with \verb|hash_create|. All pointers and \verb|hscan_t| objects that referred to the hash become indeterminate. \subsubsection{The {\tt hash_free_nodes} function} \indexfunc{hash_free_nodes} \synopsis \begin{verbatim} void hash_free_nodes(hash_t *);\end{verbatim} \description The \verb|hash_free_nodes| function removes each node from the hash and destroys it as if by calling \verb|hash_delete_free| (Section \ref{section:hash_delete_free}). The order in which the nodes are destroyed is unspecified. \subsubsection{The {\tt hash_free} function} \indexfunc{hash_free} \synopsis \begin{verbatim} void hash_free(hash_t *);\end{verbatim} \description Every node in the hash is removed from the hash and is then subject to the deallocation function. The overall effect is as if the function \verb|hash_delete_free| (Section \ref{section:hash_delete_free}) were invoked on each node, and then \verb|hash_destroy| invoked on the hash itself. This function is obsolescent, and will be removed from some future revision of this document. \subsubsection{The {\tt hash_init} function} \indexfunc{hash_init} \synopsis \begin{verbatim} hash_t *hash_init(hash_t *, hashcount_t, hash_comp_t, hash_fun_t, hnode_t **, hashcount_t); \end{verbatim} \constraints The last argument, which specifies the size of the program-supplied table, shall be integral power of two that is greater than one---that is to say, an integer of the form $2^k$ where $k$ is a positive integer. \description The \verb|hash_init| function configures the specified \verb|hash_t| object to use a specified array of \verb|hnode_t *| pointer objects as a table. The user is responsible for providing storage for the \verb|hash_t| object and the array. As in the \verb|hash_create| interface, the second parameter specifies the capacity, and the subsequent arguments specify the comparison and hashing function, respectively. The last two arguments specify the table of pointers. The array object shall have at least as many elements as indicated by the last parameter, otherwise the behavior is undefined. The call to \verb|hash_init| is said to register the array with the hash. The program shall not register the same array with more than one hash. More specifically, once the program modifies a registered array, or registers it with another hash, it must discontinue use of the first hash. \footnote{Note that no explicit deinitialization function is provided to dissociate the array. A program disposes of a hash created by {\tt hash_init} by discontinuing its use.} \subsubsection{The {\tt hash_insert} function} \indexfunc{hash_insert} \label{section:hash_insert} \synopsis \begin{verbatim} void hash_insert(hash_t *, hnode_t *, const void *);\end{verbatim} \constraints The hash is not full. The key specified by the \verb|void *| parameter does not already exist in the specified hash. The node specified by the second parameter is not already inserted into a hash. \description The \verb|hash_insert| function adds a new node to a hash. The user must supply a node object that was initialized with \verb|hnode_init| or dynamically created with \verb|hnode_create|. If the node is already inserted into the same hash or any other hash, the behavior is undefined. A program may modify a key or node that has been inserted into a hash, or cause the storage of the key or the node to become invalid. However, any subsequent use of the hash invokes undefined behavior, with the following exception: the data pointer stored within a node may be modified using the \verb|hnode_put| function. The \verb|hash_insert| function invokes the hashing function callback with the key pointer as the argument. The \verb|hash_insert| function may need to acquire additional storage in order to support hash table growth. If the storage allocation fails, the function shall fully recover, and insert the node without growing the table. The Hash implementation shall not modify the storage referenced by a key, and shall not access it other than indirectly through the supplied hashing and comparison functions. \subsubsection{The {\tt hash_lookup} function} \indexfunc{hash_lookup} \synopsis \begin{verbatim} hnode_t *hash_lookup(hash_t *, const void *);\end{verbatim} \description The \verb|hash_lookup| function searches the given hash for a node matching the given key. Unless the hash is empty, the key shall be compared against one or more keys that are already in the hash, using the comparison function. The key pointer may be identical to one that has already been inserted into the hash.\footnote {In that case, the comparison function must correctly cope with aliased parameters}. If the key is found in the hash, a pointer to the corresponding node is returned.\footnote{The corresponding node is the one that was specified in the call to {\tt hash_insert} together with the matching key.} If the key is not found, a null pointer is returned. \subsubsection{The {\tt hash_delete} function} \indexfunc{hash_delete} \synopsis \begin{verbatim} hnode_t *hash_delete(hash_t *, hnode_t *);\end{verbatim} \constraints The specified node is an occupant of the given hash. \description The \verb|hash_delete| function removes from the given hash a node that has previously been inserted into it. The key under which the node was inserted is also removed from the hash.\footnote{Thus the program may arbitrarily manipulate the removed key without destroying the integrity of the hash.} Any existing \verb|hscan_t| iterator which is associated with the hash becomes indeterminate.\footnote{To delete the current node during hash table traversal, the {\tt hash_scan_delete} function must be used instead.} \subsubsection{The {\tt hash_alloc_insert} function} \label{section:hash_alloc_insert} \indexfunc{hash_alloc_insert} \synopsis \begin{verbatim} int hash_alloc_insert(hash_t *, const void *, void *);\end{verbatim} \constraints The second argument specifies the insertion key. The hash shall not already contain this key. \description The \verb|hash_alloc_insert| function dynamically allocates and initializes a \verb|hnode_t| object and inserts it into the given hash. The second argument and third arguments are pointers to user data and key objects, either of which may be null. The allocation is performed by a call to the default allocation function, or to the function that was configured using \verb|hash_set_allocator| (Section \ref{section:hash_set_allocator}). If the allocation succeeds, the insertion is performed and the value 1 is returned. If the allocation fails, no insertion is performed and 0 is returned. \subsubsection{The {\tt hash_delete_free} function} \label{section:hash_delete_free} \indexfunc{hash_delete_free} \synopsis \begin{verbatim} void hash_delete_free(hash_t *, hnode_t *) \end{verbatim} \constraints The given node can be found within the given hash. \description The \verb|hash_delete_free| function is the reverse of \verb|hash_alloc_insert|. It removes the given node form the hash as if by a call to \verb|hash_delete| and then deletes it using the default or user-defined allocator (Section \ref{section:hash_set_allocator}). If the given node had not been created using \verb|hash_alloc_insert|, the behavior is undefined. \subsubsection{The {\tt hnode_put} function} \indexfunc{hnode_put} \synopsis \begin{verbatim} void hnode_put(hnode_t *, void *);\end{verbatim} \description The function \verb|hnode_put| replaces the data element associated with the hash node. \subsubsection{The {\tt hnode_get} function} \indexfunc{hnode_get} \synopsis \begin{verbatim} void *hnode_get(hnode_t *);\end{verbatim} \description The \verb|hnode_get| function retrieves the \verb|void * | data value associated with the given hash node. \subsubsection{The {\tt hnode_getkey} function} \indexfunc{hnode_getkey} \synopsis \begin{verbatim} const void *hnode_getkey(hnode_t *);\end{verbatim} \description The \verb|hnode_getkey| function retrieves the \verb|void *| key value associated with the given node. A node acquires an associated key when it is inserted into a hash (see section \ref{section:hash_insert}). Invoking \verb|hnode_getkey| on a node that has not been inserted into a hash results in undefined behavior. \subsubsection{The {\tt hash_count} function} \indexfunc{hash_count} \synopsis \begin{verbatim} hashcount_t hash_count(hash_t *);\end{verbatim} \description The \verb|hash_count| function returns a value which represents the number of nodes currently stored in the hash pointed at by the argument. \subsubsection{The {\tt hash_size} function} \indexfunc{hash_size} \synopsis \begin{verbatim} hashcount_t hash_size(hash_t *hash)\end{verbatim} \description The \verb|hash_size| function returns an implementation-defined value that depends on the number of entries in the given hash. The intent is that the value represent the size of the internal hash table managed by the given hash. \subsubsection{The {\tt hash_isfull} function} \indexfunc{hash_isfull} \synopsis \begin{verbatim} int hash_isfull(hash_t *);\end{verbatim} \description The \verb|hash_isfull| function returns 1 if the hash is full, otherwise it returns 0. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt hash_isempty} function} \indexfunc{hash_isempty} \synopsis \begin{verbatim} int hash_isempty(hash_t *);\end{verbatim} \description The \verb|hash_isempty| function returns 1 if the given hash is empty, otherwise it returns 0. \subsubsection{The {\tt hash_scan_begin} function} \indexfunc{hash_scan_begin} \synopsis \begin{verbatim} void hash_scan_begin(hscan_t *, hash_t *);\end{verbatim} \description The \verb|hash_scan_begin| initializes the \verb|hscan_t| iterator object, preparing it for a traversal of the given hash. After this initialization, if the hash is modified in any way by the performance of an insertion or deletion operation, the value of the \verb|hscan_t| object becomes indeterminate, with one exception: the \verb|hash_scan_delete| function or the \verb|hash_scan_delfree| function may be used to delete the current node. \subsubsection{The {\tt hash_scan_next} function} \indexfunc{hash_scan_next} \synopsis \begin{verbatim} hnode_t *hash_scan_next(hscan_t *);\end{verbatim} \description If any unvisited nodes remain, the \verb|hash_scan_next| function advances to the next one and returns a pointer to it. Otherwise, it returns a null pointer. Repeated invocations of \verb|hash_scan_next| return a pointer to every node that has been inserted into the table, in no particular order, such that no node is reported twice. \subsubsection{The {\tt hash_scan_delete} function} \indexfunc{hash_scan_delete} \synopsis \begin{verbatim} hnode_t *hash_scan_delete(hash_t *, hnode_t *); \end{verbatim} \constraints The specified node is an occupant of the given hash. \description This function is almost exactly like \verb|hash_delete| except that it may be used to delete a node that has been most recently obtained from \verb|hash_scan_next| without destroying the validity of the \verb|hscan_t| iterator from which the node was obtained. \subsubsection{The {\tt hash_scan_delfree} function} \label{section:hash_scan_delfree} \indexfunc{hash_scan_delfree} \synopsis \begin{verbatim} void hash_scan_delfree(hash_t *, hnode_t *) \end{verbatim} \constraints The given node can be found within the given hash. \description The \verb|hash_scan_delfree| function is similar to \verb|hash_delete_free|. It removes the given node form the hash and then deletes it using the default or user-defined allocator (Section \ref{section:hash_set_allocator}). If the given node had not been created using \verb|hash_alloc_insert|, the behavior is undefined. The deletion from the hash is performed as if by a call to \verb|hash_scan_delete|, thus it is safe to delete a node that was most recently obtained from a \verb|hash_scan_next| without destroying the validity of the \verb|hscan_t| iterator. \subsubsection{The {\tt hash_verify} function} \indexfunc{hash_verify} \synopsis \begin{verbatim} int hash_verify(hash_t *hash);\end{verbatim} \description The intent of the \verb|hash_verify| function is to perform a verification on the hash object, regardless of whether the Kazlib implementation is operated in verification or production mode. If the hash object and its constituent nodes have been correctly manipulated, and the program has not caused any undefined behaviors, the value $1$ is returned. Otherwise, the function may be able to, but is not guaranteed to, detect corruption, and return the value zero. \subsubsection{The {\tt hnode_create} function} \indexfunc{hnode_create} \synopsis \begin{verbatim} hnode_t *hnode_create(void *);\end{verbatim} \description The \verb|hnode_create| function dynamically allocates a hash node, stores in it the data value specified in the argument and returns a pointer to it. The allocation is performed by a call to the standard \verb|malloc| function. If the allocation fails, a null pointer is returned. The node's key pointer remains indeterminate until it is the subject of a \verb|hash_insert| operation. \subsubsection{The {\tt hnode_init} function} \indexfunc{hnode_init} \synopsis \begin{verbatim} hnode_t *hnode_init(hnode_t *, void *);\end{verbatim} \description The \verb|hnode_init| function initializes the contents of the specified hash node object, assigning it the data value specified as the second argument. The first argument is a pointer which refers to a data object that has a suitable size and alignment for the representation of an \verb|hnode_t| type. After initialization with \verb|hnode_init|, the object is subsequently eligible as an operand to the functions of the hash component, other than \verb|hnode_getkey|. The node's key pointer remains indeterminate until it is the subject of a \verb|hash_insert| operation. \subsubsection{The {\tt hnode_destroy} function} \indexfunc{hnode_destroy} \synopsis \begin{verbatim} void hnode_destroy(hnode_t *);\end{verbatim} \description The \verb|hnode_destroy| function destroys a hash node that has been allocated with the \verb|hnode_create| function. The value of any pointer that referred to the node that was thus freed is indeterminate. If the node is currently the occupant of a hash, the behavior is undefined if the hash is subsequently used. \subsection{Implementation} TODO \section{Dictionary component} \index{Dictionary} The Dictionary component provides a means to manage ordered sequences of elements, having the following properties: \begin{enumerate} \item If the dictionary is not empty, a first and last element can be identified. In a dictionary having only one element, that one element is both the first and last element. \item Each element that is not the last element has another element as its {\it successor}. \index{successor!of a dictionary element} \index{Dictionary!successor of an element} \item Each element that is not the first element has a {\it predecessor}. \index{predecessor!of a dictionary element} \index{Dictionary!predecessor of an element} \item No element is the predecessor or successor of more than one element. \item If one element is the successor of another, the other is necessarily the predecessor of the first. \item Each element is associated with a piece of information known as the key. The sequence is ordered according to the relation imposed by the comparison function: the key of an element compares greater than or equal to the key of its predecessor. \item If duplicate keys are present, then elements having the same key form a subsequence with no other keys in it, which follows from the previous property. No additional ordering is imposed within such subsequences. \item Each element is associated with arbitrary satellite data. \end{enumerate} The Dictionary component supports efficient operations over such ordered sequences: such as insertion, deletion, ordered traversal, as well as exact and range searches.\footnote{The implicit association of keys and satellite data, together with the ability of efficiently search by key to retrieve data, gives rise to the term {\it dictionary}. A dictionary need not be ordered; a hash can therefore also be considered to be a kind of dictionary; the Kazlib nomenclature is somewhat unfortunate in that regard.} The number of elements that can be stored in a dictionary is limited; maximum number of entries in a dictionary is known as its {\it capacity}. \subsection{Interface} \subsubsection{The {\tt dict.h} header} Each C or C++ translation unit that is to use the functionality of the Dict component shall include the header \verb|dict.h|. This header shall contain declarations of types and external functions, and definitions of macros. The following typedef names shall be defined:\index{Dict!typedef names} \index{typedefs!defined by Dict} \begin{verbatim} dict_t dnode_process_t dnode_t dnode_alloc_t dictcount_t dnode_free_t dict_comp_t dict_load_t \end{verbatim} In addition, the following structure tags may be defined:\index{Dict!tag names} \index{tags!defined by Dict} \begin{verbatim} struct dict_t struct dnode_t \end{verbatim} The following external function names shall be declared: \index{Dict!function names}\index{functions!defined by Dict} \begin{verbatim} dict_create dict_count dict_set_allocator dict_isempty dict_destroy dict_isfull dict_free_nodes dict_contains dict_init dict_allow_dupes dict_verify dnode_is_in_a_dict dict_lookup dnode_create dict_lower_bound dnode_init dict_upper_bound dnode_destroy dict_insert dnode_get dict_delete dnode_getkey dict_alloc_insert dnode_put dict_delete_free dict_process dict_first dict_load_begin dict_last dict_load_next dict_next dict_load_end dict_prev dict_free \end{verbatim} The following preprocessor symbols shall be defined: \index{Dict!macro names}\index{macros!defined by Dict} \indexmacro{DICTCOUNT_T_MAX} \indexmacro{DICT_H} \begin{verbatim} DICTCOUNT_T_MAX DICT_H\end{verbatim} \index{symbols!reserved by Dict}\index{Dict!reserved symbols} Macro identifiers which begin with the upper-case prefix \verb|DICT| are reserved for future extensions to the \verb|dict.h| header, as are names in the ordinary and tag namespaces which begin with \verb|dict_| or \verb|dnode_|. External names which begin with \verb|dict_| or \verb|dnode_| are reserved by the Kazlib library regardless of what headers are included. \subsubsection{The {\tt dict_t} type} \indextype{dict_t} The type \verb|dict_t| is an opaque data type which represents a single dictionary. A dictionary consists of an instance of the \verb|dict_t| type, plus zero or more instances of the type \verb|dnode_t|. An object of type \verb|dict_t| can be initialized by the \verb|dict_init| function. Alternately, the \verb|dict_create| function will dynamically allocate and initialize a dictionary. An empty dictionary created by \verb|dict_create| may be disposed of using \verb|dict_destroy|. \subsubsection{The {\tt dnode_t} type} \indextype{dnode_t} The \verb|dnode_t| type represents a single entry in a dictionary called a dictionary node. The object stores a pointer to user data, and a key pointer that is assigned to the dictionary node at the time when it is inserted into the dictionary. A \verb|dnode_t| may be dynamically created using \verb|dnode_create| and destroyed using \verb|dnode_destroy|. Alternately, the program may supply storage for a \verb|dnode_t| object and initialize it using the \verb|dnode_init| function. \subsubsection{The {\tt dictcount_t} type} \indextype{dictcount_t} \indexmacro{DICTCOUNT_T_MAX} This is an unsigned integral type which is capable of representing the number of nodes in a dictionary. The \verb|DICTCOUNT_T_MAX| macro expands to a constant expression of type \verb|dictcount_t| which specifies the maximum value of that type. \subsubsection{The {\tt dict_comp_t} type} \indextype{dict_comp_t} The \verb|dict_comp_t| type is a typedef name for the pointer-to-function type \begin{verbatim} int (*)(const void *, const void *); \end{verbatim} In the context of the Dictionary component, this type denotes pointers to comparison functions. \subsubsection{The {\tt dnode_process_t} type} \indextype{dnode_process_t} The type \verb|dnode_process_t| is a typedef name for the pointer-to-function type \begin{verbatim} void (*)(dict_t *, dnode_t *, void *); \end{verbatim} In the context of the Dictionary component, this is the type of a dictionary node processing function (See section \ref{section:dict_process}). The first two parameters identify a dictionary and the node within that dictionary that is being processed. The third argument is a context pointer. \subsubsection{The {\tt dnode_alloc_t} type} \indextype{dnode_alloc_t} The type \verb|dnode_alloc_t| is a typedef name for the pointer-to-function type \begin{verbatim} dnode_t *(*)(void *); \end{verbatim} A function compatible with this type which meets certain other criteria may be registered with a \verb|dict_t| object as a node allocator function (See section \ref{section:dict_set_allocator}). \subsubsection{The {\tt dnode_free_t} type} \indextype{dnode_free_t} The type \verb|dnode_free_t| is a typedef name for the pointer-to-function type \begin{verbatim} void (*)(dnode_t *, void *); \end{verbatim} A function compatible with this type which meets certain other criteria may be registered with a \verb|dict_t| object as a node deallocator function. (See section \ref{section:dict_set_allocator}). \subsubsection{The {\tt dict_load_t} type} \indextype{dict_load_t} The \verb|dict_load_t| type is opaque, and represents a context structure used during the process of constructing a dictionary from an ordered list of nodes. (See sections \ref{section:dict_load_begin} to \ref{section:dict_load_end}). \subsubsection{The {\tt dict_create} function} \indexfunc{dict_create} \index{Dictionary!creation of} \index{create!dictionary object} \synopsis \begin{verbatim} dict_t *dict_create(dictcount_t, dict_comp_t);\end{verbatim} \description The \verb|dict_create| function allocates a new object of type \verb|dict_t| and initializes it to act as a dictionary. If insufficient resources exist for the allocation, a null pointer is returned, otherwise a pointer to the dictionary is returned. The first argument specifies the capacity of the dictionary, which is initially empty. The second argument is a comparison function that is used for comparing keys during insertion and searching operations, and is associated with the dictionary for its entire duration. \subsubsection{The {\tt dict_set_allocator} function} \label{section:dict_set_allocator} \indexfunc{dict_set_allocator} \synopsis \begin{verbatim} void dict_set_allocator(dict_t *, dnode_alloc_t, dnode_free_t, void *);\end{verbatim} \constraints The second and third arguments---the function pointers---shall either both be null, or both be non-null. The dictionary pointed at by the first argument shall be empty. \description When a dictionary is initialized, it is outfitted with a pair of default node allocation functions. These functions may be replaced with functions supplied by the program by calling the \verb|dict_set_allocator| function and specifying two suitable pointers. If these pointers are null, the default functions are restored. These functions are called to allocate and free \verb|dnode_t| objects by the functions \verb|dict_alloc_insert| and \verb|dict_delete_free| (see sections \ref{section:dict_delete_free} and \ref{section:dict_alloc_insert}). If sufficient resources exist, the allocation function shall return a pointer to a unique storage object that is large enough and suitably aligned to represent an object of type \verb|dnode_t|. Otherwise, the function shall return a null pointer. The deallocation function shall be capable of disposing of the objects created by the matching allocator function. \subsubsection{The {\tt dict_destroy} function} \indexfunc{dict_destroy} \synopsis \begin{verbatim} void dict_destroy(dict_t *);\end{verbatim} \constraints The dictionary pointed at by the first argument shall be empty. \description The \verb|dict_destroy| function deinitializes and deallocates a dictionary object that was created by \verb|dict_create|. All pointers that referred to the dictionary become indeterminate. \subsubsection{The {\tt dict_free_nodes} function} \indexfunc{dict_free_nodes} \synopsis \begin{verbatim} void dict_free_nodes(dict_t *);\end{verbatim} \description Every node in the dictionary is removed from the dictionary and is then subject to the deallocation function, as if the function \verb|dict_delete_free| (Section \ref{section:dict_delete_free}) were invoked on each node, in some unspecified order. \subsubsection{The {\tt dict_free} function} \indexfunc{dict_free} \synopsis \begin{verbatim} void dict_free(dict_t *);\end{verbatim} \description This function is obsolescent, and will be removed from some future revision of this document. It is equivalent to \verb|dict_free_nodes|. \subsubsection{The {\tt dict_init} function} \indexfunc{dict_init} \synopsis \begin{verbatim} dict_t *dict_init(dict_t *, dictcount_t, dict_comp_t);\end{verbatim} \description The \verb|dict_init| function prepares specified \verb|dict_t| object to behave as a dictionary that may subsequently be used with the other dictionary functions. The first argument points to the \verb|dict_t| object to be initialized. The second argument specifies the capacity of the dictionary. The third argument is a pointer to the comparison function which shall be associated with the dictionary for its entire duration. \subsubsection{The {\tt dict_verify} function} \indexfunc{dict_verify} \synopsis \begin{verbatim} int dict_verify(dict_t *);\end{verbatim} \description The intent of the \verb|dict_verify| function is to perform a verification on the dictionary object, regardless of whether the Kazlib implementation is operated in verification or production mode. If the dictionary object and its constituent nodes have been correctly manipulated, and the program has not caused any undefined behaviors, the value $1$ is returned. Otherwise, the function may be able to, but is not guaranteed to, detect corruption, and return the value zero. \subsubsection{The {\tt dict_lookup} function} \indexfunc{dict_lookup} \synopsis \begin{verbatim} dnode_t *dict_lookup(dict_t *, const void *);\end{verbatim} \description The \verb|dict_lookup| function searches the given dictionary for a node matching the given key. Unless the dictionary is empty, the key shall be compared against one or more keys that are already in the dictionary, using the comparison function. The key pointer may be identical to one that has already been inserted into the dictionary. If the key is found in the dictionary, a pointer to the corresponding node is returned. If the key is not found, a null pointer is returned. If the dictionary contains more than one key which matches the search key, then the first key in the subsequence of duplicate keys is returned. \subsubsection{The {\tt dict_lower_bound} function} \indexfunc{dict_lower_bound} \synopsis \begin{verbatim} dnode_t *dict_lower_bound(dict_t *, const void *);\end{verbatim} \description The \verb|dict_lower_bound| function searches the dictionary in a manner similar to \verb|dict_lookup|. If the given key exists in the dictionary, the behavior is exactly the same as \verb|dict_lookup|. However, if the key is not found, then the node which has the smallest key that is greater than the search key is returned. If no such key exists (because the search key is higher than any other key in the dictionary or the dictionary is empty) then a null pointer is returned. \example Suppose that pointer \verb|d| refers to a dictionary whose registered comparison function performs lexicographic comparisons on ordinary C strings, similar to \verb|strcmp|. To iterate over all keys that begin with the letter \verb|d|, the following idiom can be used: \begin{verbatim} dict_t *d; dnode_t *n, *start, *end; /*...*/ start = dict_lower_bound(d, "d"); end = dict_lower_bound(d, "e"); for (n = start; n != end; n = dict_next(d, n)) { /* n points to each node in turn whose key starts with 'd' */ } \end{verbatim} Note that if the dictionary is empty, or has keys which are all lower than \verb|"d"|, then both \verb|start| and \verb|end| shall be null pointers, and the loop body will never execute since the two are equal. Also note that if there are keys that begin with \verb|d| and the dictionary's last node has a key that starts with \verb|d|, then \verb|end| is null, otherwise \verb|end| points to the first key that doesn't begin with \verb|d|. In both cases, the loop will terminate after processing the last \verb|d| key, because \verb|dict_next| shall produce a pointer that is equal to \verb|end|. \subsubsection{The {\tt dict_upper_bound} function} \indexfunc{dict_upper_bound} \synopsis \begin{verbatim} dnode_t *dict_upper_bound(dict_t *, const void *);\end{verbatim} \description The \verb|dict_upper_bound| function searches the dictionary in a manner similar to \verb|dict_lookup|. If the given key exists in the dictionary, the behavior is exactly the same as \verb|dict_lookup| with one difference: If the dictionary contains more than one key which matches the search key, then the last key in the sequence of duplicates is returned, rather than the first. However, if the key is not found, then the node which has the greatest key that is lower than the search key is returned. If no such key exists (because the search key is lower than any other key in the dictionary or the dictionary is empty) then a null pointer is returned. \example The following idiom can be used to iterate over a sequence of duplicate keys without the overhead of performing a full comparison before each iteration to detect the first non-matching key. \begin{verbatim} dict_t *d; void *key; dnode_t *n, *start, *end; /* ... Initialize d, and key. ...*/ start = dict_lower_bound(d, key); end = dict_upper_bound(d, key); /* advance end to first non-matching key */ if (end != 0) end = dict_next(d, end); else end = start; /* start == dict_first(d) in this case */ for (n = start; n != end; n = dict_next(d, n)) { /* n points to duplicate keys in turn */ } \end{verbatim} Immediately prior to the execution of the if statement, exactly one of the following conditions is true: \begin{itemize} \item The key was found in the dictionary; \verb|start| points to the first duplicate node and \verb|end| points to the last. \item The dictionary has only higher keys than the search key; \verb|start| points to the first node in the dictionary and \verb|end| is null. \item The dictionary has only lower keys than the search key; \verb|end| points to the last node in the dictionary, and \verb|start| is null. \item The dictionary has both lower and higher keys; \verb|end| and \verb|start| point to two consecutive nodes, respectively, such that the node pointed at by \verb|end| has a lower key than the search key and the node pointed at by \verb|start| has a higher key. \item The dictionary is empty; \verb|start| and \verb|end| are null. \end{itemize} The if statement ensures that if the dictionary contains no matching keys, than \verb|start| and \verb|end| are equal, and if the dictionary contains one or more matching keys, than \verb|end| points to the first non-matching node, or is null if there is no such node. Thus the loop performs correctly in all circumstances. \subsubsection{The {\tt dict_insert} function} \label{section:dict_insert} \indexfunc{dict_insert} \synopsis \begin{verbatim} void dict_insert(dict_t *, dnode_t *, const void *);\end{verbatim} \constraints The dictionary is not full. If the dictionary has not been configured to allow duplicate keys, the key specified by the \verb|void *| parameter does not already exist in the dictionary. \description The \verb|dict_insert| function adds a new node to a dictionary. The user must supply a node object that was initialized with \verb|dnode_init| or dynamically created with \verb|dnode_create|. If the node is already inserted into the same dictionary or any other dictionary, the behavior is undefined. Duplicate keys may be inserted into a dictionary only if the dictionary has been configured to permit duplicate keys (see section \ref{section:dict_allow_dupes}). If this is the case, it is also permissible to insert the same key more than once: the implementation shall not distinguish between distinct keys that are declared equal by a correctly designed comparison function, and two key pointers that refer to the same key. A program may modify a key or node that has been inserted into a dictionary, or cause the storage of the key or the node to become invalid. However, any subsequent use of the dictionary invokes undefined behavior, with the following exception: the data pointer stored within a node may be modified using the \verb|dnode_put| function. The Dictionary implementation shall not modify the storage referenced by a key, and shall not access it other than indirectly through the supplied comparison function. \subsubsection{The {\tt dict_delete} function} \indexfunc{dict_delete} \synopsis \begin{verbatim} dnode_t *dict_delete(dict_t *, dnode_t *);\end{verbatim} \constraints The specified node is an occupant of the given dictionary. \description The \verb|dict_delete| function removes from the given dictionary a node that has previously been inserted into it. The key under which the node was inserted is also removed from the dictionary. \subsubsection{The {\tt dict_alloc_insert} function} \label{section:dict_alloc_insert} \indexfunc{dict_alloc_insert} \synopsis \begin{verbatim} int dict_alloc_insert(dict_t *, const void *, void *);\end{verbatim} \constraints The second argument specifies the insertion key. The dictionary shall not already contain this key unless it has been configured as allowing duplicates. \description The \verb|dict_alloc_insert| function dynamically allocates and initializes a \verb|dnode_t| object and inserts it into the given dictionary. The second argument and third arguments are pointers to user data and key objects, either of which may be null. The allocation is performed by a call to the default allocation function, or to the function that was configured using \verb|dict_set_allocator| (Section \ref{section:dict_set_allocator}). If the allocation succeeds, the insertion is performed and the value 1 is returned. If the allocation fails, no insertion is performed and 0 is returned. \subsubsection{The {\tt dict_delete_free} function} \label{section:dict_delete_free} \indexfunc{dict_delete_free} \synopsis \begin{verbatim} void dict_delete_free(dict_t *, dnode_t *);\end{verbatim} \constraints The given node can be found within the given dictionary. \description The \verb|dict_delete_free| function is the reverse of \verb|dict_alloc_insert|. It removes the given node form the dictionary and then deletes it using the default or user-defined allocator (Section \ref{section:dict_set_allocator}). If the given node had not been created using \verb|dict_alloc_insert|, the behavior is undefined. \subsubsection{The {\tt dict_first} function} \indexfunc{dict_first} \synopsis \begin{verbatim} dnode_t *dict_first(dict_t *);\end{verbatim} \description If the dictionary pointed at by the argument is empty, a null pointer is returned. Otherwise, a pointer to the first node in that dictionary is returned. \subsubsection{The {\tt dict_last} function} \indexfunc{dict_last} \synopsis \begin{verbatim} dnode_t *dict_last(dict_t *);\end{verbatim} \description If the dictionary pointed at by the argument is empty, a null pointer is returned. Otherwise, a pointer to the last node in that dictionary is returned. \subsubsection{The {\tt dict_next} function} \indexfunc{dict_next} \synopsis \begin{verbatim} dnode_t *dict_next(dict_t *, dnode_t *);\end{verbatim} \constraints The node pointed at by the second argument is an occupant of the dictionary pointed at by the first argument. \description If the node pointed at by the second argument has a successor, a pointer to that successor is returned. Otherwise, a null pointer is returned. \example The \verb|dict_first| and \verb|dict_next| functions can be used together to iterate over all of the elements of the dictionary, as in the following idiom: \begin{verbatim} dict_t *d; dnode_t *n; /*...*/ for (n = dict_first(d); n != 0; n = dict_next(d, n)) { /* n points to each node in turn */ } \end{verbatim} \subsubsection{The {\tt dict_prev} function} \indexfunc{dict_prev} \synopsis \begin{verbatim} dnode_t *dict_prev(dict_t *, dnode_t *);\end{verbatim} \constraints The node pointed at by the second argument is an occupant of the dictionary pointed at by the first argument. \description If the node pointed at by the second argument has a predecessor, a pointer to that predecessor is returned. Otherwise, a null pointer is returned. \subsubsection{The {\tt dict_count} function} \indexfunc{dict_count} \synopsis \begin{verbatim} dictcount_t dict_count(dict_t *);\end{verbatim} \description The \verb|dict_count| function returns a value which represents the number of nodes currently stored in the dictionary pointed at by the argument. \subsubsection{The {\tt dict_isempty} function} \indexfunc{dict_isempty} \synopsis \begin{verbatim} int dict_isempty(dict_t *);\end{verbatim} \description The \verb|dict_isempty| function returns 1 if the given dictionary is empty, otherwise it returns 0. \subsubsection{The {\tt dict_isfull} function} \indexfunc{dict_isfull} \synopsis \begin{verbatim} int dict_isfull(dict_t *);\end{verbatim} \description The \verb|dict_isfull| function returns 1 if the dictionary is full, otherwise it returns 0. If the argument is an expression with side effects, the behavior is undefined.\index{macros!and side effects} \subsubsection{The {\tt dict_contains} function} \indexfunc{dict_contains} \synopsis \begin{verbatim} int dict_contains(dict_t *, dnode_t *);\end{verbatim} \description The \verb|dict_contains| function searches the given dictionary to determine whether the given node is an occupant. If the node is found, 1 is returned, otherwise 0 is returned.\footnote{The intent is to support verification. The search may be inefficient compared to {\tt dict_lookup}.} \subsubsection{The {\tt dict_allow_dupes} function} \label{section:dict_allow_dupes} \indexfunc{dict_allow_dupes} \synopsis \begin{verbatim} void dict_allow_dupes(dict_t *);\end{verbatim} \constraints The dictionary specified by the first argument shall be empty. \description The \verb|dict_allow_dupes| function configures the given dictionary to support duplicate keys. This can only be done when the dictionary is empty, and the change cannot be reverted. \subsubsection{The {\tt dnode_is_in_a_dict} function} \indexfunc{dnode_is_in_a_dict} \synopsis \begin{verbatim} int dnode_is_in_a_dict(dnode_t *);\end{verbatim} \description The \verb|dnode_is_in_a_dict| function reports whether the given node is currently the occupant of some dictionary. If so, 1 is returned. Otherwise 0 is returned. \subsubsection{The {\tt dnode_create} function} \indexfunc{dnode_create} \synopsis \begin{verbatim} dnode_t *dnode_create(void *);\end{verbatim} \description The \verb|dnode_create| function dynamically allocates a dictionary node, stores in it the data value specified in the argument and returns a pointer to it. The allocation is performed by a call to the standard \verb|malloc| function. If the allocation fails, a null pointer is returned. The node's key pointer remains indeterminate until it is the subject of a \verb|dict_insert| operation. \subsubsection{The {\tt dnode_init} function} \indexfunc{dnode_init} \synopsis \begin{verbatim} dnode_t *dnode_init(dnode_t *, void *);\end{verbatim} \description The \verb|dnode_init| function initializes the contents of the specified dictionary node object, assigning it the data value specified as the second argument. The first argument is a pointer which refers to a data object that has a suitable size and alignment for the representation of an \verb|dnode_t| type. After initialization with \verb|dnode_init|, the object is subsequently eligible as an operand to the functions of the dictionary component, other than \verb|dnode_getkey|. The node's key pointer remains indeterminate until it is the subject of a \verb|dict_insert| operation. \subsubsection{The {\tt dnode_destroy} function} \indexfunc{dnode_destroy} \synopsis \begin{verbatim} void dnode_destroy(dnode_t *);\end{verbatim} \description The \verb|dnode_destroy| function destroys a dictionary node that has been allocated with \verb|dnode_create|. The value of any pointer that referred to the node that was thus freed is indeterminate. If the node is currently the occupant of a dictionary, the behavior is undefined if the hash is subsequently used. \subsubsection{The {\tt dnode_get} function} \indexfunc{dnode_get} \synopsis \begin{verbatim} void *dnode_get(dnode_t *);\end{verbatim} \description The \verb|dnode_get| function retrieves the \verb|void * | data value associated with the given dictionary node. \subsubsection{The {\tt dnode_getkey} function} \indexfunc{dnode_getkey} \synopsis \begin{verbatim} const void *dnode_getkey(dnode_t *);\end{verbatim} \description The \verb|dnode_getkey| function retrieves the \verb|void *| key value associated with the given node. A node acquires an associated key when it is inserted into a dictionary (see section \ref{section:dict_insert}). Invoking \verb|dnode_getkey| on a node that has not been inserted into a dictionary results in undefined behavior. \subsubsection{The {\tt dnode_put} function} \indexfunc{dnode_put} \synopsis \begin{verbatim} void dnode_put(dnode_t *, void *);\end{verbatim} \description The function \verb|dnode_put| replaces the data element associated with the dictionary node. \subsubsection{The {\tt dict_process} function} \label{section:dict_process} \indexfunc{dict_process} \synopsis \begin{verbatim} void dict_process(dict_t *, void *, dnode_process_t);\end{verbatim} \description The \verb|dict_process| function iterates over the nodes of a dict, and for each node invokes a callback function.\footnote{In most cases, it is more convenient and preferable to iterate over the dict using explicit calls to {\tt dict_first} and {\tt dict_next}.} The second argument is a {\it context pointer\/} which can have any value. The third argument of \verb|dict_process| shall be a pointer to a function which is compatible with the specified type. If the dict contains one or more nodes, then the function is invoked once for each node, in order from first to last. On each invocation, the first argument of the callback is a pointer to the dict; the second argument is a pointer to a node, called the {\it subject node}; and the third argument repeats the context pointer value that was originally passed to \verb|dict_process|. The callback function may delete the subject node by, for instance, calling \verb|dict_delete|. It may insert new nodes into the dictionary; however, if such an insertion causes the subject node to acquire a new successor, it is implementation-defined whether upon returning from the callback function, the traversal shall continue with the new successor, or with the original successor. The callback function, and any function invoked from the callback function, shall not destroy the dictionary or make any modifications other than the insertion of new nodes, or the deletion of the subject node. The callback function may recursively invoke \verb|dict_process| for the same dictionary or for a different dictionary; the callback invocations arising out of the nested call inherit all of the restrictions of the outer callback in addition to being subject to the usual restrictions.\footnote{This means, for instance, that if two callbacks are in progress for different subject nodes from the same dictionary, the inner callback may not delete its subject node, because it inherits the restriction that the only permitted deletion is the outer callback's subject node.} The callback function may freely operate on a different dictionary, subject to any inherited restrictions. \subsubsection{The {\tt dict_load_begin} function} \label{section:dict_load_begin} \indexfunc{dict_load_begin} \synopsis \begin{verbatim} void dict_load_begin(dict_load_t *, dict_t *);\end{verbatim} \constraints The dictionary specified by the second argument is empty. \description The \verb|dict_load_begin| function prepares a context object for the task of constructing the contents of a dictionary out of a sequence of elements which is already sorted according to the sorting function of the dictionary.\footnote{This process is more efficient than inserting all of the elements into a dictionary using {\tt dict_insert}. In the reference implementation, this process runs in linear time, or $O(n)$ whereas construction by repeated insertions runs in $O(n\log n)$ time.} The actual construction is performed by zero or more calls to \verb|dict_load_next| and is finalized by \verb|dict_load_end|. The \verb|dict_load_begin| function is said to bind the dictionary and context object together; the only way to unbind the two is by calling \verb|dict_load_end| on the context object. The program shall not manipulate a dictionary that is bound to a context object, other than by calling \verb|dict_load_next|. The program shall not attempt to bind a dictionary to more than one context object simultaneously, or a context object to more than one dictionary simultaneously. \subsubsection{The {\tt dict_load_next} function} \label{section:dict_load_next} \indexfunc{dict_load_next} \synopsis \begin{verbatim} void dict_load_next(dict_load_t *, dnode_t *, const void *);\end{verbatim} \constraints The node pointed at by the second argument is not an occupant of any dictionary. The key specified by the third argument is greater than or equal to all keys specified in previous calls to \verb|dict_load_next| in the context of the same construction, according to the comparison function of the dictionary that is being constructed. That is to say, successive calls specify monotonically increasing keys. The dictionary is not full. \description The \verb|dict_load_next| function continues the construction of a dictionary from an ordered list of elements by specifying the next node in the sequence, along with its key. After this call, the node is considered to be inserted into the dictionary as if by \verb|dict_insert|. \subsubsection{The {\tt dict_load_end} function} \label{section:dict_load_end} \indexfunc{dict_load_end} \synopsis \begin{verbatim} void dict_load_end(dict_load_t *);\end{verbatim} \description The \verb|dict_load_end| function finalizes the construction of a dictionary from a ordered sequence. It breaks the binding between the \verb|dict_load_t| context object and the dictionary. \subsection{Implementation} TODO \section{Exception component} \label{section:exception_component} \index{Exception} The Exception component provides distributed error handling in the form of exceptions, behind an interface designed to be implementable using only the portable features of standard C. The features of this interface are: \begin{itemize} \item the ability to set up nested try-catch regions which declare specific exceptions that they can handle; \item grouped exceptions, allowing handlers to catch specific exceptions, or any exception within a group; \item the ability to designate a function that is called in the event that an exception is thrown that has no handler. \item a mechanism for releasing resources acquired by code that is terminated by an exception; \item the ability to pass dynamically allocated data from the throw site to the catch site. \end{itemize} An exception is simply a means of returning to a prior place in the program's execution. The ANSI C language provides crude, but portable, exception handling consisting of the \verb|jmp_buf| type, the \verb|setjmp| macro and the \verb|longjmp| function. The Kazlib Exception component can be implemented in terms of these primitives. The constraint to implementability in standard C leads to a number of concessions: \begin{itemize} \item A program can leave cleanup regions and try-catch regions by improper means, such as using \verb|goto|, \verb|return| or \verb|break|. This is difficult to diagnose, and is simply documented as undefined behavior. There is no support in the standard language for designating code that is executed whenever a statement block terminates by any means. \item For the same reason, the exception handling interface described here has an explicit mechanism for deallocation of resources associated with statement blocks that are terminated by exceptions. This interface is not as convenient as language support for automatic cleanup. Correct management of temporary dynamic resources using this interface requires programmer discipline. \item The requirement to be able to use \verb|setjmp| to save a context to be later returned to during exception processing brings in restrictions related to non-volatile objects. If non-volatile objects are modified between the time an exception handling region is initiated and the time an exception is caught in the region, these objects have indeterminate values.\footnote{This liberty in ANSI C allows compiler or library writers to implement {\tt setjmp} as a simple mechanism that takes a snapshot of the machine context. Objects that are optimized into special storage---such as registers---and whose values change since the context saving operation will be clobbered when the context is restored by {\tt longjmp}.} \end{itemize} \subsection{Interface} \subsubsection{The {\tt except.h} header} Each C or C++ translation unit that is to use the functionality of the Exception component shall include the header \verb|except.h|. This header shall contain declarations of types and external functions, and definitions of macros. The following typedef names shall be defined:\index{Exception!typedef names} \begin{verbatim} except_id_t except_t \end{verbatim} The following external function names shall be declared: \index{Exception!function names}\index{functions!defined by Exception} \begin{verbatim} except_init except_group except_deinit except_message except_rethrow except_data except_throw except_take_data except_throwd except_set_allocator except_throwf except_alloc except_unhandled_catcher except_free except_code \end{verbatim} The following preprocessor symbols shall be defined: \index{Exception!macro names}\index{macros!defined by Exception} \indexmacro{XCEPT_H} \begin{verbatim} XCEPT_H except_cleanup_pop XCEPT_GROUP_ANY except_checked_cleanup_pop XCEPT_CODE_ANY except_try_push XCEPT_BAD_ALLOC except_try_pop except_cleanup_push \end{verbatim} Finally, these two enum constants are defined: \begin{verbatim} except_no_call except_call \end{verbatim} \index{symbols!reserved by Exception}\index{Exception!reserved symbols} Macro identifiers which begin with the upper-case prefix \verb|XCEPT|\footnote{The prefix {\tt XCEPT} is used rather than {\tt EXCEPT} because ISO 9899 reserves preprocessor symbols beginning with {\tt E} followed by a digit or capital letter for future extensions to the {\tt } header.} are reserved for future extensions to the \verb|except.h| header, as are names in the ordinary and tag namespaces which begin with \verb|except_|. External names which begin with \verb|except_| are reserved by the Kazlib library regardless of what headers are included. \subsubsection{The {\tt except_id_t} type} \label{section:except_id_t} \indextype{except_id_t} \indexmacro{XCEPT_GROUP_ANY} \indexmacro{XCEPT_CODE_ANY} The type \verb|except_id_t| is an aggregate consisting of two unsigned long values which represent an {\it exception group\/} and {\it exception code}, respectively, in that order.\footnote{Thus, the program may initialize an {\tt except_id_t} object using two brace-enclosed initializers which specify the group and code.} An exception group is a value which identifies a group of related exceptions. An exception code is a value which identifies a specific exception uniquely within a group. The codes are assigned by the program designer. The Exception component reserves only the group and code values of zero, which, when used to specify a catch, match any value. The preprocessor symbols \verb|XCEPT_GROUP_ANY| and \verb|XCEPT_CODE_ANY| each expand to a constant integral expression having the value zero. These symbols are intended, in a catch specification, to clearly convey that any exception or any group is being caught. The preprocessor symbol \verb|XCEPT_BAD_ALLOC| expands to an integral constant expression having the value 1. This symbol is intended to represent the standard exception group for failed memory allocations. (See section \ref{section:except_throwf}). The exception groups from 1 to 15 are reserved for implementation use. \subsubsection{The {\tt except_t} type} \indextype{except_t} An object of type \verb|except_t| keeps track of all of the information that is passed when an exception is thrown, and is known as an {\it exception descriptor}. The type is opaque, hence the program shall manipulate this type using only the interface functions provided. \subsubsection{The {\tt except_init} function} \indexfunc{except_init} \synopsis \begin{verbatim} int except_init(void);\end{verbatim} \description The \verb|except_init| function allocates resources needed by the Exception component. Before using any of the other exception interface functions or macros, the program shall perform at least one successful call to \verb|except_init|. If the initialization succeeds, \verb|except_init| returns 1. Otherwise it returns 0. The \verb|except_init| function may be called more than once. After a successful call, every subsequent call shall be successful up to an implementation-defined maximum number of repetitions, which shall be at least as large as the \verb|INT_MAX| from \verb|limits.h|. \footnote{ The intent is to support, but not enforce, a style of global initialization whereby each module which requires the use of another module calls its initialization function from its own initialization function. Only the first such call performs the initialization of the module; subsequent calls merely increment a counter. During deinitialization, the counter is decremented and cleanup takes place when the counter reaches zero.} \subsubsection{The {\tt except_deinit} function} \indexfunc{except_deinit} \synopsis \begin{verbatim} void except_deinit(void);\end{verbatim} \description The \verb|except_deinit| function releases the resources that were allocated by \verb|except_init|. For the resource deallocation to actually take place, the \verb|except_deinit| must be called as many times as the number of times \verb|except_init| was successfully called. If \verb|except_deinit| is called more times than \verb|except_init| is successfully called, the behavior is undefined. \subsubsection{The {\tt except_rethrow} function} \indexfunc{except_rethrow} \synopsis \begin{verbatim} void except_rethrow(except_t *);\end{verbatim} \description The rethrow function is used to rethrow a caught exception. The argument shall not be null. An exception shall not be rethrown from outside of the {\it try-catch region\/} in which it was caught. An exception shall not be rethrown from a try-catch region other than the one in which it was caught. It shall not be rethrown from a try-catch or cleanup region enclosed within the one in which it was caught. When an exception is rethrown, the search for a handler does not begin with the region in which the exception was caught. Instead, this region is terminated, and the search continues with the enclosing one, if one exists. \subsubsection{The {\tt except_throw} function} \indexfunc{except_throw} \synopsis \begin{verbatim} void except_throw(long, long, const char *);\end{verbatim} \constraints The first two arguments specify the exception group and code, respectively. Neither of these arguments shall be zero. \description The \verb|except_throw| function causes an exception to be thrown. If the throw takes place in a try-catch region where an exception was just caught, this original exception is considered handled. In this case, the new exception is still eligible for handling by the same try-catch region. The third argument points to the first character of a string which becomes the {\it exception message}. Because the throwing of the exception may cause the current statement block to terminate, this string data shall be non-local. It may be a string literal, since the implementation shall not modify the message, or it may be an ordinary object of static duration. If it is dynamic data, it becomes the handler's responsibility to extract the message from the caught exception and free the data.\footnote{The programmer should consider using {\tt except_throwd} to pass arbitrary dynamic data from the throw site to the try-catch region.} The \verb|except_throw| function does not return. The implementation searches for a suitable try-catch region starting with the one initiated by the most recent \verb|except_try_push|. If there is no enclosing region, the search fails. Otherwise if a match is found, execution continues at the start of the target try-catch region, appearing to be a second return from \verb|except_try_push| distinguished by a non-null value of the \verb|except_t *| object. If no match is found during exception processing, the exception is handled internally by the implementation. The implementation then calls the currently registered function for catching unhandled exceptions (see section \ref{section:except_unhandled_catcher}). The default catcher for unhandled exceptions shall terminate the program with a diagnostic which identifies the code, group and exception message. During the search for an exception handler, cleanup handlers may be encountered. They are removed from the inside out and called with their registered arguments. This process is called {\it unwinding}. \index{unwinding} \subsubsection{The {\tt except_throwd} function} \indexfunc{except_throwd} \synopsis \begin{verbatim} void except_throwd(long, long, const char *, void *);\end{verbatim} \constraints The first two arguments specify the exception group and code, respectively. Neither of these arguments shall be zero. \description The \verb|except_throwd| function is the same as \verb|except_throw| in every respect except that it has an additional \verb|void *| parameter. A null argument may be used for this parameter, or it may be any valid pointer value. When the exception is handled, and the handler does not remove this pointer using \verb|except_take_data| then the implementation shall automatically invoke the function \verb|except_free| on this pointer. \subsubsection{The {\tt except_throwf} function} \indexfunc{except_throwf} \label{section:except_throwf} \synopsis \begin{verbatim} void except_throwf(long, long, const char *, ...);\end{verbatim} \constraints The first two arguments specify the exception group and code, respectively. Neither of these arguments shall be zero. \description This function is almost exactly the same as \verb|except_throw| except that the exception message is not directly specified. Instead, the \verb|char *| argument specifies a format string which may be followed by trailing arguments. The format string and trailing arguments are interpreted as the format string and arguments of the standard C function \verb|printf| and are subject to the same requirements. The format string is interpreted, and the results of formatting are placed into buffer provided by the implementation. The implementation shall provide space for at least 1024 bytes of storage for the result of the formatting, including the null terminator byte. If the formatting requires more space than the implementation provides, the behavior is undefined. The results of the formatted print shall become the exception message of the thrown exception. If the implementation is unable to allocate resources for the formatted message, it shall throw a code 1 exception having an unspecified code in group \verb|XCEPT_BAD_ALLOC| with an implementation-defined message. (See section \ref{section:except_id_t}). \subsubsection{The {\tt except_unhandled_catcher} function} \label{section:except_unhandled_catcher} \indexfunc{except_unhandled_catcher} \synopsis \begin{verbatim} void (*except_unhandled_catcher(void (*)(except_t *))) (except_t *);\end{verbatim} \description The \verb|except_unhandled_catcher| function installs a new function for catching unhandled exceptions. The argument is a pointer to a catching function that returns nothing, and accepts a pointer of type \verb|except_t *|. A pointer to the previously installed catching function is returned. If the program did not previously install a catching function, then a pointer to the default catching function is returned. The program may retain this pointer and use it to reinstall the default function. A function for catching unhandled exceptions should not return. If it returns, the implementation shall terminate the program with a diagnostic. \subsubsection{The {\tt except_code} function} \indexfunc{except_code} \synopsis \begin{verbatim} unsigned long except_code(except_t *);\end{verbatim} \description The \verb|except_code| is an accessor function which returns the exception code of the given exception descriptor. \subsubsection{The {\tt except_group} function} \indexfunc{except_group} \synopsis \begin{verbatim} unsigned long except_group(except_t *);\end{verbatim} \description The \verb|except_group| is an accessor function which returns the exception group of the given exception descriptor. \subsubsection{The {\tt except_message} function} \indexfunc{except_message} \synopsis \begin{verbatim} const char *except_message(except_t *);\end{verbatim} \description The \verb|except_group| is an accessor function which returns a pointer to the string of text that was specified when the exception was thrown (the exception message). \subsubsection{The {\tt except_data} function} \indexfunc{except_data} \synopsis \begin{verbatim} void *except_data(except_t *);\end{verbatim} \description The \verb|except_group| returns the data pointer that was specified in the \verb|except_throwd| call. If the exception was not thrown by \verb|except_throwd| the return value is unspecified. \subsubsection{The {\tt except_take_data} function} \indexfunc{except_take_data} \synopsis \begin{verbatim} void *except_take_data(except_t *);\end{verbatim} \description The \verb|except_take_data| returns the data pointer that was specified in the \verb|except_throwd| call, and updates the exception descriptor so that the pointer is set to null. If the exception was not thrown by \verb|except_throwd| the result is unspecified. \subsubsection{The {\tt except_cleanup_push} macro} \indexmacro{except_cleanup_push} \synopsis \begin{verbatim} void except_cleanup_push(void (*)(void *), void *);\end{verbatim} \description The call to \verb|except_cleanup_push| shall be matched with a call to \verb|except_cleanup_pop| which must occur in the same statement block at the same level of nesting.\footnote{This requirement allows an implementation to provide an {\tt except_cleanup_push} macro which opens up a statement block and a {\tt except_cleanup_pop} which closes the statement block. The space for the registered pointers can then be efficiently allocated from automatic storage.} The \verb|except_cleanup_push| macro registers a cleanup handler that will be called if an exception subsequently occurs before the matching \verb|except_cleanup_pop| is executed, and is not intercepted and handled by a try-catch region that is nested between the two. The first argument to \verb|except_cleanup_push| is a pointer to the cleanup handler, a function that returns nothing and takes a single argument of type \verb|void *|. The second argument is a \verb|void *| value that is registered along with the handler. This value is what is passed to the registered handler, should it be called. Cleanup handlers are called in the reverse order of their nesting: inner handlers are called before outer handlers. The program shall not leave the cleanup region between the call to the macro \verb|except_cleanup_push| and the matching call to \verb|except_cleanup_pop| by means other than throwing an exception, or calling \verb|except_cleanup_pop|. Within the call to the cleanup handler, it is possible that new exceptions may happen. Such exceptions must be handled before the cleanup handler terminates. If the call to the cleanup handler is terminated by an exception, the behavior is undefined.\footnote{The exception which triggered the cleanup is not yet caught; thus the program would be effectively trying to replace an exception with one that isn't in a well-defined state.} \subsubsection{The {\tt except_cleanup_pop} macro} \indexmacro{except_cleanup_pop} \label{section:except_cleanup_pop} \synopsis \begin{verbatim} void except_cleanup_pop(int);\end{verbatim} \description A call to the \verb|except_cleanup_pop| macro shall match each call to \verb|except_cleanup_push| which shall be in the same statement block at the same nesting level. It shall match the most recent such a call that is not matched by a previous \verb|except_cleanup_pop| at the same level. This macro causes the registered cleanup handler to be removed. If, and only if the argument is other than zero, the cleanup handler is called. In that case, the registered context pointer is passed to the cleanup handler. \indexenum{except_no_call} \indexenum{except_call} The enumeration constants \verb|except_no_call| and \verb|except_call| may be used as arguments to this function instead of the equivalent constants \verb|0| and \verb|1|. The program shall not leave the region between the call to the macro \verb|except_cleanup_push| and the matching call to \verb|except_cleanup_pop| other than by throwing an exception, or by executing the \verb|except_cleanup_pop|. \subsubsection{The {\tt except_checked_cleanup_pop} macro} \indexmacro{except_checked_cleanup_pop} \synopsis \begin{verbatim} void except_checked_cleanup_pop(void (*)(void *), int);\end{verbatim} \constraints The first pointer-to-function argument shall match the pointer value that was registered by the matching \verb|except_cleanup_push| macro. \description The \verb|except_checked_cleanup_pop| macro may be used as an alternative to \verb|except_cleanup_pop|. In verification mode, the constraint serves to provide additional safety by making an explicit declaration regarding which handler is being called (or ignored, as the case may be). The program shall not leave the region between the call to the macro \verb|except_cleanup_push| and the call to \verb|except_checked_cleanup_pop| by means other than throwing an exception, or executing the latter macro. \subsubsection{The {\tt except_try_push} macro} \indexmacro{except_try_push} \label{section:except_try_push} \synopsis \begin{verbatim} void except_try_push(const except_id_t [], size_t, except_t **);\end{verbatim} \description The \verb|except_try_push| marks the beginning of a try-catch region of the program. It must be matched by a \verb|except_try_pop| written in the same statement block at the same level of nesting, which terminates the try-catch region. Regions may be nested. The program shall not leave a try-catch region other than by throwing an exception or by executing the \verb|except_try_pop|.\footnote{Thus, leaving the try-catch region using {\tt goto}, {\tt return}, {\tt break} or {\tt continue} leads to undefined behavior.} The first argument is a pointer to the first element of an array of \verb|except_id_t| objects, the number of elements of which is specified by the second argument. The array specifies which exceptions are caught. The implementation shall treat this array as read-only.\footnote{Thus, the program may allocate the array in static storage.} The third argument of \verb|except_try_push| shall point to an object of type \verb|except_t *|. After the call to \verb|except_try_push|, the program shall inspect the value of this object. A null value indicates that no exception has been thrown. A non-null value indicates that an exception was thrown, and is now caught. In other words, when an exception is caught by a try-catch region, then control passes from the throw site back to the first statement after the \verb|except_try_push| statement of the try-catch region. This case is distinguished from an ordinary return by the non-null value of the pointer object that was specified by the third argument of the earlier call to \verb|except_try_push|. An exception is considered handled if it is caught in a try-catch region which subsequently terminates by executing its \verb|except_try_pop| or by throwing another exception. When an exception is considered handled, any dynamic data that was associated with that exception is freed.\footnote{Dynamic data may be explicitly associated with an exception using {\tt except_throwd}. Other types of throw may associate unspecified dynamic data.} It's possible for more than one exception to be active at once. During the processing of one exception, a try-catch region which catches the exception may execute a nested try-catch region in which independent exception processing takes place. Provided that no exception escapes from the inner try-catch region, the original exception remains pending. But if an exception escapes from the inner region, it causes the original exception to be handled.\footnote{Thus, a given try-catch region cannot catch multiple exceptions concurrently.} The caught exception may be rethrown by calling \verb|except_rethrow|, specifying the the value of the caught exception descriptor as the argument. Rethrowing a caught exception causes the innermost try-catch region to terminate, but the exception is not considered handled. The search for a handler continues with the second most enclosing region. Throwing a new exception during the handling of a caught exception may cause the {\it same\/} try-catch region to catch that exception; the try-catch region is not terminated until it is determined that it doesn't catch the new exception. Each entry in the array of \verb|except_id_t| objects specifies what exceptions are caught by the try-catch region. When an exception is thrown, the implementation searches for the inner-most try-catch region which has at least one match for the thrown exception in its catch specification array. A match occurs when a specification exactly matches the group and code of the thrown exception. If a catch specification is for group 0, then it matches any group. If a catch specification is for code 0, then it matches any exception code. A catch specification of group 0 and code 0 catches all exceptions. Non-volatile automatic variables that are local to the function containing the try-catch region, and that are modified after \verb|except_try_push| begins the try-catch region have indeterminate values when an exception is caught. Once a caught exception is handled or re-thrown, the value of the \verb|except_t *| pointer which referenced it becomes indeterminate. If a re-thrown exception is caught again, the implementation shall produce a valid \verb|except_t *| pointer. \example The following example illustrates the use of \verb|except_try_push| and related macros and functions. \begin{verbatim} #include #include #include "except.h" #define MY_GROUP 42 #define MY_CODE 1 static void func_that_throws(void) { except_throw(MY_GROUP, MY_CODE, "this is an exception"); } static void func_that_cleans_up(void) { void *local_data = malloc(10); except_cleanup_push(free, local_data); func_that_throws(); except_checked_cleanup_pop(free, except_call); } void func_that_catches(void) { /* catch specification */ static const except_id_t catch_spec[] = { { MY_GROUP, XCEPT_CODE_ANY } }; /* exception handle */ except_t *exc; except_try_push(catch_spec, 1, &exc); /* * Start of try-catch region: when exception is * thrown, control returns here. */ if (exc == 0) { /* try code that may throw an exception */ func_that_cleans_up(); } else { /* handle exception that was thrown */ assert (except_group(exc) == MY_GROUP); printf("exception caught: %s %ld %ld\n", except_message(exc), except_group(exc), except_code(exc)); goto terminate; /* ERROR! jumping out of try-catch */ } /* end of try-catch region */ except_try_pop(); terminate: ; } \end{verbatim} In this example, the function \verb|func_that_catches| is intended to be called first. It sets up a try-catch region which traps exceptions having the group identification \verb|MY_GROUP| (or 42). Any code within that group is caught because the code catch was specified as \verb|XCEPT_CODE_ANY|. When the \verb|except_try_push| macro is executed, it sets the value of \verb|exc| to null. Then \verb|func_that_cleans_up| is called, which throws an exception in the \verb|MY_GROUP| group. This exception is caught, so control resumes at the top of the try-catch region, with \verb|exc| set to a non-null value. Thus the else clause of the if statement is now executed. The handling code simply prints the exception message on standard output, as well as the numeric group and code. The subsequent goto statement demonstrates a serious programming error. The \verb|func_that_cleans_up| function illustrates the use of cleanup regions. Dynamic memory is allocated which must not be allowed to leak when an exception is thrown, so a cleanup handler is set up to free the memory in that event. The standard C function \verb|free| happens to have, the right type signature and semantics that it can be used directly as a cleanup handler. Should no exception be thrown, the cleanup pop macro will perform the call to the cleanup handler, because it is invoked with argument \verb|except_call|. \subsubsection{The {\tt except_try_pop} macro} \indexmacro{except_try_pop} \synopsis \begin{verbatim} void except_try_pop(void);\end{verbatim} \description The \verb|except_try_pop| macro terminates a try-catch region. It must match a previous \verb|except_try_push| macro in the same statement block at the same level of nesting which is not already matched by an earlier \verb|except_try_pop|. \subsubsection{The {\tt except_set_allocator} function} \indexfunc{except_set_allocator} \label{section:except_set_allocator} \synopsis \begin{verbatim} void except_set_allocator(void *(*)(size_t), void (*)(void *));\end{verbatim} \description The \verb|except_set_allocator| function installs a pair of allocator routines that will be used by the Exception component for future allocation and deallocation requests. The first argument points to a function that resembles the standard C \verb|malloc| in type and semantics. The second argument points to a function that similarly resembles the standard C function \verb|free|. The default allocators are \verb|malloc| and \verb|free|. The call \begin{verbatim} except_set_allocator(malloc, free); \end{verbatim} may be used to restore these default allocator functions. The program shall not call \verb|except_set_allocator| if an exception was thrown and has not yet been handled.\footnote{Doing so could, for example, create a mismatch whereby a pointer to data allocated with the previously installed allocator function would be passed to the new deallocator function.} The allocator function shall create a unique object consisting of at least as many bytes of storage as indicated by the value of the argument. The pointer returned shall be suitably aligned to represent an object of any type. If insufficient resources exist, the pointer returned shall be null. Requesting an object of zero size may produce a unique pointer that shall be acceptable to the deallocator function, or a null pointer. The deallocator function shall be capable of destroying objects created by the corresponding allocator function. Passing a null pointer to the deallocator shall have no effect. \subsubsection{The {\tt except_alloc} function} \indexfunc{except_alloc} \synopsis \begin{verbatim} void *except_alloc(size_t);\end{verbatim} \description The \verb|except_alloc| function allocates memory using the default memory allocator or one installed by the program. (See section \ref{section:except_set_allocator}). If the allocation succeeds, a non-null pointer to the allocated object is returned. If the allocator indicates failure by returning a null pointer, then instead of returning, \verb|except_alloc| throws exception code 1 in the group \verb|XCEPT_BAD_ALLOC| (See section \ref{section:except_id_t}). If a zero size request is specified, then an exception is thrown or a non-null pointer is returned, depending on the treatment of such requests by the underlying allocator. \subsubsection{The {\tt except_free} function} \indexfunc{except_free} \synopsis \begin{verbatim} void *except_free(void *);\end{verbatim} \description The \verb|except_free| function releases memory that was allocated using \verb|except_alloc|. The deallocation is performed using the default allocator or one installed by the program. If an object is allocated by \verb|except_alloc|, then a different allocator is installed, and the object is freed using \verb|except_free|, the behavior is undefined. \subsection{Implementation} \index{Exception component!reference implementation} Described here is a reference implementation of the exception handling interface that is covered in section \ref{section:exception_component} The reference implementation requires only a conforming ANSI C implementation. In particular, the actual mechanism for passing control from an exception throw to a catch handler is based on the standard C \verb|setjmp| macro and \verb|longjmp| function. \subsubsection{Overview} The core structure in the exception handling implementation is a stack that is composed of a mixture of two types of nodes: cleanup nodes and catch nodes. When an exception is thrown, the stack nodes are popped and processed starting with the topmost one. The nodes are efficiently allocated in automatic storage by the macros \verb|except_cleanup_push| and \verb|except_try_push|. These macros open up a new statement block and declare the node information in automatic storage. These objects are then pushed onto the stack. The corresponding macros \verb|except_cleanup_pop| and \verb|except_try_pop| pop the node off the stack and close the statement block. An static variable keeps track of the stack top. In the multi-threaded variant of the code which is based on the POSIX threading interface, there is a thread-specific stack top created using the thread-specific function pthread_key_create. Using global variables is a compromise that simplifies the interface; the throw functions simply ``know'' where the thread's exception stack is, so the context information doesn't have to be passed around. \subsubsection{Stack nodes} A node in the exception handling stack contains a pointer to the next node below, followed by a type field and a union which together keep track of the appropriate type-specific data: \begin{verbatim} enum except_stacktype { XCEPT_CLEANUP, XCEPT_CATCHER }; struct except_stacknode { struct except_stacknode *except_down; enum except_stacktype except_type; union { struct except_catch *except_catcher; struct except_cleanup *except_cleanup; } except_info; }; \end{verbatim} The union overlaps pointers to structures instead of structures in order to save space: there is a disparity in size between a cleanup node and a catch node, so making them both use the same amount of space would be wasteful. The space saving comes at a price, because the pointers themselves take up extra space and time is spent initializing them. Some casting trickery could be used to create a stack having two different kinds of structures without the use of unions. \paragraph{Cleanup nodes} Cleanup nodes act as placeholders for a pointer to a cleanup handler function and a context pointer to be passed to that function. The type-dependent component of the cleanup node is declared like this: \begin{verbatim} struct except_cleanup { void (*except_func)(void *); void *except_context; }; \end{verbatim} The cleanup handler is invoked when the node is popped during exception processing. A cleanup handler may also be invoked when the cleanup node is removed by executing \verb|except_cleanup_pop| or \verb|except_checked_cleanup_pop|. Whether or not this happens depends on the integer parameter that is documented in section \ref{section:except_cleanup_pop}. \paragraph{Catch nodes} The catch node structure is more complicated than the cleanup node. Its definition depends on two additional types, \verb|except_id_t| and \verb|except_t|, both of which also make play a role in the exception component's interface. \begin{verbatim} typedef struct { unsigned long except_group; unsigned long except_code; } except_id_t; typedef struct { except_id_t except_id; const char *except_message; void *except_dyndata; } except_t; struct except_catch { const except_id_t *except_id; size_t except_size; except_t except_obj; jmp_buf except_jmp; }; \end{verbatim} The \verb|except_id| member of the \verb|except_catch| structure is a pointer to the array of \verb|except_id_t| objects which specify what exceptions the node catches. The \verb|except_size| member specifies the number of elements in the array. Both of these values are derived directly from the arguments of the \verb|except_try_push| macro (see section \ref{section:except_try_push}). The \verb|except_obj| member provides storage for the caught exception. This member is the means by which the thrown exception is communicated to the try-catch region where it is caught. It contains the group and code identifiers, the exception message and, optionally, the pointer to arbitrary exception data. The \verb|except_jmp| member is the standard C \verb|jmp_buf|---a place for saving the execution context so that it's possible to pass control, via \verb|longjmp| from the place where an exception is thrown to the place where it is caught. If, during the search for an exception handler, a catch node is encountered which matches the thrown exception, the node remains the stack. The exception information is stored into into the node's \verb|except_obj| member and a \verb|longjmp| is executed to return to the try-catch region in which the node was allocated and pushed. Because the node is still on the stack, it's possible to throw another exception which is caught again by the same node. When an exception is thus caught, control resumes just after the \verb|except_throw| which placed the node onto the stack. The pointer passed into \verb|except_throw| is updated to point to the \verb|except_obj| member of the catch structure. The program can then use the portable accessor functions such as \verb|except_code| to gain information about the caught exception and handle it accordingly. \index{external names|see {functions}} \index{reference implementation|see {implementation}} \index{names|see {symbols}} \index{identifiers|see {symbols}} \index{structure names|see{tags}} \index{preprocessor symbols|see{macros}} \index{defines|see{macros}} \index{reserved symbols|see{symbols}} \index{symbols!preprocessor|see{macros}} \index{symbols!type names|see{typedefs}} \index{symbols!function names|see{functions}} \printindex \end{document} kmer-code-2013-trunk/libutil/kazlib/docs/docs.ist0000644000000000000000000000014710213513255020466 0ustar rootrootpreamble "\\begin{theindex}\n\\addcontentsline{toc}{section}{Index}\n" postamble "\n\\end{theindex}\n" kmer-code-2013-trunk/libutil/kazlib/docs/CHANGES0000644000000000000000000003711510541426140020015 0ustar rootrootNew in 1.20 1. Bugfix in except.h. Modified non-volatile auto variables were being accessed after longjmp. New in 1.19 1. Rewrite of broken dict_free. 2. Fixed embarassing build breakages that accidentally went into 1.18 3. Function hash_scan_delete_free renamed to hash_scan_delfree to be distinct from hash_scan_delete in the first 14 characters. 4. To resolve inconsistencies between hash_free and dict_free, and a difference between the actual behavior of hash_free and the documented behavior, these two functions are marked obsolescent. The functions dict_free_nodes and hash_free_nodes are provided. The obsolescent functions continue to work as before, for now. 5. Documentation of hash_free is fixed to say that it also subjects the hash to hash_destroy, which is what the implementation does. 6. Documentation states what release it is for. New in 1.18 1. Error in assert expression in list_merge fixed. 2. Semantics of list_merge extended to allow list to be merged onto itself (which is a noop). 3. Clarified interface specification of list_transfer and list_extract; the source and destination list may be the same object. 4. New functions: dict_init_like: create a dictionary similar to another one; dict_similar: determine whether two dictionaries are similar; dict_merge: merge contents of one dictionary to another. 5. Dictionary test main can juggle multiple dictionaries, and test dict_merge. 6. If a hash node is inserted into some hash, it is a now a constraint violation to insert it again into some hash. 7. The hash_scan_delete_free function has been implemented; it is to hash_scan_delete what hash_delete_free is to hash_delete. New in 1.17 Carl van Tast : 1. Removed references to ``safe malloc'' from some comments. 2. Swapped ``allowed'' and ``not allowed'' in comment to verify_bintree. 3. Fixed comment to list_next: this function never returns the sentinel. 4. lnode_pool_init: nodes[i].prev = nodes instead of nodes + 1. This saves one or two CPU cycles :-) and it gives a valid address even if we have a (somewhat pathological) pool with just one element. Kaz: 5. Dropped extra parameter from tree rotation functions in dict.c. Should shave a few cycles. 6. Fixed error in the duplicate key iteration idiom example in the documentation (see the section on dict_upper_bound). 7. Forgotten #include added to hash.c New in 1.16 1. Added an interface for loading the contents of a dictionary from an ordered sequence. This is done in O(n) time by a direct bottom-up construction of the red-black tree, making it much faster than the O(n log n) process of inserting each element. 2. Miscellaneous cleanup: missing const qualifiers were added to key pointer parameters, some incorrect comments fixed; spelling errors corrected in documentation. New in 1.15 1. Another potential exception handling memory leak fixed. This one has to do with throwing an exception from within a try-catch region in which an exception was just caught. The new exception replaces the old without the old's dynamic memory being disposed of. 2. Restrictions added on except_rethrow. 3. Exception module must now be explicitly initialized with except_init. 4. Structure members in exception header renamed to adhere to documented namespace. 5. The exwrap.[ch] source files are gone. There is support for memory allocation with exception handling in except.c, which supports user defined allocators. 6. Three bugfixes to sfx parser. First, unary operators take a cast expression, not a unary expression. Secondly, sizeof doesn't throw a syntax error anymore on things that look like casts, but maybe are not. Thirdly, empty parentheses weren't handled right in treatment of ambiguous expressions, e.g. (a)() was declared a syntax error. 7. Changed the representation of hash table chains. They are now singly linked lists, which means that the overhead of managing back pointers is gone. Only deletion is slightly more complicated now because it has to search from the beginning of the chain. [Rationale: this is okay, since chains are supposed to be short in a hash table!] 8. Rewritten test main() in list.c. It's now more like the others with a menu. Previously it was essentially a file sorting program. 9. New function: list_find. Exhaustively searches the list for a matching entry, returns pointer to node if found. New in 1.14 1. Got rid of some overbearing copyright restrictions. There is no need for executables to contain copyright notices. In fact, there are no restrictions on the use, or distribution in executable form. 2. Tiny tweak in red-black fixup code of dict_insert. 3. Keys in hash and dict are declared const void * now in all functions rather than plain void *. This means that casts are no longer necessary when calling insert or lookup functions with const data as the key. But casts of the return value of hnode_getkey or dnode_getkey may be required. 4. Fixed compile breakage of except.c when posix thread support enabled. 5. Side effect assertion interface now performs caching, to avoid parsing the same expressions over and over again. Thus debugging with KAZLIB_SIDEEFFECT_DEBUG incurs a smaller performance hit. 6. Major bugfix to sfx expression parser. The function dealing with disambiguating casts had to be rewritten to do more sophisticated lookahead and backtracking. It all started with Mark Brady discovered that (a++)+b was being incorrectly diagnosed as a syntax error. 7. Added documentation. more examples for uses of dictionaries, and exception handling. Some documentation about the internals of exception handling added. Changed document format for narrower margins, reducing page count and increasing readability. 8. Bugfix in except_rethrow. It was freeing the dynamic data of the exception even though it's not handled yet. New in 1.13 1. Fixed some potential memory leaks in except.c. 2. Finished all interface documentation. All that is left now is to flesh out the implementation notes. 3. Fixed a bug in POSIX threaded variant of except.c. Null function pointer dereference in unhandled exception case. 4. Macros beginning with E[A-Z] have been renamed to stay out of space reserved for . 5. Identifiers in exwrap.[ch] have been renamed from having ex_ prefixed to having exwrap_ prefixes. New in 1.12 1. COOL! New module for detecting side effects in C expressions. 2. Serious bugfix in hash_init(). The computation of the initial hash mask was completely botched up. Historically this code has seen little testing because hashing over a user supplied table is not extendible. Users of hash_create() are not affected. 3. Tried to make computation of hash_val_t_bit more threadsafe. It should be okay if writes to int objects are atomic, and concurrent writes of the same int value to a given object are safe. 4. Makefile renamed to Makefile.gcc. Makevile.vc added. The rename is retroactive to all prior releases. 5. OPAQUE_DEBUG becomes KAZLIB_OPAQUE_DEBUG and TEST_MAIN becomes KAZLIB_TEST_MAIN. In general, macros that affect how the modules build should be confined to a special namespace. 6. New KAZLIB_SIDEEFFECT_DEBUG feature to enable diagnosis of side effect expressions being passed to macros that evaluate their arguments more than once. New in 1.11 1. Improvements in experimental exception handling module: except_throwf has been added which takes printf-like arguments; except_checked_cleanup_pop has been added to provide a measure of safety; there is now a way to pass arbitrary data from the throw site to the catch. 2. Improvements in dict_insert. A redundant call to the comparison function has been eliminated, resulting in one fewer comparisons per insert operation! Also a redundant test has been removed from the controlling expression of the fixup loop, taking advantage of the fact that nil is always black, and hence the root node always has a black parent. 3. Small change in dict_delete. A test in the fixup loop has been eliminated by temporarily coloring the root node red. See comment and diff between dict.c revision 1.25 and 1.26. 4. Test program blast.pl deletes keys out of order; to get in order delete, initialize $factor_d to 1. New in 1.10 1. The dict_init function now correctly initializes allocator-related members of the dict structure. 2. Tiny optimization in dict_lookup---less frequent cases tested last. 3. Added list_extract, for extracting list slices (more general than list_transfer). 4. Incorporated changes from Loic Dachary: hash_free() has been added for deleting all nodes; hash and compare functions from the hash.c test code are now available to the user as defaults if null pointers are given to hash_init() or hash_create(); and hash_set_allocator restores the default allocator routines if null pointers are given to it. 5. Changes to dict analogous to hash: dict_free() added, etc. 6. New exception handling module added (experimental). 7. Much new documentation. New in 1.9 1. Third argument of list_transfer may be null, in which case no nodes are transferred. [Rationale: allows empty source list to be treated without special case testing when all nodes are being transferred.] 2. Two new functions added to dict: dict_upper_bound and dict_lower_bound. These allow for inexact and range searches. New in 1.8 1. New improved hashing function in the hash.c test code. It turns out that when I changed the hash table algorithm, the blast.pl testcase was hashing all to a single chain due to the pathologically bad hashing function. The new hashing function should be good enough for general use. It uses each nybble of the key to index a table of 16 random 32 bit integers. These integers are XOR-ed into the hash value which is rotated after each XOR. 2. Spurious semicolon removed from the #define of HASH_VAL_T_BIT. 3. I fixed some incorrect comments in hash.c which still talked about the old algorithm from release 1.5 and older. 4. The smalloc.c module is no longer supported. It's still in RCS but it's not tagged as being part of release 1.8, and is not used by any of the other sources. The standard library memory allocation functions are now used directly. [Rationale: smalloc.c is overkill and interferes with integration of the other source files into projects. Conscientious programmer already ahve their own tools for debugging allocator corruption, anyway.] New in 1.7 1. Missing #include added to smalloc.h 2. The dict_delete() functions internals have been changed to make it much more sane. This function no longer has the potential to return a node other than the one that is passed to it. 3. The changes to dict_delete() also fix a serious bug in dict_process(). The dict_process computes a pointer to a node's successor before invoking the user callback to process a node. If the user callback calls dict_delete() on the node, under the old dict_delete() semantics it was possible for the successor to get deleted instead. Thus dict_process() could end up with an invalid pointer. 4. The changes to dict_delete() also mean that key and value information will never be relocated from one node to another. User code can now rely on this convenient assumption. New in 1.6 1. The extendible hashing algorithm internals have changed. This has a potential impact on the behavior with respect to hashing functions which were written to work well specifically with the old hashing scheme. For a silly reason, in the old hashing scheme, the top N bits were always taken from the results of a hashing function, for a hash table size of 2^N chains. In the new scheme, the bottom N bits are taken instead. [Rationale: This is change makes it easier to write portable hashing functions and simplifies the functions that expand or contract the table, making them more efficient.] 2. Added const qualifiers to the rcsid[] and right[] char arrays, which shuts up the GCC compiler from complaining that these are unused statics. New in 1.5 1. First two arguments to list_prune_graft() are reversed. The leftmost argument is now the destination list. Moreover, the function has been renamed list_transfer(). [Rationale: this ordering of parameters is consistent with list_merge(), and the standard C functions also pass destination pointers on the left. Renaming the function protects against incorrect use.] 2. Red-Black tree dictionaries now support duplicate keys. [Rationale: duplicate keys could be useful in some applications.] When a dictionary is created or initialized, it does not allow duplicate keys. The function dict_allow_dupes() is used to set a flag in a dictionary to henceforth allow duplicates. Once made, the decision to allow duplicates cannot be reversed. [Rationale: toggling between allowing and disallowing duplicates does not seem useful. Once duplicates are admitted, there is no point in disallowing duplicates.] When a key is sought in tree that currently allows duplicates, the leftmost node containing that key is chosen from among the nodes that contain duplicates of the key. Then dict_next() can be used to fetch the remaining duplicates one by one. No particular order among the duplicates may be assumed. However, for what it may be worth, the order between any two duplicates is preserved for as long as they both remain in the dictionary. 3. The function prototypes in the header files have been modified to eliminate parameter names. [Rationale: parameter names in prototypes have only documentary value, and may clash with macro identifiers defined in other headers.] 4. Dictionary and hash table now has support for automatic allocation of nodes in the insert and delete operations, which means that the user can add items in one operation instead of the two operations of allocating a node and inserting it. [Rationale: ease of use.] There is support for user-defined allocators; the default allocators use the smalloc.c routines. For any instance of a dict_t or hash_t object, the user can override the allocator functions by supplying his or her own pointers to suitable functions, and a context pointer that will be passed to these functions when they are called through that particular dict_t or hash_t instance. [Rationale: flexibility, ease of use, promotes good design.] The funtion pointers can only be set when the data structure is empty. [Rationale: it is undesirable to switch to a different allocator when there are nodes in the dictionary; it might lead to the error of freeing a node with an incorrect allocator.] kmer-code-2013-trunk/libutil/kazlib/docs/README0000644000000000000000000000622710541426140017702 0ustar rootrootThis collection of data structures is maintained by Kaz Kylheku INSTRUCTIONS Simply add the necessary .c and .h files to your project. Include the appropriate .h file in any translation unit that interfaces with one or more of the kazlib modules. Then compile and link the modules together with your program. To use kazlib in a C++ project, don't compile them with a C++ compiler. Compile with a C compiler, and include the header files in your C++ translation units. Then link together the translated C and C++. As of release 1.2, the header files should work with C++. IMPORTANT NOTES 1. Self checks The modules in this collection perform extensive self-checks, some of which make the performance really poor (by actually raising the overall asymptotic complexity of an operation, for example from O(log N) to O(N). The instrumentation assertions can be disabled by compiling with the NDEBUG macro defined. You can check that your project does not violate the principles of implementation hiding in connection with its use of the kazlib modules. This is accomplished by defining the macro KAZLIB_OPAQUE_DEBUG at the beginning of any translation unit which includes the kazlib header files. Note that whereas this will detect violations, it will not result in a translation that can be linked against the kazlib. When you are done checking, turn off KAZLIB_OPAQUE_DEBUG and recompile. If your compiler has a special ``check only'' mode which enables it to perform syntax and type checking without doing an actual translation (similar to lint), it may be a time-saving idea to use it in conjunction with KAZLIB_OPAQUE_DEBUG. 2. Macros with side effects Some of the kazlib header files define macros that evaluate their arguments more than once. This means that if expressions with side effects are passed to these macros, undesirable and undefined behavior will happen. There is support in Kazlib for catching these kinds of bugs: compile with KAZLIB_SIDEEFFECT_DEBUG, and add the except.c and sfx.c modules to your object. The macros will now parse their expressions at run time to diagnose the presence of side effects and function calls. It's easy to add this support to your own code! 3. Thread support POSIX thread support is enabled by predefining KAZLIB_POSIX_THREADS. Currently only the exception-handling module has any need for this. When compiled that way, it provides thread-safe exception handling. Threads can independently throw exceptions and each thread can install its own specific catcher for unhandled exceptions. Moreover, each thread can register its own memory allocator functions. Note: this variant of the code also depends on the ability to cast between void * and function pointers, which is a common language extension. 4. CVS identification The source files contain declarations of a static char array variable called rcsid. This contains an expansion of the CVS identification of each module, making it possible to determine the ``bill of materials'' that went into an executable build. I have now wrapped the declarations of these rcsid[] arrays so they are conditional on KAZLIB_RCSID being defined. For many users, these are just a waste of space. kmer-code-2013-trunk/libutil/kazlib/docs/MUST_READ0000644000000000000000000000216510213513255020325 0ustar rootrootGreetings, Programmer! I gather that because you are reading this, you are probably considering using the C language translation units included here in your own software. If that is the case, I would like to know who you are and urge you to contact me. Here is why: I rove over this code periodically looking for defects. In fact, I use it in my own programming projects. If I discover a defect, I will notify everyone who I know is a user of this software. If there is a serious defect in some code that you are using in your software project, wouldn't you want to be informed? In fact, there is no question that you _need_ to be informed! Here is what you do: simply send an e-mail message to kaz@ashi.footprints.net with the subject "kazlib" and the body "I am a user". Be sure that your message has a good return address. I will manually add your e-mail address to a list which I will use only for the purpose of notifications regarding Kazlib. You will receive a reply to the effect that you are added. If ever you should wish to be removed from this list, simply ask and it shall be done. Yours in earnest, Kaz Kylheku kmer-code-2013-trunk/libutil/kazlib/except.h0000644000000000000000000001011110541426140017516 0ustar rootroot/* * Portable Exception Handling for ANSI C. * Copyright (C) 1999 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #ifndef XCEPT_H #define XCEPT_H #include #include #include #define XCEPT_GROUP_ANY 0 #define XCEPT_CODE_ANY 0 #define XCEPT_BAD_ALLOC 1 #ifdef __cplusplus extern "C" { #endif enum { except_no_call, except_call }; typedef struct { unsigned long except_group; unsigned long except_code; } except_id_t; typedef struct { except_id_t volatile except_id; const char *volatile except_message; void *volatile except_dyndata; } except_t; struct except_cleanup { void (*except_func)(void *); void *except_context; }; struct except_catch { const except_id_t *except_id; size_t except_size; except_t except_obj; jmp_buf except_jmp; }; enum except_stacktype { XCEPT_CLEANUP, XCEPT_CATCHER }; struct except_stacknode { struct except_stacknode *except_down; enum except_stacktype except_type; union { struct except_catch *except_catcher; struct except_cleanup *except_cleanup; } except_info; }; /* private functions made external so they can be used in macros */ void except_setup_clean(struct except_stacknode *, struct except_cleanup *, void (*)(void *), void *); void except_setup_try(struct except_stacknode *, struct except_catch *, const except_id_t [], size_t); struct except_stacknode *except_pop(void); /* public interface functions */ int except_init(void); void except_deinit(void); void except_rethrow(except_t *); void except_throw(long, long, const char *); void except_throwd(long, long, const char *, void *); void except_throwf(long, long, const char *, ...); void (*except_unhandled_catcher(void (*)(except_t *)))(except_t *); unsigned long except_code(except_t *); unsigned long except_group(except_t *); const char *except_message(except_t *); void *except_data(except_t *); void *except_take_data(except_t *); void except_set_allocator(void *(*)(size_t), void (*)(void *)); void *except_alloc(size_t); void except_free(void *); #define except_code(E) ((E)->except_id.except_code) #define except_group(E) ((E)->except_id.except_group) #define except_message(E) ((E)->except_message) #define except_data(E) ((E)->except_dyndata) #ifdef __cplusplus } #endif /* * void except_cleanup_push(void (*)(void *), void *); * void except_cleanup_pop(int); * void except_checked_cleanup_pop(void (*)(void *), int); * void except_try_push(const except_id_t [], size_t, except_t **); * void except_try_pop(void); */ #define except_cleanup_push(F, C) \ { \ struct except_stacknode except_sn; \ struct except_cleanup except_cl; \ except_setup_clean(&except_sn, &except_cl, F, C) #define except_cleanup_pop(E) \ except_pop(); \ if (E) \ except_cl.except_func(except_cl.except_context); \ } #define except_checked_cleanup_pop(F, E) \ except_pop(); \ assert (except_cl.except_func == (F)); \ if (E) \ except_cl.except_func(except_cl.except_context); \ } #define except_try_push(ID, NUM, PPE) \ { \ struct except_stacknode except_sn; \ struct except_catch except_ch; \ except_setup_try(&except_sn, &except_ch, ID, NUM); \ if (setjmp(except_ch.except_jmp)) \ *(PPE) = &except_ch.except_obj; \ else \ *(PPE) = 0 #define except_try_pop() \ except_free(except_ch.except_obj.except_dyndata); \ except_pop(); \ } #endif kmer-code-2013-trunk/libutil/kazlib/drivers/0000755000000000000000000000000012641613360017546 5ustar rootrootkmer-code-2013-trunk/libutil/kazlib/drivers/list-main.c0000644000000000000000000000542710213514156021613 0ustar rootroot#include #include #include #include typedef char input_t[256]; static int tokenize(char *string, ...) { char **tokptr; va_list arglist; int tokcount = 0; va_start(arglist, string); tokptr = va_arg(arglist, char **); while (tokptr) { while (*string && isspace((unsigned char) *string)) string++; if (!*string) break; *tokptr = string; while (*string && !isspace((unsigned char) *string)) string++; tokptr = va_arg(arglist, char **); tokcount++; if (!*string) break; *string++ = 0; } va_end(arglist); return tokcount; } static int comparef(const void *key1, const void *key2) { return strcmp(key1, key2); } static char *dupstring(char *str) { int sz = strlen(str) + 1; char *new = malloc(sz); if (new) memcpy(new, str, sz); return new; } int main(void) { input_t in; list_t *l = list_create(LISTCOUNT_T_MAX); lnode_t *ln; char *tok1, *val; int prompt = 0; char *help = "a append value to list\n" "d delete value from list\n" "l lookup value in list\n" "s sort list\n" "c show number of entries\n" "t dump whole list\n" "p turn prompt on\n" "q quit"; if (!l) puts("list_create failed"); for (;;) { if (prompt) putchar('>'); fflush(stdout); if (!fgets(in, sizeof(input_t), stdin)) break; switch(in[0]) { case '?': puts(help); break; case 'a': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } val = dupstring(tok1); ln = lnode_create(val); if (!val || !ln) { puts("allocation failure"); if (ln) lnode_destroy(ln); free(val); break; } list_append(l, ln); break; case 'd': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } ln = list_find(l, tok1, comparef); if (!ln) { puts("list_find failed"); break; } list_delete(l, ln); val = lnode_get(ln); lnode_destroy(ln); free(val); break; case 'l': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } ln = list_find(l, tok1, comparef); if (!ln) puts("list_find failed"); else puts("found"); break; case 's': list_sort(l, comparef); break; case 'c': printf("%lu\n", (unsigned long) list_count(l)); break; case 't': for (ln = list_first(l); ln != 0; ln = list_next(l, ln)) puts(lnode_get(ln)); break; case 'q': exit(0); break; case '\0': break; case 'p': prompt = 1; break; default: putchar('?'); putchar('\n'); break; } } return 0; } kmer-code-2013-trunk/libutil/kazlib/drivers/sfx-main.c0000644000000000000000000000144110213514156021430 0ustar rootroot#include int main(int argc, char **argv) { char expr_buf[256]; char *expr, *ptr; sfx_rating_t eff; for (;;) { if (argc < 2) { expr = expr_buf; if (fgets(expr_buf, sizeof expr_buf, stdin) == 0) break; if ((ptr = strchr(expr_buf, '\n')) != 0) *ptr = 0; } else { expr = (argv++)[1]; if (!expr) break; } if (!sfx_determine(expr, &eff)) { printf("expression '%s' has a syntax error\n", expr); return EXIT_FAILURE; } switch (eff) { case sfx_none: printf("expression '%s' has no side effects\n", expr); break; case sfx_potential: printf("expression '%s' may have side effects\n", expr); break; case sfx_certain: printf("expression '%s' has side effects\n", expr); break; } } return 0; } kmer-code-2013-trunk/libutil/kazlib/drivers/hash-main.c0000644000000000000000000000723110213514156021556 0ustar rootroot#include #include #include typedef char input_t[256]; static int tokenize(char *string, ...) { char **tokptr; va_list arglist; int tokcount = 0; va_start(arglist, string); tokptr = va_arg(arglist, char **); while (tokptr) { while (*string && isspace((unsigned char) *string)) string++; if (!*string) break; *tokptr = string; while (*string && !isspace((unsigned char) *string)) string++; tokptr = va_arg(arglist, char **); tokcount++; if (!*string) break; *string++ = 0; } va_end(arglist); return tokcount; } static char *dupstring(char *str) { int sz = strlen(str) + 1; char *new = malloc(sz); if (new) memcpy(new, str, sz); return new; } static hnode_t *new_node(void *c) { static hnode_t few[5]; static int count; if (count < 5) return few + count++; return NULL; } static void del_node(hnode_t *n, void *c) { } int main(void) { input_t in; hash_t *h = hash_create(HASHCOUNT_T_MAX, 0, 0); hnode_t *hn; hscan_t hs; char *tok1, *tok2, *val; const char *key; int prompt = 0; char *help = "a add value to hash table\n" "d delete value from hash table\n" "l lookup value in hash table\n" "n show size of hash table\n" "c show number of entries\n" "t dump whole hash table\n" "+ increase hash table (private func)\n" "- decrease hash table (private func)\n" "b print hash_t_bit value\n" "p turn prompt on\n" "s switch to non-functioning allocator\n" "q quit"; if (!h) puts("hash_create failed"); for (;;) { if (prompt) putchar('>'); fflush(stdout); if (!fgets(in, sizeof(input_t), stdin)) break; switch(in[0]) { case '?': puts(help); break; case 'b': printf("%d\n", hash_val_t_bit); break; case 'a': if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { puts("what?"); break; } key = dupstring(tok1); val = dupstring(tok2); if (!key || !val) { puts("out of memory"); free((void *) key); free(val); } if (!hash_alloc_insert(h, key, val)) { puts("hash_alloc_insert failed"); free((void *) key); free(val); break; } break; case 'd': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } hn = hash_lookup(h, tok1); if (!hn) { puts("hash_lookup failed"); break; } val = hnode_get(hn); key = hnode_getkey(hn); hash_scan_delfree(h, hn); free((void *) key); free(val); break; case 'l': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } hn = hash_lookup(h, tok1); if (!hn) { puts("hash_lookup failed"); break; } val = hnode_get(hn); puts(val); break; case 'n': printf("%lu\n", (unsigned long) hash_size(h)); break; case 'c': printf("%lu\n", (unsigned long) hash_count(h)); break; case 't': hash_scan_begin(&hs, h); while ((hn = hash_scan_next(&hs))) printf("%s\t%s\n", (char*) hnode_getkey(hn), (char*) hnode_get(hn)); break; case '+': grow_table(h); /* private function */ break; case '-': shrink_table(h); /* private function */ break; case 'q': exit(0); break; case '\0': break; case 'p': prompt = 1; break; case 's': hash_set_allocator(h, new_node, del_node, NULL); break; default: putchar('?'); putchar('\n'); break; } } return 0; } kmer-code-2013-trunk/libutil/kazlib/drivers/dict-main.c0000644000000000000000000001336610213514156021564 0ustar rootroot#include #include #include #include typedef char input_t[256]; static int tokenize(char *string, ...) { char **tokptr; va_list arglist; int tokcount = 0; va_start(arglist, string); tokptr = va_arg(arglist, char **); while (tokptr) { while (*string && isspace((unsigned char) *string)) string++; if (!*string) break; *tokptr = string; while (*string && !isspace((unsigned char) *string)) string++; tokptr = va_arg(arglist, char **); tokcount++; if (!*string) break; *string++ = 0; } va_end(arglist); return tokcount; } static int comparef(const void *key1, const void *key2) { return strcmp(key1, key2); } static char *dupstring(char *str) { int sz = strlen(str) + 1; char *new = malloc(sz); if (new) memcpy(new, str, sz); return new; } static dnode_t *new_node(void *c) { static dnode_t few[5]; static int count; if (count < 5) return few + count++; return NULL; } static void del_node(dnode_t *n, void *c) { } static int prompt = 0; static void construct(dict_t *d) { input_t in; int done = 0; dict_load_t dl; dnode_t *dn; char *tok1, *tok2, *val; const char *key; char *help = "p turn prompt on\n" "q finish construction\n" "a add new entry\n"; if (!dict_isempty(d)) puts("warning: dictionary not empty!"); dict_load_begin(&dl, d); while (!done) { if (prompt) putchar('>'); fflush(stdout); if (!fgets(in, sizeof(input_t), stdin)) break; switch (in[0]) { case '?': puts(help); break; case 'p': prompt = 1; break; case 'q': done = 1; break; case 'a': if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { puts("what?"); break; } key = dupstring(tok1); val = dupstring(tok2); dn = dnode_create(val); if (!key || !val || !dn) { puts("out of memory"); free((void *) key); free(val); if (dn) dnode_destroy(dn); } dict_load_next(&dl, dn, key); break; default: putchar('?'); putchar('\n'); break; } } dict_load_end(&dl); } int main(void) { input_t in; dict_t darray[10]; dict_t *d = &darray[0]; dnode_t *dn; int i; char *tok1, *tok2, *val; const char *key; char *help = "a add value to dictionary\n" "d delete value from dictionary\n" "l lookup value in dictionary\n" "( lookup lower bound\n" ") lookup upper bound\n" "# switch to alternate dictionary (0-9)\n" "j merge two dictionaries\n" "f free the whole dictionary\n" "k allow duplicate keys\n" "c show number of entries\n" "t dump whole dictionary in sort order\n" "m make dictionary out of sorted items\n" "p turn prompt on\n" "s switch to non-functioning allocator\n" "q quit"; for (i = 0; i < sizeof darray / sizeof *darray; i++) dict_init(&darray[i], DICTCOUNT_T_MAX, comparef); for (;;) { if (prompt) putchar('>'); fflush(stdout); if (!fgets(in, sizeof(input_t), stdin)) break; switch(in[0]) { case '?': puts(help); break; case 'a': if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { puts("what?"); break; } key = dupstring(tok1); val = dupstring(tok2); if (!key || !val) { puts("out of memory"); free((void *) key); free(val); } if (!dict_alloc_insert(d, key, val)) { puts("dict_alloc_insert failed"); free((void *) key); free(val); break; } break; case 'd': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } dn = dict_lookup(d, tok1); if (!dn) { puts("dict_lookup failed"); break; } val = dnode_get(dn); key = dnode_getkey(dn); dict_delete_free(d, dn); free(val); free((void *) key); break; case 'f': dict_free(d); break; case 'l': case '(': case ')': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } dn = 0; switch (in[0]) { case 'l': dn = dict_lookup(d, tok1); break; case '(': dn = dict_lower_bound(d, tok1); break; case ')': dn = dict_upper_bound(d, tok1); break; } if (!dn) { puts("lookup failed"); break; } val = dnode_get(dn); puts(val); break; case 'm': construct(d); break; case 'k': dict_allow_dupes(d); break; case 'c': printf("%lu\n", (unsigned long) dict_count(d)); break; case 't': for (dn = dict_first(d); dn; dn = dict_next(d, dn)) { printf("%s\t%s\n", (char *) dnode_getkey(dn), (char *) dnode_get(dn)); } break; case 'q': exit(0); break; case '\0': break; case 'p': prompt = 1; break; case 's': dict_set_allocator(d, new_node, del_node, NULL); break; case '#': if (tokenize(in+1, &tok1, (char **) 0) != 1) { puts("what?"); break; } else { int dictnum = atoi(tok1); if (dictnum < 0 || dictnum > 9) { puts("invalid number"); break; } d = &darray[dictnum]; } break; case 'j': if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { puts("what?"); break; } else { int dict1 = atoi(tok1), dict2 = atoi(tok2); if (dict1 < 0 || dict1 > 9 || dict2 < 0 || dict2 > 9) { puts("invalid number"); break; } dict_merge(&darray[dict1], &darray[dict2]); } break; default: putchar('?'); putchar('\n'); break; } } return 0; } kmer-code-2013-trunk/libutil/kazlib/drivers/except-main.c0000644000000000000000000000233310213514156022121 0ustar rootroot#include #include static void cleanup(void *arg) { printf("cleanup(\"%s\") called\n", (char *) arg); } static void bottom_level(void) { char buf[256]; printf("throw exception? "); fflush(stdout); fgets(buf, sizeof buf, stdin); if (buf[0] >= 0 && toupper(buf[0]) == 'Y') except_throw(1, 1, "nasty exception"); } static void top_level(void) { except_cleanup_push(cleanup, "argument"); bottom_level(); except_cleanup_pop(0); } int main(int argc, char **argv) { static const except_id_t catch[] = { { 1, 1 }, { 1, 2 } }; except_t *ex; /* * Nested exception ``try blocks'' */ /* outer */ except_try_push(catch, 2, &ex); if (!ex) { /* inner */ except_try_push(catch, 2, &ex); if (!ex) { top_level(); } else { /* inner catch */ printf("caught exception (inner): \"%s\", s=%ld, c=%ld\n", except_message(ex), except_group(ex), except_code(ex)); except_rethrow(ex); } except_try_pop(); } else { /* outer catch */ printf("caught exception (outer): \"%s\", s=%ld, c=%ld\n", except_message(ex), except_group(ex), except_code(ex)); } except_try_pop(); except_throw(99, 99, "exception in main"); return 0; } kmer-code-2013-trunk/libutil/kazlib/hash.c0000644000000000000000000005727410541426140017171 0ustar rootroot/* * Hash Table Data Type * Copyright (C) 1997 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #include #include #include #include #define HASH_IMPLEMENTATION #include "hash.h" #define INIT_BITS 6 #define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */ #define INIT_MASK ((INIT_SIZE) - 1) #define next hash_next #define key hash_key #define data hash_data #define hkey hash_hkey #define table hash_table #define nchains hash_nchains #define nodecount hash_nodecount #define maxcount hash_maxcount #define highmark hash_highmark #define lowmark hash_lowmark #define compare hash_compare #define function hash_function #define allocnode hash_allocnode #define freenode hash_freenode #define context hash_context #define mask hash_mask #define dynamic hash_dynamic #define table hash_table #define chain hash_chain static hnode_t *hnode_alloc(void *context); static void hnode_free(hnode_t *node, void *context); static hash_val_t hash_fun_default(const void *key); static int hash_comp_default(const void *key1, const void *key2); int hash_val_t_bit; /* * Compute the number of bits in the hash_val_t type. We know that hash_val_t * is an unsigned integral type. Thus the highest value it can hold is a * Mersenne number (power of two, less one). We initialize a hash_val_t * object with this value and then shift bits out one by one while counting. * Notes: * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power * of two. This means that its binary representation consists of all one * bits, and hence ``val'' is initialized to all one bits. * 2. While bits remain in val, we increment the bit count and shift it to the * right, replacing the topmost bit by zero. */ static void compute_bits(void) { hash_val_t val = HASH_VAL_T_MAX; /* 1 */ int bits = 0; while (val) { /* 2 */ bits++; val >>= 1; } hash_val_t_bit = bits; } /* * Verify whether the given argument is a power of two. */ static int is_power_of_two(hash_val_t arg) { if (arg == 0) return 0; while ((arg & 1) == 0) arg >>= 1; return (arg == 1); } /* * Compute a shift amount from a given table size */ static hash_val_t compute_mask(hashcount_t size) { assert (is_power_of_two(size)); assert (size >= 2); return size - 1; } /* * Initialize the table of pointers to null. */ static void clear_table(hash_t *hash) { hash_val_t i; for (i = 0; i < hash->nchains; i++) hash->table[i] = NULL; } /* * Double the size of a dynamic table. This works as follows. Each chain splits * into two adjacent chains. The shift amount increases by one, exposing an * additional bit of each hashed key. For each node in the original chain, the * value of this newly exposed bit will decide which of the two new chains will * receive the node: if the bit is 1, the chain with the higher index will have * the node, otherwise the lower chain will receive the node. In this manner, * the hash table will continue to function exactly as before without having to * rehash any of the keys. * Notes: * 1. Overflow check. * 2. The new number of chains is twice the old number of chains. * 3. The new mask is one bit wider than the previous, revealing a * new bit in all hashed keys. * 4. Allocate a new table of chain pointers that is twice as large as the * previous one. * 5. If the reallocation was successful, we perform the rest of the growth * algorithm, otherwise we do nothing. * 6. The exposed_bit variable holds a mask with which each hashed key can be * AND-ed to test the value of its newly exposed bit. * 7. Now loop over each chain in the table and sort its nodes into two * chains based on the value of each node's newly exposed hash bit. * 8. The low chain replaces the current chain. The high chain goes * into the corresponding sister chain in the upper half of the table. * 9. We have finished dealing with the chains and nodes. We now update * the various bookeeping fields of the hash structure. */ static void grow_table(hash_t *hash) { hnode_t **newtable; assert (2 * hash->nchains > hash->nchains); /* 1 */ newtable = realloc(hash->table, sizeof *newtable * hash->nchains * 2); /* 4 */ if (newtable) { /* 5 */ hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ hash_val_t chain; assert (mask != hash->mask); for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next; for (hptr = newtable[chain]; hptr != 0; hptr = next) { next = hptr->next; if (hptr->hkey & exposed_bit) { hptr->next = high_chain; high_chain = hptr; } else { hptr->next = low_chain; low_chain = hptr; } } newtable[chain] = low_chain; /* 8 */ newtable[chain + hash->nchains] = high_chain; } hash->table = newtable; /* 9 */ hash->mask = mask; hash->nchains *= 2; hash->lowmark *= 2; hash->highmark *= 2; } assert (hash_verify(hash)); } /* * Cut a table size in half. This is done by folding together adjacent chains * and populating the lower half of the table with these chains. The chains are * simply spliced together. Once this is done, the whole table is reallocated * to a smaller object. * Notes: * 1. It is illegal to have a hash table with one slot. This would mean that * hash->shift is equal to hash_val_t_bit, an illegal shift value. * Also, other things could go wrong, such as hash->lowmark becoming zero. * 2. Looping over each pair of sister chains, the low_chain is set to * point to the head node of the chain in the lower half of the table, * and high_chain points to the head node of the sister in the upper half. * 3. The intent here is to compute a pointer to the last node of the * lower chain into the low_tail variable. If this chain is empty, * low_tail ends up with a null value. * 4. If the lower chain is not empty, we simply tack the upper chain onto it. * If the upper chain is a null pointer, nothing happens. * 5. Otherwise if the lower chain is empty but the upper one is not, * If the low chain is empty, but the high chain is not, then the * high chain is simply transferred to the lower half of the table. * 6. Otherwise if both chains are empty, there is nothing to do. * 7. All the chain pointers are in the lower half of the table now, so * we reallocate it to a smaller object. This, of course, invalidates * all pointer-to-pointers which reference into the table from the * first node of each chain. * 8. Though it's unlikely, the reallocation may fail. In this case we * pretend that the table _was_ reallocated to a smaller object. * 9. Finally, update the various table parameters to reflect the new size. */ static void shrink_table(hash_t *hash) { hash_val_t chain, nchains; hnode_t **newtable, *low_tail, *low_chain, *high_chain; assert (hash->nchains >= 2); /* 1 */ nchains = hash->nchains / 2; for (chain = 0; chain < nchains; chain++) { low_chain = hash->table[chain]; /* 2 */ high_chain = hash->table[chain + nchains]; for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next) ; /* 3 */ if (low_chain != 0) /* 4 */ low_tail->next = high_chain; else if (high_chain != 0) /* 5 */ hash->table[chain] = high_chain; else assert (hash->table[chain] == NULL); /* 6 */ } newtable = realloc(hash->table, sizeof *newtable * nchains); /* 7 */ if (newtable) /* 8 */ hash->table = newtable; hash->mask >>= 1; /* 9 */ hash->nchains = nchains; hash->lowmark /= 2; hash->highmark /= 2; assert (hash_verify(hash)); } /* * Create a dynamic hash table. Both the hash table structure and the table * itself are dynamically allocated. Furthermore, the table is extendible in * that it will automatically grow as its load factor increases beyond a * certain threshold. * Notes: * 1. If the number of bits in the hash_val_t type has not been computed yet, * we do so here, because this is likely to be the first function that the * user calls. * 2. Allocate a hash table control structure. * 3. If a hash table control structure is successfully allocated, we * proceed to initialize it. Otherwise we return a null pointer. * 4. We try to allocate the table of hash chains. * 5. If we were able to allocate the hash chain table, we can finish * initializing the hash structure and the table. Otherwise, we must * backtrack by freeing the hash structure. * 6. INIT_SIZE should be a power of two. The high and low marks are always set * to be twice the table size and half the table size respectively. When the * number of nodes in the table grows beyond the high size (beyond load * factor 2), it will double in size to cut the load factor down to about * about 1. If the table shrinks down to or beneath load factor 0.5, * it will shrink, bringing the load up to about 1. However, the table * will never shrink beneath INIT_SIZE even if it's emptied. * 7. This indicates that the table is dynamically allocated and dynamically * resized on the fly. A table that has this value set to zero is * assumed to be statically allocated and will not be resized. * 8. The table of chains must be properly reset to all null pointers. */ hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun) { hash_t *hash; if (hash_val_t_bit == 0) /* 1 */ compute_bits(); hash = malloc(sizeof *hash); /* 2 */ if (hash) { /* 3 */ hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ if (hash->table) { /* 5 */ hash->nchains = INIT_SIZE; /* 6 */ hash->highmark = INIT_SIZE * 2; hash->lowmark = INIT_SIZE / 2; hash->nodecount = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->function = hashfun ? hashfun : hash_fun_default; hash->allocnode = hnode_alloc; hash->freenode = hnode_free; hash->context = NULL; hash->mask = INIT_MASK; hash->dynamic = 1; /* 7 */ clear_table(hash); /* 8 */ assert (hash_verify(hash)); return hash; } free(hash); } return NULL; } /* * Select a different set of node allocator routines. */ void hash_set_allocator(hash_t *hash, hnode_alloc_t al, hnode_free_t fr, void *context) { assert (hash_count(hash) == 0); assert ((al == 0 && fr == 0) || (al != 0 && fr != 0)); hash->allocnode = al ? al : hnode_alloc; hash->freenode = fr ? fr : hnode_free; hash->context = context; } /* * Free every node in the hash using the hash->freenode() function pointer, and * cause the hash to become empty. */ void hash_free_nodes(hash_t *hash) { hscan_t hs; hnode_t *node; hash_scan_begin(&hs, hash); while ((node = hash_scan_next(&hs))) { hash_scan_delete(hash, node); hash->freenode(node, hash->context); } hash->nodecount = 0; clear_table(hash); } /* * Obsolescent function for removing all nodes from a table, * freeing them and then freeing the table all in one step. */ void hash_free(hash_t *hash) { #ifdef KAZLIB_OBSOLESCENT_DEBUG assert ("call to obsolescent function hash_free()" && 0); #endif hash_free_nodes(hash); hash_destroy(hash); } /* * Free a dynamic hash table structure. */ void hash_destroy(hash_t *hash) { assert (hash_val_t_bit != 0); assert (hash_isempty(hash)); free(hash->table); free(hash); } /* * Initialize a user supplied hash structure. The user also supplies a table of * chains which is assigned to the hash structure. The table is static---it * will not grow or shrink. * 1. See note 1. in hash_create(). * 2. The user supplied array of pointers hopefully contains nchains nodes. * 3. See note 7. in hash_create(). * 4. We must dynamically compute the mask from the given power of two table * size. * 5. The user supplied table can't be assumed to contain null pointers, * so we reset it here. */ hash_t *hash_init(hash_t *hash, hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, hashcount_t nchains) { if (hash_val_t_bit == 0) /* 1 */ compute_bits(); assert (is_power_of_two(nchains)); hash->table = table; /* 2 */ hash->nchains = nchains; hash->nodecount = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->function = hashfun ? hashfun : hash_fun_default; hash->dynamic = 0; /* 3 */ hash->mask = compute_mask(nchains); /* 4 */ clear_table(hash); /* 5 */ assert (hash_verify(hash)); return hash; } /* * Reset the hash scanner so that the next element retrieved by * hash_scan_next() shall be the first element on the first non-empty chain. * Notes: * 1. Locate the first non empty chain. * 2. If an empty chain is found, remember which one it is and set the next * pointer to refer to its first element. * 3. Otherwise if a chain is not found, set the next pointer to NULL * so that hash_scan_next() shall indicate failure. */ void hash_scan_begin(hscan_t *scan, hash_t *hash) { hash_val_t nchains = hash->nchains; hash_val_t chain; scan->table = hash; /* 1 */ for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) ; if (chain < nchains) { /* 2 */ scan->chain = chain; scan->next = hash->table[chain]; } else { /* 3 */ scan->next = NULL; } } /* * Retrieve the next node from the hash table, and update the pointer * for the next invocation of hash_scan_next(). * Notes: * 1. Remember the next pointer in a temporary value so that it can be * returned. * 2. This assertion essentially checks whether the module has been properly * initialized. The first point of interaction with the module should be * either hash_create() or hash_init(), both of which set hash_val_t_bit to * a non zero value. * 3. If the next pointer we are returning is not NULL, then the user is * allowed to call hash_scan_next() again. We prepare the new next pointer * for that call right now. That way the user is allowed to delete the node * we are about to return, since we will no longer be needing it to locate * the next node. * 4. If there is a next node in the chain (next->next), then that becomes the * new next node, otherwise ... * 5. We have exhausted the current chain, and must locate the next subsequent * non-empty chain in the table. * 6. If a non-empty chain is found, the first element of that chain becomes * the new next node. Otherwise there is no new next node and we set the * pointer to NULL so that the next time hash_scan_next() is called, a null * pointer shall be immediately returned. */ hnode_t *hash_scan_next(hscan_t *scan) { hnode_t *next = scan->next; /* 1 */ hash_t *hash = scan->table; hash_val_t chain = scan->chain + 1; hash_val_t nchains = hash->nchains; assert (hash_val_t_bit != 0); /* 2 */ if (next) { /* 3 */ if (next->next) { /* 4 */ scan->next = next->next; } else { while (chain < nchains && hash->table[chain] == 0) /* 5 */ chain++; if (chain < nchains) { /* 6 */ scan->chain = chain; scan->next = hash->table[chain]; } else { scan->next = NULL; } } } return next; } /* * Insert a node into the hash table. * Notes: * 1. It's illegal to insert more than the maximum number of nodes. The client * should verify that the hash table is not full before attempting an * insertion. * 2. The same key may not be inserted into a table twice. * 3. If the table is dynamic and the load factor is already at >= 2, * grow the table. * 4. We take the bottom N bits of the hash value to derive the chain index, * where N is the base 2 logarithm of the size of the hash table. */ void hash_insert(hash_t *hash, hnode_t *node, const void *key) { hash_val_t hkey, chain; assert (hash_val_t_bit != 0); assert (node->next == NULL); assert (hash->nodecount < hash->maxcount); /* 1 */ assert (hash_lookup(hash, key) == NULL); /* 2 */ if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */ grow_table(hash); hkey = hash->function(key); chain = hkey & hash->mask; /* 4 */ node->key = key; node->hkey = hkey; node->next = hash->table[chain]; hash->table[chain] = node; hash->nodecount++; assert (hash_verify(hash)); } /* * Find a node in the hash table and return a pointer to it. * Notes: * 1. We hash the key and keep the entire hash value. As an optimization, when * we descend down the chain, we can compare hash values first and only if * hash values match do we perform a full key comparison. * 2. To locate the chain from among 2^N chains, we look at the lower N bits of * the hash value by anding them with the current mask. * 3. Looping through the chain, we compare the stored hash value inside each * node against our computed hash. If they match, then we do a full * comparison between the unhashed keys. If these match, we have located the * entry. */ hnode_t *hash_lookup(hash_t *hash, const void *key) { hash_val_t hkey, chain; hnode_t *nptr; hkey = hash->function(key); /* 1 */ chain = hkey & hash->mask; /* 2 */ for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */ if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0) return nptr; } return NULL; } /* * Delete the given node from the hash table. Since the chains * are singly linked, we must locate the start of the node's chain * and traverse. * Notes: * 1. The node must belong to this hash table, and its key must not have * been tampered with. * 2. If this deletion will take the node count below the low mark, we * shrink the table now. * 3. Determine which chain the node belongs to, and fetch the pointer * to the first node in this chain. * 4. If the node being deleted is the first node in the chain, then * simply update the chain head pointer. * 5. Otherwise advance to the node's predecessor, and splice out * by updating the predecessor's next pointer. * 6. Indicate that the node is no longer in a hash table. */ hnode_t *hash_delete(hash_t *hash, hnode_t *node) { hash_val_t chain; hnode_t *hptr; assert (hash_lookup(hash, node->key) == node); /* 1 */ assert (hash_val_t_bit != 0); if (hash->dynamic && hash->nodecount <= hash->lowmark && hash->nodecount > INIT_SIZE) shrink_table(hash); /* 2 */ chain = node->hkey & hash->mask; /* 3 */ hptr = hash->table[chain]; if (hptr == node) { /* 4 */ hash->table[chain] = node->next; } else { while (hptr->next != node) { /* 5 */ assert (hptr != 0); hptr = hptr->next; } assert (hptr->next == node); hptr->next = node->next; } hash->nodecount--; assert (hash_verify(hash)); node->next = NULL; /* 6 */ return node; } int hash_alloc_insert(hash_t *hash, const void *key, void *data) { hnode_t *node = hash->allocnode(hash->context); if (node) { hnode_init(node, data); hash_insert(hash, node, key); return 1; } return 0; } void hash_delete_free(hash_t *hash, hnode_t *node) { hash_delete(hash, node); hash->freenode(node, hash->context); } /* * Exactly like hash_delete, except does not trigger table shrinkage. This is to be * used from within a hash table scan operation. See notes for hash_delete. */ hnode_t *hash_scan_delete(hash_t *hash, hnode_t *node) { hash_val_t chain; hnode_t *hptr; assert (hash_lookup(hash, node->key) == node); assert (hash_val_t_bit != 0); chain = node->hkey & hash->mask; hptr = hash->table[chain]; if (hptr == node) { hash->table[chain] = node->next; } else { while (hptr->next != node) hptr = hptr->next; hptr->next = node->next; } hash->nodecount--; assert (hash_verify(hash)); node->next = NULL; return node; } /* * Like hash_delete_free but based on hash_scan_delete. */ void hash_scan_delfree(hash_t *hash, hnode_t *node) { hash_scan_delete(hash, node); hash->freenode(node, hash->context); } /* * Verify whether the given object is a valid hash table. This means * Notes: * 1. If the hash table is dynamic, verify whether the high and * low expansion/shrinkage thresholds are powers of two. * 2. Count all nodes in the table, and test each hash value * to see whether it is correct for the node's chain. */ int hash_verify(hash_t *hash) { hashcount_t count = 0; hash_val_t chain; hnode_t *hptr; if (hash->dynamic) { /* 1 */ if (hash->lowmark >= hash->highmark) return 0; if (!is_power_of_two(hash->highmark)) return 0; if (!is_power_of_two(hash->lowmark)) return 0; } for (chain = 0; chain < hash->nchains; chain++) { /* 2 */ for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) { if ((hptr->hkey & hash->mask) != chain) return 0; count++; } } if (count != hash->nodecount) return 0; return 1; } /* * Test whether the hash table is full and return 1 if this is true, * 0 if it is false. */ #undef hash_isfull int hash_isfull(hash_t *hash) { return hash->nodecount == hash->maxcount; } /* * Test whether the hash table is empty and return 1 if this is true, * 0 if it is false. */ #undef hash_isempty int hash_isempty(hash_t *hash) { return hash->nodecount == 0; } static hnode_t *hnode_alloc(void *context) { return malloc(sizeof *hnode_alloc(NULL)); } static void hnode_free(hnode_t *node, void *context) { free(node); } /* * Create a hash table node dynamically and assign it the given data. */ hnode_t *hnode_create(void *data) { hnode_t *node = malloc(sizeof *node); if (node) { node->data = data; node->next = NULL; } return node; } /* * Initialize a client-supplied node */ hnode_t *hnode_init(hnode_t *hnode, void *data) { hnode->data = data; hnode->next = NULL; return hnode; } /* * Destroy a dynamically allocated node. */ void hnode_destroy(hnode_t *hnode) { free(hnode); } #undef hnode_put void hnode_put(hnode_t *node, void *data) { node->data = data; } #undef hnode_get void *hnode_get(hnode_t *node) { return node->data; } #undef hnode_getkey const void *hnode_getkey(hnode_t *node) { return node->key; } #undef hash_count hashcount_t hash_count(hash_t *hash) { return hash->nodecount; } #undef hash_size hashcount_t hash_size(hash_t *hash) { return hash->nchains; } static hash_val_t hash_fun_default(const void *key) { static unsigned long randbox[] = { 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, }; const unsigned char *str = key; hash_val_t acc = 0; while (*str) { acc ^= randbox[(*str + acc) & 0xf]; acc = (acc << 1) | (acc >> 31); acc &= 0xffffffffU; acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; acc = (acc << 2) | (acc >> 30); acc &= 0xffffffffU; } return acc; } static int hash_comp_default(const void *key1, const void *key2) { return strcmp(key1, key2); } kmer-code-2013-trunk/libutil/kazlib/list.c0000644000000000000000000003770510541426140017216 0ustar rootroot/* * List Abstract Data Type * Copyright (C) 1997 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #include #include #include #define LIST_IMPLEMENTATION #include "list.h" #define next list_next #define prev list_prev #define data list_data #define pool list_pool #define fre list_free #define size list_size #define nilnode list_nilnode #define nodecount list_nodecount #define maxcount list_maxcount #define list_nil(L) (&(L)->nilnode) #define list_first_priv(L) ((L)->nilnode.next) #define list_last_priv(L) ((L)->nilnode.prev) #define lnode_next(N) ((N)->next) #define lnode_prev(N) ((N)->prev) /* * Initialize a list object supplied by the client such that it becomes a valid * empty list. If the list is to be ``unbounded'', the maxcount should be * specified as LISTCOUNT_T_MAX, or, alternately, as -1. The value zero * is not permitted. */ list_t *list_init(list_t *list, listcount_t maxcount) { assert (maxcount != 0); list->nilnode.next = &list->nilnode; list->nilnode.prev = &list->nilnode; list->nodecount = 0; list->maxcount = maxcount; return list; } /* * Dynamically allocate a list object using malloc(), and initialize it so that * it is a valid empty list. If the list is to be ``unbounded'', the maxcount * should be specified as LISTCOUNT_T_MAX, or, alternately, as -1. */ list_t *list_create(listcount_t maxcount) { list_t *new = malloc(sizeof *new); if (new) { assert (maxcount != 0); new->nilnode.next = &new->nilnode; new->nilnode.prev = &new->nilnode; new->nodecount = 0; new->maxcount = maxcount; } return new; } /* * Destroy a dynamically allocated list object. * The client must remove the nodes first. */ void list_destroy(list_t *list) { assert (list_isempty(list)); free(list); } /* * Free all of the nodes of a list. The list must contain only * dynamically allocated nodes. After this call, the list * is empty. */ void list_destroy_nodes(list_t *list) { lnode_t *lnode = list_first_priv(list), *nil = list_nil(list), *tmp; while (lnode != nil) { tmp = lnode->next; lnode->next = NULL; lnode->prev = NULL; lnode_destroy(lnode); lnode = tmp; } list_init(list, list->maxcount); } /* * Return all of the nodes of a list to a node pool. The nodes in * the list must all have come from the same pool. */ void list_return_nodes(list_t *list, lnodepool_t *pool) { lnode_t *lnode = list_first_priv(list), *tmp, *nil = list_nil(list); while (lnode != nil) { tmp = lnode->next; lnode->next = NULL; lnode->prev = NULL; lnode_return(pool, lnode); lnode = tmp; } list_init(list, list->maxcount); } /* * Insert the node ``new'' into the list immediately after ``this'' node. */ void list_ins_after(list_t *list, lnode_t *new, lnode_t *this) { lnode_t *that = this->next; assert (new != NULL); assert (!list_contains(list, new)); assert (!lnode_is_in_a_list(new)); assert (this == list_nil(list) || list_contains(list, this)); assert (list->nodecount + 1 > list->nodecount); new->prev = this; new->next = that; that->prev = new; this->next = new; list->nodecount++; assert (list->nodecount <= list->maxcount); } /* * Insert the node ``new'' into the list immediately before ``this'' node. */ void list_ins_before(list_t *list, lnode_t *new, lnode_t *this) { lnode_t *that = this->prev; assert (new != NULL); assert (!list_contains(list, new)); assert (!lnode_is_in_a_list(new)); assert (this == list_nil(list) || list_contains(list, this)); assert (list->nodecount + 1 > list->nodecount); new->next = this; new->prev = that; that->next = new; this->prev = new; list->nodecount++; assert (list->nodecount <= list->maxcount); } /* * Delete the given node from the list. */ lnode_t *list_delete(list_t *list, lnode_t *del) { lnode_t *next = del->next; lnode_t *prev = del->prev; assert (list_contains(list, del)); prev->next = next; next->prev = prev; list->nodecount--; del->next = del->prev = NULL; return del; } /* * For each node in the list, execute the given function. The list, * current node and the given context pointer are passed on each * call to the function. */ void list_process(list_t *list, void *context, void (* function)(list_t *list, lnode_t *lnode, void *context)) { lnode_t *node = list_first_priv(list), *next, *nil = list_nil(list); while (node != nil) { /* check for callback function deleting */ /* the next node from under us */ assert (list_contains(list, node)); next = node->next; function(list, node, context); node = next; } } /* * Dynamically allocate a list node and assign it the given piece of data. */ lnode_t *lnode_create(void *data) { lnode_t *new = malloc(sizeof *new); if (new) { new->data = data; new->next = NULL; new->prev = NULL; } return new; } /* * Initialize a user-supplied lnode. */ lnode_t *lnode_init(lnode_t *lnode, void *data) { lnode->data = data; lnode->next = NULL; lnode->prev = NULL; return lnode; } /* * Destroy a dynamically allocated node. */ void lnode_destroy(lnode_t *lnode) { assert (!lnode_is_in_a_list(lnode)); free(lnode); } /* * Initialize a node pool object to use a user-supplied set of nodes. * The ``nodes'' pointer refers to an array of lnode_t objects, containing * ``n'' elements. */ lnodepool_t *lnode_pool_init(lnodepool_t *pool, lnode_t *nodes, listcount_t n) { listcount_t i; assert (n != 0); pool->pool = nodes; pool->fre = nodes; pool->size = n; for (i = 0; i < n - 1; i++) { nodes[i].next = nodes + i + 1; } nodes[i].next = NULL; nodes[i].prev = nodes; /* to make sure node is marked ``on list'' */ return pool; } /* * Create a dynamically allocated pool of n nodes. */ lnodepool_t *lnode_pool_create(listcount_t n) { lnodepool_t *pool; lnode_t *nodes; assert (n != 0); pool = malloc(sizeof *pool); if (!pool) return NULL; nodes = malloc(n * sizeof *nodes); if (!nodes) { free(pool); return NULL; } lnode_pool_init(pool, nodes, n); return pool; } /* * Determine whether the given pool is from this pool. */ int lnode_pool_isfrom(lnodepool_t *pool, lnode_t *node) { listcount_t i; /* this is carefully coded this way because ANSI C forbids pointers to different objects from being subtracted or compared other than for exact equality */ for (i = 0; i < pool->size; i++) { if (pool->pool + i == node) return 1; } return 0; } /* * Destroy a dynamically allocated pool of nodes. */ void lnode_pool_destroy(lnodepool_t *p) { free(p->pool); free(p); } /* * Borrow a node from a node pool. Returns a null pointer if the pool * is exhausted. */ lnode_t *lnode_borrow(lnodepool_t *pool, void *data) { lnode_t *new = pool->fre; if (new) { pool->fre = new->next; new->data = data; new->next = NULL; new->prev = NULL; } return new; } /* * Return a node to a node pool. A node must be returned to the pool * from which it came. */ void lnode_return(lnodepool_t *pool, lnode_t *node) { assert (lnode_pool_isfrom(pool, node)); assert (!lnode_is_in_a_list(node)); node->next = pool->fre; node->prev = node; pool->fre = node; } /* * Determine whether the given list contains the given node. * According to this function, a list does not contain its nilnode. */ int list_contains(list_t *list, lnode_t *node) { lnode_t *n, *nil = list_nil(list); for (n = list_first_priv(list); n != nil; n = lnode_next(n)) { if (node == n) return 1; } return 0; } /* * A more generalized variant of list_transfer. This one removes a * ``slice'' from the source list and appends it to the destination * list. */ void list_extract(list_t *dest, list_t *source, lnode_t *first, lnode_t *last) { listcount_t moved = 1; assert (first == NULL || list_contains(source, first)); assert (last == NULL || list_contains(source, last)); if (first == NULL || last == NULL) return; /* adjust the destination list so that the slice is spliced out */ first->prev->next = last->next; last->next->prev = first->prev; /* graft the splice at the end of the dest list */ last->next = &dest->nilnode; first->prev = dest->nilnode.prev; dest->nilnode.prev->next = first; dest->nilnode.prev = last; while (first != last) { first = first->next; assert (first != list_nil(source)); /* oops, last before first! */ moved++; } /* assert no overflows */ assert (source->nodecount - moved <= source->nodecount); assert (dest->nodecount + moved >= dest->nodecount); /* assert no weirdness */ assert (moved <= source->nodecount); source->nodecount -= moved; dest->nodecount += moved; /* assert list sanity */ assert (list_verify(source)); assert (list_verify(dest)); } /* * Split off a trailing sequence of nodes from the source list and relocate * them to the tail of the destination list. The trailing sequence begins * with node ``first'' and terminates with the last node of the source * list. The nodes are added to the end of the new list in their original * order. */ void list_transfer(list_t *dest, list_t *source, lnode_t *first) { listcount_t moved = 1; lnode_t *last; assert (first == NULL || list_contains(source, first)); if (first == NULL) return; last = source->nilnode.prev; source->nilnode.prev = first->prev; first->prev->next = &source->nilnode; last->next = &dest->nilnode; first->prev = dest->nilnode.prev; dest->nilnode.prev->next = first; dest->nilnode.prev = last; while (first != last) { first = first->next; moved++; } /* assert no overflows */ assert (source->nodecount - moved <= source->nodecount); assert (dest->nodecount + moved >= dest->nodecount); /* assert no weirdness */ assert (moved <= source->nodecount); source->nodecount -= moved; dest->nodecount += moved; /* assert list sanity */ assert (list_verify(source)); assert (list_verify(dest)); } void list_merge(list_t *dest, list_t *sour, int compare (const void *, const void *)) { lnode_t *dn, *sn, *tn; lnode_t *d_nil = list_nil(dest), *s_nil = list_nil(sour); /* Nothing to do if source and destination list are the same. */ if (dest == sour) return; /* overflow check */ assert (list_count(sour) + list_count(dest) >= list_count(sour)); /* lists must be sorted */ assert (list_is_sorted(sour, compare)); assert (list_is_sorted(dest, compare)); dn = list_first_priv(dest); sn = list_first_priv(sour); while (dn != d_nil && sn != s_nil) { if (compare(lnode_get(dn), lnode_get(sn)) >= 0) { tn = lnode_next(sn); list_delete(sour, sn); list_ins_before(dest, sn, dn); sn = tn; } else { dn = lnode_next(dn); } } if (dn != d_nil) return; if (sn != s_nil) list_transfer(dest, sour, sn); } void list_sort(list_t *list, int compare(const void *, const void *)) { list_t extra; listcount_t middle; lnode_t *node; if (list_count(list) > 1) { middle = list_count(list) / 2; node = list_first_priv(list); list_init(&extra, list_count(list) - middle); while (middle--) node = lnode_next(node); list_transfer(&extra, list, node); list_sort(list, compare); list_sort(&extra, compare); list_merge(list, &extra, compare); } assert (list_is_sorted(list, compare)); } lnode_t *list_find(list_t *list, const void *key, int compare(const void *, const void *)) { lnode_t *node; for (node = list_first_priv(list); node != list_nil(list); node = node->next) { if (compare(lnode_get(node), key) == 0) return node; } return 0; } /* * Return 1 if the list is in sorted order, 0 otherwise */ int list_is_sorted(list_t *list, int compare(const void *, const void *)) { lnode_t *node, *next, *nil; next = nil = list_nil(list); node = list_first_priv(list); if (node != nil) next = lnode_next(node); for (; next != nil; node = next, next = lnode_next(next)) { if (compare(lnode_get(node), lnode_get(next)) > 0) return 0; } return 1; } /* * Get rid of macro functions definitions so they don't interfere * with the actual definitions */ #undef list_isempty #undef list_isfull #undef lnode_pool_isempty #undef list_append #undef list_prepend #undef list_first #undef list_last #undef list_next #undef list_prev #undef list_count #undef list_del_first #undef list_del_last #undef lnode_put #undef lnode_get /* * Return 1 if the list is empty, 0 otherwise */ int list_isempty(list_t *list) { return list->nodecount == 0; } /* * Return 1 if the list is full, 0 otherwise * Permitted only on bounded lists. */ int list_isfull(list_t *list) { return list->nodecount == list->maxcount; } /* * Check if the node pool is empty. */ int lnode_pool_isempty(lnodepool_t *pool) { return (pool->fre == NULL); } /* * Add the given node at the end of the list */ void list_append(list_t *list, lnode_t *node) { list_ins_before(list, node, &list->nilnode); } /* * Add the given node at the beginning of the list. */ void list_prepend(list_t *list, lnode_t *node) { list_ins_after(list, node, &list->nilnode); } /* * Retrieve the first node of the list */ lnode_t *list_first(list_t *list) { if (list->nilnode.next == &list->nilnode) return NULL; return list->nilnode.next; } /* * Retrieve the last node of the list */ lnode_t *list_last(list_t *list) { if (list->nilnode.prev == &list->nilnode) return NULL; return list->nilnode.prev; } /* * Retrieve the count of nodes in the list */ listcount_t list_count(list_t *list) { return list->nodecount; } /* * Remove the first node from the list and return it. */ lnode_t *list_del_first(list_t *list) { return list_delete(list, list->nilnode.next); } /* * Remove the last node from the list and return it. */ lnode_t *list_del_last(list_t *list) { return list_delete(list, list->nilnode.prev); } /* * Associate a data item with the given node. */ void lnode_put(lnode_t *lnode, void *data) { lnode->data = data; } /* * Retrieve the data item associated with the node. */ void *lnode_get(lnode_t *lnode) { return lnode->data; } /* * Retrieve the node's successor. If there is no successor, * NULL is returned. */ lnode_t *list_next(list_t *list, lnode_t *lnode) { assert (list_contains(list, lnode)); if (lnode->next == list_nil(list)) return NULL; return lnode->next; } /* * Retrieve the node's predecessor. See comment for lnode_next(). */ lnode_t *list_prev(list_t *list, lnode_t *lnode) { assert (list_contains(list, lnode)); if (lnode->prev == list_nil(list)) return NULL; return lnode->prev; } /* * Return 1 if the lnode is in some list, otherwise return 0. */ int lnode_is_in_a_list(lnode_t *lnode) { return (lnode->next != NULL || lnode->prev != NULL); } int list_verify(list_t *list) { lnode_t *node = list_first_priv(list), *nil = list_nil(list); listcount_t count = list_count(list); if (node->prev != nil) return 0; if (count > list->maxcount) return 0; while (node != nil && count--) { if (node->next->prev != node) return 0; node = node->next; } if (count != 0 || node != nil) return 0; return 1; } kmer-code-2013-trunk/libutil/kazlib/sfx.c0000644000000000000000000005244310541426140017037 0ustar rootroot/* * SFX---A utility which tries to determine whether a given C expression * is free of side effects. This can be used for verifying that macros which * expand their arguments more than once are not being accidentally misused. * * Copyright (C) 1999 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #include #include #include #include #include "except.h" #include "sfx.h" #include "hash.h" #ifdef KAZLIB_POSIX_THREADS #include #endif /* * Exceptions */ #define SFX_EX 0x34DB9C4A #define SFX_SYNERR 1 /* * Cache entry */ typedef struct { hnode_t node; const char *expr; sfx_rating_t eff; } sfx_entry_t; /* * Parsing context structure */ typedef struct { const unsigned char *start; const unsigned char *input; size_t size; sfx_rating_t eff; } context_t; /* * Declarator type: abstract, concrete or both */ typedef enum { decl_abstract, decl_concrete, decl_both } decl_t; static void init_context(context_t *ctx, const unsigned char *expr) { ctx->input = ctx->start = expr; ctx->size = strlen((const char *) expr) + 1; ctx->eff = sfx_none; } static void assign_context(context_t *copy, context_t *orig) { *copy = *orig; } static void set_effect(context_t *ctx, sfx_rating_t eff) { assert (eff == sfx_none || eff == sfx_potential || eff == sfx_certain); if (eff > ctx->eff) ctx->eff = eff; } static void reset_effect(context_t *ctx) { ctx->eff = sfx_none; } static sfx_rating_t get_effect(context_t *ctx) { return ctx->eff; } static int skip_ws(context_t *expr) { while (*expr->input != 0 && isspace(*expr->input)) expr->input++; return (*expr->input == 0); } static int get_next(context_t *expr) { int ret = *expr->input; if (ret) expr->input++; return ret; } static int get_next_skip_ws(context_t *expr) { if (!skip_ws(expr)) return *expr->input++; return 0; } static const unsigned char *get_ptr(context_t *expr) { return expr->input; } static void skip_n(context_t *ctx, size_t n) { assert ((size_t) (ctx->input - ctx->start) <= ctx->size - n); ctx->input += n; } static void put_back(context_t *expr, int ch) { if (ch) expr->input--; } static int peek_next(context_t *expr) { return *expr->input; } static void syntax_error(void) { except_throw(SFX_EX, SFX_SYNERR, "syntax_error"); } static void match_hard(context_t *expr, int match) { int ch = get_next(expr); if (ch != match) syntax_error(); } static void chk_comma(context_t *); static void skip_ident(context_t *expr) { int ch = get_next(expr); if (!isalpha(ch) && ch != '_') syntax_error(); do { ch = get_next(expr); } while (isalnum(ch) || ch == '_'); put_back(expr, ch); } static void skip_constant(context_t *expr) { int ch = get_next(expr); assert (isdigit(ch) || ch == '.'); do { ch = get_next(expr); if (ch == 'e' || ch == 'E') { ch = get_next(expr); if (ch == '+' || ch == '-') { ch = get_next(expr); if (!isdigit(ch)) syntax_error(); } } } while (ch != 0 && (isalnum(ch) || ch == '.')); put_back(expr, ch); } static void skip_strlit(context_t *expr) { int ch = get_next(expr); assert (ch == '"'); do { ch = get_next(expr); if (ch == '\\') { get_next(expr); continue; } } while (ch != 0 && ch != '"'); if (ch != '"') syntax_error(); } static void skip_charlit(context_t *expr) { int ch = get_next(expr); assert (ch == '\''); do { ch = get_next(expr); if (ch == '\\') { get_next(expr); continue; } } while (ch != 0 && ch != '\''); if (ch != '\'') syntax_error(); } static void chk_spec_qual_list(context_t *expr) { skip_ws(expr); skip_ident(expr); for (;;) { int ch; skip_ws(expr); ch = peek_next(expr); if (!isalpha(ch) && ch != '_') break; skip_ident(expr); } } static int speculate(void (*chk_func)(context_t *), context_t *expr, context_t *copy, int nextchar) { static const except_id_t catch[] = { { SFX_EX, XCEPT_CODE_ANY } }; except_t *ex; volatile int result = 0; assign_context(copy, expr); except_try_push(catch, 1, &ex); if (ex == 0) { chk_func(copy); if (nextchar) { skip_ws(copy); match_hard(copy, nextchar); } result = 1; } except_try_pop(); return result; } static void chk_pointer_opt(context_t *expr) { for (;;) { int ch = get_next_skip_ws(expr); if (ch != '*') { put_back(expr, ch); break; } skip_ws(expr); ch = peek_next(expr); if (ch == '*') continue; if (!isalpha(ch) && ch != '_') break; skip_ident(expr); } } static void chk_decl(context_t *, decl_t); static void chk_parm_decl(context_t *expr) { chk_spec_qual_list(expr); chk_decl(expr, decl_both); } static void chk_parm_type_list(context_t *expr) { for (;;) { int ch; chk_parm_decl(expr); ch = get_next_skip_ws(expr); if (ch != ',') { put_back(expr, ch); break; } ch = get_next_skip_ws(expr); if (ch == '.') { match_hard(expr, '.'); match_hard(expr, '.'); break; } put_back(expr, ch); } } static void chk_conditional(context_t *); static void chk_direct_decl(context_t *expr, decl_t type) { for (;;) { int ch = get_next_skip_ws(expr); if (ch == '(') { skip_ws(expr); ch = peek_next(expr); if (ch == '*' || ch == '(' || ch == '[') chk_decl(expr, type); else if (isalpha(ch) || ch == '_') chk_parm_type_list(expr); match_hard(expr, ')'); } else if (ch == '[') { skip_ws(expr); ch = peek_next(expr); if (ch != ']') chk_conditional(expr); match_hard(expr, ']'); } else if ((type == decl_concrete || type == decl_both) && (isalpha(ch) || ch == '_')) { put_back(expr, ch); skip_ident(expr); break; } else { put_back(expr, ch); break; } } } static void chk_decl(context_t *expr, decl_t type) { int ch; chk_pointer_opt(expr); skip_ws(expr); ch = peek_next(expr); if (ch == '[' || ch == '(' || ((type == decl_concrete || type == decl_both) && (isalpha(ch) || ch == '_'))) { chk_direct_decl(expr, type); } } static void chk_typename(context_t *expr) { chk_spec_qual_list(expr); chk_decl(expr, decl_abstract); } static void chk_primary(context_t *expr) { int ch = peek_next(expr); if (ch == 'L') { get_next(expr); ch = peek_next(expr); if (ch == '\'') { skip_charlit(expr); return; } if (ch == '"') { skip_strlit(expr); return; } put_back(expr, 'L'); ch = 'L'; } if (isalpha(ch) || ch == '_') { skip_ident(expr); return; } if (isdigit(ch) || ch == '.') { skip_constant(expr); return; } if (ch == '(') { get_next(expr); chk_comma(expr); match_hard(expr, ')'); return; } if (ch == '\'') { skip_charlit(expr); return; } if (ch == '"') { skip_strlit(expr); return; } syntax_error(); } static void chk_postfix(context_t *expr) { chk_primary(expr); for (;;) { int ch = get_next_skip_ws(expr); switch (ch) { case '[': chk_comma(expr); skip_ws(expr); match_hard(expr, ']'); continue; case '(': set_effect(expr, sfx_potential); ch = get_next_skip_ws(expr); if (ch != ')') { put_back(expr, ch); /* clever hack: parse non-empty argument list as comma expression */ chk_comma(expr); ch = get_next_skip_ws(expr); } if (ch != ')') syntax_error(); continue; case '.': skip_ws(expr); skip_ident(expr); continue; case '-': ch = get_next(expr); if (ch != '-' && ch != '>') { put_back(expr, ch); put_back(expr, '-'); break; } if (ch == '>') { skip_ws(expr); skip_ident(expr); continue; } set_effect(expr, sfx_certain); continue; case '+': ch = get_next(expr); if (ch != '+') { put_back(expr, ch); put_back(expr, '+'); break; } set_effect(expr, sfx_certain); continue; default: put_back(expr, ch); break; } break; } } static void chk_cast(context_t *); static void chk_unary(context_t *expr) { for (;;) { int nscan, ch = get_next_skip_ws(expr); switch (ch) { case '+': ch = get_next(expr); if (ch == '+') set_effect(expr, sfx_certain); else put_back(expr, ch); chk_cast(expr); break; case '-': ch = get_next(expr); if (ch == '-') set_effect(expr, sfx_certain); else put_back(expr, ch); chk_cast(expr); break; case '&': case '*': case '~': case '!': chk_cast(expr); break; case 's': put_back(expr, ch); nscan = 0; sscanf((const char *) get_ptr(expr), "sizeof%*1[^a-z0-9_]%n", &nscan); if (nscan == 7 || strcmp((const char *) get_ptr(expr), "sizeof") == 0) { sfx_rating_t eff = get_effect(expr); skip_n(expr, 6); ch = get_next_skip_ws(expr); if (ch == '(') { context_t comma, type; int iscomma = speculate(chk_comma, expr, &comma, ')'); int istype = speculate(chk_typename, expr, &type, ')'); if (!iscomma && !istype) syntax_error(); if (iscomma) { context_t unary; put_back(expr, ch); if (speculate(chk_unary, expr, &unary, 0)) { assign_context(expr, &unary); istype = 0; } } if (istype) assign_context(expr, &type); } else { put_back(expr, ch); chk_unary(expr); } reset_effect(expr); set_effect(expr, eff); break; } chk_postfix(expr); break; default: put_back(expr, ch); chk_postfix(expr); break; } break; } } static void chk_cast(context_t *expr) { enum { parexpr, /* parenthesized expression */ partype, /* parenthesized type name */ parambig, /* ambiguity between paren expr and paren type name */ unary, /* unary expression */ plunary, /* unary expression with leading plus or minus */ other /* none of the above, or even end of input */ } curr = partype, old = partype, peek = partype; /* history for backtracking: two cast expression elements back */ context_t old_expr = { 0 }, cur_expr = { 0 }; for (;;) { context_t type, comma, unr; int ch = get_next_skip_ws(expr); /* * Determine what the next bit of input is: parenthesized type name, * expression, unary expression or what? Speculative parsing is used * to test several hypotheses. For example, something like * (X)(Y) ^ 1 is seen, it will be turned, by subsequent iterations of * this loop, into the codes: parambig, parambig, other. */ if (ch == '(') { int istype = speculate(chk_typename, expr, &type, ')'); int iscomma = speculate(chk_comma, expr, &comma, ')'); switch (istype << 1 | iscomma) { case 0: ch = get_next_skip_ws(expr); if (ch == ')') peek = other; /* empty parentheses */ else syntax_error(); break; case 1: peek = parexpr; break; case 2: peek = partype; break; case 3: peek = parambig; break; } put_back(expr, ch); } else if (ch == 0) { peek = other; } else { put_back(expr, ch); if (speculate(chk_unary, expr, &unr, 0)) { peek = (ch == '+' || ch == '-' || ch == '*' || ch == '&') ? plunary : unary; } else { peek = other; } } /* * Okay, now we have an idea what is coming in the input. We make some * sensible decision based on this and the thing we parsed previously. * Either the parsing continues to grab more parenthesized things, or * some decision is made to parse out the suffix material sensibly and * terminate. Backtracking is used up to two elements back. For * example in the case of (X)(Y) ^ 1 (parambig, parambig, other) it's * necessary, upon seeing ^ 1 (other) to go back to second to last * ambigous parenthesized element (X) and terminate by parsing the * (X)(Y) as a postfix expression. It cannot be a cast, because ^1 * isn't an expression. Unary expressions that start with + or - * create an interesting ambiguity. Is (X)(Y) + 1 the addition of 1 to * the result of the call to function X with parameter Y? Or is it the * unary expression + 1 cast to type Y and X? The safer assumption is * to go with the function call hypothesis, since that's the * interpretation that may have side effects. */ switch (curr) { case parexpr: /* impossible cases */ case other: case unary: case plunary: assert (0); syntax_error(); /* notreached */ case partype: switch (peek) { case parexpr: /* cast in front of parenthesized expression */ chk_postfix(expr); return; case partype: /* compounding cast: keep looping */ break; case parambig: /* type or expr: keep looping */ break; case unary: case plunary: chk_unary(expr); return; case other: /* cast in front of non-expression! */ syntax_error(); /* notreached */ } break; case parambig: switch (peek) { case parexpr: /* function call */ assign_context(expr, &cur_expr); chk_postfix(expr); return; case partype: /* compounding cast: keep looping */ break; case parambig: /* type or expr: keep looping */ break; case unary: chk_unary(expr); return; case plunary: /* treat unary expr with + or - as additive */ case other: if (old == parambig) { /* reparse two expression-like things in a row as call */ assign_context(expr, &old_expr); chk_postfix(expr); return; } /* reparse expression followed by non-parenthesized stuff as postfix expression */ assign_context(expr, &cur_expr); chk_postfix(expr); return; /* need more context */ } break; } old = curr; curr = peek; assign_context(&old_expr, &cur_expr); assign_context(&cur_expr, expr); assign_context(expr, &type); } } static void chk_multiplicative(context_t *expr) { for (;;) { int ch; chk_cast(expr); ch = get_next_skip_ws(expr); if ((ch != '*' && ch != '/' && ch != '%') || peek_next(expr) == '=') { put_back(expr, ch); break; } } } static void chk_additive(context_t *expr) { for (;;) { int ch; chk_multiplicative(expr); ch = get_next_skip_ws(expr); if ((ch != '+' && ch != '-') || peek_next(expr) == '=') { put_back(expr, ch); break; } } } static void chk_shift(context_t *expr) { for (;;) { int ch; chk_additive(expr); ch = get_next_skip_ws(expr); if (ch != '<' && ch != '>') { put_back(expr, ch); break; } if (ch == '<' && peek_next(expr) != '<') { put_back(expr, ch); break; } if (ch == '>' && peek_next(expr) != '>') { put_back(expr, ch); break; } get_next(expr); if (peek_next(expr) == '=') { put_back(expr, ch); put_back(expr, ch); break; } } } static void chk_relational(context_t *expr) { for (;;) { int ch; chk_shift(expr); ch = get_next_skip_ws(expr); if (ch != '<' && ch != '>') { put_back(expr, ch); break; } if (ch == '<' && peek_next(expr) == '<') { put_back(expr, ch); break; } if (ch == '>' && peek_next(expr) == '>') { put_back(expr, ch); break; } if (peek_next(expr) == '=') get_next(expr); } } static void chk_equality(context_t *expr) { for (;;) { int ch; chk_relational(expr); ch = get_next_skip_ws(expr); if ((ch != '!' && ch != '=') || peek_next(expr) != '=') { put_back(expr, ch); break; } match_hard(expr, '='); } } static void chk_and(context_t *expr) { for (;;) { int ch; chk_equality(expr); ch = get_next_skip_ws(expr); if (ch != '&' || peek_next(expr) == '&' || peek_next(expr) == '=') { put_back(expr, ch); break; } } } static void chk_exclusive_or(context_t *expr) { for (;;) { int ch; chk_and(expr); ch = get_next_skip_ws(expr); if (ch != '^' || peek_next(expr) == '=') { put_back(expr, ch); break; } } } static void chk_inclusive_or(context_t *expr) { for (;;) { int ch; chk_exclusive_or(expr); ch = get_next_skip_ws(expr); if (ch != '|' || peek_next(expr) == '|' || peek_next(expr) == '=') { put_back(expr, ch); break; } } } static void chk_logical_and(context_t *expr) { for (;;) { int ch; chk_inclusive_or(expr); ch = get_next_skip_ws(expr); if (ch != '&' || peek_next(expr) != '&') { put_back(expr, ch); break; } match_hard(expr, '&'); } } static void chk_logical_or(context_t *expr) { for (;;) { int ch; chk_logical_and(expr); ch = get_next_skip_ws(expr); if (ch != '|' || peek_next(expr) != '|') { put_back(expr, ch); break; } match_hard(expr, '|'); } } static void chk_conditional(context_t *expr) { for (;;) { int ch; chk_logical_or(expr); ch = get_next_skip_ws(expr); if (ch != '?') { put_back(expr, ch); break; } chk_comma(expr); skip_ws(expr); match_hard(expr, ':'); } } static void chk_assignment(context_t *expr) { for (;;) { int ch; chk_conditional(expr); ch = get_next_skip_ws(expr); switch (ch) { case '=': break; case '*': case '/': case '%': case '+': case '-': case '&': case '^': case '|': match_hard(expr, '='); break; case '<': match_hard(expr, '<'); match_hard(expr, '='); break; case '>': match_hard(expr, '>'); match_hard(expr, '='); break; case 0: default: put_back(expr, ch); return; } set_effect(expr, sfx_certain); } } static void chk_comma(context_t *expr) { for (;;) { int ch; chk_assignment(expr); ch = get_next_skip_ws(expr); if (ch != ',') { put_back(expr, ch); break; } } } /* * This function returns 1 if the expression is successfully parsed, * or 0 if there is a syntax error. * * The object pointed to by eff is set to indicate the side effect ranking of * the parsed expression: sfx_none, sfx_potential and sfx_certain. These * rankins mean, respectively, that there are no side effects, that there are * potential side effects, or that there certainly are side effects. */ int sfx_determine(const char *expr, sfx_rating_t *eff) { static const except_id_t catch[] = { { SFX_EX, XCEPT_CODE_ANY } }; except_t *ex; context_t ctx; volatile int retval = 1; if (!except_init()) return 0; init_context(&ctx, (const unsigned char *) expr); except_try_push(catch, 1, &ex); if (ex == 0) { chk_comma(&ctx); skip_ws(&ctx); if (peek_next(&ctx) != 0) syntax_error(); } else { /* exception caught */ retval = 0; } except_try_pop(); *eff = ctx.eff; except_deinit(); return retval; } #ifdef KAZLIB_POSIX_THREADS static pthread_once_t cache_init; static pthread_mutex_t cache_mutex = PTHREAD_MUTEX_INITIALIZER; #define init_once(X, Y) pthread_once(X, Y) #define lock_cache() pthread_mutex_lock(&cache_mutex) #define unlock_cache() pthread_mutex_unlock(&cache_mutex) #else static int cache_init; static void init_once(int *once, void (*func)(void)) { if (*once == 0) { func(); *once = 1; } } #define lock_cache() #define unlock_cache() #endif static hash_t *cache; extern hash_t *hash_create(hashcount_t, hash_comp_t, hash_fun_t); static void init_cache(void) { cache = hash_create(HASHCOUNT_T_MAX, 0, 0); } static int lookup_cache(const char *expr, sfx_rating_t *rating) { hnode_t *cache_node; init_once(&cache_init, init_cache); lock_cache(); cache_node = hash_lookup(cache, expr); unlock_cache(); if (cache_node != 0) { sfx_entry_t *cache_entry = hnode_get(cache_node); *rating = cache_entry->eff; return 1; } return 0; } static int cache_result(const char *expr, sfx_rating_t rating) { int result = 0; hnode_t *cache_node; init_once(&cache_init, init_cache); if (cache == 0) goto bail; lock_cache(); cache_node = hash_lookup(cache, expr); if (!cache_node) { sfx_entry_t *cache_entry = malloc(sizeof *cache_entry); if (cache_entry == 0) goto bail_unlock; hnode_init(&cache_entry->node, cache_entry); cache_entry->expr = expr; cache_entry->eff = rating; hash_insert(cache, &cache_entry->node, expr); } else { sfx_entry_t *cache_entry = hnode_get(cache_node); cache_entry->eff = rating; result = 1; } result = 1; bail_unlock: unlock_cache(); bail: return result; } void sfx_check(const char *expr, const char *file, unsigned long line) { sfx_rating_t eff; int success = lookup_cache(expr, &eff); if (!success) { success = sfx_determine(expr, &eff); cache_result(expr, eff); } if (!success) { fprintf(stderr, "%s:%ld: syntax error in expression \"%s\"\n", file, line, expr); } else if (eff == sfx_potential) { fprintf(stderr, "%s:%ld: expression \"%s\" may have side effects\n", file, line, expr); } else if (eff == sfx_certain) { fprintf(stderr, "%s:%ld: expression \"%s\" has side effects\n", file, line, expr); } else { return; } } int sfx_declare(const char *expr, sfx_rating_t eff) { return cache_result(expr, eff); } kmer-code-2013-trunk/libutil/kazlib/Make.include0000644000000000000000000000102410213514156020303 0ustar rootroot# -*- makefile -*- src := $/dict.c \ $/dict.h \ $/except.c \ $/except.h \ $/hash.c \ $/hash.h \ $/list.c \ $/list.h \ $/sfx.c \ $/sfx.h tst := $/dict-main.c \ $/except-main.c \ $/hash-main.c \ $/list-main.c \ $/sfx-main.c $/.C_SRCS :=$(filter %.c,${src}) $/.CXX_SRCS :=$(filter %.C,${src}) $/.CXX_LIBS :=$/libkaz.a $/.CLEAN := $/*.o $/libkaz.a: ${$/.C_SRCS:.c=.o} ${$/.CXX_SRCS:.C=.o} kmer-code-2013-trunk/libutil/kazlib/except.c0000644000000000000000000002047210541426140017524 0ustar rootroot/* * Portable Exception Handling for ANSI C. * Copyright (C) 1999 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #include #include #include #include #include #include "except.h" #define XCEPT_BUFFER_SIZE 1024 #define group except_group #define code except_code #define id except_id #define message except_message #define dyndata except_dyndata #define func except_func #define context except_context #define id except_id #define size except_size #define obj except_obj #define jmp except_jmp #define down except_down #define type except_type #define catcher except_catcher #define cleanup except_cleanup #define info except_info #ifdef KAZLIB_POSIX_THREADS #include static pthread_mutex_t init_mtx = PTHREAD_MUTEX_INITIALIZER; static int init_counter; static pthread_key_t top_key; static pthread_key_t uh_key; static pthread_key_t alloc_key; static pthread_key_t dealloc_key; static void unhandled_catcher(except_t *); #define get_top() ((struct except_stacknode *) pthread_getspecific(top_key)) #define set_top(T) (pthread_setspecific(top_key, (T)), (void)((T) == (struct except_stacknode *) 0)) #define set_catcher(C) (pthread_setspecific(uh_key, (void *) (C)), (void)((C) == (void (*)(except_t *)) 0)) #define set_alloc(A) (pthread_setspecific(alloc_key, (void *) (A)), (void)((A) == (void *(*)(size_t)) 0)) #define set_dealloc(D) (pthread_setspecific(dealloc_key, (void *) (D)), (void)((D) == (void (*)(void *)) 0)) static void (*get_catcher(void))(except_t *) { void (*catcher)(except_t *) = (void (*)(except_t *)) pthread_getspecific(uh_key); return (catcher == 0) ? unhandled_catcher : catcher; } static void *(*get_alloc(void))(size_t) { void *(*alloc)(size_t) = (void *(*)(size_t)) pthread_getspecific(alloc_key); return (alloc == 0) ? malloc : alloc; } static void (*get_dealloc(void))(void *) { void (*dealloc)(void *) = (void (*)(void *)) pthread_getspecific(dealloc_key); return (dealloc == 0) ? free : dealloc; } int except_init(void) { int retval = 1; pthread_mutex_lock(&init_mtx); assert (init_counter < INT_MAX); if (init_counter++ == 0) { int top_ok = (pthread_key_create(&top_key, 0) == 0); int uh_ok = (pthread_key_create(&uh_key, 0) == 0); int alloc_ok = (pthread_key_create(&alloc_key, 0) == 0); int dealloc_ok = (pthread_key_create(&dealloc_key, 0) == 0); if (!top_ok || !uh_ok || !alloc_ok || !dealloc_ok) { retval = 0; init_counter = 0; if (top_ok) pthread_key_delete(top_key); if (uh_ok) pthread_key_delete(uh_key); if (alloc_ok) pthread_key_delete(alloc_key); if (dealloc_ok) pthread_key_delete(dealloc_key); } } pthread_mutex_unlock(&init_mtx); return retval; } void except_deinit(void) { pthread_mutex_lock(&init_mtx); assert (init_counter > 0); if (--init_counter == 0) { pthread_key_delete(top_key); pthread_key_delete(uh_key); pthread_key_delete(alloc_key); pthread_key_delete(dealloc_key); } pthread_mutex_unlock(&init_mtx); } #else /* no thread support */ static int init_counter; static void unhandled_catcher(except_t *); static void (*uh_catcher_ptr)(except_t *) = unhandled_catcher; static void *(*allocator)(size_t) = malloc; static void (*deallocator)(void *) = free; static struct except_stacknode *stack_top; #define get_top() (stack_top) #define set_top(T) (stack_top = (T)) #define get_catcher() (uh_catcher_ptr) #define set_catcher(C) (uh_catcher_ptr = (C)) #define get_alloc() (allocator) #define set_alloc(A) (allocator = (A)) #define get_dealloc() (deallocator) #define set_dealloc(D) (deallocator = (D)) int except_init(void) { assert (init_counter < INT_MAX); init_counter++; return 1; } void except_deinit(void) { assert (init_counter > 0); init_counter--; } #endif static int match(const volatile except_id_t *thrown, const except_id_t *caught) { int group_match = (caught->group == XCEPT_GROUP_ANY || caught->group == thrown->group); int code_match = (caught->code == XCEPT_CODE_ANY || caught->code == thrown->code); return group_match && code_match; } static void do_throw(except_t *except) { struct except_stacknode *top; assert (except->id.group != 0 && except->id.code != 0); for (top = get_top(); top != 0; top = top->down) { if (top->type == XCEPT_CLEANUP) { top->info.cleanup->func(top->info.cleanup->context); } else { struct except_catch *catcher = top->info.catcher; const except_id_t *pi = catcher->id; size_t i; assert (top->type == XCEPT_CATCHER); except_free(catcher->obj.dyndata); for (i = 0; i < catcher->size; pi++, i++) { if (match(&except->id, pi)) { catcher->obj = *except; set_top(top); longjmp(catcher->jmp, 1); } } } } set_top(top); get_catcher()(except); /* unhandled exception */ abort(); } static void unhandled_catcher(except_t *except) { fprintf(stderr, "Unhandled exception (\"%s\", group=%ld, code=%ld)\n", except->message, except->id.group, except->id.code); abort(); } static void stack_push(struct except_stacknode *node) { node->down = get_top(); set_top(node); } void except_setup_clean(struct except_stacknode *esn, struct except_cleanup *ecl, void (*cleanf)(void *), void *context) { esn->type = XCEPT_CLEANUP; ecl->func = cleanf; ecl->context = context; esn->info.cleanup = ecl; stack_push(esn); } void except_setup_try(struct except_stacknode *esn, struct except_catch *ech, const except_id_t id[], size_t size) { ech->id = id; ech->size = size; ech->obj.dyndata = 0; esn->type = XCEPT_CATCHER; esn->info.catcher = ech; stack_push(esn); } struct except_stacknode *except_pop(void) { struct except_stacknode *top = get_top(); set_top(top->down); return top; } void except_rethrow(except_t *except) { struct except_stacknode *top = get_top(); assert (top != 0); assert (top->type == XCEPT_CATCHER); assert (&top->info.catcher->obj == except); set_top(top->down); do_throw(except); } void except_throw(long group, long code, const char *msg) { except_t except; except.id.group = group; except.id.code = code; except.message = msg; except.dyndata = 0; do_throw(&except); } void except_throwd(long group, long code, const char *msg, void *data) { except_t except; except.id.group = group; except.id.code = code; except.message = msg; except.dyndata = data; do_throw(&except); } void except_throwf(long group, long code, const char *fmt, ...) { char *buf = except_alloc(XCEPT_BUFFER_SIZE); va_list vl; va_start (vl, fmt); vsprintf(buf, fmt, vl); va_end (vl); except_throwd(group, code, buf, buf); } void (*except_unhandled_catcher(void (*new_catcher)(except_t *)))(except_t *) { void (*old_catcher)(except_t *) = get_catcher(); set_catcher(new_catcher); return old_catcher; } #undef except_code #undef except_group #undef except_message #undef except_data unsigned long except_code(except_t *ex) { return ex->id.code; } unsigned long except_group(except_t *ex) { return ex->id.group; } const char *except_message(except_t *ex) { return ex->message; } void *except_data(except_t *ex) { return ex->dyndata; } void *except_take_data(except_t *ex) { void *data = ex->dyndata; ex->dyndata = 0; return data; } void except_set_allocator(void *(*alloc)(size_t), void (*dealloc)(void *)) { set_alloc(alloc); set_dealloc(dealloc); } void *except_alloc(size_t size) { void *ptr = get_alloc()(size); if (ptr == 0) except_throw(XCEPT_BAD_ALLOC, 0, "out of memory"); return ptr; } void except_free(void *ptr) { get_dealloc()(ptr); } kmer-code-2013-trunk/libutil/kazlib/hash.h0000644000000000000000000002163010541426140017161 0ustar rootroot/* * Hash Table Data Type * Copyright (C) 1997 Kaz Kylheku * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * */ #ifndef HASH_H #define HASH_H #include #ifdef KAZLIB_SIDEEFFECT_DEBUG #include "sfx.h" #endif /* * Blurb for inclusion into C++ translation units */ #ifdef __cplusplus extern "C" { #endif typedef unsigned long hashcount_t; #define HASHCOUNT_T_MAX ULONG_MAX typedef unsigned long hash_val_t; #define HASH_VAL_T_MAX ULONG_MAX extern int hash_val_t_bit; #ifndef HASH_VAL_T_BIT #define HASH_VAL_T_BIT ((int) hash_val_t_bit) #endif /* * Hash chain node structure. * Notes: * 1. This preprocessing directive is for debugging purposes. The effect is * that if the preprocessor symbol KAZLIB_OPAQUE_DEBUG is defined prior to the * inclusion of this header, then the structure shall be declared as having * the single member int __OPAQUE__. This way, any attempts by the * client code to violate the principles of information hiding (by accessing * the structure directly) can be diagnosed at translation time. However, * note the resulting compiled unit is not suitable for linking. * 2. This is a pointer to the next node in the chain. In the last node of a * chain, this pointer is null. * 3. The key is a pointer to some user supplied data that contains a unique * identifier for each hash node in a given table. The interpretation of * the data is up to the user. When creating or initializing a hash table, * the user must supply a pointer to a function for comparing two keys, * and a pointer to a function for hashing a key into a numeric value. * 4. The value is a user-supplied pointer to void which may refer to * any data object. It is not interpreted in any way by the hashing * module. * 5. The hashed key is stored in each node so that we don't have to rehash * each key when the table must grow or shrink. */ typedef struct hnode_t { #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) /* 1 */ struct hnode_t *hash_next; /* 2 */ const void *hash_key; /* 3 */ void *hash_data; /* 4 */ hash_val_t hash_hkey; /* 5 */ #else int hash_dummy; #endif } hnode_t; /* * The comparison function pointer type. A comparison function takes two keys * and produces a value of -1 if the left key is less than the right key, a * value of 0 if the keys are equal, and a value of 1 if the left key is * greater than the right key. */ typedef int (*hash_comp_t)(const void *, const void *); /* * The hashing function performs some computation on a key and produces an * integral value of type hash_val_t based on that key. For best results, the * function should have a good randomness properties in *all* significant bits * over the set of keys that are being inserted into a given hash table. In * particular, the most significant bits of hash_val_t are most significant to * the hash module. Only as the hash table expands are less significant bits * examined. Thus a function that has good distribution in its upper bits but * not lower is preferrable to one that has poor distribution in the upper bits * but not the lower ones. */ typedef hash_val_t (*hash_fun_t)(const void *); /* * allocator functions */ typedef hnode_t *(*hnode_alloc_t)(void *); typedef void (*hnode_free_t)(hnode_t *, void *); /* * This is the hash table control structure. It keeps track of information * about a hash table, as well as the hash table itself. * Notes: * 1. Pointer to the hash table proper. The table is an array of pointers to * hash nodes (of type hnode_t). If the table is empty, every element of * this table is a null pointer. A non-null entry points to the first * element of a chain of nodes. * 2. This member keeps track of the size of the hash table---that is, the * number of chain pointers. * 3. The count member maintains the number of elements that are presently * in the hash table. * 4. The maximum count is the greatest number of nodes that can populate this * table. If the table contains this many nodes, no more can be inserted, * and the hash_isfull() function returns true. * 5. The high mark is a population threshold, measured as a number of nodes, * which, if exceeded, will trigger a table expansion. Only dynamic hash * tables are subject to this expansion. * 6. The low mark is a minimum population threshold, measured as a number of * nodes. If the table population drops below this value, a table shrinkage * will occur. Only dynamic tables are subject to this reduction. No table * will shrink beneath a certain absolute minimum number of nodes. * 7. This is the a pointer to the hash table's comparison function. The * function is set once at initialization or creation time. * 8. Pointer to the table's hashing function, set once at creation or * initialization time. * 9. The current hash table mask. If the size of the hash table is 2^N, * this value has its low N bits set to 1, and the others clear. It is used * to select bits from the result of the hashing function to compute an * index into the table. * 10. A flag which indicates whether the table is to be dynamically resized. It * is set to 1 in dynamically allocated tables, 0 in tables that are * statically allocated. */ typedef struct hash_t { #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) struct hnode_t **hash_table; /* 1 */ hashcount_t hash_nchains; /* 2 */ hashcount_t hash_nodecount; /* 3 */ hashcount_t hash_maxcount; /* 4 */ hashcount_t hash_highmark; /* 5 */ hashcount_t hash_lowmark; /* 6 */ hash_comp_t hash_compare; /* 7 */ hash_fun_t hash_function; /* 8 */ hnode_alloc_t hash_allocnode; hnode_free_t hash_freenode; void *hash_context; hash_val_t hash_mask; /* 9 */ int hash_dynamic; /* 10 */ #else int hash_dummy; #endif } hash_t; /* * Hash scanner structure, used for traversals of the data structure. * Notes: * 1. Pointer to the hash table that is being traversed. * 2. Reference to the current chain in the table being traversed (the chain * that contains the next node that shall be retrieved). * 3. Pointer to the node that will be retrieved by the subsequent call to * hash_scan_next(). */ typedef struct hscan_t { #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) hash_t *hash_table; /* 1 */ hash_val_t hash_chain; /* 2 */ hnode_t *hash_next; /* 3 */ #else int hash_dummy; #endif } hscan_t; extern hash_t *hash_create(hashcount_t, hash_comp_t, hash_fun_t); extern void hash_set_allocator(hash_t *, hnode_alloc_t, hnode_free_t, void *); extern void hash_destroy(hash_t *); extern void hash_free_nodes(hash_t *); extern void hash_free(hash_t *); extern hash_t *hash_init(hash_t *, hashcount_t, hash_comp_t, hash_fun_t, hnode_t **, hashcount_t); extern void hash_insert(hash_t *, hnode_t *, const void *); extern hnode_t *hash_lookup(hash_t *, const void *); extern hnode_t *hash_delete(hash_t *, hnode_t *); extern int hash_alloc_insert(hash_t *, const void *, void *); extern void hash_delete_free(hash_t *, hnode_t *); extern void hnode_put(hnode_t *, void *); extern void *hnode_get(hnode_t *); extern const void *hnode_getkey(hnode_t *); extern hashcount_t hash_count(hash_t *); extern hashcount_t hash_size(hash_t *); extern int hash_isfull(hash_t *); extern int hash_isempty(hash_t *); extern void hash_scan_begin(hscan_t *, hash_t *); extern hnode_t *hash_scan_next(hscan_t *); extern hnode_t *hash_scan_delete(hash_t *, hnode_t *); extern void hash_scan_delfree(hash_t *, hnode_t *); extern int hash_verify(hash_t *); extern hnode_t *hnode_create(void *); extern hnode_t *hnode_init(hnode_t *, void *); extern void hnode_destroy(hnode_t *); #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) #ifdef KAZLIB_SIDEEFFECT_DEBUG #define hash_isfull(H) (SFX_CHECK(H)->hash_nodecount == (H)->hash_maxcount) #else #define hash_isfull(H) ((H)->hash_nodecount == (H)->hash_maxcount) #endif #define hash_isempty(H) ((H)->hash_nodecount == 0) #define hash_count(H) ((H)->hash_nodecount) #define hash_size(H) ((H)->hash_nchains) #define hnode_get(N) ((N)->hash_data) #define hnode_getkey(N) ((N)->hash_key) #define hnode_put(N, V) ((N)->hash_data = (V)) #endif #ifdef __cplusplus } #endif #endif kmer-code-2013-trunk/libutil/kazlib/blast.pl0000755000000000000000000000202010541426140017522 0ustar rootroot#!/usr/bin/perl # # This is a program whose output can be piped to the test drivers for # hash.c and dict.c. It inserts a bunch of data and then deletes it all. # # The $modulus should be a prime number. This ensures that the $modulus - 1 # generated keys are all distinct. The $factor_i and $factor_d values need not # be prime, but it should not be a multiple of $modulus (including zero), # otherwise a sequence of duplicate keys will be generated: choose numbers # in the range [1, $modulus - 1]. Choosing 1 means that # insertions (or deletions) will take place in order. # The purpose of using the prime modulus number is to generate a repeatable # sequence of unique keys that is (possibly) not in sorted order. # # $modulus = 200003; # $factor_i = 100; # $factor_d = 301; $modulus = 6113; $factor_i = 1669; $factor_d = 2036; for ($i = 1; $i < $modulus; $i++) { printf("a %d %d\n", ($i * $factor_i) % $modulus, $i); } for ($i = 1; $i < $modulus; $i++) { printf("d %d\n", ($i * $factor_d) % $modulus); } print "t\nq\n" kmer-code-2013-trunk/libutil/generalizedUnaryEncoding.h0000644000000000000000000000624212322046702021744 0ustar rootroot#ifndef GENERALIZED_UNARY_ENCODING_H #define GENERALIZED_UNARY_ENCODING_H #include "bitPacking.h" // Lots and lots of semi-useless debugging information //#define DEBUG_GENERALIZEDUNARYENCODING // Generalized unary encodings. Defined by (start, step, stop). // This implementation uses stop=infinity to encode all possible // numbers. If you know the highest number possible, you'll get a // slight decrease in space used ... // The method: // // The mth code word consists of 'm' unary encoded, followed by w = // start + m * step binary encoded bits. If a == stop, then the // terminator in the unary code is dropped. // // Encoding is tricky. Take the 3,2,9 example: // m w template # vals #'s // 0 3 1xxx 8 0- 7 // 1 5 01xxxxx 32 8- 39 // 2 7 001xxxxxxx 128 40-167 // 3 9 000xxxxxxxxx 512 168-679 // // I don't see a nice way of mapping our number n to the prefix m, // short of some sort of search. The implementation below is // probably very slow. // // On the bright side, decoding is trivial. Read back the unary // encoded number, then read that many bits to get the value. // static const uint64 _genunary_start = 3; static const uint64 _genunary_step = 2; //static const uint64 _genunary_stop = ~uint64ZERO; inline void setGeneralizedUnaryEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz, uint64 val) { uint64 m = uint64ZERO; uint64 w = _genunary_start; uint64 n = uint64ONE << w; // Search for the prefix m, given our number 'val'. // While doing this, we get rid of all the implicitly stored values from 'val'. // #ifdef DEBUG_GENERALIZEDUNARYENCODING fprintf(stderr, " val="uint64FMT" try n="uint64FMT" for m="uint64FMT"\n", val, n, m); #endif while (n <= val) { val -= n; w += _genunary_step; n = uint64ONE << w; m++; #ifdef DEBUG_GENERALIZEDUNARYENCODING fprintf(stderr, " val="uint64FMT" try n="uint64FMT" for m="uint64FMT"\n", val, n, m); #endif } #ifdef DEBUG_GENERALIZEDUNARYENCODING fprintf(stderr, "val="uint64FMT" found m="uint64FMT"\n", val, m); #endif // Now just encode the number // m - the unary encoded prefix // w - the size of the binary encoded number setUnaryEncodedNumber(ptr, pos, siz, m); setDecodedValue(ptr, pos+*siz, w, val); *siz = m + 1 + w; } inline uint64 getGeneralizedUnaryEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz) { uint64 val = uint64ZERO; uint64 m = uint64ZERO; uint64 w = uint64ZERO; // Comments in the encoder apply here too. m = getUnaryEncodedNumber(ptr, pos, siz); w = _genunary_start + m * _genunary_step; val = getDecodedValue(ptr, pos + *siz, w); *siz = m + 1 + w; #ifdef DEBUG_GENERALIZEDUNARYENCODING fprintf(stderr, "m="uint64FMT" w="uint64FMT" val="uint64FMT"\n", m, w, val); #endif // Add in the implcitly stored pieces of the number // while (m--) { w -= _genunary_step; val += uint64ONE << w; } return(val); } #endif // GENERALIZED_UNARY_ENCODING_H kmer-code-2013-trunk/libutil/Make.include0000644000000000000000000000277612415074037017053 0ustar rootroot# -*- makefile -*- $(eval $(call Include,$/mt19937ar/)) $(eval $(call Include,$/kazlib/)) src := $/bigQueue.C \ $/bigQueue.H \ $/bitOperations.h \ $/bitPackedArray.C \ $/bitPackedArray.H \ $/bitPackedFile.C \ $/bitPackedFile.H \ $/bitPacking.h \ $/eliasDeltaEncoding.h \ $/eliasGammaEncoding.h \ $/endianess.H \ $/fibonacciEncoding.h \ $/fibonacciNumbers.C \ $/file.c \ $/generalizedUnaryEncoding.h \ $/intervalList.H \ $/logMsg.H \ $/md5.c \ $/palloc.c \ $/qsort_mt.c \ $/readBuffer.C \ $/readBuffer.H \ $/recordFile.C \ $/recordFile.H \ $/speedCounter.C \ $/speedCounter.H \ $/splitToWords.H \ $/sweatShop.C \ $/sweatShop.H \ $/uint32List.H \ $/unaryEncoding.h \ $/util++.H \ $/util.c \ $/util.h # Broken # $/bzipBuffer.C # $/bzipBuffer.H # Executables # $/unaryEncodingTester.C $/.C_SRCS :=$(filter %.c,${src}) $/.CXX_SRCS :=$(filter %.C,${src}) $/.CXX_INCS :=$(filter %.H,${src}) $(filter %.h,${src}) $/.CXX_LIBS :=$/libutil.a $/.CLEAN := $/*.o $/libutil.a: ${$/.C_SRCS:.c=.o} ${$/.CXX_SRCS:.C=.o} \ $/mt19937ar/mt19937ar.o \ $/kazlib/dict.o \ $/kazlib/except.o \ $/kazlib/hash.o \ $/kazlib/list.o \ $/kazlib/sfx.o kmer-code-2013-trunk/libutil/bitPacking.h0000644000000000000000000003132712322046702017042 0ustar rootroot#ifndef BRI_BITPACKING_H #define BRI_BITPACKING_H #include #include // Routines used for stuffing bits into a word array. // Define this to enable testing that the width of the data element // is greater than zero. The uint64MASK() macro (bri.h) does not // generate a mask for 0. Compiler warnings are issued, because you // shouldn't use this in production code. // //#define CHECK_WIDTH // As CHECK_WIDTH is kind of expensive, we'll warn. #ifdef CHECK_WIDTH #warning libutil/bitPacking.h defined CHECK_WIDTH #endif // Returns 'siz' bits from the stream based at 'ptr' and currently at // location 'pos'. The position of the stream is not changed. // // Retrieves a collection of values; the number of bits advanced in // the stream is returned. // // Copies the lowest 'siz' bits in 'val' to the stream based at 'ptr' // and currently at 'pos'. The position of the stream is not // changed. // // Sets a collection of values; the number of bits advanced in the // stream is returned. // uint64 getDecodedValue (uint64 *ptr, uint64 pos, uint64 siz); uint64 getDecodedValues(uint64 *ptr, uint64 pos, uint64 num, uint64 *sizs, uint64 *vals); void setDecodedValue (uint64 *ptr, uint64 pos, uint64 siz, uint64 val); uint64 setDecodedValues(uint64 *ptr, uint64 pos, uint64 num, uint64 *sizs, uint64 *vals); // Like getDecodedValue() but will pre/post increment/decrement the // value stored in the stream before in addition to returning the // value. // // preIncrementDecodedValue(ptr, pos, siz) === x = getDecodedValue(ptr, pos, siz) + 1; // setDecodedValue(ptr, pos, siz, x); // // preDecrementDecodedValue(ptr, pos, siz) === x = getDecodedValue(ptr, pos, siz) - 1; // setDecodedValue(ptr, pos, siz, x); // // postIncrementDecodedValue(ptr, pos, siz) === x = getDecodedValue(ptr, pos, siz); // setDecodedValue(ptr, pos, siz, x + 1); // // postDecrementDecodedValue(ptr, pos, siz) === x = getDecodedValue(ptr, pos, siz); // setDecodedValue(ptr, pos, siz, x - 1); // uint64 preIncrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz); uint64 preDecrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz); uint64 postIncrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz); uint64 postDecrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz); // N.B. - I assume the bits in words are big-endian, which is // backwards from the way we shift things around. // // I define the "addresses" of bits in two consectuve words as // [0123][0123]. When adding words to the bit array, they're added // from left to right: // // setDecodedValue(bitstream, %0abc, 3) // setDecodedValue(bitstream, %0def, 3) // // results in [abcd][ef00] // // But when shifting things around, we typically do it from the right // side, since that is where the machine places numbers. // // A picture or two might help. // // // |----b1-----| // |-bit-||-sz-| // XXXXXX // [0---------------63] // ^ // pos // // // If the bits span two words, it'll look like this; b1 is smaller // than siz, and we update bit to be the "uncovered" piece of XXX // (all the stuff in word2). The first word is masked, then those // bits are shifted onto the result in the correct place. The second // word has the correct bits shifted to the right, then those are // appended to the result. // // |b1-| // |-----bit-----||---sz---| // XXXXXXXXXX // [0------------word1][0-------------word2] // ^ // pos // inline uint64 getDecodedValue(uint64 *ptr, uint64 pos, uint64 siz) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; //PREFETCH(ptr + wrd); makes it worse uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 64 - bit; uint64 ret = 0; #ifdef CHECK_WIDTH if (siz == 0) { fprintf(stderr, "ERROR: getDecodedValue() called with zero size!\n"); abort(); } if (siz > 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= siz) { ret = ptr[wrd] >> (b1 - siz); } else { bit = siz - b1; ret = (ptr[wrd] & uint64MASK(b1)) << bit; wrd++; ret |= (ptr[wrd] >> (64 - bit)) & uint64MASK(bit); } ret &= uint64MASK(siz); return(ret); } inline void setDecodedValue(uint64 *ptr, uint64 pos, uint64 siz, uint64 val) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 64 - bit; #ifdef CHECK_WIDTH if (siz == 0) { fprintf(stderr, "ERROR: setDecodedValue() called with zero size!\n"); abort(); } if (siz > 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif val &= uint64MASK(siz); if (b1 >= siz) { ptr[wrd] &= ~( uint64MASK(siz) << (b1 - siz) ); ptr[wrd] |= val << (b1 - siz); } else { bit = siz - b1; ptr[wrd] &= ~uint64MASK(b1); ptr[wrd] |= (val & (uint64MASK(b1) << (bit))) >> (bit); wrd++; ptr[wrd] &= ~(uint64MASK(bit) << (64 - bit)); ptr[wrd] |= (val & (uint64MASK(bit))) << (64 - bit); } } inline uint64 getDecodedValues(uint64 *ptr, uint64 pos, uint64 num, uint64 *sizs, uint64 *vals) { // compute the location of the start of the encoded words, then // just walk through to get the remaining words. uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; //PREFETCH(ptr + wrd); makes it worse uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 0; for (uint64 i=0; i 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= sizs[i]) { //fprintf(stderr, "get-single pos=%d b1=%d bit=%d wrd=%d\n", pos, b1, bit, wrd); vals[i] = ptr[wrd] >> (b1 - sizs[i]); bit += sizs[i]; } else { //fprintf(stderr, "get-double pos=%d b1=%d bit=%d wrd=%d bitafter=%d\n", pos, b1, bit, wrd, sizs[i]-b1); bit = sizs[i] - b1; vals[i] = (ptr[wrd] & uint64MASK(b1)) << bit; wrd++; vals[i] |= (ptr[wrd] >> (64 - bit)) & uint64MASK(bit); } if (bit == 64) { wrd++; bit = 0; } assert(bit < 64); vals[i] &= uint64MASK(sizs[i]); pos += sizs[i]; } return(pos); } inline uint64 setDecodedValues(uint64 *ptr, uint64 pos, uint64 num, uint64 *sizs, uint64 *vals) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 0; for (uint64 i=0; i 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= sizs[i]) { //fprintf(stderr, "set-single pos=%d b1=%d bit=%d wrd=%d\n", pos, b1, bit, wrd); ptr[wrd] &= ~( uint64MASK(sizs[i]) << (b1 - sizs[i]) ); ptr[wrd] |= vals[i] << (b1 - sizs[i]); bit += sizs[i]; } else { //fprintf(stderr, "set-double pos=%d b1=%d bit=%d wrd=%d bitafter=%d\n", pos, b1, bit, wrd, sizs[i]-b1); bit = sizs[i] - b1; ptr[wrd] &= ~uint64MASK(b1); ptr[wrd] |= (vals[i] & (uint64MASK(b1) << (bit))) >> (bit); wrd++; ptr[wrd] &= ~(uint64MASK(bit) << (64 - bit)); ptr[wrd] |= (vals[i] & (uint64MASK(bit))) << (64 - bit); } if (bit == 64) { wrd++; bit = 0; } assert(bit < 64); pos += sizs[i]; } return(pos); } inline uint64 preIncrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 64 - bit; uint64 ret = 0; #ifdef CHECK_WIDTH if (siz == 0) { fprintf(stderr, "ERROR: preIncrementDecodedValue() called with zero size!\n"); abort(); } if (siz > 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= siz) { ret = ptr[wrd] >> (b1 - siz); ret++; ret &= uint64MASK(siz); ptr[wrd] &= ~( uint64MASK(siz) << (b1 - siz) ); ptr[wrd] |= ret << (b1 - siz); } else { bit = siz - b1; ret = (ptr[wrd] & uint64MASK(b1)) << bit; ret |= (ptr[wrd+1] >> (64 - bit)) & uint64MASK(bit); ret++; ret &= uint64MASK(siz); ptr[wrd] &= ~uint64MASK(b1); ptr[wrd] |= (ret & (uint64MASK(b1) << (bit))) >> (bit); wrd++; ptr[wrd] &= ~(uint64MASK(bit) << (64 - bit)); ptr[wrd] |= (ret & (uint64MASK(bit))) << (64 - bit); } return(ret); } inline uint64 preDecrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 64 - bit; uint64 ret = 0; #ifdef CHECK_WIDTH if (siz == 0) { fprintf(stderr, "ERROR: preDecrementDecodedValue() called with zero size!\n"); abort(); } if (siz > 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= siz) { ret = ptr[wrd] >> (b1 - siz); ret--; ret &= uint64MASK(siz); ptr[wrd] &= ~( uint64MASK(siz) << (b1 - siz) ); ptr[wrd] |= ret << (b1 - siz); } else { bit = siz - b1; ret = (ptr[wrd] & uint64MASK(b1)) << bit; ret |= (ptr[wrd+1] >> (64 - bit)) & uint64MASK(bit); ret--; ret &= uint64MASK(siz); ptr[wrd] &= ~uint64MASK(b1); ptr[wrd] |= (ret & (uint64MASK(b1) << (bit))) >> (bit); wrd++; ptr[wrd] &= ~(uint64MASK(bit) << (64 - bit)); ptr[wrd] |= (ret & (uint64MASK(bit))) << (64 - bit); } return(ret); } inline uint64 postIncrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 64 - bit; uint64 ret = 0; #ifdef CHECK_WIDTH if (siz == 0) { fprintf(stderr, "ERROR: postIncrementDecodedValue() called with zero size!\n"); abort(); } if (siz > 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= siz) { ret = ptr[wrd] >> (b1 - siz); ret++; ret &= uint64MASK(siz); ptr[wrd] &= ~( uint64MASK(siz) << (b1 - siz) ); ptr[wrd] |= ret << (b1 - siz); } else { bit = siz - b1; ret = (ptr[wrd] & uint64MASK(b1)) << bit; ret |= (ptr[wrd+1] >> (64 - bit)) & uint64MASK(bit); ret++; ret &= uint64MASK(siz); ptr[wrd] &= ~uint64MASK(b1); ptr[wrd] |= (ret & (uint64MASK(b1) << (bit))) >> (bit); wrd++; ptr[wrd] &= ~(uint64MASK(bit) << (64 - bit)); ptr[wrd] |= (ret & (uint64MASK(bit))) << (64 - bit); } ret--; ret &= uint64MASK(siz); return(ret); } inline uint64 postDecrementDecodedValue(uint64 *ptr, uint64 pos, uint64 siz) { uint64 wrd = (pos >> 6) & 0x0000cfffffffffffllu; uint64 bit = (pos ) & 0x000000000000003fllu; uint64 b1 = 64 - bit; uint64 ret = 0; #ifdef CHECK_WIDTH if (siz == 0) { fprintf(stderr, "ERROR: postDecrementDecodedValue() called with zero size!\n"); abort(); } if (siz > 64) { fprintf(stderr, "ERROR: getDecodedValue() called with huge size ("uint64FMT")!\n", siz); abort(); } #endif if (b1 >= siz) { ret = ptr[wrd] >> (b1 - siz); ret--; ret &= uint64MASK(siz); ptr[wrd] &= ~( uint64MASK(siz) << (b1 - siz) ); ptr[wrd] |= ret << (b1 - siz); } else { bit = siz - b1; ret = (ptr[wrd] & uint64MASK(b1)) << bit; ret |= (ptr[wrd+1] >> (64 - bit)) & uint64MASK(bit); ret--; ret &= uint64MASK(siz); ptr[wrd] &= ~uint64MASK(b1); ptr[wrd] |= (ret & (uint64MASK(b1) << (bit))) >> (bit); wrd++; ptr[wrd] &= ~(uint64MASK(bit) << (64 - bit)); ptr[wrd] |= (ret & (uint64MASK(bit))) << (64 - bit); } ret++; ret &= uint64MASK(siz); return(ret); } #endif // BRI_BITPACKING_H kmer-code-2013-trunk/libutil/NOTES0000644000000000000000000000055510066400133015423 0ustar rootrootVarious notes that should be turned into real documentation merStream (need to check this) posInSeq() is relative to the start of the current sequence. posInStream() is relative to the start of the source file. Whitespace in the sequence / source files mess up position calculations. The FastAstream positions are NOT sequence positions. kmer-code-2013-trunk/libutil/eliasDeltaEncoding.h0000644000000000000000000000134112322046702020476 0ustar rootroot#ifndef ELIAS_DELTA_ENCODING_H #define ELIAS_DELTA_ENCODING_H #include "bitPacking.h" inline void setEliasDeltaEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz, uint64 val) { uint64 b = logBaseTwo64(val); setEliasGammaEncodedNumber(ptr, pos, siz, b); pos += *siz; setDecodedValue(ptr, pos, b-1, val); *siz += b-1; } inline uint64 getEliasDeltaEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz) { uint64 b = getEliasGammaEncodedNumber(ptr, pos, siz) - 1; pos += *siz; *siz += b; return(uint64ONE << b | getDecodedValue(ptr, pos, b)); } #endif // ELIAS_DELTA_ENCODING_H kmer-code-2013-trunk/libutil/endianess.H0000644000000000000000000000252512322046702016676 0ustar rootroot#ifndef ENDIANESS_H #define ENDIANESS_H #include #include #include "util.h" // We need to test how to swap off_t and size_t // See also test/endianess.c // If we wanted to convert to network order for everything, rather // than convert only when needed, this would be useful. // #if 0 bool checkEndianessSwapNeeded(void) { union u64 { uint64 u; unsigned char c[8]; }; union u32 { uint32 u; unsigned char c[4]; }; union u16 { uint16 u; unsigned char c[2]; }; u64 u64t.u = uint64NUMBER(0x0123456789abcdef); return(u64t.c[0] != 0x0f) } #endif inline uint64 uint64Swap(uint64 x) { x = ((x >> 8) & uint64NUMBER(0x00ff00ff00ff00ff)) | ((x << 8) & uint64NUMBER(0xff00ff00ff00ff00)); x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) | ((x << 16) & uint64NUMBER(0xffff0000ffff0000)); x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) | ((x << 32) & uint64NUMBER(0xffffffff00000000)); return(x); } inline uint32 uint32Swap(uint32 x) { x = ((x >> 8) & uint32NUMBER(0x00ff00ff)) | ((x << 8) & uint32NUMBER(0xff00ff00)); x = ((x >> 16) & uint32NUMBER(0x0000ffff)) | ((x << 16) & uint32NUMBER(0xffff0000)); return(x); } inline uint16 uint16Swap(uint16 x) { x = ((x >> 8) & 0x00ff) | ((x << 8) & 0xff00); return(x); } #endif // ENDIANESS_H kmer-code-2013-trunk/libutil/bitPackedFile.H0000644000000000000000000000525612322046702017417 0ustar rootroot#ifndef BITPACKEDFILE_H #define BITPACKEDFILE_H #include #include #include #include "util.h" //#define WITH_BZIP2 #ifdef WITH_BZIP2 #include #endif class bitPackedFile { public: bitPackedFile(char const *name, uint64 offset=0, bool forceTruncate=false); ~bitPackedFile(); uint64 getBits(uint32 size); uint64 getNumber(void); void putBits(uint64 bits, uint32 size); void putNumber(uint64 val); uint64 tell(void) { return((_pos << 6) + _bit); }; void seek(uint64 pos); uint64 loadInCore(void); void showStats(FILE *f) { fprintf(f, "inside: "uint64FMT" outside: "uint64FMT"\n", stat_seekInside, stat_seekOutside); fflush(f); }; private: // Ensure that the buffer has enough space for any future // operation. This constant, currently 31 bytes, must be strictly // less than the constant used in deciding if seek() is moving // forward or backwards. // void sync(void) { if (((_bit >> 6) + 31) >= _bfrmax) seek((_pos << 6) + _bit); }; void flushDirty(void); void seekBzip2(uint64 bitpos); void seekNormal(uint64 bitpos); int _file; char *_name; #ifdef WITH_BZIP2 FILE *_bzFILE; int _bzerr; BZFILE *_bzfile; #endif uint64 _bfrmax; // Number of words in the buffer uint64 *_bfr; // A chunk of the bitPackedFile in core uint64 _pos; // The location this chunk is from (in words) uint64 _bit; // The bit position we are modifying relative to _pos bool _inCore; bool _bfrDirty; bool _forceFirstLoad; bool _isReadOnly; bool _isBzip2; // For collecting statistics on our usage // uint64 stat_seekInside; uint64 stat_seekOutside; uint64 stat_dirtyFlushes; // For converting between hardware of different endianess. // uint64 file_offset; uint64 endianess_offset; bool endianess_flipped; }; inline uint64 bitPackedFile::getBits(uint32 siz) { sync(); uint64 ret = getDecodedValue(_bfr, _bit, siz); _bit += siz; return(ret); } inline uint64 bitPackedFile::getNumber(void) { sync(); uint64 siz = 0; uint64 ret = getFibonacciEncodedNumber(_bfr, _bit, &siz); _bit += siz; return(ret); } inline void bitPackedFile::putBits(uint64 bits, uint32 siz) { assert(_isReadOnly == false); sync(); setDecodedValue(_bfr, _bit, siz, bits); _bit += siz; _bfrDirty = true; } inline void bitPackedFile::putNumber(uint64 val) { assert(_isReadOnly == false); sync(); uint64 siz = 0; setFibonacciEncodedNumber(_bfr, _bit, &siz, val); _bit += siz; _bfrDirty = true; } #endif // BITPACKEDFILE_H kmer-code-2013-trunk/libutil/util.h0000644000000000000000000002211012322046702015732 0ustar rootroot#ifndef UTIL_H #define UTIL_H // ISO C99 says that to get INT32_MAX et al, these must be defined. (7.18.2, 7.18.4, 7.8.1) #ifndef __STDC_CONSTANT_MACROS #define __STDC_CONSTANT_MACROS #endif #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS #endif #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #include #include #include // Useful types. // // *MASK(x) is only defined for unsigned types, with x != 0 and less // than the datawidth. typedef uint64_t uint64; typedef uint32_t uint32; typedef uint16_t uint16; typedef uint8_t uint8; typedef int64_t int64; typedef int32_t int32; typedef int16_t int16; typedef int8_t int8; #if defined(__alpha) || defined(_AIX) || defined(__LP64__) || defined(_LP64) #define TRUE64BIT #define uint64NUMBER(X) X ## LU #define uint32NUMBER(X) X ## U #else #define uint64NUMBER(X) X ## LLU #define uint32NUMBER(X) X ## LU #endif #define sizetFMT "%zd" #define uint64ZERO uint64NUMBER(0x0000000000000000) #define uint64ONE uint64NUMBER(0x0000000000000001) #define uint64MAX uint64NUMBER(0xffffffffffffffff) #define uint64MASK(X) ((~uint64ZERO) >> (64 - (X))) #define uint64FMTW(X) "%" #X PRIu64 #define uint64FMT "%"PRIu64 #define uint64HEX "0x%016"PRIx64 #define int64FMTW(X) "%" #X PRId64 #define int64FMT "%"PRId64 #define uint32ZERO uint32NUMBER(0x00000000) #define uint32ONE uint32NUMBER(0x00000001) #define uint32MAX uint32NUMBER(0xffffffff) #define uint32MASK(X) ((~uint32ZERO) >> (32 - (X))) #define uint32FMTW(X) "%" #X PRIu32 #define uint32FMT "%"PRIu32 #define uint32HEX "0x%08"PRIx32 #define int32FMTW(X) "%" #X PRId32 #define int32FMT "%"PRId32 #define uint16ZERO (0x0000) #define uint16ONE (0x0001) #define uint16MAX (0xffff) #define uint16MASK(X) ((~uint16ZERO) >> (16 - (X))) #define uint16FMTW(X) "%" #X PRIu16 #define uint16FMT "%"PRIu16 #define uint8ZERO (0x00) #define uint8ONE (0x01) #define uint8MAX (0xff) #define uint8MASK(X) ((~uint8ZERO) >> (8 - (X))) #define strtouint32(N,O) (uint32)strtoul(N, O, 10) #define strtouint64(N,O) (uint64)strtoul(N, O, 10) #ifdef __cplusplus extern "C" { #endif //////////////////////////////////////// // // time // double getTime(void); //////////////////////////////////////// // // file // // Create the O_LARGEFILE type for open(), if it doesn't already // exist (FreeBSD, Tru64). We assume that by including the stuff // needed for open(2) we'll get any definition of O_LARGEFILE. // #ifndef O_LARGEFILE #define O_LARGEFILE 0 #endif uint64 getProcessSizeCurrent(void); uint64 getProcessSizeLimit(void); // Useful routines for dealing with the existence of files int isHuman(FILE *F); // Handles mmap() of files. Write is not tested -- in particluar, // the test main() in mmap.c fails. // void* mapFile(const char *filename, uint64 *length, char mode); void unmapFile(void *addr, uint64 length); // Creates a hidden temporary file. If path is given, the temporary // file is created in that directory. The temoprary file is unlinked // after it is created, so once you close the file, it's gone. // FILE *makeTempFile(char *path); // Copies all of srcFile to dstFile, returns the number of bytes written // off_t copyFile(char *srcName, FILE *dstFile); // Takes a path to a file (that possibly doesn't exist) and returns // the number of MB (1048576 bytes) free in the directory of that // file. // uint32 freeDiskSpace(char *path); // Safer read(2) and write(2). // void safeWrite(int filedes, const void *buffer, const char *desc, size_t nbytes); int safeRead(int filedes, const void *buffer, const char *desc, size_t nbytes); //////////////////////////////////////// // int fileExists(const char *path); off_t sizeOfFile(const char *path); uint64 timeOfFile(const char *path); // Open a file, read/write, using compression based on the file name // FILE *openFile(const char *path, const char *mode); void closeFile(FILE *F, const char *path); //////////////////////////////////////// // void *memdup(const void *orig, size_t size); //////////////////////////////////////// // // Pac-Man's memory allocator. // // Grabs big chunks of memory, then gives out little pieces. You can // only free ALL memory, not single blocks. // // This is useful when one needs to malloc() tens of millions of // things, at which point the overhead of finding a free block is // large. // void *palloc(size_t size); void pfree(void); // A thread-safe(r) implementation just forces the user to use a // handle. This also lets us use palloc() for collections of things // -- e.g., twice in a program. If you don't give a handle, the // default one is used. // void *palloc2(size_t size, void *handle); void pfree2(void *handle); // Get a new handle, release a used one. The size is the same // as for psetblocksize(). // void *pallochandle(size_t size); void pfreehandle(void *handle); // The block size can only be changed before the first call to // palloc(). Calling psetblocksize() after that has no effect. // void psetblocksize(size_t size); size_t pgetblocksize(void); // Not generally useful - just dumps the allocated blocks to stdout. // Uses internal structures, and used in the test routine. // // psetdebug() enables reporting of allocations. // void pdumppalloc(void *handle); void psetdebug(int on); //////////////////////////////////////// // // md5 // typedef struct { uint64 a; uint64 b; uint32 i; // the iid, used in leaff uint32 pad; // keep us size compatible between 32- and 64-bit machines. } md5_s; #define MD5_BUFFER_SIZE 32*1024 typedef struct { uint64 a; uint64 b; void *context; int bufferPos; unsigned char buffer[MD5_BUFFER_SIZE]; } md5_increment_s; // Returns -1, 0, 1 depending on if a <, ==, > b. Suitable for // qsort(). // int md5_compare(void const *a, void const *b); // Converts an md5_s into a character string. s must be at least // 33 bytes long. // char *md5_toascii(md5_s *m, char *s); // Computes the md5 checksum on the string s. // md5_s *md5_string(md5_s *m, char *s, uint32 l); // Computes an md5 checksum piece by piece. // // If m is NULL, a new md5_increment_s is allocated and returned. // md5_increment_s *md5_increment_char(md5_increment_s *m, char s); md5_increment_s *md5_increment_block(md5_increment_s *m, char *s, uint32 l); void md5_increment_finalize(md5_increment_s *m); void md5_increment_destroy(md5_increment_s *m); //////////////////////////////////////// // // Matsumoto and Nichimura's Mersenne Twister pseudo random number // generator. The struct and functions are defined in external/mt19937ar.[ch] // typedef struct mtctx mt_s; mt_s *mtInit(uint32 s); mt_s *mtInitArray(uint32 *init_key, uint32 key_length); uint32 mtRandom32(mt_s *mt); // A uint64 random number // #define mtRandom64(MT) ( (((uint64)mtRandom32(MT)) << 32) | (uint64)mtRandom32(MT) ) // Real valued randomness // mtRandomRealOpen() -- on [0,1) real interval // mtRandomRealClosed() -- on [0,1] real interval // mrRandomRealOpen53() -- on [0,1) real interval, using 53 bits // // "These real versions are due to Isaku Wada, 2002/01/09 added" and were taken from // the mt19937ar.c distribution (but they had actual functions, not macros) // // They also had // random number in (0,1) as (mtRandom32() + 0.5) * (1.0 / 4294967296.0) // #define mtRandomRealOpen(MT) ( (double)mtRandom32(MT) * (1.0 / 4294967296.0) ) #define mtRandomRealClosed(MT) ( (double)mtRandom32(MT) * (1.0 / 4294967295.0) ) #define mtRandomRealOpen53(MT) ( ((mtRandom32(MT) >> 5) * 67108864.0 + (mtRandom32(MT) >> 6)) * (1.0 / 9007199254740992.0) ) // returns a random number with gaussian distribution, mean of zero and std.dev. of 1 // double mtRandomGaussian(mt_s *mt); //////////////////////////////////////// // // FreeBSD's multithreaded qsort. // void qsort_mt(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *), int maxthreads, int forkelem); //#define qsort(A, N, ES, CMP) qsort_mt((A), (N), (ES), (CMP), 4, 64 * 1024) //////////////////////////////////////// // // perl's chomp is pretty nice // #ifndef chomp #define chomp(S) { char *t=S; while (*t) t++; t--; while (isspace(*t)) { *t--=0; } } #define chompL(S,L) { char *t=S; while (*t) t++; t--; while (isspace(*t)) { *t--=0; L--; } } #endif #ifndef munch #define munch(S) { while (*(S) && isspace(*(S))) (S)++; } #endif #ifndef crunch #define crunch(S) { while (*(S) && !isspace(*(S))) (S)++; } #endif #ifndef MIN #define MIN(x,y) (((x) > (y)) ? (y) : (x)) #endif #ifndef MAX #define MAX(x,y) (((x) < (y)) ? (y) : (x)) #endif #ifdef __cplusplus } #endif #endif // UTIL_H kmer-code-2013-trunk/libutil/unaryEncoding.h0000644000000000000000000000272112322046702017570 0ustar rootroot#ifndef UNARY_ENCODING_H #define UNARY_ENCODING_H #include "bitPacking.h" // Routines to store and retrieve a unary encoded number to/from a // bit packed word array based at 'ptr' and currently at location // 'pos'. Both routines return the size of the encoded number in // 'siz'. // The usual unary encoding. Store the number n as n 0 bits followed // by a single 1 bit. // // 0 -> 1 // 1 -> 01 // 2 -> 001 // 3 -> 0001 // 4 -> 00001 // // See the decoder as to why we use 0 instead of 1 for the count. inline void setUnaryEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz, uint64 val) { *siz = val + 1; while (val >= 64) { setDecodedValue(ptr, pos, 64, uint64ZERO); pos += 64; val -= 64; siz += 64; } setDecodedValue(ptr, pos, val + 1, uint64ONE); pos += val + 1; } inline uint64 getUnaryEncodedNumber(uint64 *ptr, uint64 pos, uint64 *siz) { uint64 val = uint64ZERO; uint64 enc = uint64ZERO; // How many whole words are zero? // enc = getDecodedValue(ptr, pos, 64); while (enc == uint64ZERO) { val += 64; pos += 64; enc = getDecodedValue(ptr, pos, 64); } // This word isn't zero. Count how many bits are zero (see, the // choice of 0 or 1 for the encoding wasn't arbitrary!) // val += 64 - logBaseTwo64(enc); *siz = val + 1; return(val); } #endif // UNARY_ENCODING_H kmer-code-2013-trunk/libutil/bigQueue.H0000644000000000000000000001116012322046702016466 0ustar rootroot#ifndef BIGQUEUE_H #define BIGQUEUE_H #include "util++.H" // A disk-backed list of user-defined objects. // // At creation time, you can opt to have it sorted, using a // user-defined function. // An list based on a variable length object (let alone a sort!) // must use some form of dereferencing scheme. So, if you want to // use variable length records, you have to use pointers, and supply // functions to do everything (compare, read, write). // // On the otherhand, it would be quite more convenient (to use) if we // used objects (would need copy, compare, read, write). // // 1) Restrict to void*, fixed block size, functions for compare, // destroy. read, write and copy done with fread(), fwrite() and // memcpy(). // // 2) Restrict to void*, functions for compare, read, write and // destroy. I allocate an array of pointers. Assume shallow copies // are ok (qsort will be used). On construct, we need to know the // size of the data so we know how many objects to buffer before // sorting and writing. It's also possible to use fread() and // fwrite(). // // 3) Restrict to objects, operators for copy, compare, read, write, // default construct, destroy. I allocate an array of objects. // // 1 is the easiest to write, 2 and 3 are conceptually the same. 1 // cannot write out deep data (pointer to string). 2 is a trivial // extenstion to 1, and fixes that. 3 is the correct version, but I // don't want to deal with streams io. So, 2 it is. // class bigQueue { public: // Initialize the bigQueue for anonymous storage, with an // option to later save the array. // bigQueue(bool (*readfcn)(FILE *, void *), bool (*writfcn)(FILE *, void *), void (*killfcn)(void *), uint32 objectSize, char *tmpPath) { _initialize(0L, readfcn, writfcn, killfcn, objectSize, 0, tmpPath, 0L); }; // Initialize the bigQueue with a file of objects, presumabely from // a previous invocation of bigQueue. // bigQueue(bool (*readfcn)(FILE *, void *), bool (*writfcn)(FILE *, void *), void (*killfcn)(void *), uint32 objectSize, char *tmpPath, char *filename) { _initialize(0L, readfcn, writfcn, killfcn, objectSize, 0, tmpPath, filename); }; // Initialize the bigQueue for sorting. // bigQueue(int (*sortfcn)(const void *a, const void *b), bool (*readfcn)(FILE *, void *), bool (*writfcn)(FILE *, void *), void (*killfcn)(void *), uint32 objectSize, uint32 memoryToUse, char *tmpPath) { _initialize(sortfcn, readfcn, writfcn, killfcn, objectSize, memoryToUse, tmpPath, 0L); }; private: void _initialize(int (*sortfcn)(const void *a, const void *b), bool (*readfcn)(FILE *f, void *a), bool (*writfcn)(FILE *f, void *a), void (*killfcn)(void *), uint32 objectSize, uint32 memoryToUse, char *tmppath, char *filename); public: ~bigQueue(); // Add elements to the end of the array. void add(void *); // We are designed for streaming access. bool next(void); void *get(void); // Rewind to the start. Sortable must be sorted. void rewind(void); // Save the anonymous array into a real file. void save(char *filepath); // Sort the sortable. Flush the flushable. void sort(void); void flush(void); private: void sortAndWriteBuffer(void); void clearBuffer(void); void mergeTemporaryFiles(void); char *_saveFile; char *_tmpPath; int (*_sortFunction)(const void *a, const void *b); bool (*_writFunction)(FILE *f, void *a); bool (*_readFunction)(FILE *f, void *a); void (*_killFunction)(void *a); uint32 _objectSize; uint32 _memoryToUse; uint32 _maxOpenFiles; uint32 _numTemporaryFiles; uint32 _numMergeFiles; // _temporaryFiles is all the opened output files. If we aren't // sorting, then only the first one is opened. // // _inputFile is a dup of the first temporary file. If we are // sorting, and you start reading before you sort, then you'll get // a very short read. // FILE **_temporaryFiles; FILE *_inputFile; // Stores things read back from disk, for return to the user. // Currently just one, but should be extended to many. // void *_thingBuffer; uint32 _bufferMax; uint32 _bufferLen; void **_buffer; }; #endif // BIGQUEUE_H kmer-code-2013-trunk/libutil/mt19937ar/0000755000000000000000000000000012641613357016202 5ustar rootrootkmer-code-2013-trunk/libutil/mt19937ar/mt19937ar.h0000644000000000000000000000200112322046702017712 0ustar rootroot#ifndef MT19937AR_H #define MT19937AR_H // Refactoring of // // A C-program for MT19937, with initialization improved 2002/1/26. // Coded by Takuji Nishimura and Makoto Matsumoto. // // to make it thread safe and (hopefully) more portable. // // 20040421, bpw // bri.h contains the function prototypes, but we hide the structure and // implementation here. // #include "../util.h" /* Period parameters */ #define MT_N 624 #define MT_M 397 #define MT_MATRIX_A 0x9908b0dfUL /* constant vector a */ #define MT_UPPER_MASK 0x80000000UL /* most significant w-r bits */ #define MT_LOWER_MASK 0x7fffffffUL /* least significant r bits */ struct mtctx { // The array for the state vector // uint32 mt[MT_N]; // The ordinal of the first uninitialized element -- // mti = N+1 -> element N is uninitialized // uint32 mti; // Something // mag01[x] = x * MT_MATRIX_A for x=0,1 // uint32 mag01[2]; }; // This is declared in util.h // //typedef struct mt mt_s; #endif // MT19937AR_H kmer-code-2013-trunk/libutil/mt19937ar/mt19937ar.out0000644000000000000000000012146310041470733020311 0ustar rootroot1000 outputs of genrand_int32() 1067595299 955945823 477289528 4107218783 4228976476 3344332714 3355579695 227628506 810200273 2591290167 2560260675 3242736208 646746669 1479517882 4245472273 1143372638 3863670494 3221021970 1773610557 1138697238 1421897700 1269916527 2859934041 1764463362 3874892047 3965319921 72549643 2383988930 2600218693 3237492380 2792901476 725331109 605841842 271258942 715137098 3297999536 1322965544 4229579109 1395091102 3735697720 2101727825 3730287744 2950434330 1661921839 2895579582 2370511479 1004092106 2247096681 2111242379 3237345263 4082424759 219785033 2454039889 3709582971 835606218 2411949883 2735205030 756421180 2175209704 1873865952 2762534237 4161807854 3351099340 181129879 3269891896 776029799 2218161979 3001745796 1866825872 2133627728 34862734 1191934573 3102311354 2916517763 1012402762 2184831317 4257399449 2899497138 3818095062 3030756734 1282161629 420003642 2326421477 2741455717 1278020671 3744179621 271777016 2626330018 2560563991 3055977700 4233527566 1228397661 3595579322 1077915006 2395931898 1851927286 3013683506 1999971931 3006888962 1049781534 1488758959 3491776230 104418065 2448267297 3075614115 3872332600 891912190 3936547759 2269180963 2633455084 1047636807 2604612377 2709305729 1952216715 207593580 2849898034 670771757 2210471108 467711165 263046873 3569667915 1042291111 3863517079 1464270005 2758321352 3790799816 2301278724 3106281430 7974801 2792461636 555991332 621766759 1322453093 853629228 686962251 1455120532 957753161 1802033300 1021534190 3486047311 1902128914 3701138056 4176424663 1795608698 560858864 3737752754 3141170998 1553553385 3367807274 711546358 2475125503 262969859 251416325 2980076994 1806565895 969527843 3529327173 2736343040 2987196734 1649016367 2206175811 3048174801 3662503553 3138851612 2660143804 1663017612 1816683231 411916003 3887461314 2347044079 1015311755 1203592432 2170947766 2569420716 813872093 1105387678 1431142475 220570551 4243632715 4179591855 2607469131 3090613241 282341803 1734241730 1391822177 1001254810 827927915 1886687171 3935097347 2631788714 3905163266 110554195 2447955646 3717202975 3304793075 3739614479 3059127468 953919171 2590123714 1132511021 3795593679 2788030429 982155079 3472349556 859942552 2681007391 2299624053 647443547 233600422 608168955 3689327453 1849778220 1608438222 3968158357 2692977776 2851872572 246750393 3582818628 3329652309 4036366910 1012970930 950780808 3959768744 2538550045 191422718 2658142375 3276369011 2927737484 1234200027 1920815603 3536074689 1535612501 2184142071 3276955054 428488088 2378411984 4059769550 3913744741 2732139246 64369859 3755670074 842839565 2819894466 2414718973 1010060670 1839715346 2410311136 152774329 3485009480 4102101512 2852724304 879944024 1785007662 2748284463 1354768064 3267784736 2269127717 3001240761 3179796763 895723219 865924942 4291570937 89355264 1471026971 4114180745 3201939751 2867476999 2460866060 3603874571 2238880432 3308416168 2072246611 2755653839 3773737248 1709066580 4282731467 2746170170 2832568330 433439009 3175778732 26248366 2551382801 183214346 3893339516 1928168445 1337157619 3429096554 3275170900 1782047316 4264403756 1876594403 4289659572 3223834894 1728705513 4068244734 2867840287 1147798696 302879820 1730407747 1923824407 1180597908 1569786639 198796327 560793173 2107345620 2705990316 3448772106 3678374155 758635715 884524671 486356516 1774865603 3881226226 2635213607 1181121587 1508809820 3178988241 1594193633 1235154121 326117244 2304031425 937054774 2687415945 3192389340 2003740439 1823766188 2759543402 10067710 1533252662 4132494984 82378136 420615890 3467563163 541562091 3535949864 2277319197 3330822853 3215654174 4113831979 4204996991 2162248333 3255093522 2219088909 2978279037 255818579 2859348628 3097280311 2569721123 1861951120 2907080079 2719467166 998319094 2521935127 2404125338 259456032 2086860995 1839848496 1893547357 2527997525 1489393124 2860855349 76448234 2264934035 744914583 2586791259 1385380501 66529922 1819103258 1899300332 2098173828 1793831094 276463159 360132945 4178212058 595015228 177071838 2800080290 1573557746 1548998935 378454223 1460534296 1116274283 3112385063 3709761796 827999348 3580042847 1913901014 614021289 4278528023 1905177404 45407939 3298183234 1184848810 3644926330 3923635459 1627046213 3677876759 969772772 1160524753 1522441192 452369933 1527502551 832490847 1003299676 1071381111 2891255476 973747308 4086897108 1847554542 3895651598 2227820339 1621250941 2881344691 3583565821 3510404498 849362119 862871471 797858058 2867774932 2821282612 3272403146 3997979905 209178708 1805135652 6783381 2823361423 792580494 4263749770 776439581 3798193823 2853444094 2729507474 1071873341 1329010206 1289336450 3327680758 2011491779 80157208 922428856 1158943220 1667230961 2461022820 2608845159 387516115 3345351910 1495629111 4098154157 3156649613 3525698599 4134908037 446713264 2137537399 3617403512 813966752 1157943946 3734692965 1680301658 3180398473 3509854711 2228114612 1008102291 486805123 863791847 3189125290 1050308116 3777341526 4291726501 844061465 1347461791 2826481581 745465012 2055805750 4260209475 2386693097 2980646741 447229436 2077782664 1232942813 4023002732 1399011509 3140569849 2579909222 3794857471 900758066 2887199683 1720257997 3367494931 2668921229 955539029 3818726432 1105704962 3889207255 2277369307 2746484505 1761846513 2413916784 2685127085 4240257943 1166726899 4215215715 3082092067 3960461946 1663304043 2087473241 4162589986 2507310778 1579665506 767234210 970676017 492207530 1441679602 1314785090 3262202570 3417091742 1561989210 3011406780 1146609202 3262321040 1374872171 1634688712 1280458888 2230023982 419323804 3262899800 39783310 1641619040 1700368658 2207946628 2571300939 2424079766 780290914 2715195096 3390957695 163151474 2309534542 1860018424 555755123 280320104 1604831083 2713022383 1728987441 3639955502 623065489 3828630947 4275479050 3516347383 2343951195 2430677756 635534992 3868699749 808442435 3070644069 4282166003 2093181383 2023555632 1568662086 3422372620 4134522350 3016979543 3259320234 2888030729 3185253876 4258779643 1267304371 1022517473 815943045 929020012 2995251018 3371283296 3608029049 2018485115 122123397 2810669150 1411365618 1238391329 1186786476 3155969091 2242941310 1765554882 279121160 4279838515 1641578514 3796324015 13351065 103516986 1609694427 551411743 2493771609 1316337047 3932650856 4189700203 463397996 2937735066 1855616529 2626847990 55091862 3823351211 753448970 4045045500 1274127772 1124182256 92039808 2126345552 425973257 386287896 2589870191 1987762798 4084826973 2172456685 3366583455 3602966653 2378803535 2901764433 3716929006 3710159000 2653449155 3469742630 3096444476 3932564653 2595257433 318974657 3146202484 853571438 144400272 3768408841 782634401 2161109003 570039522 1886241521 14249488 2230804228 1604941699 3928713335 3921942509 2155806892 134366254 430507376 1924011722 276713377 196481886 3614810992 1610021185 1785757066 851346168 3761148643 2918835642 3364422385 3012284466 3735958851 2643153892 3778608231 1164289832 205853021 2876112231 3503398282 3078397001 3472037921 1748894853 2740861475 316056182 1660426908 168885906 956005527 3984354789 566521563 1001109523 1216710575 2952284757 3834433081 3842608301 2467352408 3974441264 3256601745 1409353924 1329904859 2307560293 3125217879 3622920184 3832785684 3882365951 2308537115 2659155028 1450441945 3532257603 3186324194 1225603425 1124246549 175808705 3009142319 2796710159 3651990107 160762750 1902254979 1698648476 1134980669 497144426 3302689335 4057485630 3603530763 4087252587 427812652 286876201 823134128 1627554964 3745564327 2589226092 4202024494 62878473 3275585894 3987124064 2791777159 1916869511 2585861905 1375038919 1403421920 60249114 3811870450 3021498009 2612993202 528933105 2757361321 3341402964 2621861700 273128190 4015252178 3094781002 1621621288 2337611177 1796718448 1258965619 4241913140 2138560392 3022190223 4174180924 450094611 3274724580 617150026 2704660665 1469700689 1341616587 356715071 1188789960 2278869135 1766569160 2795896635 57824704 2893496380 1235723989 1630694347 3927960522 428891364 1814070806 2287999787 4125941184 3968103889 3548724050 1025597707 1404281500 2002212197 92429143 2313943944 2403086080 3006180634 3561981764 1671860914 1768520622 1803542985 844848113 3006139921 1410888995 1157749833 2125704913 1789979528 1799263423 741157179 2405862309 767040434 2655241390 3663420179 2172009096 2511931187 1680542666 231857466 1154981000 157168255 1454112128 3505872099 1929775046 2309422350 2143329496 2960716902 407610648 2938108129 2581749599 538837155 2342628867 430543915 740188568 1937713272 3315215132 2085587024 4030765687 766054429 3517641839 689721775 1294158986 1753287754 4202601348 1974852792 33459103 3568087535 3144677435 1686130825 4134943013 3005738435 3599293386 426570142 754104406 3660892564 1964545167 829466833 821587464 1746693036 1006492428 1595312919 1256599985 1024482560 1897312280 2902903201 691790057 1037515867 3176831208 1968401055 2173506824 1089055278 1748401123 2941380082 968412354 1818753861 2973200866 3875951774 1119354008 3988604139 1647155589 2232450826 3486058011 3655784043 3759258462 847163678 1082052057 989516446 2871541755 3196311070 3929963078 658187585 3664944641 2175149170 2203709147 2756014689 2456473919 3890267390 1293787864 2830347984 3059280931 4158802520 1561677400 2586570938 783570352 1355506163 31495586 3789437343 3340549429 2092501630 896419368 671715824 3530450081 3603554138 1055991716 3442308219 1499434728 3130288473 3639507000 17769680 2259741420 487032199 4227143402 3693771256 1880482820 3924810796 381462353 4017855991 2452034943 2736680833 2209866385 2128986379 437874044 595759426 641721026 1636065708 3899136933 629879088 3591174506 351984326 2638783544 2348444281 2341604660 2123933692 143443325 1525942256 364660499 599149312 939093251 1523003209 106601097 376589484 1346282236 1297387043 764598052 3741218111 933457002 1886424424 3219631016 525405256 3014235619 323149677 2038881721 4100129043 2851715101 2984028078 1888574695 2014194741 3515193880 4180573530 3461824363 2641995497 3179230245 2902294983 2217320456 4040852155 1784656905 3311906931 87498458 2752971818 2635474297 2831215366 3682231106 2920043893 3772929704 2816374944 309949752 2383758854 154870719 385111597 1191604312 1840700563 872191186 2925548701 1310412747 2102066999 1504727249 3574298750 1191230036 3330575266 3180292097 3539347721 681369118 3305125752 3648233597 950049240 4173257693 1760124957 512151405 681175196 580563018 1169662867 4015033554 2687781101 699691603 2673494188 1137221356 123599888 472658308 1053598179 1012713758 3481064843 3759461013 3981457956 3830587662 1877191791 3650996736 988064871 3515461600 4089077232 2225147448 1249609188 2643151863 3896204135 2416995901 1397735321 3460025646 1000 outputs of genrand_real2() 0.76275443 0.99000644 0.98670464 0.10143112 0.27933125 0.69867227 0.94218740 0.03427201 0.78842173 0.28180608 0.92179002 0.20785655 0.54534773 0.69644020 0.38107718 0.23978165 0.65286910 0.07514568 0.22765211 0.94872929 0.74557914 0.62664415 0.54708246 0.90959343 0.42043116 0.86334511 0.19189126 0.14718544 0.70259889 0.63426346 0.77408121 0.04531601 0.04605807 0.88595519 0.69398270 0.05377184 0.61711170 0.05565708 0.10133577 0.41500776 0.91810699 0.22320679 0.23353705 0.92871862 0.98897234 0.19786706 0.80558809 0.06961067 0.55840445 0.90479405 0.63288060 0.95009721 0.54948447 0.20645042 0.45000959 0.87050869 0.70806991 0.19406895 0.79286390 0.49332866 0.78483914 0.75145146 0.12341941 0.42030252 0.16728160 0.59906494 0.37575460 0.97815160 0.39815952 0.43595080 0.04952478 0.33917805 0.76509902 0.61034321 0.90654701 0.92915732 0.85365931 0.18812377 0.65913428 0.28814566 0.59476081 0.27835931 0.60722542 0.68310435 0.69387186 0.03699800 0.65897714 0.17527003 0.02889304 0.86777366 0.12352068 0.91439461 0.32022990 0.44445731 0.34903686 0.74639273 0.65918367 0.92492794 0.31872642 0.77749724 0.85413832 0.76385624 0.32744211 0.91326300 0.27458185 0.22190155 0.19865383 0.31227402 0.85321225 0.84243342 0.78544200 0.71854080 0.92503892 0.82703064 0.88306297 0.47284073 0.70059042 0.48003761 0.38671694 0.60465770 0.41747204 0.47163243 0.72750808 0.65830223 0.10955369 0.64215401 0.23456345 0.95944940 0.72822249 0.40888451 0.69980355 0.26677428 0.57333635 0.39791582 0.85377858 0.76962816 0.72004885 0.90903087 0.51376506 0.37732665 0.12691640 0.71249738 0.81217908 0.37037313 0.32772374 0.14238259 0.05614811 0.74363008 0.39773267 0.94859135 0.31452454 0.11730313 0.62962618 0.33334237 0.45547255 0.10089665 0.56550662 0.60539371 0.16027624 0.13245301 0.60959939 0.04671662 0.99356286 0.57660859 0.40269560 0.45274629 0.06699735 0.85064246 0.87742744 0.54508392 0.87242982 0.29321385 0.67660627 0.68230715 0.79052073 0.48592054 0.25186266 0.93769755 0.28565487 0.47219067 0.99054882 0.13155240 0.47110470 0.98556600 0.84397623 0.12875246 0.90953202 0.49129015 0.23792727 0.79481194 0.44337770 0.96564297 0.67749118 0.55684872 0.27286897 0.79538393 0.61965356 0.22487929 0.02226018 0.49248200 0.42247006 0.91797788 0.99250134 0.23449967 0.52531508 0.10246337 0.78685622 0.34310922 0.89892996 0.40454552 0.68608407 0.30752487 0.83601319 0.54956031 0.63777550 0.82199797 0.24890696 0.48801123 0.48661910 0.51223987 0.32969635 0.31075073 0.21393155 0.73453207 0.15565705 0.58584522 0.28976728 0.97621478 0.61498701 0.23891470 0.28518540 0.46809591 0.18371914 0.37597910 0.13492176 0.66849449 0.82811466 0.56240330 0.37548956 0.27562998 0.27521910 0.74096121 0.77176757 0.13748143 0.99747138 0.92504502 0.09175241 0.21389176 0.21766512 0.31183245 0.23271221 0.21207367 0.57903312 0.77523344 0.13242613 0.31037988 0.01204835 0.71652949 0.84487594 0.14982178 0.57423142 0.45677888 0.48420169 0.53465428 0.52667473 0.46880526 0.49849733 0.05670710 0.79022476 0.03872047 0.21697212 0.20443086 0.28949326 0.81678186 0.87629474 0.92297064 0.27373097 0.84625273 0.51505586 0.00582792 0.33295971 0.91848412 0.92537226 0.91760033 0.07541125 0.71745848 0.61158698 0.00941650 0.03135554 0.71527471 0.24821915 0.63636652 0.86159918 0.26450229 0.60160194 0.35557725 0.24477500 0.07186456 0.51757096 0.62120362 0.97981062 0.69954667 0.21065616 0.13382753 0.27693186 0.59644095 0.71500764 0.04110751 0.95730081 0.91600724 0.47704678 0.26183479 0.34706971 0.07545431 0.29398385 0.93236070 0.60486023 0.48015011 0.08870451 0.45548581 0.91872718 0.38142712 0.10668643 0.01397541 0.04520355 0.93822273 0.18011940 0.57577277 0.91427606 0.30911399 0.95853475 0.23611214 0.69619891 0.69601980 0.76765372 0.58515930 0.49479057 0.11288752 0.97187699 0.32095365 0.57563608 0.40760618 0.78703383 0.43261152 0.90877651 0.84686346 0.10599030 0.72872803 0.19315490 0.66152912 0.10210518 0.06257876 0.47950688 0.47062066 0.72701157 0.48915116 0.66110261 0.60170685 0.24516994 0.12726050 0.03451185 0.90864994 0.83494878 0.94800035 0.91035206 0.14480751 0.88458997 0.53498312 0.15963215 0.55378627 0.35171349 0.28719791 0.09097957 0.00667896 0.32309622 0.87561479 0.42534520 0.91748977 0.73908457 0.41793223 0.99279792 0.87908370 0.28458072 0.59132853 0.98672190 0.28547393 0.09452165 0.89910674 0.53681109 0.37931425 0.62683489 0.56609740 0.24801549 0.52948179 0.98328855 0.66403523 0.55523786 0.75886666 0.84784685 0.86829981 0.71448906 0.84670080 0.43922919 0.20771016 0.64157936 0.25664246 0.73055695 0.86395782 0.65852932 0.99061803 0.40280575 0.39146298 0.07291005 0.97200603 0.20555729 0.59616495 0.08138254 0.45796388 0.33681125 0.33989127 0.18717090 0.53545811 0.60550838 0.86520709 0.34290701 0.72743276 0.73023855 0.34195926 0.65019733 0.02765254 0.72575740 0.32709576 0.03420866 0.26061893 0.56997511 0.28439072 0.84422744 0.77637570 0.55982168 0.06720327 0.58449067 0.71657369 0.15819609 0.58042821 0.07947911 0.40193792 0.11376012 0.88762938 0.67532159 0.71223735 0.27829114 0.04806073 0.21144026 0.58830274 0.04140071 0.43215628 0.12952729 0.94668759 0.87391019 0.98382450 0.27750768 0.90849647 0.90962737 0.59269720 0.96102026 0.49544979 0.32007095 0.62585546 0.03119821 0.85953001 0.22017528 0.05834068 0.80731217 0.53799961 0.74166948 0.77426600 0.43938444 0.54862081 0.58575513 0.15886492 0.73214332 0.11649057 0.77463977 0.85788827 0.17061997 0.66838056 0.96076133 0.07949296 0.68521946 0.89986254 0.05667410 0.12741385 0.83470977 0.63969104 0.46612929 0.10200126 0.01194925 0.10476340 0.90285217 0.31221221 0.32980614 0.46041971 0.52024973 0.05425470 0.28330912 0.60426543 0.00598243 0.97244013 0.21135841 0.78561597 0.78428734 0.63422849 0.32909934 0.44771136 0.27380750 0.14966697 0.18156268 0.65686758 0.28726350 0.97074787 0.63676171 0.96649494 0.24526295 0.08297372 0.54257548 0.03166785 0.33735355 0.15946671 0.02102971 0.46228045 0.11892296 0.33408336 0.29875681 0.29847692 0.73767569 0.02080745 0.62980060 0.08082293 0.22993106 0.25031439 0.87787525 0.45150053 0.13673441 0.63407612 0.97907688 0.52241942 0.50580158 0.06273902 0.05270283 0.77031811 0.05113352 0.24393329 0.75036441 0.37436336 0.22877652 0.59975358 0.85707591 0.88691457 0.85547165 0.36641027 0.58720133 0.45462835 0.09243817 0.32981586 0.07820411 0.25421519 0.36004706 0.60092307 0.46192412 0.36758683 0.98424170 0.08019934 0.68594024 0.45826386 0.29962317 0.79365413 0.89231296 0.49478547 0.87645944 0.23590734 0.28106737 0.75026285 0.08136314 0.79582424 0.76010628 0.82792971 0.27947652 0.72482861 0.82191216 0.46171689 0.79189752 0.96043686 0.51609668 0.88995725 0.28998963 0.55191845 0.03934737 0.83033700 0.49553013 0.98009549 0.19017594 0.98347750 0.33452066 0.87144372 0.72106301 0.71272114 0.71465963 0.88361677 0.85571283 0.73782329 0.20920458 0.34855153 0.46766817 0.02780062 0.74898344 0.03680650 0.44866557 0.77426312 0.91025891 0.25195236 0.87319953 0.63265037 0.25552148 0.27422476 0.95217406 0.39281839 0.66441573 0.09158900 0.94515992 0.07800798 0.02507888 0.39901462 0.17382573 0.12141278 0.85502334 0.19902911 0.02160210 0.44460522 0.14688742 0.68020336 0.71323733 0.60922473 0.95400380 0.99611159 0.90897777 0.41073520 0.66206647 0.32064685 0.62805003 0.50677209 0.52690101 0.87473387 0.73918362 0.39826974 0.43683919 0.80459118 0.32422684 0.01958019 0.95319576 0.98326137 0.83931735 0.69060863 0.33671416 0.68062550 0.65152380 0.33392969 0.03451730 0.95227244 0.68200635 0.85074171 0.64721009 0.51234433 0.73402047 0.00969637 0.93835057 0.80803854 0.31485260 0.20089527 0.01323282 0.59933780 0.31584602 0.20209563 0.33754800 0.68604181 0.24443049 0.19952227 0.78162632 0.10336988 0.11360736 0.23536740 0.23262256 0.67803776 0.48749791 0.74658435 0.92156640 0.56706407 0.36683221 0.99157136 0.23421374 0.45183767 0.91609720 0.85573315 0.37706276 0.77042618 0.30891908 0.40709595 0.06944866 0.61342849 0.88817388 0.58734506 0.98711323 0.14744128 0.63242656 0.87704136 0.68347125 0.84446569 0.43265239 0.25146321 0.04130111 0.34259839 0.92697368 0.40878778 0.56990338 0.76204273 0.19820348 0.66314909 0.02482844 0.06669207 0.50205581 0.26084093 0.65139159 0.41650223 0.09733904 0.56344203 0.62651696 0.67332139 0.58037374 0.47258086 0.21010758 0.05713135 0.89390629 0.10781246 0.32037450 0.07628388 0.34227964 0.42190597 0.58201860 0.77363549 0.49595133 0.86031236 0.83906769 0.81098161 0.26694195 0.14215941 0.88210306 0.53634237 0.12090720 0.82480459 0.75930318 0.31847147 0.92768077 0.01037616 0.56201727 0.88107122 0.35925856 0.85860762 0.61109408 0.70408301 0.58434977 0.92192494 0.62667915 0.75988365 0.06858761 0.36156496 0.58057195 0.13636150 0.57719713 0.59340255 0.63530602 0.22976282 0.71915530 0.41162531 0.63979565 0.09931342 0.79344045 0.10893790 0.84450224 0.23122236 0.99485593 0.73637397 0.17276368 0.13357764 0.74965804 0.64991737 0.61990341 0.41523170 0.05878239 0.05687301 0.05497131 0.42868366 0.42571090 0.25810502 0.89642955 0.30439758 0.39310223 0.11357431 0.04288255 0.23397550 0.11200634 0.85621396 0.89733974 0.37508865 0.42077265 0.68597384 0.72781399 0.19296476 0.61699087 0.31667128 0.67756410 0.00177323 0.05725176 0.79474693 0.18885238 0.06724856 0.68193156 0.42202167 0.22082041 0.28554673 0.64995708 0.87851940 0.29124547 0.61009521 0.87374537 0.05743712 0.69902994 0.81925115 0.45653873 0.37236821 0.31118709 0.52734307 0.39672836 0.38185294 0.30163915 0.17374510 0.04913278 0.90404879 0.25742801 0.58266467 0.97663209 0.79823377 0.36437958 0.15206043 0.26529938 0.22690047 0.05839021 0.84721160 0.18622435 0.37809403 0.55706977 0.49828704 0.47659049 0.24289680 0.88477595 0.07807463 0.56245739 0.73490635 0.21099431 0.13164942 0.75840044 0.66877037 0.28988183 0.44046090 0.24967434 0.80048356 0.26029740 0.30416821 0.64151867 0.52067892 0.12880774 0.85465381 0.02690525 0.19149288 0.49630295 0.79682619 0.43566145 0.00288078 0.81484193 0.03763639 0.68529083 0.01339574 0.38405386 0.30537067 0.22994703 0.44000045 0.27217985 0.53831243 0.02870435 0.86282045 0.61831306 0.09164956 0.25609707 0.07445781 0.72185784 0.90058883 0.30070608 0.94476583 0.56822213 0.21933909 0.96772793 0.80063440 0.26307906 0.31183306 0.16501252 0.55436179 0.68562285 0.23829083 0.86511559 0.57868991 0.81888344 0.20126869 0.93172350 0.66028129 0.21786948 0.78515828 0.10262106 0.35390326 0.79303876 0.63427924 0.90479631 0.31024934 0.60635447 0.56198079 0.63573813 0.91854197 0.99701497 0.83085849 0.31692291 0.01925964 0.97446405 0.98751283 0.60944293 0.13751018 0.69519957 0.68956636 0.56969015 0.46440193 0.88341765 0.36754434 0.89223647 0.39786427 0.85055280 0.12749961 0.79452122 0.89449784 0.14567830 0.45716830 0.74822309 0.28200437 0.42546044 0.17464886 0.68308746 0.65496587 0.52935411 0.12736159 0.61523955 0.81590528 0.63107864 0.39786553 0.20102294 0.53292914 0.75485590 0.59847044 0.32861691 0.12125866 0.58917183 0.07638293 0.86845380 0.29192617 0.03989733 0.52180460 0.32503407 0.64071852 0.69516575 0.74254998 0.54587026 0.48713246 0.32920155 0.08719954 0.63497059 0.54328459 0.64178757 0.45583809 0.70694291 0.85212760 0.86074305 0.33163422 0.85739792 0.59908488 0.74566046 0.72157152 0x67405e6c328fecdf 0x3c8b2c35482ec8c9 0x3250533bca1940c7 0xf2d983e5b3262520 0xe5b759c591be1fda 0x8242a4458c0654ef 0xb04d83e5cb5b6017 0xb6ad8ae702c9d964 0xbcf18ae96331a2da 0x1cc1d152497d4674 0xc89cf1de59189442 0x398b33c171e4c16c 0xe1ef8b20e9581f1f 0xbcf3922d01c3c4c3 0x3fb925af371e20d8 0x3788696c8a091e68 0x98a8edcd8a199268 0x2b2bf18b86a1d357 0x474017009e18d034 0x0f5914833849cde7 0x5f04574352379c0d 0x8f5ca9b0d749b8d0 0x75b973eb6c039cde 0x69d4a24d0386aca4 0xfe82fe22f8c3715d 0x007e2b70611c98e7 0xf45e29c72b9f1786 0x7694fd07e82e529d 0x0d374894c5b55c9c 0xc8e6005052a38ac7 0xdafb054cec6083d7 0x625a22c66cd3bf85 0xc0af2ff40f2b0074 0xd6489630d188c4c3 0x7f034dbaf566f42b 0xfa47383d871e8dae 0x1e9bc6524bbc99df 0xc97e66d2eef0793f 0x45760d27aade8dc5 0xd9a5a1454582602e 0xb340cc9b522bb2b1 0xc449fec5c8359c3b 0xdba9d20c2b809802 0xe414bdd2089cd4a9 0x714def09cabd0d3d 0xf9755125bdca9539 0xa5d9bfb951aed29c 0x5c7e2d33c83ddf92 0x8a59ea07850aa835 0x401f067db97b5427 0xefe4b17bb713b9c7 0xee193cc8a0f16596 0x0c6b1f2a9ea778e5 0xbcacdc1567f07bef 0x713ddb4b58672888 0xee8075374182b161 0x03ec2941a3da86d6 0x675aebbee50e893c 0xd225931684eb5291 0xe477c127c0a105e7 0xefcf3d05ef772a45 0xc4ef9941734f83b1 0x87e4942af85e11e8 0xbf5fcadc377de765 0x079d9de562268b5f 0xea04faeba98f3e5c 0xb930c6da6ddba5f7 0x478236b93821b9ba 0xa9cfc9be294b0ff7 0x6079f977b05fb70e 0xe9e2dbe839ffb6e8 0xb3170798eddd9379 0x0ef96e0ec793a92f 0x0524e13897de842d 0x465b56ac3b31cdda 0x890bd07a90444c77 0xe234eff01af4dfd2 0xe84848232dbbfdd3 0x3c4efcfca0456b81 0x670edacde7eeec34 0x2b36fde828c7b1ec 0x9b6da65be00fdbd5 0xa3c761de25e1f4ba 0x7377fd171c85c139 0x1b128eb2dad95dd2 0x3537b8e218bd4ec7 0xa0101eb1b3e29a86 0x528f6b866eb20175 0x5473da9172d03fbd 0x9c77153ec299763c 0x47ba00f901873acb 0x137df82e07c009b0 0x3c61915b065d0aa6 0xe49e7299af8e6c5c 0x257f9436560d2208 0x78b7d974999c611a 0x0d8adaad822c4965 0xa20c4d15c9573034 0x6f95460c6e9d822d 0x2f853e287ea74d01 0xab53596fa5dc9c89 0x527171fc57868fc3 0xab0a91e3a225f47f 0x5fe417b6624ce303 0x8ae42059c66c39dd 0xb60de6ddff69c310 0xc83f24d03dbfe608 0xc5165efd988fd170 0x12c3eb7ad4274929 0x1f8cee33476428d2 0x1ddea9364463c3ff 0x1667501fcad4dedb 0x62c7365a9bb30ad9 0x8899b942b7573303 0xc887f6d9377ee390 0x402af03474f37acc 0x9da72866ac0d1cc5 0xa986abc55d6af4f4 0xe8a71cb1151dbf43 0x0ec056bba0bc3215 0xda14a58e848fcf79 0x491f5bd20198aa9b 0xf42da0476fcd8832 0x627c1873be8fb51e 0xa454d9b0317b861a 0x40fc96f80cd3bbc0 0x9e2ff393daf483cb 0xfc758a65c59b083e 0xe859fd51d3a08556 0x5efef07d19820724 0xe279ece74937813f 0x110eb92574a2084e 0x3df42d82245bff2b 0x9ca473630575feb8 0x15c70084fb66d585 0xbee7c870394194f7 0x9cc0a4f51f369867 0x6afbf2cb6ec861cf 0xa5b91a04835cae3a 0x7ddc222f0834f7eb 0xa9dfb4a5b06e5b6b 0xb5810c3cc04cd424 0xc868b4afb4e9dacc 0x42c02254bac2d4d8 0xf0a9eece4466ef85 0x4871a4148a47ea38 0x12576ffdb6fdad9b 0x0e9505d10fc16302 0x5631bf2c70323f0e 0x28b179d03e01feab 0x9dc0dd12b00dac9b 0x8cf7134a0d2f5d0e 0xeb08c68420fa5985 0x70c0a74d1d12661a 0xc66142602dd41863 0xb20965edb4a3adb9 0x319530ddf3890df6 0x5b78a9a93cbc723c 0x4cb9b467b0b55068 0x8542da469019ce95 0xc091429ae362663a 0x9ef9dc529dccab6e 0x96da56d02c5e2eda 0xbd065629a65356b8 0x86ab31cf8072eca6 0xc95dff79f3ee122c 0x9044db614b167618 0x2b53b3ada5a7095d 0x039b2a4c565ecffa 0xebe357d7d10d5f2d 0x64b694ddcdffd60a 0x2f1a1613ac6c9f3d 0xc79da221329d4ef9 0x2bdb226b6cd38813 0xca9767b9d3a2b084 0x66f6512c1f7dc7fd 0x91a73f962a2f07e0 0x885bbddc2ff3debf 0xbd67554077fd4c70 0x99013399c758bd15 0x5cf5b0f828128900 0x210a832780dbd783 0x9e3a38661b53eda9 0xaae6b882c58412c3 0x2082626df4ca895b 0x5cf5f76bb7b48682 0x6c0cae2d715e434e 0xef16816391c305dc 0x87b979af394695e4 0xcd2a272a1a805492 0x60f7e95280c303c4 0x321c98d44c5f4e05 0xadf6dce5480a0aa2 0x72c9e5996c3339ad 0xbf1d29e6f0b41a0d 0x274f3d2dd5f0c37e 0x914b57f6ef5243e9 0xed1817876768b412 0x05b4432aa26c8866 0x87ca3f2c01af6814 0x03116c1030ac3ab8 0xf5cacecb81781b7c 0x131f0c45877769e0 0x3eabeb0bd69e2872 0x7b276bc3d7ecbd7f 0xccb9143a415b647d 0xe84d77e693e184c0 0x780d77c885a12891 0xcbfefe34f9a928ce 0xd5c2487c43c47678 0x2eb49b0cff9b2b0c 0xd31248757f950b80 0xac0f5e6b333b69f3 0x3bc0db5d4cd64c13 0x6e7b83ef32960445 0xa503015d8f6deb6c 0x5b8daa355e155964 0x2f734ee67c567191 0xedbf27328a640c45 0xa7d67ea4920db6eb 0x581d6a00e15dd86f 0xc50a59f87c672f01 0x6bbb37d607050b9c 0xbe1ed4bf962c3f9c 0xb9e18300cc6a0292 0xbaf3a963ebf66bb1 0xcc4a11a37f3f042c 0x78c18be41d2173a5 0xd624936215d43e75 0x123cf1fcbd9a720f 0xebd61ce176c98627 0x78f872dffb1a12cc 0x58e45fdbe9434f31 0x1fb28683306e8c21 0x994bdbccb69dbb36 0xca605fc87b2b873a 0xfdef6f060b8f4c19 0xbf7b96a4d8039aa5 0xcd017cc08e0ef146 0x109f34ce10506ff9 0x79a06caf3d6c91c8 0x2ed9f6c0e43a7a0a 0xda890a93647404d9 0x8f495cdc9571b6db 0xbceda30aec45acf5 0xe2f8e38ec63c6eb3 0x86637eb61e70775c 0x656a79f5182dcd27 0x0d6ac797d29627af 0xcfa36f230d63e8f5 0x0fe19f20f9309ac4 0x069fb30d45ab4af4 0xb5ba378ef4a205b2 0xe51b1280dc04e530 0x6efd94972a8be0a7 0x334fc5b584a1bad7 0xbdd5a374520f4a9b 0x706b706b5bc348c8 0x836dc7d17c04caac 0x356ddab9d49ddfe6 0xd159ed5840ddd0ae 0xadac31b4fef8e091 0x8d89032b3845859f 0x3bd971e2a8cadd2c 0x65304014021337bf 0xb7d974ac484afeaf 0xdbc7da0068d2f636 0x16eb1156c1b04b4d 0x8616a5dea534ad0e 0x67f74216c5383c94 0x66b2b1aa56ad4087 0x29c2484d55c3489d 0x150215c70d49f395 0xf9d5c84babe4b95c 0x5190562945d45db4 0x422706f447c8cb26 0xd836deaaa1c2de48 0x422d6a90be18aada 0x47be7c3ce1d2d478 0xf67eca6f67bb3775 0x93b171e4234cfe76 0x284d0008372ef07c 0x3698d967c82e6cf2 0xe2f6a86325a83871 0xa05b6bb8d5a8f19d 0x8c3c8c0932b47315 0xcb17cef7ae59b502 0x03cfa20ca46fa2b2 0xe6b973298c8e9cf9 0x3dc8b542ce244d7c 0x24e151f91a603c98 0xbabc1fe6edf224d0 0x03057b07c7da7488 0x9893f09f3a05d9d4 0x8d7c9ede4bbb4625 0x0a3e483d53be86ca 0xaeaa80fa118aa1be 0x5db21e05baa52d1a 0x7e6646013c3c76bd 0x6e02586aab7c0b75 0xc599811f3381e84e 0x8584e334f66cfd11 0x2ddab189180628de 0x798e7628ad4a602b 0xfb4dd579277e2008 0x6600c85ce8ec6a2e 0x0aa55f130589b854 0xe45ab335eac329a3 0xa5325407038e4e11 0xcf996346624b521c 0x4d0274a34c9589bd 0x8d0cad0b87392d57 0x2d9646a64bcd2d2f 0xad7a1ec0ae4a2151 0x3f376577014f8dca 0x19deb4a855f350e3 0x4998df888b984424 0xceebb951fbc934ca 0x506ef0f671ceb9c1 0xc5ef09726ea704bf 0x5189b998448a5e11 0x675f92fbc1a4c11e 0x530c4265087a7c4a 0xe18e8069e8b707e1 0x3c2779074e4a00a2 0x95370af830c9dc3a 0x416b062811f2175f 0x1fa8fe3935e37c73 0xde540bcb853484b0 0xcb601ed072199738 0x83c2ac01d2ca8f62 0x64f81e49fdcaf6a7 0xb7e5e765429cee4c 0xcec327d993653b95 0xd21eb799eac60217 0x3d1f583dcea5cb99 0x2f780a3c356d4f5d 0x0d25a447cc72a00c 0x333fcbb7b46e275c 0xd30674469bf820aa 0x434434c3882a96aa 0x3c8b8d389fa5647b 0xa1af5ecc309ff07c 0x39e85842caf91bcc 0x383eac8cbb8c7b53 0xddc66b6b38b1f273 0x7b0e539b24202231 0xf65bd3d8c61484a8 0x8ffa69be80f9b321 0x22b46595c09272e4 0x5393806eb788160e 0x891be7c411c10b15 0x9278dccf0fe4813b 0x86b245936eaa77d7 0x4a6b28b10796ab38 0x1ea2db227acc5ae6 0x7b54f85812e5900b 0xe5f912551ddf84a8 0x0f4fea8cd744faf7 0x35aa34911651bab2 0x3302457b0da9f478 0x7e8980bb045d7552 0xb17000de13ee8a17 0x15eef5ca5c5209d4 0x776f92bce4b4bf98 0xb8176052fe1587e7 0x9437869fefbbe7ea 0xeee20c003aea2c86 0xe2c48da4fdc5426a 0x084429586e8a01ac 0x51156e0189a6be85 0x48fa8721bf77762c 0x2b0b33a3c4fc2e60 0x57d068040d700132 0xdb1dbc6158eb9ea0 0xc097e39e74ba26af 0x4a1c702bd0ab338b 0xc64fb41bb2e7242a 0x397ce664cd1bd8b9 0x10998a8b432e89d5 0x6641eb4947f78de5 0x90b623890c63938b 0xb70c700b9de39c7d 0x28bdd272e47f9e88 0x9539798ec4e3ea34 0x3ef0763c6f6c3108 0x26ac5d6724e40a1b 0x6195a673243088f6 0xdb5aba293e070ca4 0x98800aea4e5f0e4e 0x82c4134187537bc8 0x4ba979d8911b8e0e 0x943e41ad3cd01884 0x88e65d80db067bd7 0x7869b39765369333 0xaf50316f4c694b29 0xc451835f04a402e0 0x60dcb558ee7e6e2b 0x65234e499ff85e2a 0xe3dc8b2a11bd5edb 0x2a21e5d894243627 0x3ed8d69bd926c4cc 0x15bf0f6060684012 0x50ce7ee65b3c64a6 0x11cf02e2af1452d3 0xd8c84781d09f2c5c 0xd0bcaa5424f39d22 0xb251707659f54392 0x2bce134f4cc2db80 0xac8d00764ecd9b1e 0xd39e3d7e7c50dcd9 0x3bc9595caaa45365 0x4a8f4c6e9fb55bfe 0x3acabfc0bbaeab42 0xe8dc0d62f62fce39 0x87db2dfde390bb6b 0x21733320e5683477 0x382b5edf45b76468 0x0942a6f469259049 0x69d0104109910df2 0xcd97e90a0b92882f 0xccbc893007f0ce9e 0xc7f15af4b343b733 0x8452042dbf83c7b6 0x4cd332f6dcfcf88c 0x2af26987bfce6905 0xe906963392926ca5 0xe497fc1e943fa9f2 0x98a7949ac41b7f42 0x468f4c6bd9e09993 0xb7aa4263eacbfa1c 0x1046dafd21831081 0xdce0a46e7f40131a 0x02fb82dfbc2bc81f 0x24fb0ae41a50b4ce 0xb5f3c95c66a0eb55 0x4f6cb9ad702e23eb 0x53de1d08a184b063 0x6ec2daf2a3b654ad 0x0e1dce6a1f2683d8 0xfde0cc4dc717cb77 0x15ab25c3e644d3b3 0x4e4b23569f93d996 0xa081f0ade3ca6df5 0x49427fdef4f200ee 0xcf635cbfae34ebe2 0xd898e8639bb74469 0x4a86c97e8ee0654d 0x108f1bd0715a571f 0x47513cd35aecd66c 0xb9c0324084cdcc91 0x53b651c7d391dd3c 0x3752b5626ec39e99 0xb4b374a39bea99bb 0xa21b1a5d19d2a41c 0x8276063bed6548c5 0x71bc6f337c67476c 0x9c40e276168f4a94 0x4b445b3a5789d7c2 0xaf355b6b8f0c93cb 0xb4f15704ffb1c015 0x9aebd68bc4601ee9 0x7274be877312d407 0x1a8eda0149fb3c1e 0xac96a19dbddb1f43 0x289d606d06643c34 0x16dc1e37a3f591b5 0xe67f91eac997ba12 0x545f62407cfe3487 0x9ad438a940a78009 0x5cbc0cff8ea73089 0xe21a9f995af5be74 0x85802cfeb24a44d6 0xf31f35e62429f5cb 0x47b870132f0f2527 0x3bef8c28c62b7933 0xf9862f1954334aaa 0x6142043b9d7b8b81 0xc78a907965972287 0xe14b1d2a8d083d05 0xb1a871510b4f3f75 0x59a00d2aac09a4ef 0x7c97edc49af98314 0xdd4aab86f1152b2b 0x88bd0a6cd1b0307d 0xfdefc6d97ed5c95a 0xe28c7cca7215417d 0x7d2120fe52be9b7f 0xa02e5ee452ffad28 0x8dab828a23b62644 0x25b3328373eb47de 0xde0169b28c909cf4 0xb9b3c51169a7a836 0x4541eb9d97eea0d6 0x1066c5b6d33156b5 0xaf4e1758f7c6645f 0xf436e47c3b2de674 0xd8debdd7895e6d80 0x615d5dded5a0383b 0x4834c108bc72e7b0 0x5ac8150a4d88bf44 0xff249baacfc83e7a 0x1adbe96930658b73 0x0aa6f579acb30710 0x30ec7277223022a2 0x39aa3d32743f07b4 0x8b822e40dfbcefa9 0x8119c3984a6d6fc4 0x228746c8f461f4ca 0xa7588248e15d4fb6 0xa900b1fe9351661d 0xfd6ee310a38cfc88 0x5cc2716dde3a6486 0x2a430535315c47c6 0xec8f741aa91d097a 0x325170f292f4d4b7 0xf71ad5c55a97aafa 0x3051eca81bdefbdc 0xc55f411439234e97 0x10fe11b12e91e37a 0xf081df2576233eec 0x9eb8940ca99473c9 0x5268bb96f025f00d 0x0281ee36f301d7ed 0xa0dfe16d6dafba1a 0x2c1414da9a9b5d5a 0x9c44677222d40889 0xae4747a3134dd86d 0x2d8892a98f291495 0xfb877e659c3ba71c 0xddf2cd76c64c3eae 0x86998597390f3524 0xed28981f85bb3386 0x55b153bfec5beeac 0x070f3b6614925bee 0x4eb1f4056f2ffa7f 0x78c6fd1608716443 0x9a7662241ec5ee22 0x271ec60c7e87c370 0xf06fd9db9054c3d2 0xa2723d1d26721f97 0xb078f617085673e5 0xdc78f1d0abc15908 0x943849291f25e178 0xbae22ce64af1206e 0x11c78544941df857 0x770ade0044d5f1fc 0xbc2878e716899306 0x94790cb9dea053b5 0x6bbde24988ce4f85 0xfeb8786c8d39aa85 0x1f97f351d463dcf9 0x7b4e291afc0e51d5 0x1faae3799dba6603 0x25815979ba54f03b 0xb02a9b623c9400f0 0xc4a07da834cbf427 0x8f5cc1076ea09031 0x2dea3ec81ab08515 0xe98683d1fb2b46e0 0xd03c1f8e1d3ab77d 0xf2c2c6d3e5f609ab 0x2a4de6d7e059318d 0x5ade1e2f78a73769 0x2662dcf712c0a5f9 0x14207cc1750e2a61 0xa02d796041e7f4c5 0x996adc6d965c2445 0x5dd4cb5847526843 0x0612d104e4e52c29 0x627c637c39b6587a 0xc04c4f4dc793d508 0xdd6cc43e981b9b46 0xec8c58354fba1cfb 0xbc57a1e2281000bf 0x856fe5ec82652f66 0x2350e203ab340e42 0xb74978ac55eb75fa 0x844067a42c59d22a 0x5bb67e9025bcb14b 0xc6882f42a876775d 0x537deec5107af383 0xa1b9e0408bb030ae 0x96f5422b40d40266 0xe6cd16792e1a8f4f 0xd0994934ced6fb04 0x48924c4026066397 0xe6554877dbefffdc 0x8acdae02c1b72b73 0x7002431784308714 0xec6113b8ebf9216d 0xccd7d92eb94a654d 0x55bf89d61ea45655 0x40dfe822e6fc1ea4 0xd871edcfa00e4eae 0x2b162c2c595b2ce0 0xfe78e58f4fa84c74 0x8761656ac9573dc6 0xfb1c05d5c05ad042 0xcd6868216c342eae 0x438466743ae36016 0x090c7ae0a96ce094 0x84c585a68e7a3fd3 0x0f6b9eec0252f718 0x8144d103959c9cc8 0x9228d0f9530fe13e 0x8182289a02b0f3b8 0xc71701f3dc02224e 0xd4fea5e16b0d5abc 0x1bf89249443455f9 0x4d55abb33ea80876 0x30983ba4fc324bb0 0xbdbe2ca26b4c48f5 0x110c8c2a0ce2403f 0x28fb71fe1421a58e 0x897ca36224a0aeeb 0x873de78eae3414fa 0x5fc8368456d0a9a5 0x730c72bf20e283f8 0x7582ea92afd96933 0x109dc798de62815f 0x1334281c26f62ce5 0xc7351153d0631deb 0x0c7023b82da633bf 0xab1f57a9402eb30f 0xbb43ef1a4abfed2f 0xbb13ff409efeb1fc 0x16c77c66e400694e 0x5426e09c1deea6a8 0x79166753b5237b34 0x4dac916d35ab84db 0xb2ddcede1bc5dccf 0x2ca000aed2faada6 0x07b93902e0f10ca3 0xb181252e99021c2e 0x2a7cbcd596a23023 0x1d135b910a22e3cd 0x50cac25266319226 0xd58ef5433e09ed3d 0x2ba2a95166295246 0x104b0b90d7f54fcb 0x581e62e6e1effb88 0x3b45c52fb3a61216 0x9b49976a6f98f4eb 0x5ecfaa723c68195f 0x8bbec72e20caaf68 0xc6bcc3dd73e2ad16 0xeba0ca03b8cb6ce6 0x77ea36fa204a7dbc 0x032d39bedbe56a62 0xf602ba75320469bb 0x75ab379ae76f35d0 0xde954c6d2bc62abc 0xc67dc6587d5cb845 0x6dd3c792d70f1d9f 0xda648f505baf81a4 0x6db4fb04bf05696b 0x171a2898c06495a2 0x70f7573328116d96 0xa1b113a37b65e83b 0xb7dc61716efbe386 0x44ff43b2d0f72216 0xb7572d3ef3679377 0x01aa46678e35f96e 0x4793e9cefb9c00c5 0xb1b1cef3fa3e69da 0xd6d5ce0496318fda 0x69d94c713ea5f8ff 0xb472156b44138e70 0xa02166bb98773ebb 0xb91af940dff04058 0x9ceb3ab0aa02a266 0xe2100434f0fc2e43 0xb80cdd4bbf69e7d3 0xe7c922fe9b3e7dfd 0x3efad5419796a29b 0x87066640dc12ad77 0x395836ee2932bf23 0x8e31b88ecced3d2e 0x4961ec2d6c7a75bf 0x6cd4f08d12c54e2b 0x9f4dffcf1ddaa230 0xde123965919adba9 0x85e8c4d2146b9a09 0x9e08deb3621df068 0xe0fd112de4b1269a 0xce27bb4d2a13cc8f 0x32da8b6a6be75911 0x74e5dc1240d07647 0x99f0a90a8e1744c6 0x51f94d8711994a47 0x690ca685dce349e9 0xda0c25b35ec3c056 0x1ddea693f42c7d0f 0x23ab3c219c040475 0x3117b89a22075ecf 0x8b729c188838cc11 0xf70c2e398725c02c 0xb5bf35fe12a37678 0x1f0d26466bf81f25 0x20035f8d7e67489e 0x536e07502c93e26d 0x0289b3b250176742 0xf0808a555109f37a 0x57ad69a0b71ab441 0x1c81fa7d03800bdf 0xdf4febd9d519ccce 0x3b197c4de921087e 0xda7c01969094f8f2 0x1bc17b9c04f6c5a5 0x89e4e1442e1d96d2 0x05ae8ea263a5551a 0x6a01bac34ab09948 0x0b3e4f6e4a88c0f0 0x4ecd511935dc55fe 0xa86534eb3311fa12 0x5b30a6c3a1e4141f 0xa7cb6e8360649ab6 0xac9f884e6cfdacf4 0xf91bedc58f459fbc 0x0fc06df57546cd8d 0xbe67f7c136080899 0x56abb03af17d6abf 0x724d1581df36e444 0x6d348658a786cf13 0x15657b71facd5394 0x870833f43ea4f44c 0x0c6b401a4ac5a9cf 0xac11311d9fdc1be0 0x476db893090dbf50 0x6413363a40a0cc4d 0x656747030824b882 0xc7763af198a3a915 0xc75b4c22db7f00e4 0xae0908746889482f 0x105715af45acd194 0x484b10f834c570c8 0x29ac2c4625e5ebe7 0x1ceda07f07739b02 0x25b8f899688c1e3e 0x9dffd257982a8139 0x6ab245d75fd640ad 0xfd4a57561629ffa3 0xc6d726b0652e522f 0xbcee4526af1030dd 0x43ea89479729db21 0xf0293df6e9672dec 0xd4bc658e845060e1 0xd3a76977a87cc56a 0x3e9d83e4f81d87ba 0x37f417d10048bc8d 0x14813d8b4553c5ae 0x574426b7bcdf149a 0x9f791183f3df9a17 0xb155609334da4198 0xb4c3a5805d905e94 0x12be857025a4a3eb 0x987445ae6fa8f849 0x428543effdb537ca 0x05184d6f0c8af720 0x5f5a26958fc4f110 0x327feea9b8040719 0x20f1368a9c267469 0x142bd97ddce3145f 0xb6dde259f3962e5b 0xa5b7dfcf9c74d178 0x4a18aeb68cb7ee39 0xb7cb562ea710998c 0x192c6a2440aa4799 0x262f549ff36ca087 0x00f1af86f7765419 0x4fcaee4d2099f234 0xcb4823b5e460d68f 0xb8739346ee15f7c7 0xe43f60dfcb722680 0xe588a07930826597 0xf11726b1fc2109e9 0x2424847c52585435 0x594ca4d5eeaee507 0x960a8b2c5b3f2315 0xf967389af1bc6066 0x9f5e1091a412689b 0x62c846564646bc24 0x700fc081c9ad8622 0x054e53d585fed025 0x8393936ba2f979b0 0xb7c891fc56a0e600 0x2055bd49358e42d3 0x10cc9952ec1e0244 0x5dec53aadd0f64cf 0x5e2b688880e60374 0x26e5823522a3d1f7 0x59263a46b0645a04 0xf8ea1b64930f2e4b 0x36e9fd75c4e84705 0x4d196437dc1e9d07 0xc5c13dbebb428a60 0x0dcfbb9e0dd9357a 0x7431271fe51ae20f 0x7578310058c9bc41 0xedab646a7a08e4e7 0x1f098e77ac40fb45 0xdeb6b54c5f31ee6a 0x7d4953f9398fdf64 0x9f28504dddbf678f 0xe3a56dee1a79e9f0 0x5cc098e3879b9e87 0xc0b3e036a7160df6 0x345202b60f7d5fb9 0xde5f13f2f373b5d5 0x09be9e199ea0e9d9 0x750064ba7fef6ab6 0x8393d8eed970a861 0x90b992785db1e2ce 0x66a200d30926cb63 0x08d2a52a1be49bcf 0xb1d396ed4bd1e92d 0x0067b3f585de736c 0x3e203ee543d7bc8e 0x7f660ac01586461f 0x2ef0b714fe7da812 0xd02030c778ab097c 0xbe9b971cd60bc342 0xb1cf6f572000cd53 0x7090e8f533244d92 0x4b3d3eb42bc0616f 0x4abcb0dc9345cec3 0x3315735b2eaa1dc1 0x1968b53c2948c9fe 0x6f832e2ab85716fb 0xf680b4690f452fe7 0x35634e189261c27a 0xb1655320b6881a39 0x3aa4f712d8d74eb6 0xe7f8db6934680789 0x31a395a5ef322e71 0x9e8108f1e7bfbf9b 0x32dc1fc02c568ef1 0x1c7e39ba8e98e717 0x98bb5d9f1719de9c 0x64fad9fd4e9e04ac 0xc43c4ef84c1a749a 0xf172bfdcb082ae79 0x775944b64825cc94 0x6752a18cc5cbd881 0x0bcd3e25a4cd6344 0xdc00a7d88f1be5a6 0xfdf241d4b98b15c7 0x6ffccf1d3347e63e 0xb4985dd04e81f326 0x798f4cfb661bbd4b 0xa2013d7415eb3df7 0x879a90fdc3315936 0xdf037725e8829def 0x3eee6d747de55fd1 0x1950ae14c16199ba 0xf32d6b0bbb81943f 0x6b813e655734bbf8 0x63f5368932cfe7b8 0xf4ebf4ca577ec930 0xe7a5ec5bab21afba 0x804feff47c04c4b7 0x2e3da596c444bf41 0x21be1d62146c81da 0xfaa880de93886ac0 0x88987a63ea1750e6 0xce299bb9e0c40dd8 0x6d2d3a48162f4f0d 0xfb873ecc6261d540 0x665184a16cbfeca9 0x99f08162522947e1 0xb678afe1ae26f80e 0x81438967c30cec52 0x7b84c7088847f470 0xdf8e250fe4392e3d 0xde420f611e212a49 0x247e7a9bc296553f 0xcda4431f6214d257 0x0fe8ce1a5e7264ac 0x7e2e9d87db4e18fc 0x12e9b0a508d5e4e0 0xdef99c379a602fc6 0x772b91fb215e3f6c 0x29bb880dcd669c11 0x604e2a3e80d1a980 0x62db806e1dacccb7 0x5a9925e5d386b369 0x464807efe2c001cb 0x0681067ba9e69114 0xc4d7c8c2a7123d6a 0x4a3285f39878a215 0x7432ddf4653a9cb8 0xd007940d70c24b13 0x5608cb9f87571fc8 0x7a2c5b232b41ebdd 0x99245a3f8c434fd8 0x8acbdd231982f5a2 0xfa50e2c5460ba07f 0xb2b5383501d97388 0x91722d48b0a05a3e 0xe92bd4a4ad9bd471 0xf8b226909751d1ac 0x3a84feeb4efe53ea 0x1bf2c0769fe54fae 0x1f06a43bf2b2bb23 0x6d89b57008409736 0x68d2563f7fc1319a 0x4cca7c28306c60d5 0x45532d245acdef1b 0xda535f9dec96bbab 0x25451d82b9aa12b3 0xc2a354b5d8c63228 0x1c1f97d0851becc0 0x9324fa1d5e1b4a44 0xdb312686295300fa 0x92e8ee2945f76afc 0x102b3df2dea6e8b2 0x309d7a9e07174ebf 0xa0d2445e0bebc266 0xdfb983a73c6afd26 0x52cfe364f3957e2d 0x72b6f2ad68342515 0x1383fb2184f4ab50 0xfc50740a6dc0d7a0 0x12c03f1b30b409a3 0x195f3359d8d3d697 0xb1a696de263a9206 0xbfa9d96984833e72 0xbc6b844babe41595 0xd72507fee5d57c9b 0x48ad095be6f55861 0x76942ad5903bb97c 0xd4b006bd9e4de5eb 0x0ae6d222ed88e74c 0x37aa525e18213b95 0x3e87311c62589252 0x9db5c12f91a6f728 0xe9dd60e4310fa419 0xe5ab03d0f0a1f978 0xc699639ec932afb5 0xc2e90bc8cddee0ab 0xafa68bbcb8d7ddd0 0x7a8c80def2c5c00f 0xb6caf5c351a3f9e2 0x149e219d648cff15 0x55e3b95cfb7941c9 0x445df7728165470e 0x246e3b9dde51bb60 0x7f47c1e9e9e105cc 0x2a78b27625fe4e3e 0xf7e647125a81184b 0x35615c5ac93e3ee2 0xab3d5ee5f30c6542 0x66adaa81be80a255 0xba3dee726484d20a 0x81fd51257a971385 0xb6a79592d5164e85 0xd3333835d170e9bd 0x5debdffeff4d63b0 0xe405e71c9c7fe247 0x64df8c009c73b84e 0x850fb06b2c70c144 0x1619917459f3cff2 0x50738716b1e7b305 0xa361ad73caf95820 0x6cbee1e292279728 0xd3a60974113b480f 0x1333cc2da08bd7d0 0x9e983992c88d0c05 0xf3885e54ee8d8864 0xbb72960373ad2100 0x7fce723754efe9c5 0x0d04185678b2aa93 0x2bd575cf4c798b50 0x03deee9bed8c238b 0x4dd3e83aef99b423 0x49575eddb23bcd0c 0xa86925782bd40519 0x61604398fc35ed8b 0x0030c4447d5843d7 0x97f557e742d44f66 0xe478030253f33ed2 0xb5b8b264f2499aab 0xa66da3eef7c6a8b7 0x48c5802dde4e9c48 0x892fe0c182a44518 0x40da78f7fc26965f 0xef69f38439b57f01 0x41ce76b97047c1f5 0x11d6df8a7e933468 0x0e68110350ac736e 0x4993e7a6fe133ef1 0x3ee9280e32bf67ad 0x9dd5df301cd57953 0x1e541a2e250db81e 0x193c71118501bda9 0xef943420618b4a08 0x88496f019fcee0bc 0xbea7b0911223aabe kmer-code-2013-trunk/libutil/mt19937ar/tt800.c0000644000000000000000000000375610704040232017220 0ustar rootroot/* http://random.mat.sbg.ac.at/ftp/pub/data/tt800.c */ /* A C-program for TT800 : July 8th 1996 Version */ /* by M. Matsumoto, email: matumoto@math.keio.ac.jp */ /* genrand() generate one pseudorandom number with double precision */ /* which is uniformly distributed on [0,1]-interval */ /* for each call. One may choose any initial 25 seeds */ /* except all zeros. */ /* See: ACM Transactions on Modelling and Computer Simulation, */ /* Vol. 4, No. 3, 1994, pages 254-266. */ #include #define N 25 #define M 7 double genrand() { unsigned long y; static int k = 0; static unsigned long x[N]={ /* initial 25 seeds, change as you wish */ 0x95f24dab, 0x0b685215, 0xe76ccae7, 0xaf3ec239, 0x715fad23, 0x24a590ad, 0x69e4b5ef, 0xbf456141, 0x96bc1b7b, 0xa7bdf825, 0xc1de75b7, 0x8858a9c9, 0x2da87693, 0xb657f9dd, 0xffdc8a9f, 0x8121da71, 0x8b823ecb, 0x885d05f5, 0x4e20cd47, 0x5a9ad5d9, 0x512c0c03, 0xea857ccd, 0x4cc1d30f, 0x8891a8a1, 0xa6b7aadb }; static unsigned long mag01[2]={ 0x0, 0x8ebfd028 /* this is magic vector `a', don't change */ }; if (k==N) { /* generate N words at one time */ int kk; for (kk=0;kk> 1) ^ mag01[x[kk] % 2]; } for (; kk> 1) ^ mag01[x[kk] % 2]; } k=0; } y = x[k]; y ^= (y << 7) & 0x2b5b2500; /* s and b, magic vectors */ y ^= (y << 15) & 0xdb8b0000; /* t and c, magic vectors */ y &= 0xffffffff; /* you may delete this line if word size = 32 */ /* the following line was added by Makoto Matsumoto in the 1996 version to improve lower bit's corellation. Delete this line to o use the code published in 1994. */ y ^= (y >> 16); /* added to the 1994 version */ k++; return( (double) y / (unsigned long) 0xffffffff); } /* this main() output first 50 generated numbers */ main() { int j; for (j=0; j<100000; j++) { printf("%5f ", genrand()); if (j%8==7) printf("\n"); } printf("\n"); } kmer-code-2013-trunk/libutil/mt19937ar/Make.include0000644000000000000000000000132511512763666020432 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ MTDIR/ :=${LIBUTL/}mt19937ar/ $/.C_EXES := $/mt19937ar-test $/.C_SRCS := $/mt19937ar.c $/test.c $/.C_LIBS := $/libmt19937ar.a $/.CLEAN := $/*.o $/test.c $/diffs $/.REAL-CLEAN := $/*.o $/test.c $/diffs $/mt19937ar-test $/libmt19937ar.a: $/mt19937ar.o $/test.o $/mt19937ar-test: $/mt19937ar.o $/mt19937ar-test.o $/test.c: $/mt19937ar-test ${MTDIR/}mt19937ar-test | diff - ${MTDIR/}mt19937ar.out > ${MTDIR/}diffs 2>&1 if test -s ${MTDIR/}diffs ; then echo 'MT19937: TEST FAILED'; else echo 'MT19937: Test Passed'; fi touch ${MTDIR/}test.c ${MTDIR/}mt19937ar-test | diff - ${MTDIR/}mt19937ar.out #$(eval $/%.d $/%.o: CFLAGS+= -I..) kmer-code-2013-trunk/libutil/mt19937ar/mt19937ar.c0000644000000000000000000001257312322046702017724 0ustar rootroot/* A C-program for MT19937, with initialization improved 2002/1/26. Coded by Takuji Nishimura and Makoto Matsumoto. Before using, initialize the state by using init_genrand(seed) or init_by_array(init_key, key_length). Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Any feedback is very welcome. http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) */ #include "mt19937ar.h" #include #include // Buried in genrand_in32 was this: // if init_genrand() has not been called, // a default initial seed is used // // if (ctx->mti == N+1) // init_genrand(5489UL); // // But we don't need that anymore, as we require for // thread-safety that init_genrand be called. // initialize with a single seed mt_s* mtInit(uint32 s) { mt_s *ctx = (mt_s *)malloc(sizeof(mt_s)); if (ctx == NULL) return(NULL); ctx->mt[0] = s; // See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. // In the previous versions, MSBs of the seed affect // only MSBs of the array mt[]. // 2002/01/09 modified by Makoto Matsumoto for (ctx->mti=1; ctx->mtimti++) ctx->mt[ctx->mti] = (1812433253UL * (ctx->mt[ctx->mti-1] ^ (ctx->mt[ctx->mti-1] >> 30)) + ctx->mti); ctx->mag01[0] = uint32ZERO; ctx->mag01[1] = MT_MATRIX_A; return(ctx); } /* initialize by an array with array-length */ /* init_key is the array for initializing keys */ /* key_length is its length */ /* slight change for C++, 2004/2/26 */ mt_s* mtInitArray(uint32 *init_key, uint32 key_length) { mt_s *ctx = mtInit(19650218UL); int i = 1; int j = 0; int k = (MT_N > key_length ? MT_N : key_length); for (; k; k--) { ctx->mt[i] = (ctx->mt[i] ^ ((ctx->mt[i-1] ^ (ctx->mt[i-1] >> 30)) * 1664525UL)) + init_key[j] + j; /* non linear */ i++; j++; if (i >= MT_N) { ctx->mt[0] = ctx->mt[MT_N-1]; i=1; } if (j >= key_length) j=0; } for (k=MT_N-1; k; k--) { ctx->mt[i] = (ctx->mt[i] ^ ((ctx->mt[i-1] ^ (ctx->mt[i-1] >> 30)) * 1566083941UL)) - i; /* non linear */ i++; if (i>=MT_N) { ctx->mt[0] = ctx->mt[MT_N-1]; i=1; } } ctx->mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ return(ctx); } /* generates a random number on [0,0xffffffff]-interval */ uint32 mtRandom32(mt_s *ctx) { uint32 y; // generate MT_N words at one time // if (ctx->mti >= MT_N) { int kk; for (kk=0; kk < MT_N - MT_M; kk++) { y = (ctx->mt[kk] & MT_UPPER_MASK) | (ctx->mt[kk+1] & MT_LOWER_MASK); ctx->mt[kk] = ctx->mt[kk + MT_M] ^ (y >> 1) ^ ctx->mag01[y & uint32ONE]; } for (; kk < MT_N-1; kk++) { y = (ctx->mt[kk] & MT_UPPER_MASK) | (ctx->mt[kk + 1] & MT_LOWER_MASK); ctx->mt[kk] = ctx->mt[kk + (MT_M - MT_N)] ^ (y >> 1) ^ ctx->mag01[y & uint32ONE]; } y = (ctx->mt[MT_N-1] & MT_UPPER_MASK) | (ctx->mt[0] & MT_LOWER_MASK); ctx->mt[MT_N-1] = ctx->mt[MT_M-1] ^ (y >> 1) ^ ctx->mag01[y & uint32ONE]; ctx->mti = 0; } y = ctx->mt[ctx->mti++]; /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680UL; y ^= (y << 15) & 0xefc60000UL; y ^= (y >> 18); return y; } // generates a random number on gaussian distribution with 0 median and 1 std.dev. double mtRandomGaussian(mt_s *mt) { double x1=0, x2=0, w=0, y1=0, y2=0; // from http://www.taygeta.com/random/gaussian.html // // supposedly equivalent to // // y1 = sqrt(-2*ln(x1)) cos(2*pi*x2) // y2 = sqrt(-2*ln(x1)) sin(2*pi*x2) // // but stable when x1 close to zero do { x1 = 2.0 * mtRandomRealClosed(mt) - 1.0; x2 = 2.0 * mtRandomRealClosed(mt) - 1.0; w = x1 * x1 + x2 * x2; } while (w >= 1.0); w = sqrt( (-2.0 * log(w)) / w); y1 = x1 * w; y2 = x2 * w; return(y1); } kmer-code-2013-trunk/libutil/mt19937ar/mt19937ar-test.c0000644000000000000000000000147212322046702020675 0ustar rootroot#include "mt19937ar.h" // The MD5 checksum of the correct output is // cb33e6acc162cbe20f7fcac26adddd02 // and it is 22465 bytes long. // // but we cannot use md5, as it's in libbri, and // so is this... int main(void) { int i; uint32 init[4] = {0x123, 0x234, 0x345, 0x456}; uint32 length = 4; mt_s *ctx = mtInitArray(init, length); printf("1000 outputs of genrand_int32()\n"); for (i=0; i<1000; i++) { printf(uint32FMTW(10)" ", mtRandom32(ctx)); if (i%5==4) printf("\n"); } printf("\n1000 outputs of genrand_real2()\n"); for (i=0; i<1000; i++) { printf("%10.8f ", mtRandomRealOpen(ctx)); if (i%5==4) printf("\n"); } for (i=0; i<999; i++) { printf(uint64HEX" ", mtRandom64(ctx)); if (i%3==2) printf("\n"); } return 0; } kmer-code-2013-trunk/libutil/mt19937ar/mt19937ar.readme0000755000000000000000000000532710041470733020742 0ustar rootrootThis is a Mersenne Twister pseudorandom number generator with period 2^19937-1 with improved initialization scheme, modified on 2002/1/26 by Takuji Nishimura and Makoto Matsumoto. Contents of this tar ball: readme-mt.txt this file mt19937ar.c the C source (ar: initialize by ARray) mt19937ar.out Test outputs of six types generators. 1000 for each 1. Initialization The initialization scheme for the previous versions of MT (e.g. 1999/10/28 version or earlier) has a tiny problem, that the most significant bits of the seed is not well reflected to the state vector of MT. This version (2002/1/26) has two initialization schemes: init_genrand(seed) and init_by_array(init_key, key_length). init_genrand(seed) initializes the state vector by using one unsigned 32-bit integer "seed", which may be zero. init_by_array(init_key, key_length) initializes the state vector by using an array init_key[] of unsigned 32-bit integers of length key_kength. If key_length is smaller than 624, then each array of 32-bit integers gives distinct initial state vector. This is useful if you want a larger seed space than 32-bit word. 2. Generation After initialization, the following type of pseudorandom numbers are available. genrand_int32() generates unsigned 32-bit integers. genrand_int31() generates unsigned 31-bit integers. genrand_real1() generates uniform real in [0,1] (32-bit resolution). genrand_real2() generates uniform real in [0,1) (32-bit resolution). genrand_real3() generates uniform real in (0,1) (32-bit resolution). genrand_res53() generates uniform real in [0,1) with 53-bit resolution. Note: the last five functions call the first one. if you need more speed for these five functions, you may suppress the function call by copying genrand_int32() and replacing the last return(), following to these five functions. 3. main() main() is an example to initialize with an array of length 4, then 1000 outputs of unsigned 32-bit integers, then 1000 outputs of real [0,1) numbers. 4. The outputs The output of the mt19937ar.c is in the file mt19937ar.out. If you revise or translate the code, check the output by using this file. 5. Cryptography This generator is not cryptoraphically secure. You need to use a one-way (or hash) function to obtain a secure random sequence. 6. Correspondence See: URL http://www.math.keio.ac.jp/matumoto/emt.html email matumoto@math.keio.ac.jp, nisimura@sci.kj.yamagata-u.ac.jp 7. Reference M. Matsumoto and T. Nishimura, "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number Generator", ACM Transactions on Modeling and Computer Simulation, Vol. 8, No. 1, January 1998, pp 3--30. ------- Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, All rights reserved. kmer-code-2013-trunk/libutil/util.c0000644000000000000000000000420612322046702015733 0ustar rootroot#include #include #include #include #include #include #include #include #include #include #include #include "util.h" double getTime(void) { struct timeval tp; gettimeofday(&tp, NULL); return(tp.tv_sec + (double)tp.tv_usec / 1000000.0); } uint64 getProcessSizeCurrent(void) { struct rusage ru; uint64 sz = 0; errno = 0; if (getrusage(RUSAGE_SELF, &ru) == -1) { fprintf(stderr, "getProcessSizeCurrent()-- getrusage(RUSAGE_SELF, ...) failed: %s\n", strerror(errno)); } else { sz = ru.ru_maxrss; sz *= 1024; } return(sz); } uint64 getProcessSizeLimit(void) { struct rlimit rlp; uint64 sz = ~uint64ZERO; errno = 0; if (getrlimit(RLIMIT_DATA, &rlp) == -1) { fprintf(stderr, "getProcessSizeLimit()-- getrlimit(RLIMIT_DATA, ...) failed: %s\n", strerror(errno)); } else { sz = rlp.rlim_cur; } return(sz); } void * memdup(const void *orig, size_t size) { void *rslt = NULL; if ((orig != NULL) && (size > 0)) { errno = 0; rslt = malloc(size); if (errno) { // Some ugliness to print out a size_t. This might be useless, // as it might be determined by TRUEINT64. // if (sizeof(size_t) == 8) fprintf(stderr, "memdup()-- can't allocate "int64FMT" bytes.\n%s\n", (int64)size, strerror(errno)); else fprintf(stderr, "memdup()-- can't allocate "uint32FMT" bytes.\n%s\n", (uint32)size, strerror(errno)); exit(1); } memcpy(rslt, orig, size); } return(rslt); } int fileExists(const char *path) { struct stat s; return(stat(path, &s) == 0); } off_t sizeOfFile(const char *path) { struct stat s; errno = 0; if (stat(path, &s) != 0) fprintf(stderr, "Couldn't stat() '%s'\n%s\n", path, strerror(errno)), exit(1); return(s.st_size); } uint64 timeOfFile(const char *path) { struct stat s; errno = 0; if (stat(path, &s) != 0) fprintf(stderr, "Couldn't stat() '%s'\n%s\n", path, strerror(errno)), exit(1); return(s.st_mtime); } kmer-code-2013-trunk/libutil/intervalList.H0000644000000000000000000003771312516022565017422 0ustar rootroot#ifndef INTERVALLIST_H #define INTERVALLIST_H #include // iNum - lo, hi - coordinates of the interval // iVal - va - data stored at each interval // uint32 - ct - number of elements in this interval // - when merged, needs function that converts multiple iVal and a uint32 into a single iVal template class _intervalPair { public: iNum lo; iNum hi; uint32 ct; // Number of source intervals iVal va; // Value at this interval; default is 1 bool operator<(const _intervalPair &that) const { if (lo != that.lo) return(lo < that.lo); return(hi < that.hi); }; }; template class intervalDepthRegions { public: iNum pos; // Position of the change in depth iVal change; // The value associated with this object; added or subtracted from 'va'. bool open; // If true, the start of a new interval bool operator<(const intervalDepthRegions &that) const { if (pos != that.pos) return(pos < that.pos); return(open > that.open); }; }; template class intervalList { public: intervalList(uint32 initialSize=32) { _isSorted = true; _isMerged = true; _listLen = 0; _listMax = initialSize; _list = new _intervalPair [_listMax]; }; // Takes as input an unmerged intervalList, returns to a new set of intervals, one // for each 'depth'. Two intervals, (1,4) and (2,6) would return 'depths': // 1,2,1 bgn=1, end=2, depth=1 // 2,4,2 // 4,6,1 // intervalList(intervalList &IL) { _isSorted = false; _isMerged = false; _listLen = 0; _listMax = 0; _list = 0L; depth(IL); }; intervalList(intervalDepthRegions *id, uint32 idlen) { _isSorted = false; _isMerged = false; _listLen = 0; _listMax = 0; _list = 0L; #ifdef _GLIBCXX_PARALLEL // Don't use the parallel sort, not worth the expense of starting threads. __gnu_sequential::sort(id, id + idlen); #else std::sort(id, id + idlen); #endif computeDepth(id, idlen); }; ~intervalList() { delete [] _list; }; intervalList &operator=(intervalList &src); void clear(void) { _isSorted = true; _isMerged = true; _listLen = 0; } void add(iNum position, iNum length, iVal value=0); void sort(void); void merge(uint32 minOverlap=0); // Merge overlapping regions void merge(intervalList *IL); // Insert IL into this list void intersect(intervalList &A, intervalList &B); uint32 overlapping(iNum lo, iNum hi, uint32 *&intervals, uint32 &intervalsLen, uint32 &intervalsMax); // Populates this intervalList with regions in A that are completely // contained in a region in B. // // Both A and B call merge(). // void contained(intervalList &A, intervalList &B); void invert(iNum lo, iNum hi); void depth(intervalList &A); uint32 numberOfIntervals(void) { return(_listLen); }; iNum sumOfLengths(void) { iNum len = 0; uint32 i = numberOfIntervals(); if (i > 0) while (i--) len += _list[i].hi - _list[i].lo; return(len); }; iNum &lo(uint32 i) { return(_list[i].lo); }; iNum &hi(uint32 i) { return(_list[i].hi); }; uint32 &count(uint32 i) { return(_list[i].ct); }; // Number of source intervals. uint32 &depth(uint32 i) { return(_list[i].ct); }; // Depth, if converted. iVal &value(uint32 i) { return(_list[i].va); }; // Value or sum of values. private: void computeDepth(intervalDepthRegions *id, uint32 idlen); bool _isSorted; bool _isMerged; uint32 _listMax; uint32 _listLen; _intervalPair *_list; }; template intervalList & intervalList::operator=(intervalList &src) { _isSorted = src._isSorted; _isMerged = src._isMerged; if (_listMax < src._listMax) { delete [] _list; _listMax = src._listMax; _list = new _intervalPair [_listMax]; } _listLen = src._listLen; memcpy(_list, src._list, _listLen * sizeof(_intervalPair)); return(*this); } template void intervalList::add(iNum position, iNum length, iVal val) { if (_listLen >= _listMax) { _listMax *= 2; _intervalPair *l = new _intervalPair [_listMax]; memcpy(l, _list, sizeof(_intervalPair) * _listLen); delete [] _list; _list = l; } _list[_listLen].lo = position; _list[_listLen].hi = position + length; _list[_listLen].ct = 1; _list[_listLen].va = val; // Could optimize, and search the list to see if these are false, // but that's rather expensive. _isSorted = false; _isMerged = false; _listLen++; } template void intervalList::sort(void) { if (_isSorted) return; if (_listLen > 1) #ifdef _GLIBCXX_PARALLEL // Don't use the parallel sort, not with the expense of starting threads. __gnu_sequential::sort(_list, _list + _listLen); #else std::sort(_list, _list + _listLen); #endif _isSorted = true; } template void intervalList::merge(uint32 minOverlap) { uint32 thisInterval = 0; uint32 nextInterval = 1; if (_isMerged) return; sort(); while (nextInterval < _listLen) { if ((_list[thisInterval].lo == 0) && (_list[thisInterval].hi == 0)) { // Our interval is empty. Copy in the interval we are // examining and move to the next. // XXX This is probably useless, thisInterval should always be // valid. _list[thisInterval].lo = _list[nextInterval].lo; _list[thisInterval].hi = _list[nextInterval].hi; _list[thisInterval].ct = _list[nextInterval].ct; _list[thisInterval].ct = _list[nextInterval].va; _list[nextInterval].lo = 0; _list[nextInterval].hi = 0; nextInterval++; } else { // This interval is valid. See if it overlaps with the next // interval. bool intersects = false; if ((_list[thisInterval].lo <= _list[nextInterval].lo) && (_list[nextInterval].hi <= _list[thisInterval].hi)) // next is contained in this intersects = true; if (_list[thisInterval].hi - minOverlap >= _list[nextInterval].lo) // next has thick overlap to this intersects = true; if (intersects) { // Got an intersection. // Merge nextInterval into thisInterval -- the hi range // is extended if the nextInterval range is larger. // if (_list[thisInterval].hi < _list[nextInterval].hi) _list[thisInterval].hi = _list[nextInterval].hi; _list[thisInterval].ct += _list[nextInterval].ct; _list[thisInterval].va += _list[nextInterval].va; // Clear the just merged nextInterval and move to the next one. // _list[nextInterval].lo = 0; _list[nextInterval].hi = 0; _list[nextInterval].ct = 0; _list[nextInterval].va = 0; nextInterval++; } else { // No intersection. Move along. Nothing to see here. // If there is a gap between the target and the examine (we // must have merged sometime in the past), copy examine to // the next target. thisInterval++; if (thisInterval != nextInterval) { _list[thisInterval].lo = _list[nextInterval].lo; _list[thisInterval].hi = _list[nextInterval].hi; _list[thisInterval].ct = _list[nextInterval].ct; _list[thisInterval].va = _list[nextInterval].va; } nextInterval++; } } } if (thisInterval+1 < _listLen) _listLen = thisInterval + 1; _isMerged = true; } template void intervalList::merge(intervalList *IL) { for (uint32 i=0; i_listLen; i++) add(IL->_list[i].lo, IL->_list[i].hi - IL->_list[i].lo); } template void intervalList::invert(iNum invlo, iNum invhi) { merge(); // Create a new list to store the inversion // uint32 invLen = 0; uint32 invMax = _listLen + 2; _intervalPair *inv = new _intervalPair [invMax]; // Add the zeroth and only? if (_listLen == 0) { inv[invLen].lo = invlo; inv[invLen].hi = invhi; inv[invLen].ct = 1; inv[invLen].va = 0; invLen++; } // Add the first, then the pieces, then the last // else { if (invlo < _list[0].lo) { inv[invLen].lo = invlo; inv[invLen].hi = _list[0].lo; inv[invLen].ct = 1; inv[invLen].va = 0; invLen++; } for (uint32 i=1; i<_listLen; i++) { if (_list[i-1].hi < _list[i].lo) { inv[invLen].lo = _list[i-1].hi; inv[invLen].hi = _list[i].lo; inv[invLen].ct = 1; inv[invLen].va = 0; invLen++; } } if (_list[_listLen-1].hi < invhi) { inv[invLen].lo = _list[_listLen-1].hi; inv[invLen].hi = invhi; inv[invLen].ct = 1; inv[invLen].va = 0; invLen++; } } assert(invLen <= invMax); // Nuke the old list, swap in the new one delete [] _list; _list = inv; _listLen = invLen; _listMax = invMax; } template void intervalList::intersect(intervalList &A, intervalList &B) { A.merge(); B.merge(); uint32 ai = 0; uint32 bi = 0; while ((ai < A.numberOfIntervals()) && (bi < B.numberOfIntervals())) { uint32 al = A.lo(ai); uint32 ah = A.hi(ai); uint32 bl = B.lo(bi); uint32 bh = B.hi(bi); uint32 nl = 0; uint32 nh = 0; // If they intersect, make a new region // if ((al <= bl) && (bl < ah)) { nl = bl; nh = (ah < bh) ? ah : bh; } if ((bl <= al) && (al < bh)) { nl = al; nh = (ah < bh) ? ah : bh; } if (nl < nh) add(nl, nh - nl); // Advance the list with the earlier region. // if (ah < bh) { // A ends before B ai++; } else if (ah > bh) { // B ends before A bi++; } else { // Exactly the same ending! ai++; bi++; } } } // Populates an array with the intervals that are within the supplied interval. // // Naive implementation that is easy to verify (and that works on an unsorted list). // template uint32 intervalList::overlapping(iNum rangelo, iNum rangehi, uint32 *&intervals, uint32 &intervalsLen, uint32 &intervalsMax) { if (intervals == 0L) { intervalsMax = 256; intervals = new uint32 [intervalsMax]; } intervalsLen = 0; for (uint32 i=0; i<_listLen; i++) { if ((rangelo <= _list[i].hi) && (rangehi >= _list[i].lo)) { if (intervalsLen >= intervalsMax) { intervalsMax *= 2; uint32 *X = new uint32 [intervalsMax]; memcpy(X, intervals, sizeof(uint32) * intervalsLen); delete [] intervals; intervals = X; } intervals[intervalsLen++] = i; } } return(intervalsLen); } template void intervalList::contained(intervalList &A, intervalList &B) { A.merge(); B.merge(); uint32 ai = 0; uint32 bi = 0; while ((ai < A.numberOfIntervals()) && (bi < B.numberOfIntervals())) { uint32 al = A.lo(ai); uint32 ah = A.hi(ai); uint32 bl = B.lo(bi); uint32 bh = B.hi(bi); // If A is contained in B, make a new region. // if ((bl <= al) && (ah <= bh)) add(bl, bh - bl); #if 0 if ((al <= bl) && (bh <= ah)) add(al, ah - al); #endif // Advance the list with the earlier region. // if (ah < bh) { // A ends before B ai++; } else if (ah > bh) { // B ends before A bi++; } else { // Exactly the same ending! ai++; bi++; } } } template void intervalList::depth(intervalList &IL) { uint32 idlen = IL.numberOfIntervals() * 2; intervalDepthRegions *id = new intervalDepthRegions [idlen]; for (uint32 i=0; i void intervalList::computeDepth(intervalDepthRegions *id, uint32 idlen) { // No intervals input? No intervals output. _listLen = 0; if (idlen == 0) return; // Sort by coordinate. #ifdef _GLIBCXX_PARALLEL // Don't use the parallel sort, not with the expense of starting threads. __gnu_sequential::sort(id, id + idlen); #else std::sort(id, id + idlen); #endif // Scan the list, counting how many times we change depth. #if 0 uint32 lm = 1; for (uint32 i=1; i [_listMax]; } // Init first interval. assert(id[0].open == true); _list[_listLen].lo = id[0].pos; _list[_listLen].hi = id[0].pos; _list[_listLen].ct = 1; _list[_listLen].va = id[0].change; uint32 nct; iVal nva; for (uint32 i=1; i 1) && (_list[_listLen-1].hi == _list[_listLen].lo) && (_list[_listLen-1].ct == _list[_listLen].ct) && (_list[_listLen-1].va == _list[_listLen].va)) { _list[_listLen-1].hi = _list[_listLen].hi; _listLen--; } #if 0 fprintf(stderr, "id[%2d] - list[%u] = lo=%u hi=%u ct=%u va=%f\n", i, _listLen, _list[_listLen].lo, _list[_listLen].hi, _list[_listLen].ct, _list[_listLen].va); #endif } assert(_listLen > 0); assert(_listLen <= _listMax); } #endif // INTERVALLIST_H kmer-code-2013-trunk/README.sim4db0000644000000000000000000003147311515726327015230 0ustar rootrootsim4db - batch spliced alignment of cDNA (EST, mRNA) sequences to a target genome, of the same or a related species Described in the publication: B. Walenz, L. Florea (2010) Sim4db and leaff: Utilities for fast batch spliced alignment and sequence indexing, submitted. Copyright (C) 2002, and GNU GPL, PE Corporation (NY) through the Celera Genomics Group Copyright (C) 2003-2004, and GNU GPL, Applied Biosystems Copyright (C) 2004-2009, and GNU GPL, Brian Walenz Copyright (C) 2010, and GNU GPL, Brian Walenz, Liliana Florea Includes portions copyright from: kmer - Copyright (C) 2005-2010, and GNU GPL, by Brian Walenz sim4 - Copyright (C) 1998-2003, and GNU GPL, by Liliana Florea sim4cc - Copyright (C) 2009-2010, and GNU GPL, by Liliana Florea and Leming Zhou GeneSplicer- Copyright (C) 2001-2009, and GNU GPL, by Mihaela Pertea Glimmer - Copyright (C) 1998-2009, and GNU GPL, by Arthur Delcher ======================================================================= Content: I. What is sim4db? II. Command line usage III. Input/Output IV. Affiliated tools V. Terms of use VI. Support I. What is sim4db? Sim4db performs fast batch alignment of large cDNA (EST, mRNA) sequence sets to a set of eukaryotic genomic regions. It uses the sim4 and sim4cc algorithms to determine the alignments, but incorporates a fast sequence indexing and retrieval mechanism, implemented in the sister package 'leaff', to speedily process large volumes of sequences. While sim4db produces alignments in the same way as sim4 or sim4cc, it has additional features to make it more amenable for use with whole-genome annotation pipelines. A script file can be used to group pairings between cDNAs and their corresponding genomic regions, to be aligned as one run and using the same set of parameters. Sim4db also optionally reports more than one alignment for the same cDNA within a genomic region, as long as they meet user-defined criteria such as minimum length, percentage sequence identity or coverage. This feature is instrumental in finding all alignments of a gene family at one locus. Lastly, the output is presented either as custom sim4db alignments or as GFF3 gene features. II. Command line usage A simple command line invocation: sim4db -genomic g.fasta -cdna c.fasta -scr script -output o.sim4db where: - 'c.fasta' and 'g.fasta' are the multi-fasta cDNA and genome sequence files - 'script' is a script file indicating individual alignments to be computed - output in sim4db format will be sent to the file 'o.sim4db' ('-' for standard output) A more complex invocation: sim4db -genomic g.fasta -cdna c.fasta -output o.sim4db [options] Salient options: -cdna use these cDNA sequences (multi-fasta file) -genomic use these genomic sequences (multi-fasta file) -script use this script file -pairwise sequentially align pairs of sequences If none of the '-script' and '-pairwise' options is specified, sim4db performs all-against-all alignments between pairs of cDNA and genomic sequences. -output write output to this file -gff3 report output in GFF3 format -interspecies use sim4cc for inter-species alignments (default sim4) Filter options: -mincoverage iteratively find all exon models with the specified minimum PERCENT COVERAGE -minidentity iteratively find all exon models with the specified minimum PERCENT EXON IDENTITY -minlength iteratively find all exon models with the specified minimum ABSOLUTE COVERAGE (number of bp matched) (default 0) -alwaysreport always report exon models, even if they are below the quality thresholds If no mincoverage or minidentity or minlength is given, only the best exon model is returned. This is the DEFAULT operation. You will probably want to specify ALL THREE of mincoverage, minidentity and minlength! Don't assume the default values are what you want! You will DEFINITELY want to specify at least one of mincoverage, minidentity and minlength with alwaysreport! If you don't, mincoverage will be set to 90 and minidentity to 95 -- to reduce the number of spurious matches when a good match is found. Auxiliary options: -nodeflines don't include the defline in the sim4db output -alignments print alignments -polytails DON'T mask poly-A and poly-T tails -cut trim marginal exons if A/T % > x (poly-AT tails) -noncanonical don't force canonical splice sites -splicemodel use the following splice model: 0 - original sim4; 1 - GeneSplicer; 2 - Glimmer; options 1 and 2 are only available with '-interspecies'. Default for sim4 is 0, and for sim4cc is 1. -forcestrand Force the strand prediction to always be one of 'forward' or 'reverse' Execution options: -threads Use n threads. -touch create this file when the program finishes execution Debugging options: -v print status to stderr while running -V print script lines (stderr) as they are being processed Developer options: -Z set the spaced seed pattern -H set the relink weight factor (H=1000 recommended for mRNAs) -K set the first MSP threshold -C set the second MSP threshold -Ma set the limit of the number of MSPs allowed -Mp same, as percentage of bases in cDNA NOTE: If used, both -Ma and -Mp must be specified! III. Input/Output For a typical run, sim4db takes as input two multi-fasta files containing the cDNAs and the genome, respectively, and optionally a script describing a set of pairings among the sequences. Alignments are determined using the program sim4 (default) for same-species comparisons, or sim4cc when the '-interspecies' option is set. The output is reported in the compact sim4db format (default), or in GFF3 format with the '-gff3' option. Utilities for filtering, merging, sorting and processing polishes in these formats, and for converting between the two formats (lossy), are included with the package and described in section IV below. A. The input script file format [-f|-r] -e ESTidx -D GENidx GENlo GENhi where: cDNAidx internal index of the cDNA in the input cDNA fasta file (0..#cDNAseqs-1) GENidx internal index of the genomic sequence in the input genome file (0..#GENseqs-1) -f use the cDNA sequence as is -r use the reverse complement of the cDNA sequence GENlo, GENhi begin and end coordinates of the target genomic region; coordinates are 0-based Example: -f -e 61728 -D 0 2370482 2375224 -r -e 61730 -D 0 6723331 6757701 -r -e 61734 -D 1 8428517 8432981 -f -e 61736 -D 3 4600260 4637694 etc. For best performance, the script should be sorted by the genomic sequence index. B. The sim4db output format sim4begin cDNAidx[cDNAlen-pA-pT] GENidx[GENoff-GENlen] edef=cDNA defline ddef=genomic defline cDNAbgn1-cDNAend1 (GENbgn1-GENend1) intronOri cDNAbgn2-cDNAend2 (GENbgn2-GENend2) intronOri ... cDNAbgnn-cDNAendn (GENbgnn-GENendn) intronOri cDNA alignment sequence for exon #1 genomic alignment sequence for exon #1 cDNA alignment sequence for exon #2 genomic alignment sequence for exon #2 ... cDNA alignment sequence for exon #n genomic alignment sequence for exon #n sim4end where: cDNAidx internal index of the cDNA in the input cDNA fasta file cDNAlen length of the cDNA sequence pA(T)wi length of polyA(T) tail detected and masked GENidx internal index of the genomic sequence in the genome fasta file GENoff offset to the beginning of the genomic region containing the signal GENlen length of the genomic region containing the signal M number of nucleotide matches in the alignment N number of matching N's in the alignment P percent sequence identity of the alignment O match orientation: * forward - the cDNA sequence aligns to the genomic sequence directly * complement - the reverse complement of the cDNA sequence matches the genomic sequence; this is the equivalent of the sim4 '(complement)' output line S strand predicted based on the splice signals and alignment quality: * forward - predicted forward strand * reverse - predicted reverse strand * unknown - strand unknown (because of low alignment quality, single exon match, or weak splice signals) cDNAbgni start position of exon i in the cDNA sequence cDNAendi end position of exon i in the cDNA sequence GENbgni start position of exon i in the genomic sequence (interval GENlo-GENhi) GENendi end position of exon i in the genomic sequence (interval GENlo-GENhi) M number of nucleotide matches in the alignment N number of matching N's in the alignment P percent sequence identity of the alignment intronOri predicted orientation of the intron: * -> forward (i.e., GT-AG-like splice signals) * <- reverse (i.e., CT-AC-like splice signals) * -- ambiguous * == gap (unaligned portion) in the cDNA sequence Exon coordinates are nucleotide based, starting from 1. Genomic coordinates are always in the original sequence, while the cDNA coordinates will refer to positions in the reverse complement of the sequence if the match orientation is indicated as 'complement'. Lowercase letters in the alignment lines indicate positions with matching nucleotides, '-' indicate gaps in the corresponding sequence, and uppercase letters mark either substitutions, or gaps in the other sequence. Alignments are OPTIONAL. Example: sim4begin 61728[685-0-21] 0[2370482-4742] <651-0-97-forward-reverse> edef=gb|CA807305 D. melanogaster cDNA 3' similar to CT12127, mRNA sequence ddef=arm_2L 22-337 (2372455-2372770) <313-0-99> <- 338-584 (2372830-2373076) <238-0-95> <- 585-685 (2373134-2373234) <100-0-99> gtaaaaaTttctgtttatta...gggcgaccagaagtcaatcag gtaaaaaGttctgtttatta...gggcgaccagaagtcaatcag ggtaacttgtccttGggtgc...ccacaccgGctccca-ttcgcgtAtc ggtaacttgtccttTggtgc...ccacaccgCctcccaGttcgcgtTtc tgcaagcggtcgacatgagg...cttaaAgcgctggta tgcaagcggtcgacatgagg...cttaaCgcgctggta sim4end C. The GFF3 output format The same example as before: 0:arm_2L sim4db mRNA 2372455 2373234 97 - . ID=sim4db10;Name=61728:gb|CA807305;Target=61728:gb|CA807305 22 685 +;pA=0;pT=21;genRegion=2370482-2375224 0:arm_2L sim4db exon 2372455 2372770 99 - . Parent=sim4db10;Target=61728:gb|CA807305 22 337 +;Gap=M316;nMatches=313;intron=<- 0:arm_2L sim4db exon 2372830 2373076 95 - . Parent=sim4db10;Target=61728:gb|CA807305 338 584 +;Gap=M74 D1 M2 I1 M160 D1 M10;nMatches=238;intron=<- 0:arm_2L sim4db exon 2373134 2373234 99 - . Parent=sim4db10;Target=61728:gb|CA807305 585 685 +;Gap=M101;nMatches=100 (Columns are tab-separated.) IV. Affiliated tools The 'sim4dbutils' package contains a range of utilities to work with sim4db-generated alignment files, of particular note being: convertPolishes - convert between the two formats. With GFF3->sim4db conversion, alignments will be lost. filterPolishes - filter alignments based on minimum percentage sequence identity, coverage and length. mergePolishes - merge alignments from multiple files (also concatenates the cDNA fasta files) sortPolishes - sort alignments by cDNA or genomic sequence index, using a limited amount of memory if needed. V. Terms of use This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received (LICENSE.txt) a copy of the GNU General Public License along with this program; if not, you can obtain one from http://www.gnu.org/licenses/gpl.txt or by writing to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA VI. Support Brian Walenz (high-throughput; brianwalenz@users.sourceforge.net) and Liliana Florea (sim4/sim4cc; florea@users.sourceforge.net). Please check the parent project's Sourceforge page at http://kmer.sourceforge.net for details and updates. Last updated: Jan 19, 2011 kmer-code-2013-trunk/libseq/0000755000000000000000000000000012641613360014427 5ustar rootrootkmer-code-2013-trunk/libseq/sffFile.H0000644000000000000000000000503712322046702016117 0ustar rootroot#ifndef SFF_H #define SFF_H #include "util++.H" #include "bio++.H" #include "seqFile.H" #define SFF_KEY_SEQUENCE_MAX 64 #define SFF_NAME_LENGTH_MAX 256 #define SFF_NUMBER_OF_FLOWS_MAX 512 #define SFF_NUMBER_OF_BASES_MAX 2048 // The assembler itself cannot handle longer struct sffHeader { // The next block is read in one swoop from the sff file. DO NOT MODIFY! uint32 magic_number; char version[4]; uint64 index_offset; uint32 index_length; uint32 number_of_reads; uint16 header_length; uint16 key_length; uint16 number_of_flows_per_read; uint8 flowgram_format_code; char flow_chars[SFF_NUMBER_OF_FLOWS_MAX]; // h->number_of_flows_per_read char key_sequence[SFF_KEY_SEQUENCE_MAX]; // h->key_length uint32 swap_endianess; }; struct sffRead { // The next block is read in one swoop from the sff file. DO NOT MODIFY! uint16 read_header_length; uint16 name_length; uint32 number_of_bases; uint16 clip_quality_left; uint16 clip_quality_right; uint16 clip_adapter_left; uint16 clip_adapter_right; char name[SFF_NAME_LENGTH_MAX]; // r->name_length uint16 flowgram_values[SFF_NUMBER_OF_FLOWS_MAX]; // h->number_of_flows_per_read uint8 flow_index_per_base[SFF_NUMBER_OF_BASES_MAX]; // r->number_of_bases char bases[SFF_NUMBER_OF_BASES_MAX]; // r->number_of_bases uint8 quality_scores[SFF_NUMBER_OF_BASES_MAX]; // r->number_of_bases char quality[SFF_NUMBER_OF_BASES_MAX]; // quality_scores converted to CA-format qv }; struct sffIndex { uint64 _seqPos; uint32 _seqLen; uint32 _namLen; }; class sffFile : public seqFile { protected: sffFile(const char *filename); sffFile(); public: ~sffFile(); protected: seqFile *openFile(const char *name); public: uint32 find(const char *sequencename) { assert(0); return(0); }; uint32 getSequenceLength(uint32 iid) { return(_index[iid]._seqLen); }; bool getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s); private: void clear(void); readBuffer *_rb; sffHeader _header; sffRead _read; sffIndex *_index; uint64 _firstReadLocation; uint64 _readIID; friend class seqFactory; }; #endif // SFF_H kmer-code-2013-trunk/libseq/fastqFile.C0000644000000000000000000003600312516022532016447 0ustar rootroot#include "fastqFile.H" #include "alphabet.h" #undef DEBUG #undef DEBUGINDEX // Says 'kmerFastaFileIdx' #define FASTQ_MAGICNUMBER1 0x7473614672656d6bULL #define FASTQ_MAGICNUMBER2 0x786449656c694661ULL fastqFile::fastqFile(const char *filename) { clear(); #ifdef DEBUG fprintf(stderr, "fastqFile::fastqFile()-- '%s'\n", (filename) ? filename : "NULLPOINTER"); #endif strcpy(_filename, filename); constructIndex(); _rb = new readBuffer(_filename); _numberOfSequences = _header._numberOfSequences; } fastqFile::fastqFile() { clear(); } fastqFile::~fastqFile() { delete _rb; delete [] _index; delete [] _names; } seqFile * fastqFile::openFile(const char *filename) { struct stat st; #ifdef DEBUG fprintf(stderr, "fastqFile::openFile()-- '%s'\n", (filename) ? filename : "NULLPOINTER"); #endif if (((filename == 0L) && (isatty(fileno(stdin)) == 0)) || ((filename != 0L) && (filename[0] == '-') && (filename[1] == 0))) return(0L); errno = 0; stat(filename, &st); if (errno) return(0L); if ((st.st_mode & S_IFREG) == 0) return(0L); // Otherwise, open and see if we can get the first sequence. We // assume it's fastq if we find a '>' denoting a defline the first // thing in the file. // // Use of a readBuffer here is a bit heavyweight, but it's safe and // easy. Opening a fastqFile isn't, after all, lightweight anyway. // fastqFile *f = 0L; readBuffer *r = new readBuffer(filename); char x = r->read(); while ((r->eof() == false) && (whitespaceSymbol[x] == true)) x = r->read(); // If we get a fastq record separator assume it's a fastq file. If // it's eof, the file is empty, and we might as well return this // fastq file and let the client deal with the lack of sequence. // if ((x == '@') || (r->eof() == true)) f = new fastqFile(filename); delete r; return(f); } uint32 fastqFile::find(const char *sequencename) { char *ptr = _names; // If this proves far too slow, rewrite the _names string to // separate IDs with 0xff, then use strstr on the whole thing. To // find the ID, scan down the string counting the number of 0xff's. // // Similar code is used for seqStore::find() for (uint32 iid=0; iid < _header._numberOfSequences; iid++) { //fprintf(stderr, "fastqFile::find()-- '%s' vs '%s'\n", sequencename, ptr); if (strcmp(sequencename, ptr) == 0) return(iid); while (*ptr) ptr++; ptr++; } return(~uint32ZERO); } uint32 fastqFile::getSequenceLength(uint32 iid) { #ifdef DEBUG fprintf(stderr, "fastqFile::getSequenceLength()-- "uint32FMT"\n", iid); #endif return((iid < _numberOfSequences) ? _index[iid]._seqLength : 0); } bool fastqFile::getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { #ifdef DEBUG fprintf(stderr, "fastqFile::getSequence(full)-- "uint32FMT"\n", iid); #endif if (iid >= _header._numberOfSequences) { fprintf(stderr, "fastqFile::getSequence(full)-- iid "uint32FMT" more than number of sequences "uint32FMT"\n", iid, _header._numberOfSequences); return(false); } if (sMax == 0) { sMax = 2048; s = new char [sMax]; } if (hMax == 0) { hMax = 2048; h = new char [hMax]; } if ((_index) && (sMax < _index[iid]._seqLength)) { sMax = _index[iid]._seqLength; delete [] s; s = new char [sMax]; } hLen = 0; sLen = 0; #ifdef DEBUG fprintf(stderr, "fastqFile::getSequence(full)-- seek to iid="uint32FMT" at pos="uint32FMT"\n", iid, _index[iid]._seqPosition); #endif _rb->seek(_index[iid]._seqPosition); char x = _rb->read(); // Skip whitespace at the start of the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // We should be at a '@' character now. Fail if not. if (_rb->eof()) return(false); if (x != '@') fprintf(stderr, "fastqFile::getSequence(full)-- ERROR1: In %s, expected '@' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Skip the '@' in the defline x = _rb->read(); // Skip whitespace between the '@' and the defline while ((_rb->eof() == false) && (whitespaceSymbol[x] == true) && (x != '\r') && (x != '\n')) x = _rb->read(); // Copy the defline, until the first newline. while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) { h[hLen++] = x; if (hLen >= hMax) { hMax += 2048; char *H = new char [hMax]; memcpy(H, h, hLen); delete [] h; h = H; } x = _rb->read(); } h[hLen] = 0; // Skip whitespace between the defline and the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // Copy the sequence, until EOF or the start of the QV bases. while ((_rb->eof() == false) && (x != '+')) { if (whitespaceSymbol[x] == false) { s[sLen++] = x; if (sLen >= sMax) { if (sMax == 4294967295) // 4G - 1 fprintf(stderr, "fastqFile::getSequence()-- ERROR: sequence is too long; must be less than 4 Gbp.\n"), exit(1); if (sMax >= 2147483648) // 2G sMax = 4294967295; else sMax *= 2; char *S = new char [sMax]; memcpy(S, s, sLen); delete [] s; s = S; } } x = _rb->read(); } s[sLen] = 0; // Skip the rest of the QV id line and then the entire QV line. //x = _rb->read(); assert((_rb->eof() == true) || (x == '+')); while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) x = _rb->read(); x = _rb->read(); while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) x = _rb->read(); _nextID++; return(true); } // slow bool fastqFile::getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) { if (iid >= _header._numberOfSequences) { fprintf(stderr, "fastqFile::getSequence(part)-- iid "uint32FMT" more than number of sequences "uint32FMT"\n", iid, _header._numberOfSequences); return(false); } #ifdef DEBUG fprintf(stderr, "fastqFile::getSequence(part)-- "uint32FMT"\n", iid); #endif // Unlike the fasta version of this, we know that all the sequence is on one line. However, we // expect fastq sequences to be small, and we still do the same processing -- character by character. _rb->seek(_index[iid]._seqPosition); uint32 pos = 0; char x = _rb->read(); // Skip whitespace at the start of the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // We should be at a '@' character now. Fail if not. if (_rb->eof()) return(false); if (x != '@') fprintf(stderr, "fastqFile::getSequence(part)-- ERROR2: In %s, expected '@' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Skip the defline. while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) x = _rb->read(); // Skip whitespace between the defline and the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // Skip sequence up until bgn. while ((_rb->eof() == false) && (pos < bgn)) { if (whitespaceSymbol[x] == false) pos++; x = _rb->read(); } // Copy sequence while ((_rb->eof() == false) && (pos < end)) { if (whitespaceSymbol[x] == false) s[pos++ - bgn] = x; x = _rb->read(); } s[pos - bgn] = 0; // Fail if we didn't copy enough stuff. return((pos == end) ? true : false); } void fastqFile::clear(void) { memset(_filename, 0, FILENAME_MAX); memset(_typename, 0, FILENAME_MAX); strcpy(_typename, "Fastq"); _randomAccessSupported = true; _numberOfSequences = 0; _rb = 0L; memset(&_header, 0, sizeof(fastqFileHeader)); _index = 0L; _names = 0L; _nextID = 0; } void fastqFile::loadIndex(char *indexname) { struct stat fastqstat; if (fileExists(indexname) == false) return; errno = 0; if (stat(_filename, &fastqstat)) { fprintf(stderr, "fastqFile::constructIndex()-- stat of file '%s' failed: %s\n", _filename, strerror(errno)); return; } FILE *I = fopen(indexname, "r"); if (errno) { fprintf(stderr, "fastqFile::constructIndex()-- open of file '%s' failed: %s\n", indexname, strerror(errno)); return; } fread(&_header, sizeof(fastqFileHeader), 1, I); if ((_header._magic[0] != FASTQ_MAGICNUMBER1) && (_header._magic[1] != FASTQ_MAGICNUMBER2)) { fprintf(stderr, "fastqFile::constructIndex()-- magic mismatch.\n"); fclose(I); return; } if ((_header._fastqFileSize != (uint64)fastqstat.st_size) || (_header._fastqModificationTime != (uint64)fastqstat.st_mtime) || (_header._fastqCreationTime != (uint64)fastqstat.st_ctime)) { fprintf(stderr, "fastqFile::constructIndex()-- stat mismatch.\n"); fclose(I); return; } _index = new fastqFileIndex [_header._numberOfSequences]; _names = new char [_header._namesLength]; fread(_index, sizeof(fastqFileIndex), _header._numberOfSequences, I); fread(_names, sizeof(char), _header._namesLength, I); #ifdef DEBUG fprintf(stderr, "fastqFile::constructIndex()-- '%s' LOADED\n", _filename); #endif fclose(I); return; } void fastqFile::constructIndex(void) { if (_index) return; // If the filename ends in '.fastq' then append a 'idx', // otherwise, append '.fastqidx'. char indexname[FILENAME_MAX]; strcpy(indexname, _filename); uint32 l = strlen(_filename); if ((l > 5) && (strcmp(_filename + l - 6, ".fastq") == 0)) strcat(indexname, "idx"); else strcat(indexname, ".fastqidx"); // If the index exists, suck it in and return. loadIndex(indexname); if (_index) return; #ifdef DEBUG fprintf(stderr, "fastqFile::constructIndex()-- '%s' BUILDING\n", _filename); #endif // Allocate some space for the index structures. uint32 indexMax = 64 * 1024 * 1024 / sizeof(fastqFileIndex); uint32 indexLen = 0; _index = new fastqFileIndex [indexMax]; uint32 namesMax = 32 * 1024 * 1024; uint32 namesLen = 0; _names = new char [namesMax]; // Some local storage uint64 seqStart; uint32 seqLen; uint32 seqLenMax = ~uint32ZERO; uint32 namePos; readBuffer ib(_filename); char x = ib.read(); #ifdef DEBUGINDEX fprintf(stderr, "readBuffer '%s' eof=%d x=%c %d\n", _filename, ib.eof(), x, x); #endif // Build it. // Skip whitespace at the start of the sequence. while ((ib.eof() == false) && (whitespaceSymbol[x] == true)) { #ifdef DEBUGINDEX fprintf(stderr, "skip '%c' %d\n", x, x); #endif x = ib.read(); } while (ib.eof() == false) { #ifdef DEBUGINDEX fprintf(stderr, "index\n"); #endif // We should be at a '@' character now. Fail if not. if (x != '@') fprintf(stderr, "fastqFile::constructIndex()-- ERROR3: In %s, expected '@' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Save info - ib's position is correctly at the first letter in // the defline (which might be whitespace), but the reader // expects our position to be at the '@' -- hence the -1. seqStart = ib.tell() - 1; seqLen = 0; namePos = namesLen; // Read that first letter x = ib.read(); // Copy the name to the names while ((ib.eof() == false) && (whitespaceSymbol[x] == false)) { if (namesLen + 1 >= namesMax) { namesMax += 32 * 1024 * 1024; char *nt = new char [namesMax]; memcpy(nt, _names, namesLen); delete [] _names; _names = nt; } _names[namesLen++] = x; #ifdef DEBUGINDEX fprintf(stderr, "name += %c\n", x); #endif x = ib.read(); } if (namesLen + 1 >= namesMax) { namesMax += 32 * 1024 * 1024; char *nt = new char [namesMax]; memcpy(nt, _names, namesLen); delete [] _names; _names = nt; } _names[namesLen++] = 0; // Skip the rest of the defline while ((ib.eof() == false) && (x != '\r') && (x != '\n')) { #ifdef DEBUGINDEX fprintf(stderr, "skip let %c\n", x); #endif x = ib.read(); } // Skip whitespace between the defline and the sequence. while ((ib.eof() == false) && (whitespaceSymbol[x] == true)) { #ifdef DEBUGINDEX fprintf(stderr, "skip num %d\n", x); #endif x = ib.read(); } #ifdef DEBUGINDEX fprintf(stderr, "x=%c peek=%c\n", x, ib.peek()); #endif // Count sequence length while ((ib.eof() == false) && (x != '+')) { #ifdef DEBUGINDEX fprintf(stderr, "seqlen %s %c\n", (whitespaceSymbol[x] == false) ? "save" : "skip", x); #endif if (whitespaceSymbol[x] == false) seqLen++; if (seqLen >= seqLenMax) fprintf(stderr, "fastqFile::constructIndex()-- ERROR: In %s, sequence '%s' is too long. Maximum length is %u bases.\n", _filename, _names + namePos, seqLenMax), exit(1); x = ib.read(); } // Save to the index. if (indexLen >= indexMax) { fprintf(stderr, "REALLOC len="uint32FMT" from "uint32FMT" to "uint32FMT"\n", indexLen, indexMax, indexMax * 2); indexMax *= 2; fastqFileIndex *et = new fastqFileIndex[indexMax]; memcpy(et, _index, sizeof(fastqFileIndex) * indexLen); delete [] _index; _index = et; } _index[indexLen]._seqPosition = seqStart; _index[indexLen]._seqLength = seqLen; #if 0 if ((indexLen * sizeof(fastqFileIndex) > 131000) && (indexLen * sizeof(fastqFileIndex) < 131200)) fprintf(stderr, "INDEX pos="uint64FMT" iid="uint32FMT" len="uint32FMT" pos="uint64FMT"\n", indexLen * sizeof(fastqFileIndex), indexLen, seqLen, seqStart); #endif indexLen++; // Skip the rest of the QV def line, then the entire QV line, then load the '@' for the next sequence. //x = ib.read(); assert((ib.eof() == true) || (x == '+')); while ((ib.eof() == false) && (x != '\r') && (x != '\n')) x = ib.read(); x = ib.read(); while ((ib.eof() == false) && (x != '\r') && (x != '\n')) x = ib.read(); while ((ib.eof() == false) && (x != '@')) x = ib.read(); } // Fill out the index meta data struct stat fastqstat; errno = 0; if (stat(_filename, &fastqstat)) fprintf(stderr, "fastqFile::constructIndex()-- stat() of file '%s' failed: %s\n", _filename, strerror(errno)), exit(1); _header._magic[0] = FASTQ_MAGICNUMBER1; _header._magic[1] = FASTQ_MAGICNUMBER2; _header._numberOfSequences = indexLen; _header._namesLength = namesLen; _header._fastqFileSize = fastqstat.st_size; _header._fastqModificationTime = fastqstat.st_mtime; _header._fastqCreationTime = fastqstat.st_ctime; // Dump the index, if possible. errno = 0; FILE *I = fopen(indexname, "w"); if (errno) return; fwrite(&_header, sizeof(fastqFileHeader), 1, I); fwrite( _index, sizeof(fastqFileIndex), _header._numberOfSequences, I); fwrite( _names, sizeof(char), _header._namesLength, I); fclose(I); } kmer-code-2013-trunk/libseq/test/0000755000000000000000000000000012641613360015406 5ustar rootrootkmer-code-2013-trunk/libseq/test/Makefile0000644000000000000000000000116711237152345017054 0ustar rootroot PROG = test-merstream-speed INCLUDE = -I.. -I../../libutil -I../../libbio -I../../libseq LIBS = -L.. -L../../libutil -L../../libbio -L../../libseq -lseq -lbio -lutil -lm OBJS = include ../../Make.compilers all: $(PROG) @echo Tests passed! test-merstream-speed: test-merstream-speed.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-merstream-speed.o test-merstream-speed.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-merstream-speed test-merstream-speed.o $(LIBS) ../../leaff/leaff -G 10000 1000 10000 > junk.fasta cat junk.fasta > /dev/null ./test-merstream-speed junk.fasta rm -f junk* clean: rm -f $(PROG) *.o *junk* kmer-code-2013-trunk/libseq/test/test-merstream-speed.C0000644000000000000000000000257411237152345021575 0ustar rootroot#include #include #include "bio++.H" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" int main(int argc, char **argv) { speedCounter *C = 0L; FILE *F = 0L; seqStream *S = 0L; merStream *M = 0L; if (argc != 2) { fprintf(stderr, "usage: %s some.fasta\n", argv[0]); fprintf(stderr, "Reads some.fasta using fgetc(), the seqStream and the merStream,\n"); fprintf(stderr, "reporting the speed of each method.\n"); exit(1); } //////////////////////////////////////// F = fopen(argv[1], "r"); C = new speedCounter("fgetc(): %7.2f Mthings -- %5.2f Mthings/second\r", 1000000.0, 0x3fffff, true); while (!feof(F)) fgetc(F), C->tick(); delete C; fclose(F); //////////////////////////////////////// S = new seqStream(argv[1]); C = new speedCounter("seqStream: %7.2f Mthings -- %5.2f Mthings/second\r", 1000000.0, 0x3fffff, true); while (S->get()) C->tick(); delete C; delete S; //////////////////////////////////////// M = new merStream(new kMerBuilder(20), new seqStream(argv[1]), true, true); C = new speedCounter("seqStream -> merStream: %7.2f Mthings -- %5.2f Mthings/second\r", 1000000.0, 0x3fffff, true); while (M->nextMer()) C->tick(); delete C; delete M; exit(0); } kmer-code-2013-trunk/libseq/seqStream.H0000644000000000000000000000717112322046702016506 0ustar rootroot#ifndef SEQSTREAM_H #define SEQSTREAM_H #include "util++.H" #include "bio++.H" #include "seqFile.H" struct seqStreamIndex { uint32 _iid; // seqFile IID uint32 _len; // length of the sequence uint64 _bgn; // begin position in the stream }; class seqStream { public: seqStream(const char *filename); seqStream(const char *sequence, uint32 length); ~seqStream(); // Separate sequences with this letter. Non-ACGT is always // returned as 'N'. Changing the length of the separator AFTER // setting the range will result in the wrong range being used. // void setSeparator(char sep, uint32 len); // get() returns one letter per input letter -- a gap of size n // will return n gap symbols. // unsigned char get(void); bool eof(void) { return(_eof); }; // Returns to the start of the range. // void rewind(void); // Set the range of ACGT sequence we will return. Coordinates are // space-based. Example: // // >one // AAA // >two // C // >three // GGG // // We separate these sequences with three '-' letters. // // strPos 012...3...456 // AAA---C---GGG // // range(0,0) -> nothing // range(0,1) -> A // range(0,3) -> AAA // range(0,4) -> AAAnnnC // range(0,5) -> AAAnnnCnnnG // void setRange(uint64 bgn, uint64 end); void setPosition(uint64 pos); // seqPos() is the position we are at in the current sequence; // seqIID() is the iid of that sequence; // strPos() is the position we are at in the chained sequence // // Values are not defined if the letter is a separator. // uint32 seqPos(void) { return(_currentPos); }; uint32 seqIID(void) { return(_idx[_currentIdx]._iid); }; uint64 strPos(void) { return(_streamPos); }; uint32 numberOfSequences(void) { return(_idxLen); }; // Return the length of, position of (in the chain) and IID of the // (s)th sequence in the chain. // uint32 lengthOf(uint32 s) { return((s >= _idxLen) ? ~uint32ZERO : _idx[s]._len); }; uint32 IIDOf(uint32 s) { return((s >= _idxLen) ? ~uint32ZERO : _idx[s]._iid); }; uint64 startOf(uint32 s) { return((s >= _idxLen) ? ~uint64ZERO : _idx[s]._bgn); }; // For a chain position p, returns the s (above) for that position. // uint32 sequenceNumberOfPosition(uint64 p); void tradeSpaceForTime(void); private: void fillBuffer(void); seqFile *_file; // Backed by a seqFile. char *_string; // Backed by a character string. uint64 _bgn; // Begin/End position in chained sequence uint64 _end; uint32 _currentIdx; // index into _idx of the current sequence uint32 _currentPos; // position in the current sequence uint64 _streamPos; // position in the chained sequence // Buffer for holding sequence from the seqFile. uint32 _bufferMax; // Max size of the buffer uint32 _bufferLen; // Actual size of the buffer uint32 _bufferPos; // Position we are at in the buffer uint32 _bufferSep; // How much of the buffer is separator char *_buffer; // Info about the raw sequences uint32 _idxLen; seqStreamIndex *_idx; uint32 *_seqNumOfPos; uint64 _lengthOfSequences; bool _eof; char _separator; uint32 _separatorLength; }; #endif // SEQSTREAM_H kmer-code-2013-trunk/libseq/fastqStdin.H0000644000000000000000000000261712375772744016706 0ustar rootroot#ifndef FASTQSTDIN_H #define FASTQSTDIN_H #include "util++.H" #include "bio++.H" #include "seqFile.H" class fastqStdin : public seqFile { protected: fastqStdin(const char *filename); fastqStdin(); public: ~fastqStdin(); protected: seqFile *openFile(const char *filename); public: uint32 getNumberOfSequences(void); public: uint32 find(const char *sequencename); uint32 getSequenceLength(uint32 iid); bool getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s); private: void clear(void); bool loadNextSequence(char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); readBuffer *_rb; uint32 _nextIID; FILE *_pipe; char *_header; uint32 _headerLen; uint32 _headerMax; char *_sequence; uint32 _sequenceLen; uint32 _sequenceMax; char *_quality; uint32 _qualityLen; uint32 _qualityMax; friend class seqFactory; }; #endif // FASTQSTDIN_H kmer-code-2013-trunk/libseq/selftest.C0000644000000000000000000000267712322046702016374 0ustar rootroot { seqFile *SF = openSeqFile(argv[1]); fprintf(stdout, "source '%s' of type '%s' has "uint32FMT" sequences.\n", SF->getSourceName(), SF->getFileTypeName(), SF->getNumberOfSequences()); fprintf(stdout, "getSequenceLength() vs getSequence(full)\n"); { char *h = 0L; char *s = 0L; uint32 hLen=0, hMax=0; uint32 sLen=0, sMax=0; for (uint32 sid=0; sidgetNumberOfSequences(); sid++) { SF->getSequence(sid, h, hLen, hMax, s, sLen, sMax); if ((strlen(s) != SF->getSequenceLength(sid)) || (strlen(s) != sLen) || (SF->getSequenceLength(sid) != sLen)) { fprintf(stdout, "length differ for sid="uint32FMT" h='%s' strlen(s)=%d sLen="uint32FMT" getSequenceLength()="uint32FMT"\n", sid, h, strlen(s), sLen, SF->getSequenceLength(sid)); } } delete [] h; delete [] s; } fprintf(stdout, "getSequenceLength() vs getSequence(part)\n"); { char *p = new char [128 * 1024 * 1024]; for (uint32 sid=0; sidgetNumberOfSequences(); sid++) { SF->getSequence(sid, 0, SF->getSequenceLength(sid), p); if (strlen(p) != SF->getSequenceLength(sid)) { fprintf(stdout, "length differ for sid="uint32FMT" strlen(s)=%d getSequenceLength()="uint32FMT"\n", sid, strlen(p), SF->getSequenceLength(sid)); } } delete [] p; } return(0); } kmer-code-2013-trunk/libseq/merStream.C0000644000000000000000000000304712322046702016472 0ustar rootroot#include "merStream.H" merStream::merStream(kMerBuilder *kb, seqStream *ss, bool kbown, bool ssown) { _kb = kb; _ss = ss; _kbdelete = kbown; _ssdelete = ssown; _beg = uint64ZERO; _end = ~uint64ZERO; _kb->clear(); _invalid = true; } merStream::~merStream() { if (_kbdelete) delete _kb; if (_ssdelete) delete _ss; } void merStream::rewind(void) { _ss->rewind(); _kb->clear(); _invalid = true; } void merStream::rebuild(void) { _ss->setPosition(_ss->strPos() - _kb->theFMer().getMerSpan()); _kb->clear(); _invalid = true; } void merStream::setBaseRange(uint64 beg, uint64 end) { assert(beg < end); //fprintf(stderr, "merStream::setBaseRange()-- from "uint64FMT" to "uint64FMT".\n", beg, end); // We can't tell the seqStore when to stop; while we could compute the span of a spaced seed, we // cannot compute it for a compressed seed. We need to stop iterating when the beginning of the // mer reaches the requested end. _ss->setRange(beg, ~uint64ZERO); _beg = beg; _end = end; _kb->clear(); _invalid = true; } uint64 merStream::approximateNumberOfMers(void) { uint64 approx = _end - _beg; uint64 k = _kb->merSize(); // If we don't know the range, sum all the sequence lengths, otherwise, it's just the length from // begin to end. if (_end == ~uint64ZERO) { approx = uint64ZERO; for (uint32 s=0; s<_ss->numberOfSequences(); s++) { uint32 l = _ss->lengthOf(s); if (l > k) approx += l - k + 1; } } return(approx); } kmer-code-2013-trunk/libseq/test-merStream.C0000644000000000000000000001726412322046702017455 0ustar rootroot#include "util.h" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" #include "test-correctSequence.H" #define FAIL() { err++; assert(0); } #warning HOW DO WE TEST IF WE GET ALL THE MERS? uint32 testMerStreamSimple(merStream *MS, uint32 merSize, char *seq, uint32 *SP) { uint32 err = 0; uint32 pos = 0; char testmer[32]; bool verbose = true; bool nm = false; if (verbose) fprintf(stdout, "testMSsimple() begins.\n"); // Until we have no more mers in the input while (seq[pos + merSize - 1] != 0) { nm = MS->nextMer(); MS->theFMer().merToString(testmer); if (verbose) { fprintf(stdout, "MS pos="uint32FMT" posInSeq="uint64FMT" posInStr="uint64FMT" seqNum="uint64FMT"\n", pos, MS->thePositionInSequence(), MS->thePositionInStream(), MS->theSequenceNumber()); if (strncmp(testmer, seq + pos, merSize)) fprintf(stdout, "MS pos="uint32FMT" failed '%s' != '%s'.\n", pos, testmer, seq + pos); } assert(nm == true); assert(MS->thePositionInSequence() == SP[pos]); assert(MS->thePositionInStream() == SP[pos]); assert(MS->theSequenceNumber() == 0); assert(strncmp(testmer, seq + pos, merSize) == 0); pos++; } // Should have no more mers nm = MS->nextMer(); assert(nm == false); return(err); } uint32 testMerStreamOperation(merStream *MS, uint32 beg, uint32 end, uint32 sepLen) { uint32 err = 0; char fmerstr[256]; char rmerstr[256]; char cmerstr[256]; char tmerstr[256]; while (MS->nextMer()) { MS->theFMer().merToString(fmerstr); MS->theRMer().merToString(rmerstr); MS->theCMer().merToString(cmerstr); if ((strcmp(fmerstr, cmerstr) != 0) && ((strcmp(rmerstr, cmerstr) != 0))) { fprintf(stderr, "mer strings disagree; F:%s R:%s C:%s\n", fmerstr, rmerstr, cmerstr); FAIL(); } reverseComplementSequence(rmerstr, strlen(rmerstr)); if (strcmp(fmerstr, rmerstr) != 0) { fprintf(stderr, "mer strings disagree after reverse; F:%s R:%s\n", fmerstr, rmerstr); FAIL(); } uint32 pseq = MS->thePositionInSequence(); uint32 pstr = MS->thePositionInStream(); uint32 piid = MS->theSequenceNumber(); uint32 mersize = MS->theFMer().getMerSize(); uint32 merspan = MS->theFMer().getMerSpan(); #if 0 if (beg > 10) { uint32 pp = pstr + piid * sepLen - 10; uint32 xx = 0; fprintf(stderr, "beg="uint32FMT" pstr="uint32FMT" '", beg, pstr); for (xx=0; xx<10; xx++, pp++) fprintf(stderr, "%c", chainSeq[pp]); fprintf(stderr, ":"); for (xx=0; xx 1) { ST = new seqStream("test-correctSequence.fasta"); ST->setSeparator(sep, sepLen); } else { ST = new seqStream(correctSequence[0].sequence, correctSequence[0].sequenceLength); } MS = new merStream(KB, ST, true, true); uint32 maxLen = ST->startOf(numSeq-1) + ST->lengthOf(numSeq-1); // Whole thing, rewind, whole thing fprintf(stderr, "whole thing.\n"); err += testMerStreamOperation(MS, 0, maxLen, sepLen); MS->rewind(); err += testMerStreamOperation(MS, 0, maxLen, sepLen); // Random subsets - we're not terribly interested in streaming, // just getting the start/end correct. fprintf(stderr, "subsets.\n"); for (uint32 iter=0; iter<500; iter++) { uint32 beg = mtRandom32(mtctx) % maxLen; uint32 end = (beg + 10000 < maxLen) ? (beg + 10000) : maxLen; //fprintf(stderr, "subsets - "uint32FMT"-"uint32FMT"\n", beg, end); MS->setBaseRange(beg, end); err += testMerStreamOperation(MS, beg, end, sepLen); MS->rewind(); err += testMerStreamOperation(MS, beg, end, sepLen); } delete MS; return(err); } int main(int argc, char **argv) { uint32 minLen = 1000; uint32 maxLen = 200000; uint32 numSeq = 1000; uint32 err = 0; // Very simple merStream test { fprintf(stdout, "merStream(kMerBuilder(20), ...)\n"); merStream *MS = new merStream(new kMerBuilder(20), new seqStream("GGGTCAACTCCGCCCGCACTCTAGC", 25), true, true); uint32 SP[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; testMerStreamSimple(MS, 20, "GGGTCAACTCCGCCCGCACTCTAGC", SP); MS->rewind(); testMerStreamSimple(MS, 20, "GGGTCAACTCCGCCCGCACTCTAGC", SP); MS->rewind(); MS->rewind(); testMerStreamSimple(MS, 20, "GGGTCAACTCCGCCCGCACTCTAGC", SP); delete MS; fprintf(stdout, "merStream(kMerBuilder(20), ...) - PASSED\n"); } { fprintf(stdout, "merStream(kMerBuilder(20, 1), ...)\n"); merStream *MS = new merStream(new kMerBuilder(20, 1), new seqStream("GGGAATTTTCAACTCCGCCCGCACTCTAGCCCAAA", 35), true, true); uint32 SP[10] = { 0, 3, 5, 9, 10, 12 }; testMerStreamSimple(MS, 20, "GATCACTCGCGCACTCTAGCA", SP); MS->rewind(); testMerStreamSimple(MS, 20, "GATCACTCGCGCACTCTAGCA", SP); MS->rewind(); MS->rewind(); testMerStreamSimple(MS, 20, "GATCACTCGCGCACTCTAGCA", SP); delete MS; fprintf(stdout, "merStream(kMerBuilder(20, 1), ...) - PASSED\n"); } // Move on to harder tests generateCorrectSequence(minLen, maxLen, numSeq); // Tests seqStream(string, strlen) construction method fprintf(stderr, "err += testMerStream(new kMerBuilder(20, 0, 0L), 1, '.', 1);\n"); err += testMerStream(new kMerBuilder(20, 0, 0L), 1, '.', 1); fprintf(stderr, "err += testMerStream(new kMerBuilder(22, 1, 0L), 1, '.', 1);\n"); err += testMerStream(new kMerBuilder(22, 1, 0L), 1, '.', 1); // Tests seqStream(filename) construction method fprintf(stderr, "err += testMerStream(new kMerBuilder(20, 0, 0L), numSeq, '.', 1);\n"); err += testMerStream(new kMerBuilder(20, 0, 0L), numSeq, '.', 1); fprintf(stderr, "err += testMerStream(new kMerBuilder(28, 0, 0L), numSeq, '.', 100);\n"); err += testMerStream(new kMerBuilder(28, 0, 0L), numSeq, '.', 100); fprintf(stderr, "err += testMerStream(new kMerBuilder(24, 4, 0L), numSeq, '.', 100);\n"); err += testMerStream(new kMerBuilder(24, 4, 0L), numSeq, '.', 100); removeCorrectSequence(numSeq); if (err == 0) fprintf(stderr, "Success!\n"); exit(err > 0); } kmer-code-2013-trunk/libseq/merStream.H0000644000000000000000000000607112322046702016477 0ustar rootroot#ifndef MERSTREAM_H #define MERSTREAM_H #include "util++.H" #include "bio++.H" #include "seqFile.H" #include "seqStream.H" // // merStream needs exclusive use of a kMerBuilder and a seqStream. // // The kMerBuilder can be used over and over. I think snapper is the // only one that does this though. // // The seqStream can be used elsewhere, but ONLY for looking up // positions. // // The merStream does NOT assume ownership of either of these, unless // the own flags are set. // // The stream is not valid until nextMer is called; allowing loops of // while (MS->nextMer()) { // process(MS->theFMer()); // } // // setRange() positions refer to ACGT letters in the input, NOT mers. // rewind() repositions the file to the start of the range. // class merStream { public: merStream(kMerBuilder *kb, seqStream *ss, bool kbown=false, bool ssown=false); ~merStream(); kMer const & theFMer(void) { assert(_invalid == false); return(_kb->theFMer()); }; kMer const & theRMer(void) { assert(_invalid == false); return(_kb->theRMer()); }; kMer const & theCMer(void) { assert(_invalid == false); return(_kb->theCMer()); }; bool nextMer(uint32 skip=0) { char ch; do { ch = _ss->get(); if (ch == 0) return(false); } while ((_kb->addBase(ch) == true) || (skip-- > 0)); _kb->mask(); _invalid = false; #if 0 char merstring[256]; fprintf(stderr, "merStream::nextMer()-- seqPos="uint64FMT" merPos="uint64FMT" span="uint32FMT" base0span="uint32FMT" end="uint64FMT" %s %s\n", _ss->strPos(), _ss->strPos() - theFMer().getMerSpan(), theFMer().getMerSpan(), _kb->baseSpan(0), _end, _kb->theFMer().merToString(merstring), (_ss->strPos() - theFMer().getMerSpan() < _end) ? "" : "STOP"); #endif // The mer is out of range if: // o it begins at or past the _end // o the span of the first base ends at or past the _end // // If the mer isn't spaced, the base span is always 1. If it is spaced, the span will be // between 1 and ... who knows. return(_ss->strPos() - theFMer().getMerSpan() + _kb->baseSpan(0) - 1 < _end); }; void rewind(void); void rebuild(void); void setBaseRange(uint64 beg, uint64 end); uint64 thePositionInSequence(void) { assert(_invalid == false); return(_ss->seqPos() - theFMer().getMerSpan()); }; uint64 thePositionInStream(void) { assert(_invalid == false); return(_ss->strPos() - theFMer().getMerSpan()); }; uint64 theSequenceNumber(void) { assert(_invalid == false); return(_ss->seqIID()); }; uint64 approximateNumberOfMers(void); private: kMerBuilder *_kb; seqStream *_ss; bool _kbdelete; bool _ssdelete; bool _invalid; uint64 _beg; uint64 _end; }; #endif // MERSTREAM_H kmer-code-2013-trunk/libseq/seqCache.C0000644000000000000000000000767612375772744016307 0ustar rootroot#include "seqCache.H" #include "seqFactory.H" #include "alphabet.h" #undef DEBUG seqCache::seqCache(const char *filename, uint32 cachesize, bool verbose) { _fb = openSeqFile(filename); _idToGetNext = 0; _allSequencesLoaded = false; _reportLoading = verbose; _cacheMap = 0L; _cacheSize = 0; _cacheNext = 0; _cache = 0L; setCacheSize(cachesize); } seqCache::~seqCache() { flushCache(); delete _fb; delete [] _cacheMap; delete [] _cache; } uint32 seqCache::getSequenceIID(char *name) { uint32 iid = ~uint32ZERO; // If the name is all integers, AND below the number of sequences // we have, return that, otherwise, look it up. // bool isInt = true; char *x = name; while (*x) { if ((*x < '0') || ('9' < *x)) isInt = false; x++; } if (isInt) iid = strtouint32(name, 0L); if (iid >= _fb->getNumberOfSequences()) iid = _fb->find(name); #ifdef DEBUG fprintf(stderr, "seqCache::getSequenceIID()-- '%s' -> "uint32FMT"\n", name, iid); #endif return(iid); } seqInCore * seqCache::getSequenceInCore(uint32 iid) { uint32 cacheID = ~uint32ZERO; seqInCore *retSeq = 0L; if ((_fb->randomAccessSupported() == true) && (iid >= _fb->getNumberOfSequences())) return(0L); if (_allSequencesLoaded == true) { cacheID = iid; } else if ((_cacheSize > 0) && (_cacheMap[iid] != ~uint32ZERO)) { cacheID = _cacheMap[iid]; } else { uint32 hLen=0, hMax=0, sLen=0, sMax=0; char *h=0L, *s=0L; if (_fb->getSequence(iid, h, hLen, hMax, s, sLen, sMax) == false) return(0L); retSeq = new seqInCore(iid, h, hLen, s, sLen, true); // Remove any old cached sequence, then store the one we just made if (_cache) { if (_cache[_cacheNext]) { _cacheMap[_cache[_cacheNext]->getIID()] = ~uint32ZERO; delete _cache[_cacheNext]; } _cache[_cacheNext] = retSeq; _cacheMap[iid] = _cacheNext; cacheID = _cacheNext; retSeq = 0L; _cacheNext = (_cacheNext + 1) % _cacheSize; } } // If no retSeq set, make a copy of the one we have in the cache. if ((retSeq == 0L) && (cacheID != ~uint32ZERO)) retSeq = new seqInCore(iid, _cache[cacheID]->header(), _cache[cacheID]->headerLength(), _cache[cacheID]->sequence(), _cache[cacheID]->sequenceLength(), false); return(retSeq); } void seqCache::setCacheSize(uint32 cachesize) { uint32 ns = _fb->getNumberOfSequences(); flushCache(); if (cachesize == 0) { _cacheMap = 0L; _cacheSize = 0; _cacheNext = 0; _cache = 0L; return; } _cacheMap = new uint32 [ns]; _cacheSize = cachesize; _cacheNext = 0; _cache = new seqInCore * [_cacheSize]; for (uint32 i=0; igetNumberOfSequences(); _cacheNext = 0; _cache = new seqInCore * [_cacheSize]; for (uint32 iid=0; iid<_cacheSize; iid++) { uint32 hLen=0, hMax=0, sLen=0, sMax=0; char *h=0L, *s=0L; if (_fb->getSequence(iid, h, hLen, hMax, s, sLen, sMax) == false) fprintf(stderr, "seqCache::loadAllSequences()-- Failed to load iid "uint32FMT".\n", iid), exit(1); _cache[iid] = new seqInCore(iid, h, hLen, s, sLen, true); } _allSequencesLoaded = true; } void seqCache::flushCache(void) { if (_fb == 0L) return; if (_cacheMap) { uint32 ns = _fb->getNumberOfSequences(); for (uint32 i=0; i= _filesMax) { fprintf(stderr, "seqFactory::registerFile()-- Wow! You registered lots of files! Now fix %s at line %d.\n", __FILE__, __LINE__); exit(1); } _files[_filesNum++] = f; } seqFile * seqFactory::openFile(const char *name) { seqFile *n = 0L; for (uint32 i=0; i<_filesNum; i++) { n = _files[i]->openFile(name); if (n) return(n); } fprintf(stderr, "seqFactory::registerFile()-- Cannot determine type of file '%s'. Tried:\n", name); for (uint32 i=0; i<_filesNum; i++) fprintf(stderr, "seqFactory::registerFile()-- '%s'\n", _files[i]->getFileTypeName()); exit(1); return(n); } kmer-code-2013-trunk/libseq/seqStore.C0000644000000000000000000004041712322046702016342 0ustar rootroot #include "seqStore.H" #include "seqCache.H" #include "alphabet.h" // Says 'kmerSeqStoreFile' #define SEQSTORE_MAGICNUMBER1 0x5371655372656d6bULL #define SEQSTORE_MAGICNUMBER2 0x656c694665726f74ULL seqStore::seqStore(const char *filename) { clear(); strcpy(_filename, filename); errno = 0; FILE *F = fopen(_filename, "r"); if (errno) fprintf(stderr, "seqStore::seqStore()-- Failed to open '%s': %s\n", _filename, strerror(errno)), exit(1); fread(&_header, sizeof(seqStoreHeader), 1, F); fclose(F); //_indexBPF = new bitPackedFile(_filename, _header._indexStart); //_blockBPF = new bitPackedFile(_filename, _header._blockStart); //_namesBPF = new bitPackedFile(_filename, _header._namesStart); _bpf = new bitPackedFile(_filename, sizeof(seqStoreHeader)); _numberOfSequences = _header._numberOfSequences; } seqStore::seqStore() { clear(); } seqStore::~seqStore() { //if ((_filename) && (_filename[0] != 0)) // fprintf(stderr, "Closing seqStore '%s'\n", _filename); delete _bpf; delete [] _index; delete [] _block; delete [] _names; delete _indexBPF; delete _blockBPF; delete _namesBPF; } seqFile * seqStore::openFile(const char *filename) { uint64 magic1, magic2; struct stat st; errno = 0; stat(filename, &st); if (errno) return(0L); if ((st.st_mode & S_IFREG) == 0) return(0L); // Check the magic. Fail if not correct. errno = 0; FILE *F = fopen(filename, "r"); if (errno) return(0L); fread(&magic1, sizeof(uint64), 1, F); fread(&magic2, sizeof(uint64), 1, F); fclose(F); if ((magic1 != SEQSTORE_MAGICNUMBER1) || (magic2 != SEQSTORE_MAGICNUMBER2)) return(0L); return(new seqStore(filename)); } // If this proves far too slow, rewrite the _names string to separate IDs with 0xff, then use // strstr on the whole thing. To find the ID, scan down the string counting the number of 0xff's. // // Similar code is used for fastaFile::find() // uint32 seqStore::find(const char *sequencename) { if (_names == NULL) loadIndex(); char *ptr = _names; for (uint32 iid=0; iid < _header._numberOfSequences; iid++) { if (strcmp(sequencename, ptr) == 0) return(iid); while (*ptr) ptr++; ptr++; } return(~uint32ZERO); } uint32 seqStore::getSequenceLength(uint32 iid) { if (_index == NULL) loadIndex(); return((iid < _header._numberOfSequences) ? _index[iid]._seqLength : 0); } bool seqStore::getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { if (_index == NULL) loadIndex(); if (iid >= _header._numberOfSequences) { fprintf(stderr, "seqStore::getSequence(full)-- iid "uint32FMT" more than number of sequences "uint32FMT"\n", iid, _header._numberOfSequences); return(false); } if (sMax == 0) s = 0L; // So the delete below doesn't bomb if (hMax == 0) h = 0L; if (sMax < _index[iid]._seqLength + 1) { sMax = _index[iid]._seqLength + 1024; delete [] s; s = new char [sMax]; } if (hMax < _index[iid]._hdrLength + 1) { hMax = _index[iid]._hdrLength + 1024; delete [] h; h = new char [hMax]; } hLen = 0; sLen = 0; // Copy the defline into h memcpy(h, _names + _index[iid]._hdrPosition, _index[iid]._hdrLength); h[_index[iid]._hdrLength] = 0; // Decode and copy the sequence into s uint32 seqLen = _index[iid]._seqLength; uint32 block = _index[iid]._block; uint64 seekpos = _index[iid]._seqPosition * 2; _bpf->seek(seekpos); while (sLen < seqLen) { assert(_bpf->tell() == _block[block]._bpf * 2); assert(sLen == _block[block]._pos); if (_block[block]._isACGT == 0) { memset(s + sLen, 'N', _block[block]._len); sLen += _block[block]._len; } else { for (uint32 xx=0; xx<_block[block]._len; xx++) { s[sLen++] = bitsToLetter[_bpf->getBits(2)]; } } block++; } s[sLen] = 0; return(true); } bool seqStore::getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) { if (_index == NULL) loadIndex(); if (iid >= _header._numberOfSequences) { fprintf(stderr, "seqStore::getSequence(part)-- iid "uint32FMT" more than number of sequences "uint32FMT"\n", iid, _header._numberOfSequences); return(false); } if (bgn >= end) { fprintf(stderr, "seqStore::getSequence(part)-- for iid "uint32FMT"; invalid bgn="uint32FMT" end="uint32FMT"; seqLen="uint32FMT"\n", iid, bgn, end, _index[iid]._seqLength); return(false); } // Decode and copy the sequence into s uint32 block = _index[iid]._block; uint32 sLen = 0; // length of sequence we've copied uint32 sPos = 0; // position in the sequence // Skip blocks before we care. // while (sPos + _block[block]._len < bgn) { sPos += _block[block]._len; block++; } assert(sPos == _block[block]._pos); // Move into the block (we could just set sPos = bgn...). sPos += bgn - _block[block]._pos; // Handle the partial block. Copy what is left in the block, or // the requested size, whichever is smaller. uint32 partLen = MIN((_block[block]._pos + _block[block]._len - bgn), (end - bgn)); if (_block[block]._isACGT == 0) { memset(s, 'N', partLen); sLen += partLen; _bpf->seek(_block[block+1]._bpf * 2); } else { _bpf->seek((_block[block]._bpf + bgn - _block[block]._pos) * 2); for (uint32 xx=0; xxgetBits(2)]; } sPos += partLen; block++; while (sPos < end) { assert(_bpf->tell() == _block[block]._bpf * 2); assert(sPos == _block[block]._pos); // Like the partial block above, pick how much to copy as the // smaller of the block size and what is left to fill. partLen = MIN((_block[block]._len), (end - sPos)); if (_block[block]._isACGT == 0) { memset(s + sLen, 'N', partLen); sLen += partLen; } else { for (uint32 xx=0; xxgetBits(2)]; } sPos += partLen; block++; } s[sLen] = 0; return(true); } void seqStore::clear(void) { memset(_filename, 0, FILENAME_MAX); memset(_typename, 0, FILENAME_MAX); strcpy(_typename, "seqStore"); _numberOfSequences = 0; _bpf = 0L; memset(&_header, 0, sizeof(seqStoreHeader)); _index = 0L; _block = 0L; _names = 0L; _indexBPF = 0L; _blockBPF = 0L; _namesBPF = 0L; _lastIIDloaded = ~uint32ZERO; } void seqStore::loadIndex(void) { if (_index) return; delete _indexBPF; _indexBPF = 0L; delete _blockBPF; _blockBPF = 0L; delete _namesBPF; _namesBPF = 0L; errno = 0; FILE *F = fopen(_filename, "r"); if (errno) fprintf(stderr, "seqStore::seqStore()-- Failed to open '%s': %s\n", _filename, strerror(errno)), exit(1); fread(&_header, sizeof(seqStoreHeader), 1, F); //fprintf(stderr, "seqStore::seqStore()-- Allocating space for "uint32FMT" sequences ("uint64FMT"MB)\n", _header._numberOfSequences, _header._numberOfSequences * sizeof(seqStoreIndex) / 1024 / 1024); //fprintf(stderr, "seqStore::seqStore()-- Allocating space for "uint32FMT" blocks ("uint64FMT"MB)\n", _header._numberOfBlocks, _header._numberOfBlocks * sizeof(seqStoreBlock) / 1024 / 1024); //fprintf(stderr, "seqStore::seqStore()-- Allocating space for "uint32FMT" labels ("uint64FMT"MB)\n", _header._namesLength, _header._namesLength * sizeof(char) / 1024 / 1024); _index = new seqStoreIndex [_header._numberOfSequences]; _block = new seqStoreBlock [_header._numberOfBlocks]; _names = new char [_header._namesLength]; fseeko(F, _header._indexStart, SEEK_SET); fread( _index, sizeof(seqStoreIndex), _header._numberOfSequences, F); #if 0 for (uint32 i=0; i<_header._numberOfSequences; i++) fprintf(stderr, "IDX[%4u] hdrPos=%u hdrLen=%u seqPos=%llu seqLen=%u block=%u\n", i, _index[i]._hdrPosition, _index[i]._hdrLength, _index[i]._seqPosition, _index[i]._seqLength, _index[i]._block); #endif fseeko(F, _header._blockStart, SEEK_SET); fread( _block, sizeof(seqStoreBlock), _header._numberOfBlocks, F); fseeko(F, _header._namesStart, SEEK_SET); fread( _names, sizeof(char), _header._namesLength, F); if (errno) fprintf(stderr, "seqStore::seqStore()-- Failed to read index from '%s': %s\n", _filename, strerror(errno)), exit(1); fclose(F); } static void addSeqStoreBlock(uint32 &BLOKmax, uint32 &BLOKlen, seqStoreBlock* &BLOK, seqStoreBlock &b, uint32 &nBlockACGT, uint32 &nBlockGAP, uint64 &nACGT) { //fprintf(stderr, "addSeqStoreBlock()-- BLOK max=%u len=%u ACGT=%u GAP=%u nACGT=%lu\n", // BLOKmax, BLOKlen, nBlockACGT, nBlockGAP, nACGT); if (b._len == 0) return; if (b._isACGT == 1) { nBlockACGT++; nACGT += b._len; } else { nBlockGAP++; } BLOK[BLOKlen++] = b; if (BLOKlen >= BLOKmax) { BLOKmax *= 2; seqStoreBlock *nb = new seqStoreBlock [BLOKmax]; memcpy(nb, BLOK, BLOKlen * sizeof(seqStoreBlock)); delete [] BLOK; BLOK = nb; } } void constructSeqStore(char *filename, seqCache *inputseq) { fprintf(stderr, "constructSeqStore()-- constructing seqStore '%s' from seqCache '%s' of type '%s'.\n", filename, inputseq->getSourceName(), inputseq->getFileTypeName()); seqStoreHeader HEAD; memset(&HEAD, sizeof(seqStoreHeader), 0); bitPackedFile *DATA = new bitPackedFile(filename, sizeof(seqStoreHeader), true); uint32 INDXmax = 1048576; seqStoreIndex *INDX = new seqStoreIndex [INDXmax]; uint32 BLOKmax = 1048576; uint32 BLOKlen = 0; seqStoreBlock *BLOK = new seqStoreBlock [BLOKmax]; uint32 NAMEmax = 32 * 1024 * 1024; uint32 NAMElen = 0; char *NAME = new char [NAMEmax]; seqInCore *sic = inputseq->getSequenceInCore(); uint64 nACGT = 0; uint32 nBlockACGT = 0; uint32 nBlockGAP = 0; uint32 nSequences = 0; speedCounter C(" reading sequences %7.0f sequences -- %5.0f sequences/second\r", 1.0, 0x1ffff, true); while (sic != NULL) { if (sic->sequence()) { char *seq = sic->sequence(); seqStoreBlock b; if (nSequences >= INDXmax) { seqStoreIndex *I = new seqStoreIndex[INDXmax * 2]; memcpy(I, INDX, sizeof(seqStoreIndex) * nSequences); delete [] INDX; INDXmax *= 2; INDX = I; } INDX[nSequences]._hdrPosition = NAMElen; INDX[nSequences]._hdrLength = sic->headerLength(); INDX[nSequences]._seqPosition = DATA->tell() / 2; INDX[nSequences]._seqLength = sic->sequenceLength(); INDX[nSequences]._block = BLOKlen; #if 0 fprintf(stderr, "ADD SEQUENCE hdr pos=%u len=%u seq pos=%u len=%u blok=%u\n", INDX[nSequences]._hdrPosition, INDX[nSequences]._hdrLength, INDX[nSequences]._seqPosition, INDX[nSequences]._seqLength, INDX[nSequences]._block); #endif if (sic->sequenceLength() > SEQSTOREBLOCK_MAXPOS) fprintf(stderr, "constructSeqStore()-- sequence %s too long, must be shorter than "uint64FMT" Gbp.\n", sic->header(), SEQSTOREBLOCK_MAXPOS / 1024 / 1024 / 1024), exit(1); if (sic->getIID() > SEQSTOREBLOCK_MAXPOS) fprintf(stderr, "constructSeqStore()-- too many sequences, must be fewer than "uint64FMT".\n", SEQSTOREBLOCK_MAXIID), exit(1); if (NAMElen + sic->headerLength() + 1 > NAMEmax) { NAMEmax += 32 * 1024 * 1024; char *nm = new char [NAMEmax]; memcpy(nm, NAME, sizeof(char) * NAMElen); delete [] NAME; NAME = nm; } strcpy(NAME + NAMElen, sic->header()); NAMElen += sic->headerLength() + 1; b._isACGT = 0; b._iid = sic->getIID(); b._pos = 0; b._len = 0; b._bpf = DATA->tell() / 2; for (uint32 p=0; psequenceLength(); p++) { uint64 bits = letterToBits[seq[p]]; // If the length of the current block is too big (which would // soon overflow the bit field storing length) write out a // block and reset the length. // if (b._len == SEQSTOREBLOCK_MAXLEN) { addSeqStoreBlock(BLOKmax, BLOKlen, BLOK, b, nBlockACGT, nBlockGAP, nACGT); b._pos = p; b._len = 0; b._bpf = DATA->tell() / 2; } if (bits == 0xff) { // This letter is NOT ACGT. If the current block is an ACGT block, write it // and reset. // if (b._isACGT == 1) { addSeqStoreBlock(BLOKmax, BLOKlen, BLOK, b, nBlockACGT, nBlockGAP, nACGT); b._isACGT = 0; b._iid = sic->getIID(); b._pos = p; b._len = 0; b._bpf = DATA->tell() / 2; } } else { // This letter is ACGT. If the current block is NOT an ACGT block, write it // and reset. // if (b._isACGT == 0) { addSeqStoreBlock(BLOKmax, BLOKlen, BLOK, b, nBlockACGT, nBlockGAP, nACGT); b._isACGT = 1; b._iid = sic->getIID(); b._pos = p; b._len = 0; b._bpf = DATA->tell() / 2; } } // Always add one to the length of the current block, and // write out the base if the letter is ACGT. // b._len++; if (bits != 0xff) DATA->putBits(bits, 2); } // Emit the last block // addSeqStoreBlock(BLOKmax, BLOKlen, BLOK, b, nBlockACGT, nBlockGAP, nACGT); } // If there is no sequence, the index record for this sequence is left blank. // nSequences++; C.tick(); delete sic; sic = inputseq->getSequenceInCore(); } // And a sentinel EOF block -- gets the last position in the file, // useful for the binary search. We always have a space block at // the end of the list, but we don't care if we just used the last // block (and so we don't bother to reallocate the array if it is // full). BLOK[BLOKlen]._isACGT = 0; BLOK[BLOKlen]._iid = uint32MASK(32); BLOK[BLOKlen]._pos = uint32MASK(31); BLOK[BLOKlen]._len = 0; BLOK[BLOKlen]._bpf = DATA->tell() / 2; BLOKlen++; // Update the header, assemble the final file. delete DATA; HEAD._magic[0] = SEQSTORE_MAGICNUMBER1; HEAD._magic[1] = SEQSTORE_MAGICNUMBER2; HEAD._pad = uint32ZERO; HEAD._numberOfSequences = nSequences; HEAD._numberOfACGT = nACGT; HEAD._numberOfBlocksACGT = nBlockACGT; HEAD._numberOfBlocksGAP = nBlockGAP; HEAD._numberOfBlocks = BLOKlen; HEAD._namesLength = NAMElen; HEAD._indexStart = uint64ZERO; HEAD._blockStart = uint64ZERO; HEAD._namesStart = uint64ZERO; errno = 0; FILE *F = fopen(filename, "r+"); if (errno) fprintf(stderr, "constructSeqStore()-- Failed to reopen '%s' to write data: %s\n", filename, strerror(errno)), exit(1); fseeko(F, 0, SEEK_END); HEAD._indexStart = ftello(F); fwrite(INDX, sizeof(seqStoreIndex), HEAD._numberOfSequences, F); fseeko(F, 0, SEEK_END); HEAD._blockStart = ftello(F); fwrite(BLOK, sizeof(seqStoreBlock), HEAD._numberOfBlocks, F); fseeko(F, 0, SEEK_END); HEAD._namesStart = ftello(F); fwrite(NAME, sizeof(char), HEAD._namesLength, F); fseeko(F, 0, SEEK_SET); fwrite(&HEAD, sizeof(seqStoreHeader), 1, F); fclose(F); if (errno) fprintf(stderr, "constructSeqStore()-- Failed to write data to '%s': %s\n", filename, strerror(errno)), exit(1); delete [] INDX; delete [] BLOK; delete [] NAME; // ESTmapper depends on this output. fprintf(stderr, "constructSeqStore()-- seqStore '%s' constructed ("uint32FMT" sequences, "uint64FMT" ACGT letters, "uint32FMT" ACGT blocks, "uint32FMT" GAP blocks).\n", filename, HEAD._numberOfSequences, HEAD._numberOfACGT, HEAD._numberOfBlocksACGT, HEAD._numberOfBlocksGAP); } kmer-code-2013-trunk/libseq/fastqStdin.C0000644000000000000000000001404012375772744016672 0ustar rootroot#include "fastqStdin.H" #include "alphabet.h" fastqStdin::fastqStdin(const char *filename) { clear(); #ifdef DEBUG fprintf(stderr, "fastqStdin::fastqStdin()-- '%s'\n", (filename) ? filename : "NULLPOINTER"); #endif if (filename == 0L) { strcpy(_filename, "(stdin)"); _rb = new readBuffer("-"); } else { _pipe = popen(filename, "r"); _rb = new readBuffer(_pipe); } } fastqStdin::fastqStdin() { clear(); } fastqStdin::~fastqStdin() { delete _rb; delete [] _header; delete [] _sequence; } seqFile * fastqStdin::openFile(const char *filename) { #ifdef DEBUG fprintf(stderr, "fastqStdin::openFile()-- '%s'\n", (filename) ? filename : "NULLPOINTER"); #endif if (((filename == 0L) && (isatty(fileno(stdin)) == 0)) || ((filename != 0L) && (filename[0] == '-') && (filename[1] == 0))) return(new fastqStdin(0L)); if (filename == 0L) return(0L); uint32 fl = strlen(filename); char cmd[32 + fl]; if ((filename[fl-3] == '.') && (filename[fl-2] == 'g') && (filename[fl-1] == 'z')) sprintf(cmd, "gzip -dc %s", filename); else if ((filename[fl-4] == '.') && (filename[fl-3] == 'b') && (filename[fl-2] == 'z') && (filename[fl-1] == '2')) sprintf(cmd, "bzip2 -dc %s", filename); else if ((filename[fl-3] == '.') && (filename[fl-2] == 'x') && (filename[fl-1] == 'z')) sprintf(cmd, "xz -dc %s", filename); else return(0L); return(new fastqStdin(cmd)); } uint32 fastqStdin::getNumberOfSequences(void) { if (_rb->peek() == 0) return(_nextIID); else return(_nextIID + 1); } uint32 fastqStdin::find(const char *sequencename) { fprintf(stderr, "fastqStdin::find()-- ERROR! Used for random access.\n"); assert(0); return(~uint32ZERO); } uint32 fastqStdin::getSequenceLength(uint32 iid) { if (iid == _nextIID) if (loadNextSequence(_header, _headerLen, _headerMax, _sequence, _sequenceLen, _sequenceMax) == false) return(0); if (iid + 1 != _nextIID) { fprintf(stderr, "fastqStdin::getSequence()-- ERROR! Used for random access. Requested iid=%u, at iid=%u\n", iid, _nextIID); assert(0); } return(strlen(_sequence)); } bool fastqStdin::getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { bool ret = true; #ifdef DEBUG fprintf(stderr, "fastqStdin::getSequence(full)-- "uint32FMT"\n", iid); #endif if (iid == _nextIID) if (loadNextSequence(_header, _headerLen, _headerMax, _sequence, _sequenceLen, _sequenceMax) == false) return(false); if (iid + 1 != _nextIID) { fprintf(stderr, "fastqStdin::getSequence(full)-- ERROR! Used for random access. Requested iid=%u, at iid=%u\n", iid, _nextIID); assert(0); } if (hLen < _headerMax) { delete [] h; hMax = _headerMax; h = new char [hMax]; } if (sLen < _sequenceMax) { delete [] s; sMax = _sequenceMax; s = new char [sMax]; } memcpy(h, _header, _headerLen + 1); hLen = _headerLen; memcpy(s, _sequence, _sequenceLen + 1); sLen = _sequenceLen; return(true); } bool fastqStdin::getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) { #ifdef DEBUG fprintf(stderr, "fastqStdin::getSequence(part)-- "uint32FMT"\n", iid); #endif fprintf(stderr, "fastqStdin::getSequence(part)-- ERROR! Used for random access.\n"); assert(0); return(false); } void fastqStdin::clear(void) { memset(_filename, 0, FILENAME_MAX); memset(_typename, 0, FILENAME_MAX); _randomAccessSupported = false; strcpy(_typename, "FastQstream"); _numberOfSequences = ~uint32ZERO; _rb = 0L; _nextIID = 0; _pipe = 0L; _header = 0L; _headerLen = 0; _headerMax = 0; _sequence = 0L; _sequenceLen = 0; _sequenceMax = 0; } bool fastqStdin::loadNextSequence(char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { if (hMax == 0) { hMax = 2048; h = new char [hMax]; } if (sMax == 0) { sMax = 2048; s = new char [sMax]; } hLen = 0; sLen = 0; char x = _rb->read(); // Skip whitespace at the start of the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // We should be at a '@' character now. Fail if not. if (_rb->eof() == true) return(false); if (x != '@') fprintf(stderr, "fastqStdin::loadNextSequence(part)-- ERROR: In %s, expected '@' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Skip the '@' in the defline x = _rb->read(); // Skip whitespace between the '@' and the defline while ((_rb->eof() == false) && (whitespaceSymbol[x] == true) && (x != '\r') && (x != '\n')) x = _rb->read(); // Copy the defline, until the first newline. while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) { h[hLen++] = x; if (hLen >= hMax) { //fprintf(stderr, "realloc header\n"); hMax += 2048; char *H = new char [hMax]; memcpy(H, h, hLen); delete [] h; h = H; } x = _rb->read(); } h[hLen] = 0; // Skip whitespace between the defline and the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // Copy the sequence, until EOF or the start of the QV bases. while ((_rb->eof() == false) && (_rb->peek() != '+')) { if (whitespaceSymbol[x] == false) { s[sLen++] = x; if (sLen >= sMax) { //fprintf(stderr, "realloc sequence\n"); sMax *= 2; char *S = new char [sMax]; memcpy(S, s, sLen); delete [] s; s = S; } } x = _rb->read(); } s[sLen] = 0; // Skip the rest of the QV id line and then the entire QV line. //x = _rb->read(); assert((_rb->eof() == true) || (x == '+')); while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) x = _rb->read(); x = _rb->read(); while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) x = _rb->read(); _nextIID++; return(true); } kmer-code-2013-trunk/libseq/test-correctSequence.H0000644000000000000000000000720212322046702020644 0ustar rootroot#ifndef TEST_CORRECTSEQUENCE_H #define TEST_CORRECTSEQUENCE_H //#define WITH_WHITESPACE struct correctSequence_t { char header[256]; uint32 headerLength; char *sequence; uint32 sequenceLength; }; correctSequence_t *correctSequence = 0L; mt_s *mtctx = 0L; char *chainSeq; uint32 *chainSeqPos; uint32 *chainSeqIID; uint64 *chainStrPos; void generateCorrectSequence(uint32 minLen, uint32 maxLen, uint32 numSeq) { char bases[4] = {'A', 'C', 'G', 'T'}; uint32 n = numSeq; uint32 s = minLen; uint32 l = maxLen; uint32 seed = (uint32)(getTime() * 1000); fprintf(stderr, "generateCorrectSequence()-- Using seed "uint32FMT"\n", seed); fprintf(stderr, "generateCorrectSequence()-- Generating "uint32FMT" sequences of length "uint32FMT" to "uint32FMT"\n", numSeq, minLen, maxLen); correctSequence = new correctSequence_t [n]; mtctx = mtInit(seed); FILE *F = fopen("test-correctSequence.fasta", "w"); for (uint32 i=0; i%s\n", correctSequence[i].header); for (uint32 r=mtRandom32(mtctx) % 4; r--; ) fprintf(F, "\n"); for (uint32 p=0; p%s\n", correctSequence[i].header); fprintf(F, "%s\n", correctSequence[i].sequence); #endif } for (uint32 r=mtRandom32(mtctx) % 4; r--; ) fprintf(F, "\n"); fclose(F); } void generateChainedAnswer(uint32 numSeq, char sep, uint32 sepLen) { uint32 maxLen = 0; for (uint32 i=0; ipeek() == 0) return(_nextIID); else return(_nextIID + 1); } uint32 fastaStdin::find(const char *sequencename) { fprintf(stderr, "fastaStdin::find()-- ERROR! Used for random access.\n"); assert(0); return(~uint32ZERO); } uint32 fastaStdin::getSequenceLength(uint32 iid) { if (iid == _nextIID) if (loadNextSequence(_header, _headerLen, _headerMax, _sequence, _sequenceLen, _sequenceMax) == false) return(0); if (iid + 1 != _nextIID) { fprintf(stderr, "fastaStdin::getSequenceLength()-- ERROR! Used for random access. Requested iid=%u, at iid=%u\n", iid, _nextIID); assert(0); } return(strlen(_sequence)); } bool fastaStdin::getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { bool ret = true; #ifdef DEBUG fprintf(stderr, "fastaStdin::getSequence(full)-- "uint32FMT"\n", iid); #endif if (iid == _nextIID) if (loadNextSequence(_header, _headerLen, _headerMax, _sequence, _sequenceLen, _sequenceMax) == false) return(false); if (iid + 1 != _nextIID) { fprintf(stderr, "fastaStdin::getSequence(full)-- ERROR! Used for random access. Requested iid=%u, at iid=%u\n", iid, _nextIID); assert(0); } if (hLen < _headerMax) { delete [] h; hMax = _headerMax; h = new char [hMax]; } if (sLen < _sequenceMax) { delete [] s; sMax = _sequenceMax; s = new char [sMax]; } memcpy(h, _header, _headerLen + 1); hLen = _headerLen; memcpy(s, _sequence, _sequenceLen + 1); sLen = _sequenceLen; return(true); } bool fastaStdin::getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) { #ifdef DEBUG fprintf(stderr, "fastaStdin::getSequence(part)-- "uint32FMT"\n", iid); #endif fprintf(stderr, "fastaStdin::getSequence(part)-- ERROR! Used for random access.\n"); assert(0); return(false); } void fastaStdin::clear(void) { memset(_filename, 0, FILENAME_MAX); memset(_typename, 0, FILENAME_MAX); _randomAccessSupported = false; strcpy(_typename, "FastAstream"); _numberOfSequences = ~uint32ZERO; _rb = 0L; _nextIID = 0; _pipe = 0L; _header = 0L; _headerLen = 0; _headerMax = 0; _sequence = 0L; _sequenceLen = 0; _sequenceMax = 0; } bool fastaStdin::loadNextSequence(char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { if (hMax == 0) { hMax = 2048; h = new char [hMax]; } if (sMax == 0) { sMax = 2048; s = new char [sMax]; } hLen = 0; sLen = 0; char x = _rb->read(); // Skip whitespace at the start of the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // We should be at a '>' character now. Fail if not. if (_rb->eof() == true) return(false); if (x != '>') fprintf(stderr, "fastaStdin::loadNextSequence(part)-- ERROR: In %s, expected '>' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Skip the '>' in the defline x = _rb->read(); // Skip whitespace between the '>' and the defline while ((_rb->eof() == false) && (whitespaceSymbol[x] == true) && (x != '\r') && (x != '\n')) x = _rb->read(); // Copy the defline, until the first newline. while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) { h[hLen++] = x; if (hLen >= hMax) { //fprintf(stderr, "realloc header\n"); hMax += 2048; char *H = new char [hMax]; memcpy(H, h, hLen); delete [] h; h = H; } x = _rb->read(); } h[hLen] = 0; // Skip whitespace between the defline and the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // Copy the sequence, until EOF or the next '>'. while ((_rb->eof() == false) && (_rb->peek() != '>')) { if (whitespaceSymbol[x] == false) { s[sLen++] = x; if (sLen >= sMax) { //fprintf(stderr, "realloc sequence\n"); sMax *= 2; char *S = new char [sMax]; memcpy(S, s, sLen); delete [] s; s = S; } } x = _rb->read(); } s[sLen] = 0; _nextIID++; return(true); } kmer-code-2013-trunk/libseq/fastqFile.H0000644000000000000000000000340512322046702016454 0ustar rootroot#ifndef FASTQFILE_H #define FASTQFILE_H #include "util++.H" #include "bio++.H" #include "seqFile.H" struct fastqFileHeader { uint64 _magic[2]; uint32 _numberOfSequences; // Number of sequences in the file uint32 _namesLength; // Bytes in the names uint64 _fastqFileSize; // st_size - size of file in bytes uint64 _fastqModificationTime; // st_mtime - time of last data modification uint64 _fastqCreationTime; // st_ctime - time of last file status change }; struct fastqFileIndex { uint64 _seqPosition; // Position of the sequence in the file uint32 _seqLength; // Length of the sequence (no whitespace counted) }; class fastqFile : public seqFile { protected: fastqFile(const char *filename); fastqFile(); public: ~fastqFile(); protected: seqFile *openFile(const char *filename); public: uint32 find(const char *sequencename); uint32 getSequenceLength(uint32 iid); bool getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s); private: void clear(void); void loadIndex(char *indexname); void constructIndex(void); readBuffer *_rb; fastqFileHeader _header; fastqFileIndex *_index; char *_names; uint32 _nextID; // Next sequence in the read buffer uint32 _gs_iid; uint32 _gs_pos; friend class seqFactory; }; #endif // FASTQFILE_H kmer-code-2013-trunk/libseq/seqStore.H0000644000000000000000000000574612322046702016355 0ustar rootroot#ifndef SEQSTORE_H #define SEQSTORE_H #include "util++.H" #include "seqCache.H" // A binary fasta file. // // HEADER // magic number // number of sequences // optional - alphabet size // optional - alphabet map (0x00 -> 'a', etc) // position of index start // position of data start // DATA // INDEX // position of sequence start in DATA // header length // sequence length // MAP // name to IID mapping struct seqStoreHeader { uint64 _magic[2]; uint32 _pad; uint32 _numberOfSequences; uint64 _numberOfACGT; uint32 _numberOfBlocksACGT; uint32 _numberOfBlocksGAP; uint32 _numberOfBlocks; uint32 _namesLength; uint64 _indexStart; uint64 _blockStart; uint64 _namesStart; }; // This index allows us to return a complete sequence // struct seqStoreIndex { uint32 _hdrPosition; // Offset into _names for the defline uint32 _hdrLength; // Length of the defline uint64 _seqPosition; // Offset into _bpf for the sequence data uint32 _seqLength; // Length, in bases, of the sequence uint32 _block; // The seqStoreBlock that starts this sequence }; // This index allows us to seek to a specific base in the // file of sequences. Each block is either: // ACGT - and has data // N - no data // It will map a specific ACGT location to the sequence, and the ID // of that sequence (seq ID and location in that sequence). // struct seqStoreBlock { uint64 _isACGT:1; // block is acgt uint64 _pos:32; // position in sequence uint64 _iid:32; // iid of the sequence we are in uint64 _len:23; // length of block uint64 _bpf:40; // position in the bit file of sequence }; #define SEQSTOREBLOCK_MAXPOS uint64MASK(32) #define SEQSTOREBLOCK_MAXIID uint64MASK(32) #define SEQSTOREBLOCK_MAXLEN uint64MASK(23) class seqStore : public seqFile { protected: seqStore(const char *filename); seqStore(); public: ~seqStore(); protected: seqFile *openFile(const char *filename); public: uint32 find(const char *sequencename); uint32 getSequenceLength(uint32 iid); bool getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s); private: void clear(void); void loadIndex(void); bitPackedFile *_bpf; seqStoreHeader _header; seqStoreIndex *_index; seqStoreBlock *_block; char *_names; bitPackedFile *_indexBPF; bitPackedFile *_blockBPF; bitPackedFile *_namesBPF; uint32 _lastIIDloaded; friend class seqFactory; }; // Construct a new seqStore 'filename' from input file 'inputseq'. // void constructSeqStore(char *filename, seqCache *inputseq); #endif // SEQSTORE_H kmer-code-2013-trunk/libseq/Make.include0000644000000000000000000000244512375772744016676 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ src := $/seqFile.H \ $/fastaFile.H $/fastaFile.C \ $/fastaStdin.H $/fastaStdin.C \ $/fastqFile.H $/fastqFile.C \ $/fastqStdin.H $/fastqStdin.C \ $/seqStore.H $/seqStore.C \ $/sffFile.H $/sffFile.C \ $/seqFactory.H $/seqFactory.C \ $/seqCache.H $/seqCache.C \ $/seqStream.H $/seqStream.C \ $/merStream.H $/merStream.C $/.CXX_SRCS :=$(filter %.C,${src}) $/test-seqCache.C $/test-seqStream.C $/test-merStream.C $/.CXX_INCS :=$(filter %.H,${src}) $/.CXX_EXES :=$/test-seqCache $/test-seqStream $/test-merStream $/.CXX_LIBS :=$/libseq.a $/.CLEAN := $/*.o $/libseq.a : ${$/.C_SRCS:.c=.o} ${$/.CXX_SRCS:.C=.o} $/test-seqCache : $/test-seqCache.o ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/test-seqStream : $/test-seqStream.o ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/test-merStream : $/test-merStream.o ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CFLAGS += -I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/}) $(eval $/%.d $/%.o: CXXFLAGS += -I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/}) kmer-code-2013-trunk/libseq/fastaFile.H0000644000000000000000000000340512322046702016434 0ustar rootroot#ifndef FASTAFILE_H #define FASTAFILE_H #include "util++.H" #include "bio++.H" #include "seqFile.H" struct fastaFileHeader { uint64 _magic[2]; uint32 _numberOfSequences; // Number of sequences in the file uint32 _namesLength; // Bytes in the names uint64 _fastaFileSize; // st_size - size of file in bytes uint64 _fastaModificationTime; // st_mtime - time of last data modification uint64 _fastaCreationTime; // st_ctime - time of last file status change }; struct fastaFileIndex { uint64 _seqPosition; // Position of the sequence in the file uint32 _seqLength; // Length of the sequence (no whitespace counted) }; class fastaFile : public seqFile { protected: fastaFile(const char *filename); fastaFile(); public: ~fastaFile(); protected: seqFile *openFile(const char *filename); public: uint32 find(const char *sequencename); uint32 getSequenceLength(uint32 iid); bool getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s); private: void clear(void); void loadIndex(char *indexname); void constructIndex(void); readBuffer *_rb; fastaFileHeader _header; fastaFileIndex *_index; char *_names; uint32 _nextID; // Next sequence in the read buffer uint32 _gs_iid; uint32 _gs_pos; friend class seqFactory; }; #endif // FASTAFILE_H kmer-code-2013-trunk/libseq/sffFile.C0000644000000000000000000001223312322046702016106 0ustar rootroot#include "sffFile.H" // Lots of ths came from AS_GKP_sff.c sffFile::sffFile() { clear(); } sffFile::sffFile(const char *name) { clear(); strcpy(_filename, name); _rb = new readBuffer(name); _rb->read(&_header, 31); if (_header.magic_number != 0x2e736666) { _header.swap_endianess = 1; _header.magic_number = uint32Swap(_header.magic_number); _header.index_offset = uint64Swap(_header.index_offset); _header.index_length = uint32Swap(_header.index_length); _header.number_of_reads = uint32Swap(_header.number_of_reads); _header.header_length = uint16Swap(_header.header_length); _header.key_length = uint16Swap(_header.key_length); _header.number_of_flows_per_read = uint16Swap(_header.number_of_flows_per_read); } assert(_header.magic_number == 0x2e736666); assert(_header.number_of_flows_per_read < SFF_NUMBER_OF_FLOWS_MAX); assert(_header.key_length < SFF_KEY_SEQUENCE_MAX); _rb->read(_header.flow_chars, sizeof(char) * _header.number_of_flows_per_read); _rb->read(_header.key_sequence, sizeof(char) * _header.key_length); _firstReadLocation = _header.header_length; // The spec says the index might be here, however, all files I've // seen have the index at the end of the file. // if ((_header.index_length > 0) && (_header.index_offset == _header.header_length)) _firstReadLocation += _header.index_length; // Index // _index = new sffIndex [_header.number_of_reads]; for (uint64 i=0; i<_header.number_of_reads; i++) { uint64 pos = _rb->tell(); _rb->read(&_read, 16); if (_header.swap_endianess) { _read.read_header_length = uint16Swap(_read.read_header_length); _read.name_length = uint16Swap(_read.name_length); _read.number_of_bases = uint32Swap(_read.number_of_bases); } _index[i]._seqPos = pos; _index[i]._seqLen = _read.number_of_bases; _index[i]._namLen = _read.name_length; pos += _read.read_header_length; pos += sizeof(uint16) * _header.number_of_flows_per_read; pos += sizeof(uint8) * _read.number_of_bases; pos += sizeof(char) * _read.number_of_bases; pos += sizeof(uint8) * _read.number_of_bases; pos += (_header.number_of_flows_per_read * sizeof(uint16) + _read.number_of_bases * sizeof(uint8) + _read.number_of_bases * sizeof(char) + _read.number_of_bases * sizeof(uint8)) % 8; _rb->seek(pos); } // // Index _rb->seek(_firstReadLocation); _numberOfSequences = _header.number_of_reads; } sffFile::~sffFile() { delete _rb; delete [] _index; } //////////////////////////////////////// seqFile * sffFile::openFile(const char *name) { struct stat st; // Open the file, return if it matches the SFF magic_number. errno = 0; stat(name, &st); if (errno) return(0L); if ((st.st_mode & S_IFREG) == 0) return(0L); FILE *F = fopen(name, "r"); if (errno) { fprintf(stderr, "sffFile::openFile()- failed to open '%s': %s\n", name, strerror(errno)); return(0L); } uint32 magic_number = 0; safeRead(fileno(F), &magic_number, "sff magic_number", sizeof(uint32)); fclose(F); if ((magic_number == 0x2e736666) || (uint32Swap(magic_number) == 0x2e736666)) return(new sffFile(name)); return(0L); } bool sffFile::getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { if (iid > _header.number_of_reads) return(false); memset(&_read, 0, sizeof(sffRead)); _rb->seek(_index[iid]._seqPos); _rb->read(&_read, 16); if (_header.swap_endianess) { _read.read_header_length = uint16Swap(_read.read_header_length); _read.name_length = uint16Swap(_read.name_length); _read.number_of_bases = uint32Swap(_read.number_of_bases); _read.clip_quality_left = uint16Swap(_read.clip_quality_left); _read.clip_quality_right = uint16Swap(_read.clip_quality_right); _read.clip_adapter_left = uint16Swap(_read.clip_adapter_left); _read.clip_adapter_right = uint16Swap(_read.clip_adapter_right); } assert(_read.read_header_length < SFF_NAME_LENGTH_MAX); assert(_read.number_of_bases < SFF_NUMBER_OF_BASES_MAX); _rb->read(_read.name, sizeof(char) * _read.name_length); _read.name[_read.name_length] = 0; uint64 pos = _rb->tell(); pos += _read.read_header_length; pos += sizeof(uint16) * _header.number_of_flows_per_read; pos += sizeof(uint8) * _read.number_of_bases; _rb->seek(pos); _rb->read(_read.bases, sizeof(char) * _read.number_of_bases); _read.bases[_read.number_of_bases] = 0; return(true); } bool sffFile::getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) { if (iid > _header.number_of_reads) return(false); // Same as above, mostly. return(false); } void sffFile::clear(void) { memset(_filename, 0, FILENAME_MAX); memset(_typename, 0, FILENAME_MAX); strcpy(_typename, "SFF"); _numberOfSequences = 0; _rb = 0L; memset(&_header, 0, sizeof(sffHeader)); memset(&_read, 0, sizeof(sffRead)); _index = 0L; _firstReadLocation = 0; _readIID = 0; } kmer-code-2013-trunk/libseq/fastaFile.C0000644000000000000000000003517012516022532016433 0ustar rootroot#include "fastaFile.H" #include "alphabet.h" #undef DEBUG #undef DEBUGINDEX // Says 'kmerFastaFileIdx' #define FASTA_MAGICNUMBER1 0x7473614672656d6bULL #define FASTA_MAGICNUMBER2 0x786449656c694661ULL fastaFile::fastaFile(const char *filename) { clear(); #ifdef DEBUG fprintf(stderr, "fastaFile::fastaFile()-- '%s'\n", (filename) ? filename : "NULLPOINTER"); #endif strcpy(_filename, filename); constructIndex(); _rb = new readBuffer(_filename); _numberOfSequences = _header._numberOfSequences; } fastaFile::fastaFile() { clear(); } fastaFile::~fastaFile() { delete _rb; delete [] _index; delete [] _names; } seqFile * fastaFile::openFile(const char *filename) { struct stat st; #ifdef DEBUG fprintf(stderr, "fastaFile::openFile()-- '%s'\n", (filename) ? filename : "NULLPOINTER"); #endif if (((filename == 0L) && (isatty(fileno(stdin)) == 0)) || ((filename != 0L) && (filename[0] == '-') && (filename[1] == 0))) return(0L); errno = 0; stat(filename, &st); if (errno) return(0L); if ((st.st_mode & S_IFREG) == 0) return(0L); // Otherwise, open and see if we can get the first sequence. We // assume it's fasta if we find a '>' denoting a defline the first // thing in the file. // // Use of a readBuffer here is a bit heavyweight, but it's safe and // easy. Opening a fastaFile isn't, after all, lightweight anyway. // fastaFile *f = 0L; readBuffer *r = new readBuffer(filename); char x = r->read(); while ((r->eof() == false) && (whitespaceSymbol[x] == true)) x = r->read(); // If we get a fasta record separator assume it's a fasta file. If // it's eof, the file is empty, and we might as well return this // fasta file and let the client deal with the lack of sequence. // if ((x == '>') || (r->eof() == true)) f = new fastaFile(filename); delete r; return(f); } uint32 fastaFile::find(const char *sequencename) { char *ptr = _names; // If this proves far too slow, rewrite the _names string to // separate IDs with 0xff, then use strstr on the whole thing. To // find the ID, scan down the string counting the number of 0xff's. // // Similar code is used for seqStore::find() for (uint32 iid=0; iid < _header._numberOfSequences; iid++) { //fprintf(stderr, "fastaFile::find()-- '%s' vs '%s'\n", sequencename, ptr); if (strcmp(sequencename, ptr) == 0) return(iid); while (*ptr) ptr++; ptr++; } return(~uint32ZERO); } uint32 fastaFile::getSequenceLength(uint32 iid) { #ifdef DEBUG fprintf(stderr, "fastaFile::getSequenceLength()-- "uint32FMT"\n", iid); #endif return((iid < _numberOfSequences) ? _index[iid]._seqLength : 0); } bool fastaFile::getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) { #ifdef DEBUG fprintf(stderr, "fastaFile::getSequence(full)-- "uint32FMT"\n", iid); #endif // Assume there is no index. Without being horribly complicated // (as in the previous versions of this codebase) all we'd get from // having an index around is the length of the sequence. // // Previous versions used to use the index to tell if the sequence // was squeezed (and so a direct copy to the output), if it was // fixed width (mostly direct copies) or unknown. Now we just // assume it's unknown and go byte by byte. If speed is a concern, // use the seqFile instead. if (iid >= _header._numberOfSequences) { fprintf(stderr, "fastaFile::getSequence(full)-- iid "uint32FMT" more than number of sequences "uint32FMT"\n", iid, _header._numberOfSequences); return(false); } if (sMax == 0) { sMax = 2048; s = new char [sMax]; } if (hMax == 0) { hMax = 2048; h = new char [hMax]; } if ((_index) && (sMax < _index[iid]._seqLength)) { sMax = _index[iid]._seqLength; delete [] s; s = new char [sMax]; } hLen = 0; sLen = 0; #ifdef DEBUG fprintf(stderr, "fastaFile::getSequence(full)-- seek to iid="uint32FMT" at pos="uint32FMT"\n", iid, _index[iid]._seqPosition); #endif _rb->seek(_index[iid]._seqPosition); char x = _rb->read(); // Skip whitespace at the start of the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // We should be at a '>' character now. Fail if not. if (_rb->eof()) return(false); if (x != '>') fprintf(stderr, "fastaFile::getSequence(full)-- ERROR1: In %s, expected '>' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Skip the '>' in the defline x = _rb->read(); // Skip whitespace between the '>' and the defline while ((_rb->eof() == false) && (whitespaceSymbol[x] == true) && (x != '\r') && (x != '\n')) x = _rb->read(); // Copy the defline, until the first newline. while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) { h[hLen++] = x; if (hLen >= hMax) { hMax += 2048; char *H = new char [hMax]; memcpy(H, h, hLen); delete [] h; h = H; } x = _rb->read(); } h[hLen] = 0; // Skip whitespace between the defline and the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // Copy the sequence, until EOF or the next '>'. while ((_rb->eof() == false) && (_rb->peek() != '>')) { if (whitespaceSymbol[x] == false) { s[sLen++] = x; if (sLen >= sMax) { if (sMax == 4294967295) // 4G - 1 fprintf(stderr, "fastaFile::getSequence()-- ERROR: sequence is too long; must be less than 4 Gbp.\n"), exit(1); if (sMax >= 2147483648) // 2G sMax = 4294967295; else sMax *= 2; char *S = new char [sMax]; memcpy(S, s, sLen); delete [] s; s = S; } } x = _rb->read(); } s[sLen] = 0; _nextID++; return(true); } // slow bool fastaFile::getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) { if (iid >= _header._numberOfSequences) { fprintf(stderr, "fastaFile::getSequence(part)-- iid "uint32FMT" more than number of sequences "uint32FMT"\n", iid, _header._numberOfSequences); return(false); } #ifdef DEBUG fprintf(stderr, "fastaFile::getSequence(part)-- "uint32FMT"\n", iid); #endif // It is impossible to be efficient here; see the big comment in // the other getSequence() above. // // We can't even guess where to start scanning the sequence; we // just don't have any information about how much whitespace is in // the sequence. _rb->seek(_index[iid]._seqPosition); uint32 pos = 0; char x = _rb->read(); // Skip whitespace at the start of the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // We should be at a '>' character now. Fail if not. if (_rb->eof()) return(false); if (x != '>') fprintf(stderr, "fastaFile::getSequence(part)-- ERROR2: In %s, expected '>' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Skip the defline. while ((_rb->eof() == false) && (x != '\r') && (x != '\n')) x = _rb->read(); // Skip whitespace between the defline and the sequence. while ((_rb->eof() == false) && (whitespaceSymbol[x] == true)) x = _rb->read(); // Skip sequence up until bgn. while ((_rb->eof() == false) && (pos < bgn)) { if (whitespaceSymbol[x] == false) pos++; x = _rb->read(); } // Copy sequence while ((_rb->eof() == false) && (pos < end)) { if (whitespaceSymbol[x] == false) s[pos++ - bgn] = x; x = _rb->read(); } s[pos - bgn] = 0; // Fail if we didn't copy enough stuff. return((pos == end) ? true : false); } void fastaFile::clear(void) { memset(_filename, 0, FILENAME_MAX); memset(_typename, 0, FILENAME_MAX); strcpy(_typename, "FastA"); _randomAccessSupported = true; _numberOfSequences = 0; _rb = 0L; memset(&_header, 0, sizeof(fastaFileHeader)); _index = 0L; _names = 0L; _nextID = 0; } void fastaFile::loadIndex(char *indexname) { struct stat fastastat; if (fileExists(indexname) == false) return; errno = 0; if (stat(_filename, &fastastat)) { fprintf(stderr, "fastaFile::constructIndex()-- stat of file '%s' failed: %s\n", _filename, strerror(errno)); return; } FILE *I = fopen(indexname, "r"); if (errno) { fprintf(stderr, "fastaFile::constructIndex()-- open of file '%s' failed: %s\n", indexname, strerror(errno)); return; } fread(&_header, sizeof(fastaFileHeader), 1, I); if ((_header._magic[0] != FASTA_MAGICNUMBER1) && (_header._magic[1] != FASTA_MAGICNUMBER2)) { fprintf(stderr, "fastaFile::constructIndex()-- magic mismatch.\n"); fclose(I); return; } #if 0 (_header._fastaModificationTime != (uint64)fastastat.st_mtime) (_header._fastaCreationTime != (uint64)fastastat.st_ctime) #endif if (_header._fastaFileSize != (uint64)fastastat.st_size) { fprintf(stderr, "fastaFile::constructIndex()-- stat mismatch.\n"); fclose(I); return; } _index = new fastaFileIndex [_header._numberOfSequences]; _names = new char [_header._namesLength]; fread(_index, sizeof(fastaFileIndex), _header._numberOfSequences, I); fread(_names, sizeof(char), _header._namesLength, I); #ifdef DEBUG fprintf(stderr, "fastaFile::constructIndex()-- '%s' LOADED\n", _filename); #endif fclose(I); return; } void fastaFile::constructIndex(void) { if (_index) return; // If the filename ends in '.fasta' then append a 'idx', // otherwise, append '.fastaidx'. char indexname[FILENAME_MAX]; strcpy(indexname, _filename); uint32 l = strlen(_filename); if ((l > 5) && (strcmp(_filename + l - 6, ".fasta") == 0)) strcat(indexname, "idx"); else strcat(indexname, ".fastaidx"); // If the index exists, suck it in and return. loadIndex(indexname); if (_index) return; #ifdef DEBUG fprintf(stderr, "fastaFile::constructIndex()-- '%s' BUILDING\n", _filename); #endif // Allocate some space for the index structures. uint32 indexMax = 64 * 1024 * 1024 / sizeof(fastaFileIndex); uint32 indexLen = 0; _index = new fastaFileIndex [indexMax]; uint32 namesMax = 32 * 1024 * 1024; uint32 namesLen = 0; _names = new char [namesMax]; // Some local storage uint64 seqStart; uint32 seqLen; uint32 seqLenMax = ~uint32ZERO; uint32 namePos; readBuffer ib(_filename); char x = ib.read(); #ifdef DEBUGINDEX fprintf(stderr, "readBuffer '%s' eof=%d x=%c %d\n", _filename, ib.eof(), x, x); #endif // Build it. // Skip whitespace at the start of the sequence. while ((ib.eof() == false) && (whitespaceSymbol[x] == true)) { #ifdef DEBUGINDEX fprintf(stderr, "skip '%c' %d\n", x, x); #endif x = ib.read(); } while (ib.eof() == false) { #ifdef DEBUGINDEX fprintf(stderr, "index\n"); #endif // We should be at a '>' character now. Fail if not. if (x != '>') fprintf(stderr, "fastaFile::constructIndex()-- ERROR3: In %s, expected '>' at beginning of defline, got '%c' instead.\n", _filename, x), exit(1); // Save info - ib's position is correctly at the first letter in // the defline (which might be whitespace), but the reader // expects our position to be at the '>' -- hence the -1. seqStart = ib.tell() - 1; seqLen = 0; namePos = namesLen; // Read that first letter x = ib.read(); // Copy the name to the names while ((ib.eof() == false) && (whitespaceSymbol[x] == false)) { if (namesLen + 1 >= namesMax) { namesMax += 32 * 1024 * 1024; char *nt = new char [namesMax]; memcpy(nt, _names, namesLen); delete [] _names; _names = nt; } _names[namesLen++] = x; #ifdef DEBUGINDEX fprintf(stderr, "name += %c\n", x); #endif x = ib.read(); } if (namesLen + 1 >= namesMax) { namesMax += 32 * 1024 * 1024; char *nt = new char [namesMax]; memcpy(nt, _names, namesLen); delete [] _names; _names = nt; } _names[namesLen++] = 0; // Skip the rest of the defline while ((ib.eof() == false) && (x != '\r') && (x != '\n')) { #ifdef DEBUGINDEX fprintf(stderr, "skip let %c\n", x); #endif x = ib.read(); } // Skip whitespace between the defline and the sequence. while ((ib.eof() == false) && (whitespaceSymbol[x] == true)) { #ifdef DEBUGINDEX fprintf(stderr, "skip num %d\n", x); #endif x = ib.read(); } #ifdef DEBUGINDEX fprintf(stderr, "x=%c peek=%c\n", x, ib.peek()); #endif // Count sequence length while ((ib.eof() == false) && (ib.peek() != '>')) { #ifdef DEBUGINDEX fprintf(stderr, "seqlen %s %c\n", (whitespaceSymbol[x] == false) ? "save" : "skip", x); #endif if (whitespaceSymbol[x] == false) seqLen++; if (seqLen >= seqLenMax) fprintf(stderr, "fastaFile::constructIndex()-- ERROR: In %s, sequence '%s' is too long. Maximum length is %u bases.\n", _filename, _names + namePos, seqLenMax), exit(1); x = ib.read(); } // Save to the index. if (indexLen >= indexMax) { indexMax *= 2; fastaFileIndex *et = new fastaFileIndex[indexMax]; memcpy(et, _index, sizeof(fastaFileIndex) * indexLen); delete [] _index; _index = et; } _index[indexLen]._seqPosition = seqStart; _index[indexLen]._seqLength = seqLen; #ifdef DEBUG fprintf(stderr, "INDEX iid="uint32FMT" len="uint32FMT" pos="uint64FMT"\n", indexLen, seqLen, seqStart); #endif indexLen++; // Load the '>' for the next iteration. x = ib.read(); } // Fill out the index meta data struct stat fastastat; errno = 0; if (stat(_filename, &fastastat)) fprintf(stderr, "fastaFile::constructIndex()-- stat() of file '%s' failed: %s\n", _filename, strerror(errno)), exit(1); _header._magic[0] = FASTA_MAGICNUMBER1; _header._magic[1] = FASTA_MAGICNUMBER2; _header._numberOfSequences = indexLen; _header._namesLength = namesLen; _header._fastaFileSize = fastastat.st_size; _header._fastaModificationTime = fastastat.st_mtime; _header._fastaCreationTime = fastastat.st_ctime; // Dump the index, if possible. errno = 0; FILE *I = fopen(indexname, "w"); if (errno) return; fwrite(&_header, sizeof(fastaFileHeader), 1, I); fwrite( _index, sizeof(fastaFileIndex), _header._numberOfSequences, I); fwrite( _names, sizeof(char), _header._namesLength, I); fclose(I); } kmer-code-2013-trunk/libseq/seqStream.C0000644000000000000000000002246012322046702016477 0ustar rootroot#include "seqFactory.H" #include "seqStream.H" seqStream::seqStream(const char *filename) { _file = openSeqFile(filename); _string = 0L; _currentIdx = 0; _currentPos = 0; _streamPos = 0; _bufferMax = 1048576; _bufferLen = 0; _bufferPos = 0; _bufferSep = 0; _buffer = new char [_bufferMax + 1]; _idxLen = _file->getNumberOfSequences(); _idx = new seqStreamIndex [_idxLen + 1]; //fprintf(stderr, "seqStream::seqStream()-- Allocating "uint64FMT"MB for seqStreamIndex on "uint64FMT" sequences.\n", // _idxLen * sizeof(seqStreamIndex) / 1024 / 1024, _idxLen); _seqNumOfPos = 0L; _lengthOfSequences = 0; _eof = false; _separator = '.'; _separatorLength = 2; setSeparator('.', 2); _bgn = 0; _end = _lengthOfSequences; } seqStream::seqStream(const char *sequence, uint32 length) { _file = 0L; _string = (char *)sequence; _currentIdx = 0; _currentPos = 0; _streamPos = 0; _bufferMax = length; _bufferLen = length; _bufferPos = 0; _bufferSep = 0; _buffer = _string; _idxLen = 1; _idx = new seqStreamIndex [_idxLen + 1]; _seqNumOfPos = 0L; _idx[0]._iid = 0; _idx[0]._len = length; _idx[0]._bgn = 0; _idx[1]._iid = ~uint32ZERO; _idx[1]._len = 0; _idx[1]._bgn = length; _lengthOfSequences = length; _eof = false; _separator = '.'; _separatorLength = 20; _bgn = 0; _end = length; } seqStream::~seqStream() { if (_file) { delete _file; delete [] _buffer; } delete [] _idx; delete [] _seqNumOfPos; } void seqStream::setSeparator(char sep, uint32 len) { // Special case; no separator needed for string backed sequences. if (_string) return; // Bizarre signedness issue with sep=255 // ST->get() == sep FAILS // x=ST->get(); x == sep SUCCEEDS // // Not suggested to use non-printable ascii. if ((isprint(sep) == 0) || (tolower(sep) == 'a') || (tolower(sep) == 'c') || (tolower(sep) == 'g') || (tolower(sep) == 't')) { fprintf(stderr, "seqStream::setSeparator()-- ERROR! Separator letter must be printable ASCII and not [ACGTacgt].\n"); exit(1); } if (len == 0) { fprintf(stderr, "seqStream::setSeparator()-- ERROR! Separator length cannot be zero.\n"); exit(1); } _lengthOfSequences = 0; _separator = sep; _separatorLength = len;; for (uint32 s=0; s<_idxLen; s++) { _idx[s]._iid = s; _idx[s]._len = _file->getSequenceLength(s); _idx[s]._bgn = _lengthOfSequences; _lengthOfSequences += _idx[s]._len; } _idx[_idxLen]._iid = ~uint32ZERO; _idx[_idxLen]._len = 0; _idx[_idxLen]._bgn = _lengthOfSequences; // Rebuild our sequence number of position map, if it exists. // if (_seqNumOfPos) { delete [] _seqNumOfPos; tradeSpaceForTime(); } } void seqStream::tradeSpaceForTime(void) { uint32 i = 0; uint32 s = 0; //fprintf(stderr, "Allocating "uint32FMT" uint32s for seqNumOfPos.\n", _lengthOfSequences); _seqNumOfPos = new uint32 [_lengthOfSequences]; for (i=0; i<_lengthOfSequences; i++) { // Increment the sequence number until we enter into the next // sequence. Zero length sequences require the use of a 'while' // here. // while (i >= _idx[s+1]._bgn) s++; _seqNumOfPos[i] = s; } } unsigned char seqStream::get(void) { if (_streamPos >= _end) _eof = true; if ((_eof == false) && (_bufferPos >= _bufferLen)) fillBuffer(); if (_eof) return(0); if (_bufferSep == 0) { _currentPos++; _streamPos++; } else { _bufferSep--; } return(_buffer[_bufferPos++]); } void seqStream::rewind(void){ // Search for the correct spot. Uncommon operation, be inefficient // but simple. The range was checked to be good by setRange(). uint32 s = 0; uint64 l = 0; while ((s < _idxLen) && (l + _idx[s]._len < _bgn)) l += _idx[s++]._len; _eof = false; // (_bgn - l) is a 32-bit quanitity because of the second half of // the while above. Although _bgn is a 64-bit value, the value // used to set _bufferPos will be for that of a string constructor, // and so _bgn will be 32-bits. fillBuffer() resets _bufferPos if // we're backed by a file. _currentIdx = s; _currentPos = _bgn - l; _streamPos = _bgn; _bufferPos = _bgn; //fprintf(stderr, "seqStream::rewind()-- 1 currentIdx="uint32FMT" currentPos="uint32FMT" streamPos="uint32FMT" bufferPos="uint32FMT"\n", // _currentIdx, _currentPos, _streamPos, _bufferPos); fillBuffer(); //fprintf(stderr, "seqStream::rewind()-- 2 currentIdx="uint32FMT" currentPos="uint32FMT" streamPos="uint32FMT" bufferPos="uint32FMT"\n", // _currentIdx, _currentPos, _streamPos, _bufferPos); } void seqStream::setRange(uint64 bgn, uint64 end) { assert(bgn < end); uint32 s = 0; uint64 l = 0; while (s < _idxLen) l += _idx[s++]._len; if (end == ~uint64ZERO) end = l; if ((bgn > l) || (end > l)) fprintf(stderr, "seqStream::setRange()-- ERROR: range ("uint64FMT","uint64FMT") too big; only "uint64FMT" positions.\n", bgn, end, l), exit(1); _bgn = bgn; _end = end; rewind(); } void seqStream::setPosition(uint64 pos) { assert(_bgn <= pos); assert( pos < _end); uint64 old = _bgn; _bgn = pos; rewind(); _bgn = old; } uint32 seqStream::sequenceNumberOfPosition(uint64 p) { uint32 s = ~uint32ZERO; // binary search on our list of start positions, to find the // sequence that p is in. if (_lengthOfSequences <= p) { fprintf(stderr, "seqStream::sequenceNumberOfPosition()-- WARNING: position p="uint64FMT" too big; only "uint64FMT" positions.\n", p, _lengthOfSequences); return(s); } if (_seqNumOfPos) return(_seqNumOfPos[p]); if (_idxLen < 16) { for (s=0; s<_idxLen; s++) if ((_idx[s]._bgn <= p) && (p < _idx[s+1]._bgn)) break; } else { uint32 lo = 0; uint32 hi = _idxLen; uint32 md = 0; while (lo <= hi) { md = (lo + hi) / 2; if (p < _idx[md]._bgn) { // This block starts after the one we're looking for. hi = md; } else if ((_idx[md]._bgn <= p) && (p < _idx[md+1]._bgn)) { // Got it! lo = md + 1; hi = md; s = md; } else { // By default, then, the block is too low. lo = md; } } } return(s); } void seqStream::fillBuffer(void) { // Special case for when we're backed by a character string; there // is no need to fill the buffer. // if (_file == 0L) { if (_currentPos >= _end) _eof = true; return; } // Read bytes from the _file, stuff them into the buffer. Assumes // there is nothing in the buffer to save. _bufferLen = 0; _bufferPos = 0; // Still more stuff in the sequence? Get it. if (_currentPos < _idx[_currentIdx]._len) { #ifdef DEBUG fprintf(stderr, "seqStream::fillBuffer()-- More Seq currentPos="uint32FMT" len="uint32FMT"\n", _currentPos, _idx[_currentIdx]._len); #endif _bufferLen = MIN(_idx[_currentIdx]._len - _currentPos, _bufferMax); if (_file->getSequence(_idx[_currentIdx]._iid, _currentPos, _currentPos + _bufferLen, _buffer) == false) fprintf(stderr, "seqStream::fillBuffer()-- Failed to getSequence(part) #1 iid="uint32FMT" bgn="uint32FMT" end="uint32FMT"\n", _idx[_currentIdx]._iid, _currentPos, _currentPos + _bufferLen), exit(1); return; } // We've finished a sequence. Load the next. _currentPos = 0; _currentIdx++; while ((_currentIdx < _idxLen) && (_idx[_currentIdx]._len == 0)) _currentIdx++; #ifdef DEBUG fprintf(stderr, "seqStream::fillBuffer()-- New Seq currentPos="uint32FMT" len="uint32FMT"\n", _currentPos, _idx[_currentIdx]._len); #endif // All done if there is no more sequence. if (_currentIdx >= _idxLen) { _eof = true; return; } // Insert a separator. for (_bufferLen = 0; _bufferLen < _separatorLength; _bufferLen++) _buffer[_bufferLen] = _separator; // Keep track of the separator - this is used to make sure we don't // advance the sequence/stream position while the separator is // being returned. // _bufferSep = _bufferLen; // How much to get; minimum of what is left in the sequence, and // the buffer size. Don't forget about the separator we already // inserted! // uint32 bl = MIN(_idx[_currentIdx]._len - _currentPos, _bufferMax - _bufferLen); if (_file->getSequence(_idx[_currentIdx]._iid, _currentPos, _currentPos + bl, _buffer + _bufferLen) == false) fprintf(stderr, "seqStream::fillBuffer()-- Failed to getSequence(part) #2 iid="uint32FMT" bgn="uint32FMT" end="uint32FMT"\n", _idx[_currentIdx]._iid, _currentPos, _currentPos + bl), exit(1); _bufferLen += bl; // Load more, until buffer is full. Not really needed, and won't // improve performance much. AND it adds a lot of complexity to // track which sequence is current (_currentIdx). return; } kmer-code-2013-trunk/libseq/seqFile.H0000644000000000000000000000275412375772744016160 0ustar rootroot#ifndef SEQFILE_H #define SEQFILE_H #include "util.h" // General flow of the constructors is: // Clear all data // Open the file // Set _filename, _typename // Read/build the index structure // Position the file to the first read // Set _numberOfSequences (IMPORTANT, and subtle) class seqFile { protected: seqFile(const char *filename) {}; seqFile() {}; public: virtual ~seqFile() {}; protected: virtual seqFile *openFile(const char *filename) = 0; public: virtual const char *getSourceName(void) { return(_filename); }; virtual const char *getFileTypeName(void) { return(_typename); }; virtual bool randomAccessSupported(void) { return(_randomAccessSupported); }; virtual uint32 getNumberOfSequences(void) { return(_numberOfSequences); }; public: virtual uint32 find(const char *sequencename) = 0; virtual uint32 getSequenceLength(uint32 id) = 0; virtual bool getSequence(uint32 id, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax) = 0; virtual bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s) = 0; protected: char _filename[FILENAME_MAX]; char _typename[FILENAME_MAX]; bool _randomAccessSupported; uint32 _numberOfSequences; friend class seqFactory; }; #endif // SEQFILE_H kmer-code-2013-trunk/libseq/test-seqStream.C0000644000000000000000000001715412322046702017460 0ustar rootroot#include "util.h" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" #include "test-correctSequence.H" #define FAIL() { err++; assert(0); } uint32 testIndexing(uint32 numSeq, char sep, uint32 sepLen) { uint32 err = 0; seqStream *ST = 0L; fprintf(stderr, "testIndexing()-- numSeq="uint32FMT" sep=%c sepLen="uint32FMT"\n", numSeq, sep, sepLen); generateChainedAnswer(numSeq, sep, sepLen); if (numSeq > 1) { ST = new seqStream("test-correctSequence.fasta"); ST->setSeparator(sep, sepLen); } else { ST = new seqStream(correctSequence[0].sequence, correctSequence[0].sequenceLength); } uint32 maxLen = ST->startOf(numSeq-1) + ST->lengthOf(numSeq-1); // Basic checks on the reverse lookup - this is state independent; // it changes only based on the separator length. In other words, // there is no need to check this while iterating through the // seqStream. fprintf(stderr, "IGNORE THIS WARNING: "); if (ST->sequenceNumberOfPosition(maxLen) != ~uint32ZERO) { fprintf(stderr, "maxLen too small.\n"); FAIL(); } if (ST->sequenceNumberOfPosition(maxLen - 1) == ~uint32ZERO) { fprintf(stderr, "maxLen too big.\n"); FAIL(); } // Check all lookups - lengthOf() and IIDOf() are implicitly // checked by the operation of seqStream (get() mostly). startOf() // isn't, but inserting errors in setRange() led to // infinite-looking loops. uint64 pos = 0; uint64 sta = 0; for (uint32 sid=0; sidlengthOf(sid) != correctSequence[sid].sequenceLength) { fprintf(stderr, "lengthOf "uint32FMT" returned "uint32FMT", not correct "uint32FMT"\n", sid, ST->lengthOf(sid), correctSequence[sid].sequenceLength); FAIL(); } if (ST->startOf(sid) != sta) { fprintf(stderr, "startOf "uint32FMT" returned "uint64FMT", not correct "uint64FMT"\n", sid, ST->startOf(sid), sta); FAIL(); } if (ST->IIDOf(sid) != sid) { fprintf(stderr, "IIDOf "uint32FMT" returned "uint32FMT", not correct "uint32FMT"\n", sid, ST->IIDOf(sid), sid); FAIL(); } sta += correctSequence[sid].sequenceLength; for (uint32 ppp=0; pppsequenceNumberOfPosition(pos) != sid) { fprintf(stderr, "sequenceNumberOfPosition "uint64FMT" returned "uint32FMT", not correct "uint32FMT".\n", pos, ST->sequenceNumberOfPosition(pos), sid); FAIL(); } } } if (pos != maxLen) { fprintf(stderr, "maxLen wrong.\n"); FAIL(); } // Check the separator. Seek to a spot right before one, and count // that we have the correct length. More rigorously tested in // testChaining(). for (uint32 sid=0; sidsetRange(ST->startOf(sid) + ST->lengthOf(sid)-1, ~uint64ZERO); ST->get(); for (uint32 x=0; xget(); if (s != sep) { fprintf(stderr, "wrong separator at sep "uint32FMT" got %d expected %d\n", x, s, sep); FAIL(); } } if (ST->get() == sep) { fprintf(stderr, "too many separators!\n"); FAIL(); } } delete ST; return(err); } uint32 testSeqStream(seqStream *ST, uint32 sib, uint32 sie, char sep) { uint32 err = 0; while (ST->eof() == false) { uint32 sp = ST->seqPos(); uint32 si = ST->seqIID(); uint64 st = ST->strPos(); char ch = ST->get(); if (ch != 0) { if (ch != chainSeq[sib]) { fprintf(stderr, "sp="uint32FMT" si="uint32FMT" st="uint64FMT" ch=%c -- letter wrong got'%c'\n", sp, si, st, ch, chainSeq[sib]); FAIL(); } if ((ch != sep) && (sp != chainSeqPos[sib])) { fprintf(stderr, "sp="uint32FMT" si="uint32FMT" st="uint64FMT" ch=%c -- seqPos wrong got "uint32FMT"\n", sp, si, st, ch, chainSeqPos[sib]); FAIL(); } if ((ch != sep) && (si != chainSeqIID[sib])) { fprintf(stderr, "sp="uint32FMT" si="uint32FMT" st="uint64FMT" ch=%c -- seqIID wrong got"uint32FMT"\n", sp, si, st, ch, chainSeqIID[sib]); FAIL(); } if ((ch != sep) && (st != chainStrPos[sib])) { fprintf(stderr, "sp="uint32FMT" si="uint32FMT" st="uint64FMT" ch=%c -- strPos wrong got "uint64FMT"\n", sp, si, st, ch, chainStrPos[sib]); FAIL(); } sib++; } } if (sib != sie) { fprintf(stderr, "iterated length wrong; sib="uint32FMT" sie="uint32FMT"\n", sib, sie); FAIL(); } return(err); } uint32 testChaining(uint32 numSeq, char sep, uint32 sepLen) { uint32 err = 0; seqStream *ST = 0L; fprintf(stderr, "testChaining()-- numSeq="uint32FMT" sep=%c sepLen="uint32FMT"\n", numSeq, sep, sepLen); generateChainedAnswer(numSeq, sep, sepLen); if (numSeq > 1) { ST = new seqStream("test-correctSequence.fasta"); ST->setSeparator(sep, sepLen); } else { ST = new seqStream(correctSequence[0].sequence, correctSequence[0].sequenceLength); } // Do a test on the whole thing. { uint32 sib = 0; uint32 sie = strlen(chainSeq); fprintf(stderr, "initial test with full range\n"); testSeqStream(ST, sib, sie, sep); fprintf(stderr, "initial test with full range (rewind)\n"); ST->rewind(); testSeqStream(ST, sib, sie, sep); } // Set the range to random values, and check all the results. // We've already verified the index works, so we're free to use // that (but we currently don't). uint32 maxLen = ST->startOf(numSeq-1) + ST->lengthOf(numSeq-1); fprintf(stderr, "test on subranges\n"); for (uint32 iter=0; iter<500; iter++) { uint32 beg = mtRandom32(mtctx) % maxLen; uint32 end = mtRandom32(mtctx) % maxLen; if (beg > end) { uint32 t = end; end = beg; beg = t; } ST->setRange(beg, end); // Compute the position in our stream for the ACGT based beg and // end. The quirk here is that our stream includes the // separator. uint32 sib = 0; // chainSeq position uint32 sie = 0; for (uint32 ppp=0, sid=0; sideof() == false) ST->get(); ST->rewind(); } else { //fprintf(stderr, "Random iter "uint32FMT"\n", iter); } testSeqStream(ST, sib, sie, sep); } return(err > 0); } int main(int argc, char **argv) { uint32 minLen = 100; uint32 maxLen = 20000; uint32 numSeq = 1000; uint32 err = 0; generateCorrectSequence(minLen, maxLen, numSeq); // Tests seqStream(string, strlen) construction method err += testIndexing(1, '.', 1); err += testChaining(1, '.', 1); // Tests seqStream(filename) construction method err += testIndexing(numSeq, '.', 1); err += testIndexing(numSeq, ':', 10); err += testIndexing(numSeq, 'z', 100); err += testIndexing(numSeq, '-', 1000); err += testChaining(numSeq, '.', 1); err += testChaining(numSeq, ':', 10); err += testChaining(numSeq, 'z', 100); err += testChaining(numSeq, '-', 1000); removeCorrectSequence(numSeq); if (err == 0) fprintf(stderr, "Success!\n"); exit(err > 0); } kmer-code-2013-trunk/libseq/test-seqCache.C0000644000000000000000000001211412322046702017217 0ustar rootroot#include "util.h" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" #include "test-correctSequence.H" uint32 testSeqVsCorrect(seqInCore *S, uint32 testID) { uint32 err = 0; if (S == 0L) { fprintf(stderr, "testID:"uint32FMT" - empty sequence\n", testID); return(1); } uint32 sid = S->getIID(); if (strcmp(S->header(), correctSequence[sid].header) != 0) { fprintf(stderr, "testID:"uint32FMT" - header differs '%s' vs '%s'\n", testID, S->header(), correctSequence[sid].header); err++; } if (S->headerLength() != correctSequence[sid].headerLength) { fprintf(stderr, "testID:"uint32FMT" - header length differs "uint32FMT" vs "uint32FMT"\n", testID, S->headerLength(), correctSequence[sid].headerLength); err++; } if (strcmp(S->sequence(), correctSequence[sid].sequence) != 0) { fprintf(stderr, "testID:"uint32FMT" - sequence differs\n", testID); err++; } if (strlen(S->sequence()) != correctSequence[sid].sequenceLength) { fprintf(stderr, "testID:"uint32FMT" - sequence length differs strlen "uint32FMT" vs "uint32FMT"\n", testID, (uint32)strlen(S->sequence()), correctSequence[sid].sequenceLength); err++; } if (S->sequenceLength() != correctSequence[sid].sequenceLength) { fprintf(stderr, "testID:"uint32FMT" - sequence length differs "uint32FMT" vs "uint32FMT"\n", testID, S->sequenceLength(), correctSequence[sid].sequenceLength); err++; } return(err); } uint32 testSeqCacheIDLookups(seqCache *SC) { uint32 err = 0; uint32 numSeq = SC->getNumberOfSequences(); double start = getTime(); // 1 - getSequenceIID() fprintf(stderr, "1 - getSequenceIID()\n"); for (uint32 sid=0; sidgetSequenceIID(correctSequence[sid].header)) { fprintf(stderr, "2 - failed to find name '%s'\n", correctSequence[sid].header); err++; } } fprintf(stderr, "Test took %f seconds.\n", getTime() - start); return(err); } uint32 testSeqCache(seqCache *SC) { uint32 err = 0; uint32 numSeq = SC->getNumberOfSequences(); seqInCore *S = 0L; double start = getTime(); // 0 - getSequenceLength() fprintf(stderr, "0 - getSequenceLength()\n"); for (uint32 sid=0; sidgetSequenceLength(sid) != correctSequence[sid].sequenceLength) { fprintf(stderr, "1 - length differs.\n"); err++; } // 2 - stream with getSequenceInCore() fprintf(stderr, "2 - stream with getSequenceInCore()\n"); S = SC->getSequenceInCore(); while (S != 0L) { err += testSeqVsCorrect(S, 2); delete S; S = SC->getSequenceInCore(); } // 3 - iterate with getSequenceInCore(sid++) fprintf(stderr, "3 - iterate with getSequenceInCore(sid++)\n"); for (uint32 sid=0; sidgetSequenceInCore(sid); err += testSeqVsCorrect(S, 3); delete S; } // 4 - random with getSequenceInCore(sid) fprintf(stderr, "4 - random with getSequenceInCore(sid)\n"); for (uint32 cnt=0; cnt<4*numSeq; cnt++) { uint32 sid = mtRandom32(mtctx) % numSeq; S = SC->getSequenceInCore(sid); err += testSeqVsCorrect(S, 4); delete S; } fprintf(stderr, "Test took %f seconds.\n", getTime() - start); return(err); } int main(int argc, char **argv) { uint32 minLen = 100; uint32 maxLen = 2000; uint32 numSeq = 100000; seqCache *SC = 0L; uint32 err = 0; generateCorrectSequence(minLen, maxLen, numSeq); fprintf(stderr, "seqCache(file, 0, true) (ID lookups)\n"); SC = new seqCache("test-correctSequence.fasta", 0, true); //err += testSeqCacheIDLookups(SC); delete SC; fprintf(stderr, "seqCache(file, 0, true)\n"); SC = new seqCache("test-correctSequence.fasta", 0, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 1, true)\n"); SC = new seqCache("test-correctSequence.fasta", 1, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 2, true)\n"); SC = new seqCache("test-correctSequence.fasta", 2, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 4, true)\n"); SC = new seqCache("test-correctSequence.fasta", 4, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 8, true)\n"); SC = new seqCache("test-correctSequence.fasta", 8, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 32, true)\n"); SC = new seqCache("test-correctSequence.fasta", 32, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 200, true)\n"); SC = new seqCache("test-correctSequence.fasta", 200, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 1000000, true)\n"); SC = new seqCache("test-correctSequence.fasta", 1000000, true); err += testSeqCache(SC); delete SC; fprintf(stderr, "seqCache(file, 0, true) -- loadAllSequence\n"); SC = new seqCache("test-correctSequence.fasta", 0, true); SC->loadAllSequences(); err += testSeqCache(SC); delete SC; removeCorrectSequence(numSeq); if (err == 0) fprintf(stderr, "Success!\n"); exit(err > 0); } kmer-code-2013-trunk/libseq/seqCache.H0000644000000000000000000000557512375772744016310 0ustar rootroot#ifndef SEQCACHE_H #define SEQCACHE_H #include "util++.H" #include "seqFile.H" class seqInCore { private: seqInCore(uint32 iid, char *hdr, uint32 hdrlen, char *seq, uint32 seqlen, bool deletable) { _idx = iid; _deletable = deletable; _headerLen = hdrlen; _header = hdr; _seqLen = seqlen; _seq = seq; }; friend class seqCache; public: ~seqInCore() { if (_deletable) { delete [] _header; _header = 0L; delete [] _seq; _seq = 0L; } }; char *header(void) const { return(_header); }; uint32 headerLength(void) const { return(_headerLen); }; char *sequence(void) const { return(_seq); }; uint32 sequenceLength(void) const { return(_seqLen); }; uint32 getIID(void) const { return(_idx); }; // Used only by searchGENOME (as far as I know) seqInCore *copy(void) { char *h = new char [_headerLen + 1]; char *s = new char [_seqLen + 1]; memcpy(h, _header, _headerLen + 1); memcpy(s, _seq, _seqLen + 1); return(new seqInCore(_idx, h, _headerLen, s, _seqLen, true)); }; private: uint32 _idx; bool _deletable; uint32 _headerLen; char *_header; uint32 _seqLen; char *_seq; }; class seqCache { public: seqCache(const char *filename, uint32 cachesize=0, bool verbose=false); ~seqCache(); // Returns IID for a name, either the first word on the defline, or // the ascii IID. uint32 getSequenceIID(char *name); seqInCore *getSequenceInCore(uint32 iid); seqInCore *getSequenceInCore(char *name) { return(getSequenceInCore(getSequenceIID(name))); }; seqInCore *getSequenceInCore(void) { return(getSequenceInCore(_idToGetNext++)); }; const char *getSourceName(void) { return(_fb->getSourceName()); }; const char *getFileTypeName(void) { return(_fb->getFileTypeName()); }; bool randomAccessSupported(void) { return(_fb->randomAccessSupported()); }; uint32 getNumberOfSequences(void) { return(_fb->getNumberOfSequences()); }; uint32 getSequenceLength(uint32 iid) { return(_fb->getSequenceLength(iid)); }; void setCacheSize(uint32 cachesize); void loadAllSequences(void); void flushCache(void); private: seqFile *_fb; uint32 _idToGetNext; bool _allSequencesLoaded; bool _reportLoading; uint32 *_cacheMap; // Maps ID to cache entry uint32 _cacheSize; // Size of cache uint32 _cacheNext; // Next cache spot to use seqInCore **_cache; // Cache of sequences }; #endif // SEQCACHE_H kmer-code-2013-trunk/libseq/seqFactory.H0000644000000000000000000000106312322046702016654 0ustar rootroot#ifndef SEQFACTORY_H #define SEQFACTORY_H #include "util.h" #include "seqFile.H" class seqFactory { protected: seqFactory(); ~seqFactory(); public: static seqFactory *instance(void) { if (me == 0L) me = new seqFactory; return(me); }; void registerFile(seqFile *f); seqFile *openFile(const char *name); private: static seqFactory *me; uint32 _filesNum; uint32 _filesMax; seqFile **_files; }; #define openSeqFile(S) seqFactory::instance()->openFile((S)) #endif // SEQFACTORY_H kmer-code-2013-trunk/libseq/fastaStdin.H0000644000000000000000000000245312375772744016664 0ustar rootroot#ifndef FASTASTDIN_H #define FASTASTDIN_H #include "util++.H" #include "bio++.H" #include "seqFile.H" class fastaStdin : public seqFile { protected: fastaStdin(const char *filename); fastaStdin(); public: ~fastaStdin(); protected: seqFile *openFile(const char *filename); public: uint32 getNumberOfSequences(void); public: uint32 find(const char *sequencename); uint32 getSequenceLength(uint32 iid); bool getSequence(uint32 iid, char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); bool getSequence(uint32 iid, uint32 bgn, uint32 end, char *s); private: void clear(void); bool loadNextSequence(char *&h, uint32 &hLen, uint32 &hMax, char *&s, uint32 &sLen, uint32 &sMax); readBuffer *_rb; uint32 _nextIID; FILE *_pipe; char *_header; uint32 _headerLen; uint32 _headerMax; char *_sequence; uint32 _sequenceLen; uint32 _sequenceMax; friend class seqFactory; }; #endif // FASTASTDIN_H kmer-code-2013-trunk/Makefile.wiki0000644000000000000000000014112311512770706015560 0ustar rootroot == Overview of the proposed new IR build system == The proposed buld system for IR projects is defined by the set of files under cds/IR/build. These files consist of a Makefile and several ancillary files which provide platform specific and file type specific definitions and rules. The build itself is always directed through the one Makefile in the build directory. What I describe hereafter as '''the build''' is really gmake is invoked with this Makefile, either from within the cds/IR/build directory or referrenced explicitly with a -f option to gmake. When the build is started, the ancillary files are examined to establish definitions, rules, and to provide a naming utility. The build then looks for a file named Make.include in the directory of invocation. This Make.include files should contain build information for files pertinent to its directory and possibly the inclusions of other Make.include files in lower directories. It is only after the tree (and it better be a tree or something very bad will happen) of Make.include's is read that any building starts, so that full dependency information is available before any action is taken. Automatic include files such as the *.d files which typically hold C and C++ dependencies are included, possibly after being rebuilt (if a build rule exists for them and if they do not exist or are out of date). Thus, the first actions of the build is typically the creation of automatically generated dependency information, with subsequent modifications of sources to minimally rebuild these files. The build properly proceeds with the creation of all targets which have been defined. Targets declared by placing them on the lists of variables which are defined to be targets by the build rules. It is possible to subgoal the building at a file or directory level of granularity. == Invoking the build system == There are two different directories which are of importance in the build system. The first is the fixed build directory, cds/IR/build/, which holds the Makefile, ancillary files, and the installed files. The second directory of importance is the directory of invocation, the place where gmake is executed. These two directories coincide when one executes gmake from the build directory. Keep these things distinct because all inclusions start from and all actions take place in the directory of invocation, not the build directory. The first Make.include is read in the directory of invocation. If the convention has been followed of having Make.include files in every node of the source tree which refer only to subdirectories, then only those Make.include files at the directory of invocation and lower will be read into the build system. This means that automatic rebuilding of dependencies external to the directory of invocation will not be handled fully (files will not be updated, but they will be found if they exist). Thus, if one wishes to focus one's attention to a single subdirectory to build in, one should make the subdirectory the directory of invocation.
$ cd that/subdir/
$ gmake -f ${cvs}/cds/IR/build/Makefile all
This will result in much less work that the build will have to do in order to figure out what actions to take, because it will examine fewer constraints. On the other hand, if one invokes from a higher directory, then any sibling dependencies of the target will be properly examined and necessary actions at that level will be taken.
$ cd ${cvs}/cds/IR/build
$ gmake that/subdir/.all
This will result in all the subgoals in a subdirectory (and lower) being built, with any necessary updates of other directories being handled automatically. All rules which can be found are examined although only those actions necessary for the given subtarget(s) are taken. By default, the build system builds all things under the ${cvs}/cds/IR tree which are currently checked out and which support the build system by providing Make.include files.
$ cd ${cvs}/cds/IR/build
$ gmake
The targets currently supported by the build system are {| class="wikitable" border="1" |- | all || build all subgoals |- | clean || remove subgoals |- | depends-clean || remove any automatically generated dependency (*.d)files |- | real-clean || do both depends-clean and clean |- | install || do all and copy subgoals and other files to the install subdirectory of the build directory |- | ls || do 'ls -l' of subgoals |- |} These targets are also supported in a subtarget specific form, such as that/subdir/.install. The compile options can be modified for debugging, profiling, or compile with GNU compilers, or modifying the installation directory. {| class="wikitable" border="1" |- | WITH_OPT=debug || compile with no optimization and maximum debugging |- | WITH_OPT=profile || compile for profiling with optimization and minimal debugging |- | WITH_GNU=1 || compile with gnu compilers |- | WITH_THREADS=1 || compile with threading enabled |- | INSTALL_TAG= || append - to the name of the install directory |- | MAKE_COMPILERS= || use the file in place of Make.compilers (this option is of dubious value). |- | WITHOUT='dir1/ dir2/'|| cancel the inclusion of any Make.include files in the given directories (another dubious option). |- |} These options go on the gmake command line.
$ gmake WITH_OPT=debug WITH_GNU=1 INSTALL_TAG=release all
== Anatomy of a Make.include == Make.include files define variables which hold names of files, and they are not necessarily sitting in the same place where the build is invoked. With these two considerations, there is defined, for each Make.include file a variable named by the single character '/' which holds the relative path of the current directory. Any file in the current directory can be referenced by prepending $/ to it. The contents of / ought to be unique in the namespace of the build, so any variable defined as $/.MYVAR cannot conflict with any other variable in the build namespace (re-using the syntax of hidden files for variables may be a bad thing, but it has not caused problems yet). So, a simple Make.include might look like:
$/.C_SRCS := $/hello.c
$/.C_EXES := $/hello
$/hello: $/hello.o
$/.CLEAN  := $/*.o
Note, the use of the 'verb+:=+' assignment instead of the '=' assignment. The reason for this, is that gmake has two '''flavors''' of variable, the traditional one, which is lazily evaluated, defined by '=', and one which is imediately evaluated, defined by ':='. If the RHS of the $/.C_SRCS assignment were to be lazily evaluated, then the $/ component of the name would expand to whatever value / holds at the end of the whole traversal (which is an empty string if everything goes right). This would produce errors. I recommend the use of ':=' in just about every possible case, unless you are trying to be tricky on purpose. There are a number of special variables, used on a per-/ basis by the Make.rules ancillary file. These variables can be assigned to in the Make.include files to specify the various types of files and the actions required to build them. The current list of variables is as follows: {| class="wikitable" border="1" |- | $/.C_SRCS || C sources which need to have their dependencies analyzed |- | $/.C_INCS || C header files |- | $/.C_LIBS || C library subgoals |- | $/.C_SHLIBS || C shared library subgoals |- | $/.C_EXES || C program subgoals |- | $/.CXX_ || same as $/.C_ but for C++ |- | $/.TEX_PS || Postscript subgoals to be built from LaTeX files |- | $/.TEX_PDF || PDF subgoals to be built from LaTeX files |- | $/.SHARES || a catchall category for things which are just to be copied |- | $/.SH_LIBS || sh script libraries |- | $/.SH_EXES || sh script executables |- | $/.PERL_LIBS || Perl script libraries |- | $/.PERL_EXES || Perl script executables |- | $/.PY_LIBS || Python script libraries |- | $/.PY_EXES || Python script executables |- | $/.LIB/ || subdirectory of lib/ where $/.C_LIBS and $/.CXX_LIBS are installed |- | $/.INCLUDE/ || subdirectory of include/ where $/.C_INCS and $/.CXX_INCS are installed |- | $/.DOC/ || subdirectory of doc/ where $/.TEX_PS and $/.TEX_PDF are installed |- | $/.SHARE/ || subdirectory of share/ where $/.SHARES are installed |- | $/.SH_LIB/ || subdirectory of scripts/ where $/.SH_LIBS are installed |- | $/.PERL_LIB/ || subdirectory of scripts/ where $/.PERL_LIBS are installed |- | $/.PY_LIB/ || subdirectory of scripts/ where $/.PY_LIBS are installed |- | $/.CLEAN || files and patterns to be removed during a clean |- | $/.REAL-CLEAN || files and patterns to be removed during a real-clean |- |} Note: while the current build system removes all subgoals, intermediate files are not removed automatically. If C/C++ programs are being build then patterns like $/*.o should be put in the $/.CLEAN variable or they will not get removed. It is debatable whether this should be left up to each Make.include file to take care of on its own. There are a couple of routine tasks which are done in a fashion a little unusual to those accustomed to more traditional uses of make. One of these tasks is the specification of additional flags used when building the C/C++ programs. For C programs, this is done by adding to the CFLAGS variable in a target specific manner.
$/myprogram.o $/myprogram.c.d: CFLAGS += -DTEST -I/usr/local/lib
If you wish to have this take effect for all files defined in the current Make.include you can use a pattern rule.
$/%.o $/%.d: CFLAGS +=-DTEST -I/usr/local/lib
One '''gotcha''' is in the use of locally defined variables (like anything involving $/). It seems that any variable expansion on the target specific '+=' is delayed until after all rules have been traversed, at which point $/ is very likely to have the wrong value. This does not happen with a target specific ':=' so it may be a bug in what is a fairly new gmake feature. The work-around invokes another fairly new gmake feature.
$(eval $/%.o $/%.d: CFLAGS +=-I$/include)
The second routine task is that of specifying external libraries to link to when building C/C++ executables. If the libraries are external to the whole build, then one would use the usual -L-l flags in a target specific variable modification.
$/myprogram: CLDFLAGS+=-L/usr/local/foodir
$/myprogram: CLIBS +=-lfoo
One must again wrap any variable expressions which are likely to be overwritten by other includes with $(eval ) to force imediate variable expansion. If the library is being built by the build system, using the -L-l flags would create a '''dependency leak''', as the build system would not know that the library must be updated before the link. To avoid creating this leak, one should do the more explicit dependency.
$/myprogram: ${THELIBDIR/}libfoo.a
Here no variable expansion needs to be forced, since dependency lines expand variables imediately. By making it a dependent, libfoo.a will appear in the series of arguments to the linker for $/myprogram and the leak avoided. === Two examples === Here is a walkthrough of the Make.include for AtacPipeline, which builds a variety of executables and libraries.
$/.CXX_EXES   :=$/heavyChains
$/.CXX_SHLIBS :=$/localAlignerInterfacemodule.so $/halignmodule.so $/hellomodule.so
One program and three shared libraries are subgoals to be created.
$/.CXX_SRCS:=$/GF_ALN_dpaligner.cc $/GF_ALN_local.cc \
  $/GF_ALN_overlap.cc $/GF_ALN_qvaligner.cc \
  $/GF_ALN_loverlapper.cc $/GF_ALN_pieceOlap.cc \
  $/halign.cc $/halignDriver.cc $/halignmodule.cc \
  $/heavyChains.cc \
  $/localAlignerInterface.cc $/localAlignerInterfacemodule.cc \
  $/hellomodule.cc $/byemodule.cc $/holignmodule.cc
The source files are declared.
$/.CLEAN  := $/*.o $/*.pyc
Since this directory will build both C++ programs and python programs, intermediates for both must be clean-ed.
$/.PY_EXES :=$/AtacDriver.py
$/.PY_LIBS :=$(filter-out ${$/.PY_EXES},$(wildcard $/*.py))
A python executable is declared and the python libraries are any file in this directory ending in '.py' which is not on the list of executables.
$/.PY_LIB/ :=AtacPipeline/
The python libraries are to be installed under scripts/AtacPipeline.
$/heavyChains   :  $/heavyChains.o

$/localAlignerInterfacemodule.so : \
   $/localAlignerInterfacemodule.o $/localAlignerInterface.o \
   $/GF_ALN_overlap.o $/GF_ALN_local.o \
   $/GF_ALN_loverlapper.o $/GF_ALN_pieceOlap.o \
   $/GF_ALN_dpaligner.o $/GF_ALN_qvaligner.o

$/hellomodule.so: $/hellomodule.o

$/halignmodule.so: $/halignmodule.o $/halign.o
The linking dependencies for each of the targets is specified.
$(eval $/%.d $/%.o: CXXFLAGS+=${PYINC})
The shared libraries being built are actually python extensions, so they will be including python header files. The ${PYINC} path is specified in the Make.compilers directory and is not expected to change (so the $(eval ) wrapper is a bit paranoid, but harmless). There are some extra flags which are needed for building python extensions at the end of this file when on AIX, but they are very exceptional, and an explanation of there here is of little value.
$(eval $/localAlignerInterfacemodule.so: AIX_SHLIB_FLAGS+=-einitlocalAlignerInterface -Wl,-bI:$/AIX_python-module-exports)
$(eval $/halignmodule.so: AIX_SHLIB_FLAGS+=-einithalign -Wl,-bI:$/AIX_python-module-exports)
$(eval $/hellomodule.so: AIX_SHLIB_FLAGS+=-einithello -Wl,-bI:$/AIX_python-module-exports)
Our next example is the Make.include for MatchExtender which builds a series of C++ programs which depend on external libraries.
FRAMEWORK/  :=$(call MakePath,$/../Framework/)
RASCAL/     :=$(call MakePath,$/../../../RASCAL/src/)
External paths are defined by the MakePath function. This function is explained later.
$/.CXX_EXES := $/testFastaReader $/MatchExtender $/MismatchCounter
Three C++ programs are to be built.
ind_src  := $/IndexedFastaReader.cc
test_src := $/testFastaReader.cc
mch_src  := $/MEMatch.cc
me_src   := $/MatchExtenderAtac.cc $/MatchExtender.cc
mc_src   := $/MismatchCounterAtac.cc $/MismatchCounter.cc

$/.CXX_SRCS := ${ind_src} ${test_src} ${mch_src} ${me_src}
The sources are partitioned into four groups.
$/.CLEAN :=$/*.o $/*~ $/core
On a clean we remove object files, emacs backups, and any cores.
$/testFastaReader: ${ind_src:.cc=.o} ${test_src:.cc=.o}
$/MatchExtender:   ${ind_src:.cc=.o} ${mch_src:.cc=.o} ${me_src:.cc=.o} 
$/MismatchCounter:   ${ind_src:.cc=.o} ${mch_src:.cc=.o} ${mc_src:.cc=.o} 
Program dependencies are defined as combinations of the various groups defined above, with their '.cc' extensions turned to '.o'.
${$/.CXX_EXES}: \
  ${RASCAL/}seq/libRASCAL_seq.a ${RASCAL/}base/libRASCAL_base.a \
  ${FRAMEWORK/}libATAC.a

$(eval $/%.d $/%.o:   CXXFLAGS+=-I${RASCAL/}. -I${FRAMEWORK/}.)
All programs must link to several external libraries and use their header files. === The Include function === The build system has wrapped the usual include syntax of gmake with a function called Include which can be invoked from within a Make.include file.
$(eval $(call Include,$/subdir1/ $/subdir2/))
Its effect is to check if there exists a Make.include file in each of its directory arguments, and if so, to traverse that file. The contents of those Make.include files are evaluated and added to the current build definitions. The variable / is pushed and popped appropriately. === The MakePath function === The build system supplies a function MakePath which is meant to be called in Make.include files to canonicalize pathnames. The problem it addresses is the one of gmake's inability to recognize the sameness of expressions like src/../src/foo and src/foo. Suppose we had a set of files and directories as follows:
X/
  Make.include Y/ Z/
X/Y/
  Make.include y.c
X/Z/
  Make.include z.c
Where we build a library liby.a in Y/ which is needed to compile the program z in Z/. The contents of X/Make.include is
$(eval $(call Include, $/Y/ $/Z/))
and the contents of Y/Make.include is
$/.C_SRCS :=$/y.c
$/.C_LIBS :=$/liby.a
$/liby.a: $/y.o
Then a natural choice for Z/Make.include would be
${Y/}  :=$/../Y/
$/.C_SRCS :=$/z.c
$/.C_EXES :=$/z
$/z: $/z.o ${Y/}liby.a
If liby.a is already built by the time z is built, then there is no problem. If not then, and if the build is invoked in Z/, one will get some error about not knowing how to build ../Y/liby.a, which is to be expected. However, if the build is invoked in X/ then one gets a similar error about not knowing how to build X/../Y/liby.a. The build, invoked from X/, does know how to build Y/liby.a, but does not understand that X/../Y/liby.a is the same thing. The function
$(call MakePath,P)
takes a path P to an existing directory and returns the shortest (redundant dots and double dots collapsed) path to P relative to the directory of invocation, in a fashion consistent with the pathname conventions used elsewhere in the build system (trailing '/' and '.' referred to by an empty string). Thus, the right version of the Z/Make.include file is,
${Y/}  :=$(call MakePath,$/../Y/)
$/.C_SRCS :=$/z.c
$/.C_EXES :=$/z
$/z: $/z.o ${Y/}liby.a
MakePath will issue a warning if the directory sought is not found, and return an empty string. The current implementation of the MakePath function is kind of kludgey, involving a shell-call to either a C program or a PERL program. I have not found a better implementation yet for this functionality. === Legacy builds === It is inevitable, because some parts of the code tree came from external sources or are complicated legacy codes, that one wants to still be able to integrate the usual '''recursive make''' procedure for some directory which circumvents the build system and its dependency checking. Here is an example of a simple Make.include which does this.
$(eval $(call MakeRecursive))

$/md5lib/md5c.o: $/.all

$/.all:
        cd `dirname $@` && ${MAKE} all

$/.real-clean $/.clean:
        cd `dirname $@` && ${MAKE} clean

$/.install:
The first line calls a special build system function, MakeRecursive which declares that this Make.include file is opting out of the usual build system and will define its own subtargets. The next line announces a target being supplied, the $/md5lib/md5c.o object file. This is optional, but gives the build system some idea of how to order multiple recursive makes based on possible mutual dependencies. The next lines specify rules for subdirectory specific subtargets (all, clean, real-clean, install) all of which are mandatory for recursive Make.include's. Each of these rules is just a recursive build invocation after changing into the appropriate directory, or an empty rule, signifying no action. == Anatomy of the Makefile == Here we go line by line through the Makefile (CVS revision 1.29) and discuss the function of every part.
default:   all
First a default target is created. The first goal listed is always the default target. Typically people use all for this. Since we do not know what verb+all+ will mean until much later in the file, we can not define all yet.
ifndef MAKEFILE/
  MAKEFILE/ :=$(dir $(firstword $(MAKEFILE_LIST)))
endif
The auxiliiary files are looked for in the directory where the Makefile was found. We extract this information from the built-in variable MAKEFILE_LIST. The MAKEFILE/ variable points to the build directory.
ifdef MAKE_COMPILERS
 include ${MAKE_COMPILERS}
else
 include ${MAKEFILE/}Make.compilers
endif
We load the Make.compilers file, which is more of a configuration file, since it contains definitions not just of the compilers but also of basic utilities and of locations of important libraries such as X11 and LAPACK. One design goal was to have all platform specifics captured by a single file so that porting to a new platform would require only the adjustment of this file. This file can be overridden by a user supplied MAKE_COMPILERS argument, though it is probably a mistake to use this feature as anything but a temporary device.
include ${MAKEFILE/}Make.path
The Make.path file supplies a crucial utility in canonicalizing directory names. We now begin the directory traversal part, where subdirectories are explored and build information is collected.
//           :=
/            :=
//-RECURSIVE    :=
define MakeRecursive
//-RECURSIVE :=$$/.
endef
Three important variables are being initialized here. The variable // holds the list of all directories which have been traversed which have not opted out of the build system. The directories are kept in '''dotted''' form (i.e. ., subdir/.). The //-RECURSIVE variable holds those directories (in dotted form) which have been traversed and have opted out of the build system. The variable / is the current relative path variable, which is meant to be used by traversed Make.include files.
define Include
 $(foreach x,$(strip ${1}),$(call Include_File,$x))
endef

define Include_File
  ifeq ($(filter ${1}.,${WITHOUT_}),)
    ifeq ($(wildcard ${1}Make.include),${1}Make.include)
      $/.SUBS +=${1}.
      // +=${1}.
      ${1}.SUBS :=
      /  :=${1}
      include ${1}Make.include
      /  :=$/
    endif
  endif

endef

ifndef WITHOUT
  WITHOUT:=
endif
WITHOUT_:=$(patsubst %,%.,$(strip ${WITHOUT}))
The normal include syntax is wrapped in a function which will maintain / properly while adding newly traversed directories to // and keeping track of who is who's children (kept in $/.SUBS). Each directory is traversed if its Make.include file exists and is not on a set of special suppressed directories (contained in the ${WITHOUT}). Traversed directories have their Make.include files included. Within those Make.include files, / will hold the relative path to the directory. The Include function is meant for external use, while the Include_File is a technicality and should not be employed except within this file.
$(eval $(call Include_File,$/))
We include the Make.include file which sits in the directory of invocation (as opposed to the one in the build directory). Since traversal starts in this directory, the only build information which will be considered is that from this directory and its descendants, allowing a user to build within a limited source directory, if they do not which to check lateral dependencies for some reason (e.g. efficiency).
//            :=$(filter-out ${//-RECURSIVE},${//})
After traversal, // holds all directories which have been traversed. We now remove from it all those paths which have opted out. At this point, // holds those traversed directories which are considered to be properly participating in the build and //-RECURSIVE holds those which will be built in a more or less '''legacy''' fashion. At this point, / should be an empty string (even though it does appear below). A second design goal was the separation of the specification of build rules from the primary Makefile so that new file types and build commands could be added to the build system by appending them to Make.rules. Actions are dictated by file types.
__SUBGOALS__=
__DEPGOALS__=
The __SUBGOALS__ variable is intended to hold all those targets which must be made for the all target. The __DEPGOALS__ holds patterns for automatic dependency files which are to be included. These variables will be dynamically scoped (the one exception we make to the usual static scoping). This allows for a variable capture which we exploit later. The __SUBGOALS__ and __DEPGOALS__ variables are appended to in the Make.rules file.
-include ${MAKEFILE/}Make.rules
If the Make.rules file exists in the directory of the Makefile then it is included. If it does not exist, the system will use the default rules built-in to make, which have a chance of working right (a snowball's chance in hell).
$(eval DEPENDS:=$(foreach x,${//},$(call __DEPGOALS__,$x)))
ifneq ($(strip ${DEPENDS}),)
  ifeq ($(filter %-clean,${MAKECMDGOALS}),)
    include ${DEPENDS}
  endif
endif
The __DEPGOALS__ pattern is evaluated on every directory and expanded into a set of files in the variable DEPENDS. Unless one of the command goals of the build contains the suffix -clean (real-clean or depends-clean, but not clean), these files will be included. The '''clean''' conditional exists to prevent certain kind of wedged conditions the build system could get in as well as allowing the clean targets to be processed without a building of any automatically created DEPENDS files. We now define the standard make targets, which are applied to all subdirectory targets. The basic target, TARG is also defined on a per-subdirectory basis with targets of the form $/.TARG with TARG being nearly an alias for .TARG (aside from //-RECURSIVE directories). This allows the user to selectively build only those subgoals which are in a single directory. Target TARG for //-RECURSIVE builds are done before the $/.TARG target. Building all legacy targets first seems like a good idea.
clean:         ${//-RECURSIVE:.=.clean}      $/.clean
define .RULE-clean
${1:.=.clean}: $${${1:.=.SUBS}:.=.clean}
	${RM} $${${1:.=.CLEAN}} ${__SUBGOALS__}
	(cd $1 && ${RM} -r ${C_TMP_COMPILE} ${CXX_TMP_COMPILE})

endef
$(eval $(foreach x,${//},$(call .RULE-clean,$x)))
The clean target executes for recursive directories first and then for .clean. The $/.clean target for each subdirectory depends on the $/.clean target of its children and executes by removing those files or patterns which were listed in the $/.CLEAN variable of that directory, any subgoals of that directory, and any temporary compiler files which may have been created in that directory (e.g. so_locations/).
depends-clean:                            $/.depends-clean
${//-RECURSIVE:.=.depends-clean}:
define .RULE-depends-clean
${1:.=.depends-clean}: $${${1:.=.SUBS}:.=.depends-clean}
	${RM} ${1:.=Make.depends} ${__DEPGOALS__}

endef
$(eval $(foreach x,${//},$(call .RULE-depends-clean,$x)))
Similar to clean only we remove only dependency files which may have been built to satisfy the include ${DEPENDS} line previous.
real-clean:    ${//-RECURSIVE:.=.real-clean} $/.real-clean
define .RULE-real-clean
${1:.=.real-clean}: $${${1:.=.SUBS}:.=.real-clean}
	${RM} $${${1:.=.CLEAN}} ${__SUBGOALS__}
	(cd $1 && ${RM} -r ${C_TMP_COMPILE} ${CXX_TMP_COMPILE})
	${RM} ${1:.=Make.depends} ${__DEPGOALS__}
	${RM} $${${1:.=.REAL-CLEAN}}

endef
$(eval $(foreach x,${//},$(call .RULE-real-clean,$x)))
A combination of the previous two clean targets.
all:           ${//-RECURSIVE:.=.all}        $/.all
define .RULE-all
${1:.=.all}: $${${1:.=.SUBS}:.=.all} ${__SUBGOALS__}

endef
$(eval $(foreach x,${//},$(call .RULE-all,$x)))
The all target depends on all subdirectory all's and all subgoals for this directory. The last major section of the Makefile is the installer. Installation currently proceeds by depending on the subgoals and upon a copy of those built subgoals to a special directory, INSTALL/ which is determined in the Make.compilers file. Because different directories may wish to do different kinds of pre and post installation actions, the .install targets have been written to provide a number of hooks. It is up to the Make.rules file to make use of those hooks.
${//-RECURSIVE:.=.install-copy}:
install-copy:       ${//-RECURSIVE:.=.install-copy}    $/.install-copy
define .RULE-install-copy
${1:.=.install-copy}: $${${1:.=.SUBS}:.=.install-copy}

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy,$x)))
All .install targets have a .install-copy target defined which depends on the .install-copy's of the children. A dummy target is defined for legacy builds to prevent certain kinds of build problems, but it is never normally invoked.
install:       ${//-RECURSIVE:.=.install}    $/.install
define .RULE-install
${1:.=.install}: ${1:.=.all} ${1:.=.install-copy}

endef
$(eval $(foreach x,${//},$(call .RULE-install,$x)))
An install in a directory is equivalent to doing an install in the legacy directories, a build in the current directory (and its children) and an install copy in the current directory (and its children). The benefit of this separation of tasks for install is that the install-copy phase of the build can be invoked as a separate target to selectively copy targets into the install directory, whcih might be needed in some special cases. One major disadvantage of this separation is that because install-copy does not depend on all, a multithreaded invocation of gmake (i.e. gmake -j4) is not guarranteed to perform install-copy after all. == Anatomy of the Make.rules == The execution of commands other than cleaning commands is determined by the file Make.rules (cvs revision 1.25). This file defines file types and actions to be taken to rebuild files.
define .FUN-install-copy
	@ files='$$(strip $1)'; dirs='$$(strip $2)'; \
	if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \
	  for F in $$$${files} ; do \
	    if [ -f $$$${F} ] ; then \
	      for D in $$$${dirs} ; do \
	        Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \
	        echo ":Copying $$$${F} to $$$${Fout}:" ; \
	        mkdir -p `dirname $$$${Fout}` && \
	        rm -f $$$${Fout} && cp -fp $$$${F} $$$${Fout} ; \
	      done ; \
	    fi ; \
	  done ; \
        fi
endef
define .FUN-install-copy-exe
	@ files='$$(strip $1)'; dirs='$$(strip $2)'; \
	if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \
	  for F in $$$${files} ; do \
	    if [ -f $$$${F}${.EXE} ] ; then \
	      for D in $$$${dirs} ; do \
	        Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \
	        echo ":Copying $$$${F}${.EXE} to $$$${Fout}${.EXE}:" ; \
	        mkdir -p `dirname $$$${Fout}` && \
	        rm -f $$$${Fout}${.EXE} && cp -fp $$$${F}${.EXE} $$$${Fout}${.EXE} ; \
	      done ; \
	    fi ; \
	  done ; \
        fi
endef
define .FUN-install-copy-script
	@ files='$$(strip $1)'; dirs='$$(strip $2)'; sheb='$$(strip $3)'; \
	if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \
	  for F in $$$${files} ; do \
	    if [ -f $$$${F} ] ; then \
	      for D in $$$${dirs} ; do \
	        Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \
	        echo ":Mangling $$$${F} to $$$${Fout}:" ; \
	        mkdir -p `dirname $$$${Fout}` && \
	        rm -f $$$${Fout} && cp -fp $$$${F} $$$${Fout} ; \
	        chmod ugo+x $$$${Fout} && \
	        ${PERL} -npi \
	           -e"if(0==\$$$$i++){s|^#!.*|#! $$$${sheb}|}" $$$${Fout}; \
	      done ; \
	    fi ; \
	  done ; \
        fi
endef
These are three similar helper functions. The first of these copies its first argument, files, into all of the directories specified in the second argument, dirs (which are assumed to be subdirs of INSTALL/). It checks for existence and tries to create directories as it needs. The second function is similar to the first but it is for executable binaries, which require a special suffix (e.g. .exe) on some platforms. The third function is similar to the first, but it also takes a third argument sheb which is the '''shebang''' line for a script. It replaces the shebang line of the contents of the sheb variable. The rest of the file is the set of rule blocks, each block dealing with a certain file type. The first section, which is the largest, is the one for C and C++.
__DEPGOALS__     +=                 $$(patsubst %,%.d,$${${1:.=.C_SRCS}})
ALL_C_DEPS       :=$(foreach x,${//},$(patsubst %,%.d,${${x:.=.C_SRCS}}))
${ALL_C_DEPS}:%.d:%
	@ echo "making $@"
	 dir=`echo $< | sed -e's~[^/]*$$~~'`; \
	  ${CCDEP} ${CDEPFLAGS} ${CFLAGS} $< | \
	  sed -e"/:/s!^!$${dir}!" > $@

__DEPGOALS__     +=                 $$(patsubst %,%.d,$${${1:.=.CXX_SRCS}})
ALL_CXX_DEPS     :=$(foreach x,${//},$(patsubst %,%.d,${${x:.=.CXX_SRCS}}))
${ALL_CXX_DEPS}:%.d:%
	@ echo "making $@"
	 dir=`echo $< | sed -e's~[^/]*$$~~'`; \
	  ${CXXDEP} ${CXXDEPFLAGS} ${CXXFLAGS} $< | \
	  sed -e"/:/s!^!$${dir}!" > $@
This section specifies the compiler dependencies which must be detected. Dependency files are made for all source files (set to $/.C_SRCS and $/.CXX_SRCS presumably in the $/Make.include file). These names are added to the ___DEPGOALS__ to be included later in the Makefile. We also have the rule for constructing dependency files from source files.
.PRECIOUS: %.o

%.o: %.c
	${-CC} ${CC} ${CFLAGS} ${CFLAGS_COMPILE} -o $@ -c $<

%.o: %.cc
	${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} -o $@ -c $<

%.o: %.cpp
	${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} -o $@ -c $<

%.o: %.C
	${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} -o $@ -c $<

Pattern-driven rules are specified for several kinds of object code builds.
ALL_C_EXES   :=$(strip $(foreach x,${//},${${x:.=.C_EXES}}))
${ALL_C_EXES}:
	${-CC} ${CC} ${CLDFLAGS} -o $@ $+ ${CLIBS}
__SUBGOALS__+=$${${1:.=.C_EXES}}

ALL_CXX_EXES :=$(strip $(foreach x,${//},${${x:.=.CXX_EXES}}))
${ALL_CXX_EXES}:
	${-CXX} ${CXX} ${CXXLDFLAGS} -o $@ $+ ${CXXLIBS}
__SUBGOALS__+=$${${1:.=.CXX_EXES}}
We add to the subgoals the executable programs $/.C_EXES and $/.CXX_EXES. They are constructed by a link command.
define .RULE-install-copy-C-CXX-EXES
${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-EXES}
${1:.=.install-copy-C-CXX-EXES}:
	$(call .FUN-install-copy-exe,$${${1:.=.C_EXES}} $${${1:.=.CXX_EXES}},bin/)

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy-C-CXX-EXES,$x)))
We add to each .install-copy rule the action that executable binaries be copied to the subdirectory bin/.
ALL_C_LIBS   :=$(strip $(foreach x,${//},${${x:.=.C_LIBS}}))
${ALL_C_LIBS}:
	${-CC} ${RM} $@ && ${AR} ${ARFLAGS} $@ $^
__SUBGOALS__+=$${${1:.=.C_LIBS}}

ALL_CXX_LIBS     :=$(strip $(foreach x,${//},${${x:.=.CXX_LIBS}}))
${ALL_CXX_LIBS}:
	${-CXX} ${RM} $@ && ${AR} ${ARFLAGS} $@ $^
__SUBGOALS__+=$${${1:.=.CXX_LIBS}}

${_OS_}_SHLIB_FLAGS:=
ALL_C_SHLIBS     :=$(strip $(foreach x,${//},${${x:.=.C_SHLIBS}}))
${ALL_C_SHLIBS}:
	${-CC} ${RM} $@ && ${CC} ${CLDFLAGS} ${SHLIB_FLAGS} ${${_OS_}_SHLIB_FLAGS} -o $@ $^ ${CLIBS}

ALL_CXX_SHLIBS   :=$(strip $(foreach x,${//},${${x:.=.CXX_SHLIBS}}))
${ALL_CXX_SHLIBS}:
	${-CXX} ${RM} $@ && ${CXX} ${CXXLDFLAGS} ${SHLIB_FLAGS} ${${_OS_}_SHLIB_FLAGS} -o $@ $^ ${CXXLIBS}
__SUBGOALS__+=$${${1:.=.C_SHLIBS}} $${${1:.=.CXX_SHLIBS}}
Additional C,C++ subgoals include libraries and shared libraries. It is unfortunate that AIX has a fairly different means of producing shared libraries than other operating systems. This is the only place in the rules where the _OS_ variable (defined in Make.compilers) is a factor in determining the rule. If more situations like this arise, it may be necessary to redesign the interactions between Make.rules and Make.compilers.
define .RULE-install-copy-C-CXX-LIBS
${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-LIBS}
${1:.=.install-copy-C-CXX-LIBS}:
	$(call .FUN-install-copy,$${${1:.=.C_LIBS}} $${${1:.=.CXX_LIBS}}, \
                                 lib/$${${1.=.LIB/}})

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy-CXX-LIBS,$x)))

define .RULE-install-copy-C-CXX-SHLIBS
${1:.=.install-copy}: ${1:.=.install-copy-CXX-SHLIBS}
${1:.=.install-copy-CXX-SHLIBS}:
	$(call .FUN-install-copy,$${${1:.=.C_SHLIBS}} $${${1:.=.CXX_SHLIBS}}, \
                                 lib/$${${1.=.LIB/}})

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy-CXX-SHLIBS,$x)))
Libraries and shared libraries are copied to the subdirectory lib/$/.LIB/, i.e. to lib/ or some subdirectory of lib/ specified by the variable $/.LIB/, which is presumably set in the $/Make.include.
define .RULE-install-copy-C-CXX-INCS
${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-INCS}
${1:.=.install-copy-C-CXX-INCS}:
	$(call .FUN-install-copy,$${${1:.=.C_INCS}} $${${1:.=.CXX_INCS}}, \
                                 include/$${${1:.=.INCLUDE/}})

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy-C-CXX-INCS,$x)))
If include files are defined in $/.C_INCS or $/.CXX_INCS then these are copied directly to include/ or one of its subdirectories, specified by $/.INCLUDE/. There is a section which builds ps and pdf documents from LaTeX files.
%.dvi: %.tex
	${-LATEX} cd `dirname $<` && ${LATEX} `basename $<` && ${LATEX} `basename $<`

%.aux: %.tex
	${-LATEX} cd `dirname $<` && ${LATEX} `basename $<` && ${LATEX} `basename $<`

%.bbl: %.aux
	${-LATEX} cd `dirname $<` && ${BIBTEX} `basename ${<:.aux=}`
These are the commands to invoke LaTeX, based on file pattern.
ALL_TEX_PS    :=$(strip $(foreach x,${//},${${x:.=.TEX_PS}}))
ALL_TEX_PDF   :=$(strip $(foreach x,${//},${${x:.=.TEX_PDF}}))

${ALL_TEX_PS} ${ALL_TEX_PDF:.pdf=.ps}: %.ps: %.dvi
	${-LATEX} cd `dirname $<` && ${DVIPS} -o `basename $@` `basename $<`

${ALL_TEX_PDF}: %.pdf: %.ps
	${-LATEX} ${PS2PDF} $< $@
__SUBGOALS__+=$${${1:.=.TEX_PS}} $${${1:.=.TEX_PDF}}
The $/.TEX_PS and $/.TEX_PDF files are added to the subgoals. The commands to actually construct ps and pdf files have been defined.
define .RULE-install-copy-TEX_PSPDF
${1:.=.install-copy}: ${1:.=.install-copy-TEX_PSPDF}
${1:.=.install-copy-TEX_PSPDF}:
	$(call .FUN-install-copy,$${${1:.=.TEX_PS}},doc/$${${1:.=.DOC/}})
	$(call .FUN-install-copy,$${${1:.=.TEX_PDF}},doc/$${${1:.=.DOC/}})

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy-TEX_PSPDF,$x)))
The ps and pdf files are copied to doc/ or the $/.DOC/ subdirectory of doc/.
define .RULE-install-copy-PYTHON
${1:.=.install-copy}: ${1:.=.install-copy-PYTHON}
${1:.=.install-copy-PYTHON}:
	$(call .FUN-install-copy-script,$${${1:.=.PY_EXES}},\
                                         scripts/,\
                                         ${PYTHON} ${PYTHON_FLAGS})
	$(call .FUN-install-copy,$${${1:.=.PY_LIBS}}, \
                                 scripts/$${${1:.=.PY_LIB/}})

endef
$(eval $(foreach x,${//},$(call .RULE-install-copy-PYTHON,$x)))
Python scripts require only copying, but with the shebang mangling on the $/.PY_EXES files. The $/.PY_EXES files get copied (and shebang-ed) to scripts/, and the $/.PY_LIBS get copied to lib/$/.PY_LIB/. Similar versions of this rule block exist for perl and sh libraries and executables. == Anatomy of Make.compilers == The Make.compilers file (cvs revision 1.50) sets many platform dependent variables as well as compiling modes such as debugging or profiling. Additionally, the paths for various libraries and utilities are set. The first part of the file ascertains the platform and build mode, and the rest of the file sets variables based on them. This is done in blocks broken down by application rather than platform or mode. This will not be a line by line walkthrough. Instead we will list important variables being set and what they mean for the other parts of the build system.
VALID_OPERATING_SYSTEM:=$(strip \
    TRU64  \
    AIX    \
    cygwin \
    SunOS|foster-city   \
    SunOS|francisco's \
    FreeBSD \
    FreeBSD|Randy \
    Linux|RH7 \
    Linux|RH9 \
    default|I-will-take-my-chances \
)
Our current thinking is that a platform consists of both an operating system and possible additional specifications. In our current work situation, the admins have defined an environment variable called OPERATING_SYSTEM for us which we now use as a platform identifier, despite the obvious misuse of the word. This variable defines those values of that variable which the build system will respect.
ifdef OPERATING_SYSTEM
  ifneq ($(filter ${OPERATING_SYSTEM},${VALID_OPERATING_SYSTEM}),)
    _OS|FULL_:=${OPERATING_SYSTEM}
    _OS_:=$(filter-out |%, $(subst |, |,${_OS|FULL_}))
  else
  $(error You are trying to use the build system on a platform where the \
environment variable OPERATING_SYSTEM is set to an unrecognized value.  \
You should either set \
OPERATING_SYSTEM to a recognized value, possibly after editing the \
Make.compilers file of the build system.  Currently, the recognized values \
for OPERATING_SYSTEM are: ${VALID_OPERATING_SYSTEM}  )
  endif
else
  $(error You are trying to use the build system on a platform where the \
environment variable OPERATING_SYSTEM is not set.  You should either set \
OPERATING_SYSTEM to a recognized value, possibly after editing the \
Make.compilers file of the build system.  Currently, the recognized values \
for OPERATING_SYSTEM are: ${VALID_OPERATING_SYSTEM}  )
endif
The OPERATING_SYSTEM variable is parsed into its major identifier, in _OS_ and its full identifier _OS|FULL_. This allows us to create conditions for the machine architecture based on _OS_ as well as for the specific installation and auxilliary packages on the platform based on _OS|FULL_. If the OPERATING_SYSTEM variable is not set correctly, then the build will abort. We chose this as opposed to some default behavior because we figured that if OPERATING_SYSTEM was left unset or there was some error in its value, that building with the default definitions, instead of being alerted to the problem harshly, would waste a lot of time. If the user really wants all default behavior, a value of OPERATING_SYSTEM exists for that.
_CC_:=
ifdef WITH_GNU
  _CC_:=-gcc
endif

_OPT_:=
ifeq (${WITH_OPT},debug)
  _OPT_ :=-debug
endif
ifeq (${WITH_OPT},profile)
  _OPT_ :=-prof
endif

_THR_:=
ifdef WITH_THREADS
  _THR_:=-threaded
endif
If _CC_ is set to '-gcc' then GNU compilers will be used, and if it is empty and native compilers will be used. There are three _OPT_ modes: 'debug', 'profile', and '' (normal). There is also a _THR_ variable which determines if the applications are to be compiled with threading. A user wishing to build with profiling and threading enabled would do something like
$ gmake WITH_OPT=profile WITH_THREADS=1
to turn these options on. One could also set these variable in the environment.
# allow additional tag for install directories
ifdef INSTALL_TAG
  INSTALL/:=${MAKEFILE/}${_OS_}${_CC_}${_OPT_}${_THR_}-${INSTALL_TAG}/
else
  INSTALL/:=${MAKEFILE/}${_OS_}${_CC_}${_OPT_}${_THR_}/
endif
The INSTALL/ directory is set. It is based on the location of the Makefile and the given tags. If the user has defined a the variable INSTALL_TAG than this will be added to the INSTALL/ directory.
CCDEP		 :=gcc
CXXDEP		 :=g++
CDEPFLAGS        :=-MM -MG
CXXDEPFLAGS      :=-MM -MG
The GNU compilers have much more sophisticated dependency producers than the native compilers, so we will use them for all architectures. In theory this could cause bugs due to the mismatch between depends and build compilers. In practice, it does not.
-CC:=
-CXX:=
CC:=gcc
CXX:=g++
CFLAGS:=-O2 -g
CXXFLAGS:=-O2 -g
CFLAGS_COMPILE:=
CXXFLAGS_COMPILE:=
CLDFLAGS:=
CXXLDFLAGS:=
CLIBS:=-lm
CXXLIBS:=-lm
SHLIB_FLAGS:=-shared
C_TMP_COMPILE:=
CXX_TMP_COMPILE:=
Each block begins with the declaration of the variables to be defined in that block, set to their default values. The CFLAGS,CXX_FLAGS variables are those compile flags which are needed by bith the dependency check and by the actual compile such as include paths. The CFLAGS_COMPILE and CXXFLAGS_COMPILE flags are those which are only needed by the actual compile, not by the dependency checker, like debugging and profiling flags. The CLDFLAGS,CXXLDFLAGS are the flags for the compiler when functioning as a loader and are placed on the loader command line ahead of the object files. The CLIBS,CXXLIBS are placed on the loader command line after the object files. The role of each of these flags is made clear from the Make.rules file, where they are used. The -VAR variables prefix all action lines of a given category. They are designed to allow the build to ignore errors in those actions. This was motivated primarily by the fact that some systems did not have some key packages installed like LaTeX.
ifeq (${_OS|FULL_},SunOS|francisco's)
  -LATEX :=-
endif
This causes a - to appear before any one of the actions in the LaTeX section of the Make.rules file. That - will cause the build to ignore any errors in the execution of those actions. The Make.compilers file is not just about defining the compilers and interpreters on the system. It is also used to make available certain architecture dependent package locations.
CFLAGS_LAPACK   :=-DFTN_UNDERSCORE -DFTN_LOWERCASE
CLDFLAGS_LAPACK :=-L/usr/local/lib
CLIBS_LAPACK    :=-llapack -lblas -lm
ifeq (${_OS_},TRU64)
# this seems to work for both gcc and non-gcc
  CLDFLAGS_LAPACK :=
  CLIBS_LAPACK    :=-ldxml
endif
ifeq (${_OS_},AIX)
  CFLAGS_LAPACK   :=-DFTN_LOWERCASE
  CLDFLAGS_LAPACK :=-L/usr/local/ir/lib
  CLIBS_LAPACK    :=-llapack -lessl -lxlf90
endif
# sometimes we deploy on Solaris with CDX
ifeq (${_OS|FULL_},SunOS|foster-city)
  CLDFLAGS_LAPACK :=-L/home/ross/local/lib
  CLIBS_LAPACK    :=-llapack -lblas -lF77
endif
# sometimes we deploy on Solaris on Fancisco's machines
ifeq (${_OS|FULL_},SunOS|francisco's)
  CLIBS_LAPACK    :=-llapack -lblas -lF77
endif
CXXFLAGS_LAPACK   :=${CFLAGS_LAPACK}
CXXLDFLAGS_LAPACK :=${CLDFLAGS_LAPACK}
CXXLIBS_LAPACK    :=${CLIBS_LAPACK}
Some modules use LAPACK. Although these variables do not get used in any of the rules of the build system, we define them in Make.compilers so that they can be used in the various Make.include files which need LAPACK. This is one case there the _OS|FULL_ is useful, since different platforms install LAPACK in all sorts of ways.
PYTHON    :=$(shell which python)
PYTHON_FLAGS :=
CFLAGS_PY    :=-I/usr/local/include/python
ifeq (${_OS_}${_CC_},TRU64)
  CFLAGS_PY :=-I/usr/local/ir/Python-2.2.2 -I/usr/local/ir/Python-2.2.2/Include
  PYTHON    :=/usr/local/ir/bin/python
endif
ifeq (${_OS_}${_CC_},AIX)
  CFLAGS_PY :=-I/usr/local/include/python2.2
  PYTHON    :=/usr/local/bin/python
endif
ifeq (${_OS_},cygwin)
  CFLAGS_PY :=-I/usr/include/python2.3
endif
CXXFLAGS_PY :=${CFLAGS_PY}
Python and its paths for the known architectures are determined here. On unknown architectures we guess where python is based on the user's path. == Acknowledgements == The guilty parties who gave me advice are Nathan Edwards, Dan Fasulo, Bjarni Halldorsson, and Clark Mobarry. == Author == Ross Lippert, ripper@..., 17 Oct 2003. kmer-code-2013-trunk/Make.rules0000644000000000000000000002502611676744271015123 0ustar rootroot# -*- makefile -*- # this might be useful for some future work if we want to make # actions more variable. define .Make-rule $1: $2 endef ############################################################ # useful functions for the install methods mentioned below ############################################################ # $(call .FUN-install-copy,file1 file2 file3,dir1/ dir2/) # copies whichever file exists into each of ${INSTALL/}dir # mkdir-ing as necessary. define .FUN-install-copy @ files='$$(strip $1)'; dirs='$$(strip $2)'; \ if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \ for F in $$$${files} ; do \ if [ -f $$$${F} ] ; then \ for D in $$$${dirs} ; do \ Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \ mkdir -p `dirname $$$${Fout}` && \ rm -f $$$${Fout} && cp -fp $$$${F} $$$${Fout} ; \ done ; \ fi ; \ done ; \ fi endef # because SOME PLATFORMS (like cygwin) use a special .exe extension # in executables, we have to do a little hack here. We assume that # Make.compilers has set a variable called .EXE define .FUN-install-copy-exe @ files='$$(strip $1)'; dirs='$$(strip $2)'; \ if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \ for F in $$$${files} ; do \ if [ "${.EXE}" != "" -a -f $$$${F}${.EXE} ] ; then \ for D in $$$${dirs} ; do \ Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \ mkdir -p `dirname $$$${Fout}` && \ rm -f $$$${Fout}${.EXE} && cp -fp $$$${F}${.EXE} $$$${Fout}${.EXE} ; \ done ; \ fi ; \ if [ -f $$$${F} ] ; then \ for D in $$$${dirs} ; do \ Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \ mkdir -p `dirname $$$${Fout}` && \ rm -f $$$${Fout} && cp -fp $$$${F} $$$${Fout} ; \ done ; \ fi ; \ done ; \ fi endef # we do another cygwin inspired hack to deal with that fact that # .so shlibs need to be turned into .dll files. define .FUN-install-copy-shlib @ files='$$(strip $1)'; dirs='$$(strip $2)'; \ if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \ for F in $$$${files} ; do \ if [ -f $$$${F} ] ; then \ for D in $$$${dirs} ; do \ Fout=$${INSTALL/}$$$${D}`basename $$$${F} .so`${.SO} ; \ mkdir -p `dirname $$$${Fout}` && \ rm -f $$$${Fout} && cp -fp $$$${F} $$$${Fout} ; \ done ; \ fi ; \ done ; \ fi endef # use this one for executable scripts with #! substitution # echo ":Mangling $$$${F} to $$$${Fout}:" ; define .FUN-install-copy-script @ files='$$(strip $1)'; dirs='$$(strip $2)'; sheb='$$(strip $3)'; \ if [ -n "$$$${files}" -a -n "$$$${dirs}" ] ; then \ for F in $$$${files} ; do \ if [ -f $$$${F} ] ; then \ for D in $$$${dirs} ; do \ Fout=$${INSTALL/}$$$${D}`basename $$$${F}` ; \ mkdir -p `dirname $$$${Fout}` && \ rm -f $$$${Fout} && cp -fp $$$${F} $$$${Fout} ; \ chmod ugo+x $$$${Fout} && \ ${PERL} -npi \ -e"if(0==\$$$$i++){s|^#!.*|#!$$$${sheb}|}" $$$${Fout}; \ done ; \ fi ; \ done ; \ fi endef ############################################################ # C and C++ stuff ############################################################ # Building depends goals for C/CXX things # C_SRCS and CXX_SRCS are collected together and turned into # associated *.d dependency files. # WISHLIST: does not propagate failure to the parent make # for some reason. It really should. I think the pipe to # sed masks the exit code. __DEPGOALS__ += $$(patsubst %,%.d,$${${1:.=.C_SRCS}}) ALL_C_DEPS :=$(foreach x,${//},$(patsubst %,%.d,${${x:.=.C_SRCS}})) ${ALL_C_DEPS}:%.d:% @ echo "making $@" @ dir=`echo $< | sed -e's~[^/]*$$~~'`; \ ${CCDEP} ${CDEPFLAGS} ${CFLAGS} $< | \ sed -e"/:/s!^!$${dir}!" > $@ __DEPGOALS__ += $$(patsubst %,%.d,$${${1:.=.CXX_SRCS}}) ALL_CXX_DEPS :=$(foreach x,${//},$(patsubst %,%.d,${${x:.=.CXX_SRCS}})) ${ALL_CXX_DEPS}:%.d:% @ echo "making $@" @ dir=`echo $< | sed -e's~[^/]*$$~~'`; \ ${CXXDEP} ${CXXDEPFLAGS} ${CXXFLAGS} $< | \ sed -e"/:/s!^!$${dir}!" > $@ ###### generic pattern rules for subgoals # don't want .o's getting deleted as intermediates .PRECIOUS: %${.O} .SUFFIXES: ${.O} %${.O}: %.c ${-CC} ${CC} ${CFLAGS} ${CFLAGS_COMPILE} -o $@ -c $< %${.O}: %.cc ${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} -o $@ -c $< %${.O}: %.cpp ${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} -o $@ -c $< %${.O}: %.C ${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} -o $@ -c $< # linking commands use the $+ to get duplicated prereqs for linking ## EXE targets ALL_C_EXES :=$(strip $(foreach x,${//},${${x:.=.C_EXES}})) ${ALL_C_EXES}: ${-CC} ${CLD} ${CLDFLAGS} -o $@ $+ ${CLIBS} __SUBGOALS__+=$${${1:.=.C_EXES}} ALL_CXX_EXES :=$(strip $(foreach x,${//},${${x:.=.CXX_EXES}})) ${ALL_CXX_EXES}: ${-CXX} ${CXXLD} ${CXXLDFLAGS} -o $@ $+ ${CXXLIBS} __SUBGOALS__+=$${${1:.=.CXX_EXES}} define .RULE-install-copy-C-CXX-EXES ${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-EXES} ${1:.=.install-copy-C-CXX-EXES}: $(call .FUN-install-copy-exe,$${${1:.=.C_EXES}} $${${1:.=.CXX_EXES}},bin/) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-C-CXX-EXES,$x))) ## LIB targets ALL_C_LIBS :=$(strip $(foreach x,${//},${${x:.=.C_LIBS}})) ${ALL_C_LIBS}: ${-CC} ${RM} $@ && ${AR} ${ARFLAGS} $@ $^ __SUBGOALS__+=$${${1:.=.C_LIBS}} ALL_CXX_LIBS :=$(strip $(foreach x,${//},${${x:.=.CXX_LIBS}})) ${ALL_CXX_LIBS}: ${-CXX} ${RM} $@ && ${AR} ${ARFLAGS} $@ $^ __SUBGOALS__+=$${${1:.=.CXX_LIBS}} ## Shared targets # AIX has really weird shared lib building flags. Unfortunately, I could # not think of a way out of this hack. ${_OS_}_SHLIB_FLAGS:= ALL_C_SHLIBS :=$(strip $(foreach x,${//},${${x:.=.C_SHLIBS}})) ${ALL_C_SHLIBS}: ${-CC} ${RM} $@ && ${CC} ${CLDFLAGS} ${SHLIB_FLAGS} ${${_OS_}_SHLIB_FLAGS} -o $@ $^ ${CLIBS} ALL_CXX_SHLIBS :=$(strip $(foreach x,${//},${${x:.=.CXX_SHLIBS}})) ${ALL_CXX_SHLIBS}: ${-CXX} ${RM} $@ && ${CXX} ${CXXLDFLAGS} ${SHLIB_FLAGS} ${${_OS_}_SHLIB_FLAGS} -o $@ $^ ${CXXLIBS} __SUBGOALS__+=$${${1:.=.C_SHLIBS}} $${${1:.=.CXX_SHLIBS}} define .RULE-install-copy-C-CXX-LIBS ${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-LIBS} ${1:.=.install-copy-C-CXX-LIBS}: $(call .FUN-install-copy,$${${1:.=.C_LIBS}} $${${1:.=.CXX_LIBS}}, lib/$${${1.=.LIB/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-C-CXX-LIBS,$x))) define .RULE-install-copy-C-CXX-SHLIBS ${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-SHLIBS} ${1:.=.install-copy-C-CXX-SHLIBS}: $(call .FUN-install-copy-shlib, $${${1:.=.C_SHLIBS}} $${${1:.=.CXX_SHLIBS}}, lib/$${${1.=.LIB/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-C-CXX-SHLIBS,$x))) define .RULE-install-copy-C-CXX-INCS ${1:.=.install-copy}: ${1:.=.install-copy-C-CXX-INCS} ${1:.=.install-copy-C-CXX-INCS}: $(call .FUN-install-copy,$${${1:.=.C_INCS}} $${${1:.=.CXX_INCS}}, include/$${${1:.=.INCLUDE/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-C-CXX-INCS,$x))) ############################################################ # latex and some ps/pdf stuff ############################################################ # I'm not sure if I should mark these as precious or not #.PRECIOUS: %.pdf %.ps %.dvi %.aux %.bbl %.dvi: %.tex ${-LATEX} cd `dirname $<` && ${LATEX} `basename $<` && ${LATEX} `basename $<` %.aux: %.tex ${-LATEX} cd `dirname $<` && ${LATEX} `basename $<` && ${LATEX} `basename $<` %.bbl: %.aux ${-LATEX} cd `dirname $<` && ${BIBTEX} `basename ${<:.aux=}` ALL_TEX_PS :=$(strip $(foreach x,${//},${${x:.=.TEX_PS}})) ALL_TEX_PDF :=$(strip $(foreach x,${//},${${x:.=.TEX_PDF}})) ${ALL_TEX_PS}: %.ps: %.dvi ${-LATEX} cd `dirname $<` && ${DVIPS} -o `basename $@` `basename $<` ${ALL_TEX_PDF}: %.pdf: %.tex %.aux ${-LATEX} cd `dirname $<` && ${PDFLATEX} `basename $<` && ${PDFLATEX} `basename $<` __SUBGOALS__+=$${${1:.=.TEX_PS}} $${${1:.=.TEX_PDF}} # install rules define .RULE-install-copy-TEX_PSPDF ${1:.=.install-copy}: ${1:.=.install-copy-TEX_PSPDF} ${1:.=.install-copy-TEX_PSPDF}: # TEX_PS go to doc/ $(call .FUN-install-copy,$${${1:.=.TEX_PS}},doc/$${${1:.=.DOC/}}) # TEX_PDF go to doc/ $(call .FUN-install-copy,$${${1:.=.TEX_PDF}},doc/$${${1:.=.DOC/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-TEX_PSPDF,$x))) ############################################################ # Python ############################################################ # python exes and libs are not subgoals. # Otherwise, they'd be deleted when we cleaned. # if we ever introduce some notion of 'file prep', beyond # adding the she-bang line, then we might want to do something # different here. #__SUBGOALS__+=$${${1:.=.PY_EXES}} $${${1:.=.PY_LIBS}} define .RULE-install-copy-PYTHON ${1:.=.install-copy}: ${1:.=.install-copy-PYTHON} ${1:.=.install-copy-PYTHON}: $(call .FUN-install-copy-script, $${${1:.=.PY_EXES}}, bin/, ${PYTHON} ${PYTHON_FLAGS}) $(call .FUN-install-copy, $${${1:.=.PY_LIBS}}, lib/$${${1:.=.PY_LIB/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-PYTHON,$x))) ############################################################ # Perl ############################################################ # Same sort of spiel as python #__SUBGOALS__+=$${${1:.=.PERL_EXES}} $${${1:.=.PERL_LIBS}} define .RULE-install-copy-PERL ${1:.=.install-copy}: ${1:.=.install-copy-PERL} ${1:.=.install-copy-PERL}: $(call .FUN-install-copy-script, $${${1:.=.PERL_EXES}}, bin/, ${PERL} ${PERL_FLAGS}) $(call .FUN-install-copy, $${${1:.=.PERL_LIBS}}, lib/$${${1:.=.PERL_LIB/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-PERL,$x))) ############################################################ # sh ############################################################ # Same sort of spiel as python #__SUBGOALS__+=$${${1:.=.SH_EXES}} $${${1:.=.SH_LIBS}} define .RULE-install-copy-SH ${1:.=.install-copy}: ${1:.=.install-copy-SH} ${1:.=.install-copy-SH}: $(call .FUN-install-copy-script, $${${1:.=.SH_EXES}}, bin/, ${SH} ${SH_FLAGS}) $(call .FUN-install-copy, $${${1:.=.SH_LIBS}}, lib/$${${1:.=.SH_LIB/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-SH,$x))) ############################################################ # share -- a random catchall for scripts and whatnot that # we should have real rules for but we don't right now ############################################################ #__SUBGOALS__+=$${${1:.=.SHARES}} define .RULE-install-copy-SHARE ${1:.=.install-copy}: ${1:.=.install-copy-SHARE} ${1:.=.install-copy-SHARE}: $(call .FUN-install-copy,$${${1:.=.SHARES}}, share/$${${1:.=.SHARE/}}) endef $(eval $(foreach x,${//},$(call .RULE-install-copy-SHARE,$x))) kmer-code-2013-trunk/seagen/0000755000000000000000000000000012641613357014420 5ustar rootrootkmer-code-2013-trunk/seagen/thr-loader.C0000644000000000000000000000076511463747051016575 0ustar rootroot#include #include #include #include "searchGENOME.H" void* loaderThread(void *) { encodedQuery *Q = 0L; seqInCore *B = 0L; try { B = config._qsFASTA->getSequenceInCore(); } catch (std::bad_alloc) { fprintf(stderr, "loaderThread()-- Failed to load next query sequence\ncaught bad_alloc in %s at line %d\n", __FILE__, __LINE__); exit(1); } if (B) { Q = new encodedQuery(B, config._merSize); delete B; } return(Q); } kmer-code-2013-trunk/seagen/thr-output.C0000644000000000000000000000330712322046702016650 0ustar rootroot#include #include #include #include "searchGENOME.H" #if 0 void statusThread(void *) { double finish = 0.0; if (config._outputPos > 0) finish = (config._numberOfQueries - config._outputPos) / (config._outputPos / (getTime() - config._zeroTime)); fprintf(stderr, "O:"uint32FMTW(7)" S:"uint32FMTW(7)" I:"uint32FMTW(7)" T:"uint32FMTW(7)" (%5.1f%%; %8.3f/sec) Finish in %5.2f seconds.\r", outputPos, inputTail, inputHead, numberOfQueries, 100.0 * outputPos / numberOfQueries, outputPos / (getTime() - zeroTime), finish); fflush(stderr); double perSec = outputPos / (getTime() - zeroTime + 0.0000001); if (perSec < 32.0) outputMask = 0xf; else if (perSec < 256.0) outputMask = 0x7f; else if (perSec < 1024.0) outputMask = 0x1ff; else outputMask = 0x3ff; } #endif void* writerThread(void *U, void *Q) { encodedQuery *query = (encodedQuery *)Q; // Write the hits // if (query->theOutputLength() > 0) { errno = 0; write(config._outputFile, query->theOutput(), query->theOutputLength()); if (errno) fprintf(stderr, "Couldn't write to the output file '%s'.\n%s\n", config._outputFileName, strerror(errno)), exit(1); } // Write the query match counts, too! // if (config._matchCountsFile) { char str[256]; sprintf(str, uint32FMT"\n", query->numberOfResults()); errno = 0; write(config._matchCountsFile, str, strlen(str)); if (errno) fprintf(stderr, "Couldn't write to the match counts file '%s'.\n%s\n", config._queryMatchFileName, strerror(errno)), exit(1); } delete query; return(0L); } kmer-code-2013-trunk/seagen/filterEST.C0000644000000000000000000001564212322046702016363 0ustar rootroot#include #include #include #include #include #include "aHit.H" #include "hitReader.H" // XXX: Thread the filter! Really cool! Pretty neat hack! Usual // thing, a thread to read hits, n threads to do filtering, and a // thread to write filtered hits. Not trivial, but maybe a win. // Global statistics // uint32 hitsSaved = 0; uint32 hitsFiltered = 0; uint32 hitsUnknown = 0; uint32 hitsTotal = 0; uint32 seqsMapped = 0; // Sequences that we mapped uint32 seqsPartial = 0; // Sequences that we mapped, but missed a few good matches uint32 seqsMissed = 0; // Sequences that we failed to map, but should have uint32 filterTP = 0; uint32 filterFP = 0; uint32 filterFNfilt = 0; // false negatives from filtering uint32 filterFNunk = 0; // false negatives from our failure to classify uint32 filterTN = 0; uint32 goodPercentID = 94; uint32 goodCoverage = 50; // Command line options // uint32 uniqThresh = 200; // Used to be 100 uint32 reptThresh = 200; // Used to be 100 FILE *logFile = 0L; // Filter results -- thread unsafe! // // bool decided -- true if the filter could decide on how to filter the hits // char label -- if decided, the name of the decider // uint32 hitsToSave -- the number of hits to save // double qualToSave -- the quality threshold to filter at // bool decided; const char *label; uint32 hitsToSave; double qualToSave; void report(uint32 iid #ifdef WITH_ANSWERS , uint32 filterTP, uint32 filterFP, uint32 filterFNfilt, uint32 filterFNunk, uint32 filterTN, uint32 seqsMapped, uint32 seqsPartial, uint32 seqsMissed #endif ) { fprintf(stderr, uint32FMTW(9)"]" #ifdef WITH_ANSWERS " tp="uint32FMTW(7)" fp="uint32FMTW(7)" fnfilt="uint32FMTW(7)" fnunkn="uint32FMTW(7)" tn="uint32FMTW(7) " yea:"uint32FMTW(7)" may:"uint32FMTW(7)" nay:"uint32FMTW(7) #endif " hits saved:"uint32FMTW(8)"/"uint32FMTW(8)" = %6.3f%%\r", iid, #ifdef WITH_ANSWERS filterTP, filterFP, filterFNfilt, filterFNunk, filterTN, seqsMapped, seqsPartial, seqsMissed, #endif hitsSaved, hitsTotal, 100.0 * hitsSaved / hitsTotal); } void complicatedFilter(hitReader &HR); // The simple filter just returns the top uniqThresh hits // void simpleFilter(hitReader &HR) { decided = true; label = "simple"; qualToSave = 0.0; hitsToSave = HR.numHits(); if (HR.numHits() > uniqThresh) hitsToSave = uniqThresh; } int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "ESTmapper utility function -- not for human use.\n"); exit(1); } hitReader HR(argc); int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-uniquethreshold", 2) == 0) { uniqThresh = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-repeatthreshold", 2) == 0) { reptThresh = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-log", 2) == 0) { ++arg; errno = 0; logFile = fopen(argv[arg], "w"); if (errno) { fprintf(stderr, "filterEST: ERROR: couldn't open logFile '%s' for writing.\n%s\n", argv[arg], strerror(errno)); exit(1); } } else { HR.addInputFile(argv[arg]); } arg++; } while (HR.loadHits()) { // Not every filter we can think of needs the hits sorted, but // it's nice to guarantee they are sorted. // HR.sortByCoverage(); //simpleFilter(HR); complicatedFilter(HR); #ifdef WITH_ANSWERS int tp=0, tn=0, fn=0, fp=0; #endif // If we still haven't figured out what to do, then the EST is // labeled a repeat. Otherwise, write the (filtered) hits to the // file. // if (!decided) { hitsUnknown += HR.numHits(); #ifdef WITH_ANSWERS // We've failed to classify all these hits, so anythig that looks good is a false negative // for (uint32 i=0; i < HR.numHits(); i++) if ((HR[i].mappedCoverage >= goodCoverage) && (HR[i].mappedIdentity >= goodPercentID)) { if (logFile) { fprintf(logFile, "FAILUNKN hit="uint32FMTW(3)" id=%2d cv=%2d COV=%5.3f MUL=%5.3f: ", i, HR[i].mappedIdentity, HR[i].mappedCoverage, HR[i].coverage, HR[i].multiplicity); ahit_printASCII(&HR[i].a, logFile); } filterFNunk++; fn++; } else { tn++; } #endif } else { for (uint32 i=0; i < HR.numHits(); i++) { if ((i < hitsToSave) && (qualToSave <= HR[i].coverage)) { hitsSaved++; ahit_printASCII(&HR[i].a, stdout); #ifdef WITH_ANSWERS if ((HR[i].mappedCoverage >= goodCoverage) && (HR[i].mappedIdentity >= goodPercentID)) { tp++; } else { fp++; } #endif } else if (HR[i].a._merged) { // We merged this hit, so scores are incorrect. Give it // the benefit of the doubt and report it. // hitsSaved++; ahit_printASCII(&HR[i].a, stdout); } else { hitsFiltered++; #ifdef WITH_ANSWERS // Report hits that are false negatives // if ((HR[i].mappedCoverage >= goodCoverage) && (HR[i].mappedIdentity >= goodPercentID)) { if (logFile) { fprintf(logFile, "FAILFILT hit="uint32FMTW(3)" id=%2d cv=%2d COV=%5.3f MUL=%5.3f: ", i, HR[i].mappedIdentity, HR[i].mappedCoverage, HR[i].coverage, HR[i].multiplicity); ahit_printASCII(&HR[i].a, logFile); } filterFNfilt++; fn++; } else { tn++; } #endif } } } #ifdef WITH_ANSWERS filterTP += tp; filterTN += tn; filterFP += fp; if (tp > 0) seqsMapped++; if (fn > 0) seqsPartial++; if ((tp == 0) && (fn > 0)) seqsMissed++; #endif hitsTotal += HR.numHits(); #ifdef WITH_ANSWERS // Report if we saw falsenegatives (we should have printed FAIL into the log, too) // if (fn > 0) if (logFile) fprintf(logFile, uint32FMT"] %sFALSENEGATIVE %10.10s tp="uint32FMTW(7)" fp="uint32FMTW(7)" fn="uint32FMTW(7)" tn="uint32FMTW(7)"\n", HR.iid(), (tp > 0) ? "partial" : "fatal", label, tp, fp, fn, tn); #endif if ((HR.iid() % 500) == 0) { report(HR.iid() #ifdef WITH_ANSWERS , filterTP, filterFP, filterFNfilt, filterFNunk, filterTN, seqsMapped, seqsPartial, seqsMissed #endif ); fflush(stderr); } } if (logFile) fclose(logFile); report(HR.iid() #ifdef WITH_ANSWERS , filterTP, filterFP, filterFNfilt, filterFNunk, filterTN, seqsMapped, seqsPartial, seqsMissed #endif ); fprintf(stderr, "\n"); return(0); } kmer-code-2013-trunk/seagen/aHit.C0000644000000000000000000000546612322046702015412 0ustar rootroot#include "aHit.H" #include #include void ahit_writeBinary(aHit *a, FILE *F) { fwrite(a, sizeof(aHit), 1, F); } void ahit_readBinary(aHit *a, FILE *F) { fread(a, sizeof(aHit), 1, F); } void ahit_readBinary(aHit *a, readBuffer *F) { F->read((char *)a, sizeof(aHit)); } void ahit_printASCII(aHit *a, FILE *F) { fprintf(F, "-%c -e "uint32FMT" -D "uint32FMT" "uint32FMT" "uint32FMT" -M "uint32FMT" "uint32FMT" "uint32FMT"\n", a->_forward ? 'f' : 'r', a->_qsIdx, a->_dsIdx, a->_dsLo, a->_dsHi, a->_covered, a->_matched, a->_numMers); } // We don't read the string here so that we can use a static buffer // in whatever loop we read with. // // e.g., // // char b[1025]; // while (!feof(I)) { // fgets(b, 1024, I); // if (!feof(I)) // ahit_parseString(a, b); // } // // Note that using sscanf, while easy to implement, and safe // (looking, anyways), is terribly slow, and not really that safe. // // char c; // sscanf(b, "-%c -e %d -D %d %d %d -M %d %d %d", // &c, // &a->_qsIdx, // &a->_dsIdx, // &a->_dsLo, // &a->_dsHi, // &a->_covered, // &a->_matched, // &a->_numMers); // a->_direction = (c == 'f'); // // fast: 138.440u 40.500s 4:04.61 73.1% 0+2k 327822+0io 4pf+0w // slow: 737.587u 38.652s 13:12.42 97.9% 0+2k 328006+0io 11pf+0w // void ahit_parseString(aHit *a, char *b) { char *c = b+1; a->_forward = (*c == 'f'); c += 1; if (c[2] != 'e') fprintf(stderr, "'%s' didn't get -e\n", b); c += 4; a->_qsIdx = (uint32)strtoul(c, &c, 10); // If we get a "-D" next then we are reading search output, // otherwise, we are (hopefully) reading seatac output. if (c[2] == 'D') { // searchGENOME format here! c += 4; a->_dsIdx = (uint32)strtoul(c, &c, 10); a->_dsLo = (uint32)strtoul(c, &c, 10); a->_dsHi = (uint32)strtoul(c, &c, 10); if (c[2] == 'M') { c += 4; a->_covered = (uint32)strtoul(c, &c, 10); a->_matched = (uint32)strtoul(c, &c, 10); a->_numMers = (uint32)strtoul(c, &c, 10); } else { a->_covered = 0; a->_matched = 0; a->_numMers = 0; } } else { // seatac format here! #if 0 fprintf(stderr, "seatac?\n"); // We make horrible use of variable names here -- covered and // matched are the regions on the first sequence, and numMers // is the "F" value. a->_covered = (uint32)strtoul(c, &c, 10); a->_matched = (uint32)strtoul(c, &c, 10); c += 4; a->_dsIdx = (uint32)strtoul(c, &c, 10); a->_dsLo = (uint32)strtoul(c, &c, 10); a->_dsHi = (uint32)strtoul(c, &c, 10); c += 4; a->_numMers = (uint32)strtoul(c, &c, 10); #endif } } kmer-code-2013-trunk/seagen/test/0000755000000000000000000000000012641613357015377 5ustar rootrootkmer-code-2013-trunk/seagen/test/encodedQueryTest.C0000644000000000000000000000177212322046702020767 0ustar rootroot#include "bio++.H" #include "encodedQuery.H" int main(int argc, char **argv) { if (argc == 1) { mt_s *mt = mtInit(time(0L)); fprintf(stderr, "Building random sequences for testing.\n"); for (uint32 i=0; i<100000; i++) { char *seq = new char [10000]; char *hdr = new char [128]; for (uint32 j=0; j<10000; j++) { seq[j] = decompressSymbol[mtRandom32(mt) % 4]; if (mtRandomRealOpen(mt) < 0.01) seq[j] = 'n'; } seq[9999] = 0; sprintf(hdr, ">"uint32FMT, i); seqInCore *S = new seqInCore(i, hdr, strlen(hdr), seq, 9999); encodedQuery *Q = new encodedQuery(S, 22); Q->test(S); delete Q; delete S; } } else { seqCache *F = new seqCache(argv[1]); while (F->eof() == false) { seqInCore *S = F->getSequenceInCore(); encodedQuery *Q = new encodedQuery(S, 22); Q->test(S); delete Q; delete S; } delete F; } exit(0); } kmer-code-2013-trunk/seagen/test/intervalList-test.C0000644000000000000000000001064512322046702021134 0ustar rootroot#include #include #include #include #include #define TEST_INTERVAL_LIST #define TEST_SIZE 2000 #define TEST_ITERS 1000 #include "libbri.H" #include "intervalList.H" void fixedTest(void) { intervalList G(10); G.addInterval(110); fprintf(stderr, "Adding %3d -> %3d:\t", 110, 110+10); G.dump(); G.addInterval(130); fprintf(stderr, "Adding %3d -> %3d:\t", 130, 130+10); G.dump(); G.addInterval(105); fprintf(stderr, "Adding %3d -> %3d:\t", 105, 105+10); G.dump(); G.addInterval(115); fprintf(stderr, "Adding %3d -> %3d:\t", 115, 115+10); G.dump(); G.addInterval(124); fprintf(stderr, "Adding %3d -> %3d:\t", 124, 124+10); G.dump(); G.addInterval( 50); fprintf(stderr, "Adding %3d -> %3d:\t", 50, 50+10); G.dump(); G.addInterval(200); fprintf(stderr, "Adding %3d -> %3d:\t", 200, 200+10); G.dump(); G.addInterval(150); fprintf(stderr, "Adding %3d -> %3d:\t", 150, 150+10); G.dump(); G.addInterval(205); fprintf(stderr, "Adding %3d -> %3d:\t", 205, 205+10); G.dump(); G.addInterval(195); fprintf(stderr, "Adding %3d -> %3d:\t", 195, 195+10); G.dump(); G.addInterval( 61); fprintf(stderr, "Adding %3d -> %3d:\t", 61, 61+10); G.dump(); G.addInterval( 72); fprintf(stderr, "Adding %3d -> %3d:\t", 72, 72+10); G.dump(); G.addInterval( 65); fprintf(stderr, "Adding %3d -> %3d:\t", 65, 65+10); G.dump(); G.addInterval( 83); fprintf(stderr, "Adding %3d -> %3d:\t", 83, 83+10); G.dump(); G.addInterval( 94); fprintf(stderr, "Adding %3d -> %3d:\t", 94, 94+10); G.dump(); G.addInterval( 75); fprintf(stderr, "Adding %3d -> %3d:\t", 75, 75+10); G.dump(); G.addInterval( 84); fprintf(stderr, "Adding %3d -> %3d:\t", 84, 84+10); G.dump(); G.addInterval(104); fprintf(stderr, "Adding %3d -> %3d:\t", 104, 104+10); G.dump(); G.addInterval(114); fprintf(stderr, "Adding %3d -> %3d:\t", 114, 114+10); G.dump(); G.addInterval(124); fprintf(stderr, "Adding %3d -> %3d:\t", 124, 124+10); G.dump(); G.addInterval(134); fprintf(stderr, "Adding %3d -> %3d:\t", 134, 134+10); G.dump(); G.addInterval(144); fprintf(stderr, "Adding %3d -> %3d:\t", 144, 144+10); G.dump(); G.addInterval( 51); fprintf(stderr, "Adding %3d -> %3d:\t", 51, 51+10); G.dump(); G.addInterval(161); fprintf(stderr, "Adding %3d -> %3d:\t", 161, 161+10); G.dump(); G.addInterval(172); fprintf(stderr, "Adding %3d -> %3d:\t", 172, 172+10); G.dump(); G.addInterval(183); fprintf(stderr, "Adding %3d -> %3d:\t", 183, 183+10); G.dump(); G.addInterval(156); fprintf(stderr, "Adding %3d -> %3d:\t", 156, 156+10); G.dump(); G.addInterval(166); fprintf(stderr, "Adding %3d -> %3d:\t", 166, 166+10); G.dump(); G.addInterval(176); fprintf(stderr, "Adding %3d -> %3d:\t", 176, 176+10); G.dump(); G.addInterval(186); fprintf(stderr, "Adding %3d -> %3d:\t", 186, 186+10); G.dump(); G.addInterval( 0); fprintf(stderr, "Adding %3d -> %3d:\t", 0, 0+10); G.dump(); G.addInterval( 0); fprintf(stderr, "Adding %3d -> %3d:\t", 0, 0+10); G.dump(); G.addInterval( 1); fprintf(stderr, "Adding %3d -> %3d:\t", 1, 1+10); G.dump(); G.addInterval( 2); fprintf(stderr, "Adding %3d -> %3d:\t", 2, 2+10); G.dump(); G.addInterval(300); fprintf(stderr, "Adding %3d -> %3d:\t", 300, 300+10); G.dump(); G.addInterval(320); fprintf(stderr, "Adding %3d -> %3d:\t", 320, 320+10); G.dump(); G.addInterval(280); fprintf(stderr, "Adding %3d -> %3d:\t", 280, 280+10); G.dump(); G.addInterval( 20); fprintf(stderr, "Adding %3d -> %3d:\t", 20, 20+10); G.dump(); } void main(int argc, char **argv) { fixedTest(); srand48(237831); loop: #if 0 intervalList *G = new intervalList(10); for (uint32 i=0; iaddInterval(floor(drand48() * (TEST_SIZE - 10))); G->test(); } G->dump(); delete G; #endif intervalList *A = new intervalList(10); intervalList *B = new intervalList(10); intervalList *C = new intervalList(10); for (uint32 i=0; iaddInterval(j); if (drand48() < 0.5) A->addInterval(j); else B->addInterval(j); } fprintf(stderr, "A & B ----------------------------------------\n"); A->dump(); B->dump(); A->merge(B); fprintf(stderr, "A & C ----------------------------------------\n"); A->dump(); C->dump(); A->compare(C); delete A; delete B; delete C; goto loop; } kmer-code-2013-trunk/seagen/misc/0000755000000000000000000000000012641613357015353 5ustar rootrootkmer-code-2013-trunk/seagen/misc/f.C0000644000000000000000000001014612322046702015674 0ustar rootroot#include "posix.H" #include #include #include #include #include #include "aHit.H" // Filters a hit file based on the length of the genomic region typedef struct { uint32 dir; uint32 estID; uint32 scfID; uint32 scfLo; uint32 scfHi; } hit_s; int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "usage: %s ....\n", argv[0]); exit(1); } int arg = 1; // Things for reading hits // FILE *file; char b[1024]; aHit a; bool isBINARY; int histogram[12] = {0}; FILE *outf[12]; outf[0] = fopen("filteredHits.0", "w"); outf[1] = fopen("filteredHits.1", "w"); outf[2] = fopen("filteredHits.2", "w"); outf[3] = fopen("filteredHits.3", "w"); outf[4] = fopen("filteredHits.4", "w"); outf[5] = fopen("filteredHits.5", "w"); outf[6] = fopen("filteredHits.6", "w"); outf[7] = fopen("filteredHits.7", "w"); outf[8] = fopen("filteredHits.8", "w"); outf[9] = fopen("filteredHits.9", "w"); outf[10] = fopen("filteredHits.a", "w"); outf[11] = fopen("filteredHits.b", "w"); while (arg < argc) { // Open the file, fatally failing if we cannot do it. // errno = 0; file = fopen(argv[arg], "r"); if (file == 0L) { fprintf(stderr, "ESTmapper/filterEST-- ERROR opening '%s'\n%s\n", argv[arg], strerror(errno)); exit(1); } // Binary or ASCII input? // char x = (char)fgetc(file); ungetc(x, file); isBINARY = (x != '-'); if (isBINARY) fprintf(stderr, "reading BINARY hits from '%s'\n", argv[arg]); else fprintf(stderr, "reading ASCII hits from '%s'\n", argv[arg]); // Read hits until we run out of space // while (!feof(file)) { if (isBINARY) { ahit_readBinary(&a, file); } else { fgets(b, 1024, file); ahit_parseString(&a, b); } // Fill the histogram // int len = a._dsHi - a._dsLo; if (len < 25000) { fprintf(outf[0], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[0]++; } else if (len < 50000) { fprintf(outf[1], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[1]++; } else if (len < 100000) { fprintf(outf[2], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[2]++; } else if (len < 200000) { fprintf(outf[3], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[3]++; } else if (len < 400000) { fprintf(outf[4], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[4]++; } else if (len < 800000) { fprintf(outf[5], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[5]++; } else if (len < 1600000) { fprintf(outf[6], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[6]++; } else if (len < 3200000) { fprintf(outf[7], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[7]++; } else if (len < 6400000) { fprintf(outf[8], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[8]++; } else if (len < 12800000) { fprintf(outf[9], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[9]++; } else if (len < 25600000) { fprintf(outf[10], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[10]++; } else { fprintf(outf[11], "-%c -e %u -D %u %u %u\n", a._direction ? 'f' : 'r', a._qsIdx, a._dsIdx, a._dsLo, a._dsHi); histogram[11]++; } } fclose(file); arg++; } for (int i=0; i<12; i++) fprintf(stderr, "%2d] %d\n", i, histogram[i]); return(0); } kmer-code-2013-trunk/seagen/misc/dumpCrapSeqs.C0000644000000000000000000000136112322046702020055 0ustar rootroot#include "posix.H" #include "searchGENOME.H" int main(int argc, char **argv) { if (argc == 0) { } uint32 zero = 0; uint32 totl = 0; FastABuffer B; FastA *F = new FastA(argv[1]); encodedQuery *Q = 0L; for (F->first(B); !F->eof(); F->next(B)) { if ((totl & 0xfff) == 0xfff) { fprintf(stderr, "%9lu / %9lu\r", totl, zero); fflush(stderr); } Q = new encodedQuery(B.sequence(), B.sequenceLength(), 20, false); totl++; if (Q->numberOfMers() == 0) { zero++; } delete Q; } fprintf(stdout, "\n"); fprintf(stdout, "Total: %9lu\n", totl); fprintf(stdout, "Zero: %9lu\n", zero); return(0); } kmer-code-2013-trunk/seagen/misc/h.C0000644000000000000000000000403012322046702015671 0ustar rootroot#include "posix.H" #include #include #include #include #include #include "aHit.H" // Generates a histogram of a hit file typedef struct { uint32 dir; uint32 estID; uint32 scfID; uint32 scfLo; uint32 scfHi; } hit_s; int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "usage: %s ....\n", argv[0]); exit(1); } int arg = 1; // Things for reading hits // FILE *file; char b[1024]; aHit a; bool isBINARY; int histogram[10] = {0}; while (arg < argc) { // Open the file, fatally failing if we cannot do it. // errno = 0; file = fopen(argv[arg], "r"); if (file == 0L) { fprintf(stderr, "ESTmapper/filterEST-- ERROR opening '%s'\n%s\n", argv[arg], strerror(errno)); exit(1); } // Binary or ASCII input? // char x = (char)fgetc(file); ungetc(x, file); isBINARY = (x != '-'); if (isBINARY) fprintf(stderr, "reading BINARY hits from '%s'\n", argv[arg]); else fprintf(stderr, "reading ASCII hits from '%s'\n", argv[arg]); // Read hits until we run out of space // while (!feof(file)) { if (isBINARY) { ahit_readBinary(&a, file); } else { fgets(b, 1024, file); ahit_parseString(&a, b); } // Fill the histogram // int len = a._dsHi - a._dsLo; if (len < 25000) histogram[0]++; else if (len < 50000) histogram[1]++; else if (len < 100000) histogram[2]++; else if (len < 200000) histogram[3]++; else if (len < 400000) histogram[4]++; else if (len < 800000) histogram[5]++; else if (len < 1600000) histogram[6]++; else if (len < 3200000) histogram[7]++; else if (len < 6400000) histogram[8]++; else histogram[9]++; } fclose(file); arg++; } for (int i=0; i<10; i++) fprintf(stderr, "%2d] %d\n", i, histogram[i]); return(0); } kmer-code-2013-trunk/seagen/searcherState.H0000644000000000000000000000077512322046702017325 0ustar rootroot class searcherState { public: uint64 posnMax; uint64 posnLen; uint64 *posn; #ifdef __APPLE__ uint32 pad; #endif double encodeTime; double maskTime; double searchTime; double filterTime; searcherState() { posnMax = 16384; posnLen = 0; posn = new uint64 [ posnMax ]; encodeTime = 0.0; maskTime = 0.0; searchTime = 0.0; filterTime = 0.0; }; ~searcherState() { delete [] posn; }; }; kmer-code-2013-trunk/seagen/configuration.C0000644000000000000000000002522012322046702017362 0ustar rootroot#include "searchGENOME.H" #include #include #include #include configuration::configuration(void) { _beVerbose = false; _merSize = 20; _merSkip = 0; _numSearchThreads = 4; _doReverse = true; _doForward = true; _maxDiagonal = 25; _maxGap = 0; _qsOverlap = 15; _dsOverlap = 15; // Alternate match extension scheme _extendWeight = 0; _extendMinimum = 0; _extendAlternate = false; _maxIntronLength = 1000000000; _smallSequenceCutoff = 0; _minLengthSingle = 0; _minCoverageSingle = 0.0; _minLengthMultiple = 0; _minCoverageMultiple = 0.0; _dbFileName = 0L; _qsFileName = 0L; _maskFileName = 0L; _onlyFileName = 0L; _outputFileName = 0L; _queryMatchFileName = 0L; _outputFile = STDOUT_FILENO; _matchCountsFile = -1; _tableTemporaryFileName = 0L; _tableFileName = 0L; _tableBuildOnly = false; _binaryOutput = false; _qsFASTA = 0L; _maskDB = 0L; _onlyDB = 0L; _positions = 0L; _numberOfQueries = 0; _startTime = getTime(); _initTime = _startTime; _buildTime = _startTime; _searchTime = _startTime; _loaderQueue = 16 * 1024; _loaderSleep.tv_sec = 1; _loaderSleep.tv_nsec = 0; _loaderWarnings = false; _searchSleep.tv_sec = 0; _searchSleep.tv_nsec = 10000000; _writerQueue = 32 * 1024; _writerSleep.tv_sec = 1; _writerSleep.tv_nsec = 0; _writerWarnings = false; } configuration::~configuration() { if (_beVerbose) { uint32 nq = _qsFASTA->getNumberOfSequences(); double tm = _searchTime - _buildTime; fprintf(stderr, "\n"uint32FMTW(7)" sequences in %5.2f seconds, %8.3f per second.\n", nq, tm, nq/tm); } errno = 0; close(_outputFile); close(_matchCountsFile); if (errno) fprintf(stderr, "Couldn't close to the output file '%s': %s\n", config._outputFileName, strerror(errno)); delete _qsFASTA; delete _maskDB; delete _onlyDB; delete _positions; } static char const *usageString = "usage: %s [options]\n" "\n" "Algorithm Options:\n" " -mersize k Use k-mers\n" " -merskip j Skip j mers between each mer inserted into table\n" " -forward Search only the normal query sequences\n" " -reverse Search only the reverse-complemented query sequences\n" " -maxdiagonal d\n" " -maxgap g\n" " -qoverlap q\n" " -doverlap d\n" " -maxintron m\n" " -smallsequence\n" " -singlelength l\n" " -singlecoverage c\n" " -multiplelength l\n" " -multiplecoverage c\n" " -extendweight w\n" " -extendminimum m\n" "\n" "Process Options\n" " -numthreads n Use n search threads\n" "\n" " -loaderqueue h Size of the loader queue\n" " -loadersleep t Time the loader will sleep when its output queue is full\n" " -loaderwarnings Enable warning messages for the loader\n" "\n" " -searchsleep t Time the searcher will sleep when it has no input\n" "\n" " -writerqueue h Size of the output queue\n" " -writersleep t Time the writer will sleep when it has nothing to write\n" " -writerwarnings Enable warning messages for the writer\n" "\n" "\n" " -buildtables datfile If 'datfile' doesn't exist, build the tables, write\n" " them to 'datfile' and exit.\n" " -usetables datfile Load the tables from 'datfile' file and do the compute.\n" " If 'datfile' doesn't exist, an implicit -buildtables is\n" " performed.\n" "Input Options:\n" " -mask f Ignore all mers listed in file f\n" " -only f Use only the mers listed in file f\n" " -cdna c.fasta Query sequences (the cDNA, the stream)\n" " -stream An alias for -cdna\n" " -genomic g.fasta Database sequences (the genome, the table)\n" " -table An alias for -genomic)\n" " -use #,#,#,# using only those sequences specified\n" " -use file using only those sequences listed in the file\n" "\n" "Output Options\n" " -verbose Entertain the user\n" " -binary Write the hits in a binary format\n" " -output f Write output to file f\n" " -count f Write counts of hits to file f\n"; void configuration::usage(char *name) { fprintf(stderr, usageString, name); } void configuration::read(int argc, char **argv) { int arg = 1; if (argc < 2) { usage(argv[0]); exit(1); } while (arg < argc) { if (strcmp(argv[arg], "-mersize") == 0) { arg++; _merSize = atoi(argv[arg]); } else if (strcmp(argv[arg], "-merskip") == 0) { arg++; _merSkip = atoi(argv[arg]); } else if (strcmp(argv[arg], "-numthreads") == 0) { arg++; _numSearchThreads = atoi(argv[arg]); } else if (strcmp(argv[arg], "-mask") == 0) { arg++; _maskFileName = argv[arg]; } else if (strcmp(argv[arg], "-only") == 0) { arg++; _onlyFileName = argv[arg]; } else if (strcmp(argv[arg], "-cdna") == 0) { arg++; _qsFileName = argv[arg]; } else if (strcmp(argv[arg], "-stream") == 0) { arg++; _qsFileName = argv[arg]; } else if (strcmp(argv[arg], "-genomic") == 0) { arg++; _dbFileName = argv[arg]; } else if (strcmp(argv[arg], "-table") == 0) { arg++; _dbFileName = argv[arg]; } else if (strcmp(argv[arg], "-buildtemporary") == 0) { arg++; _tableTemporaryFileName = argv[arg]; } else if (strcmp(argv[arg], "-buildtables") == 0) { arg++; _tableFileName = argv[arg]; _tableBuildOnly = true; } else if (strcmp(argv[arg], "-usetables") == 0) { arg++; _tableFileName = argv[arg]; _tableBuildOnly = false; } else if (strcmp(argv[arg], "-positions") == 0) { arg++; _tableFileName = argv[arg]; _tableBuildOnly = false; } else if (strcmp(argv[arg], "-forward") == 0) { _doForward = true; _doReverse = false; } else if (strcmp(argv[arg], "-reverse") == 0) { _doReverse = true; _doForward = false; } else if (strcmp(argv[arg], "-verbose") == 0) { _beVerbose = true; } else if (strcmp(argv[arg], "-binary") == 0) { _binaryOutput = true; } else if (strcmp(argv[arg], "-output") == 0) { arg++; _outputFileName = argv[arg]; } else if (strcmp(argv[arg], "-count") == 0) { arg++; _queryMatchFileName = argv[arg]; } else if (strcmp(argv[arg], "-maxdiagonal") == 0) { arg++; _maxDiagonal = atoi(argv[arg]); } else if (strcmp(argv[arg], "-maxgap") == 0) { arg++; _maxGap = atoi(argv[arg]); } else if (strcmp(argv[arg], "-qoverlap") == 0) { arg++; _qsOverlap = atoi(argv[arg]); } else if (strcmp(argv[arg], "-doverlap") == 0) { arg++; _dsOverlap = atoi(argv[arg]); } else if (strcmp(argv[arg], "-maxintron") == 0) { arg++; _maxIntronLength = atoi(argv[arg]); } else if (strcmp(argv[arg], "-smallsequence") == 0) { arg++; _smallSequenceCutoff = atoi(argv[arg]); } else if (strcmp(argv[arg], "-singlelength") == 0) { arg++; _minLengthSingle = atoi(argv[arg]); } else if (strcmp(argv[arg], "-multiplelength") == 0) { arg++; _minLengthMultiple = atoi(argv[arg]); } else if (strcmp(argv[arg], "-singlecoverage") == 0) { arg++; _minCoverageSingle = atof(argv[arg]); } else if (strcmp(argv[arg], "-multiplecoverage") == 0) { arg++; _minCoverageMultiple = atof(argv[arg]); } else if (strncmp(argv[arg], "-extendweight", 7) == 0) { arg++; _extendWeight = atoi(argv[arg]); _extendAlternate = true; } else if (strncmp(argv[arg], "-extendminimum", 7) == 0) { arg++; _extendMinimum = atoi(argv[arg]); _extendAlternate = true; } else if (strncmp(argv[arg], "-loaderqueue", 8) == 0) { _loaderQueue = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-loadersleep", 8) == 0) { setTime(&_loaderSleep, atof(argv[++arg])); } else if (strncmp(argv[arg], "-loaderwarnings", 8) == 0) { _loaderWarnings = true; } else if (strncmp(argv[arg], "-searchsleep", 8) == 0) { setTime(&_searchSleep, atof(argv[++arg])); } else if (strncmp(argv[arg], "-writerqueue", 8) == 0) { _writerQueue = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-writersleep", 8) == 0) { setTime(&_writerSleep, atof(argv[++arg])); } else if (strncmp(argv[arg], "-writerwarnings", 8) == 0) { _writerWarnings = true; } else { fprintf(stderr, "Unknown option '%s'\n", argv[arg]); } arg++; } // // Make sure some constraints are met // if (_maskFileName && _onlyFileName) { fprintf(stderr, "ERROR: At most one of -mask and -only may be used.\n"); exit(1); } // // Check that the mers are at least adjacent // if (_merSkip >= _merSize) { fprintf(stderr, "ERROR: Mers are not adjacent; make sure merskip <= mersize.\n"); exit(1); } // Fail if we don't get reasonable signal criteria // if (((_minLengthSingle == 0) && (_minCoverageSingle == 0.0)) || ((_minLengthMultiple == 0) && (_minCoverageMultiple == 0.0))) fprintf(stderr, "WARNING: Minimum match lengths not specified. All matches will be reported.\n"); // Open output file // if (_outputFileName) { errno = 0; _outputFile = open(_outputFileName, O_WRONLY | O_LARGEFILE | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) { fprintf(stderr, "Couldn't open the output file '%s'?\n%s\n", _outputFileName, strerror(errno)); exit(1); } } if (_queryMatchFileName) { errno = 0; _matchCountsFile = open(_queryMatchFileName, O_WRONLY | O_LARGEFILE | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) { fprintf(stderr, "Couldn't open the match counts file '%s'?\n%s\n", _queryMatchFileName, strerror(errno)); exit(1); } } // Gotta go somewhere! // _startTime = getTime(); } kmer-code-2013-trunk/seagen/hitMatrix.C0000644000000000000000000004267712322046702016503 0ustar rootroot#include "searchGENOME.H" #include "aHit.H" #define TRACE 0 hitMatrix::hitMatrix(uint32 qsLen, uint32 qsMers, uint32 qsIdx) { _qsLen = qsLen; _qsMers = qsMers; _qsIdx = qsIdx; _hitsLen = 0; _hitsMax = 128; _hits = new diagonalLine [_hitsMax]; _matches = 0L; } hitMatrix::~hitMatrix() { delete [] _hits; } void hitMatrix::addMatch(uint32 qsLo, uint32 qsHi, uint32 dsLo, uint32 dsHi, merCovering *IL) { uint32 offset = 0; // Extend the match // // Two methods: the first uses hardcoded parameters, has two // plateau's, and is the one to use for ESTs and mRNA in ESTmapper. // The second is paramterized, and has a single plateau. // if (config._extendAlternate) { offset = config._extendWeight * qsLo; if (offset < config._extendMinimum) offset = config._extendMinimum; if (dsLo < offset) dsLo = 0; else dsLo -= offset; offset = config._extendWeight * (_qsLen - qsHi); if (offset < config._extendMinimum) offset = config._extendMinimum; dsHi += offset; } else { // If the start of the match is near the start of the EST, we do // not need to search very far in the genome. // offset = 0; if (qsLo < 50) offset = 2000; else if (qsLo < 100) offset = 5000; else offset = 50 * qsLo; if (dsLo < offset) dsLo = 0; else dsLo -= offset; // Likewise, if the match is near the end of the EST, extend. We // don't know the length of the genomic sequence, so we can't check // for "overflow". // offset = _qsLen - qsHi; if (offset < 50) dsHi += 2000; else if (offset < 100) dsHi += 5000; else dsHi += 50 * offset; } // Create a new match // // n = new match // m = current match // l = last match // trapMatch *n = new trapMatch(qsLo, qsHi, dsLo, dsHi, IL); // And find a home for it in the list. No merging of matches is done here. It's // too hard. // if ((_matches == 0L) || (n->_dsHi > _matches->_dsHi)) { n->_next = _matches; _matches = n; } else { trapMatch *l = _matches; trapMatch *m = _matches->_next; while ((m) && (n->_dsHi < m->_dsHi)) { l = m; m = m->_next; } n->_next = m; l->_next = n; } } // Utility for sorting the diagonal lines in the hitMatrix // // The two comparison functions return true if the first line // is less than the second line. #ifdef WITHOUT_DIAGONALID inline int compareLines(diagonalLine *A, diagonalLine *B, uint32 qsLen) { uint32 a = qsLen - A->_qsPos - 1 + A->_dsPos; uint32 b = qsLen - B->_qsPos - 1 + B->_dsPos; return(((a < b)) || ((a == b) && (A->_qsPos < B->_qsPos))); } inline int compareLines(uint32 l, uint32 q, diagonalLine *B, uint32 qsLen) { uint32 b = qsLen - B->_qsPos - 1 + B->_dsPos; return(((l < b)) || ((l == b) && (q < B->_qsPos))); } inline void adjustHeap(diagonalLine *L, int32 p, int32 n, uint32 qsLen) { uint32 q = L[p]._qsPos; uint32 d = L[p]._dsPos; uint32 l = qsLen - q - 1 + d; int32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && compareLines(L+c, L+c+1, qsLen)) c++; // Does the node in question fit here? // if (compareLines(l, q, L+c, qsLen) == false) break; // Else, swap the parent and the child // L[p]._qsPos = L[c]._qsPos; L[p]._dsPos = L[c]._dsPos; // Move down the tree // p = c; c = (p << 1) + 1; } L[p]._qsPos = q; L[p]._dsPos = d; } #else // WITH_DIAGONALID inline int compareLines(diagonalLine *A, diagonalLine *B) { return(((A->_diagonalID < B->_diagonalID)) || ((A->_diagonalID == B->_diagonalID) && (A->_qsPos < B->_qsPos))); } inline int compareLines(uint32 l, uint32 q, diagonalLine *B) { return(((l < B->_diagonalID)) || ((l == B->_diagonalID) && (q < B->_qsPos))); } inline void adjustHeap(diagonalLine *L, int32 p, int32 n) { uint32 q = L[p]._qsPos; uint32 d = L[p]._dsPos; uint32 l = L[p]._diagonalID; int32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && compareLines(L+c, L+c+1)) c++; // Does the node in question fit here? // if (compareLines(l, q, L+c) == false) break; // Else, swap the parent and the child // L[p]._qsPos = L[c]._qsPos; L[p]._dsPos = L[c]._dsPos; L[p]._diagonalID = L[c]._diagonalID; // Move down the tree // p = c; c = (p << 1) + 1; } L[p]._qsPos = q; L[p]._dsPos = d; L[p]._diagonalID = l; } #endif void hitMatrix::filter(encodedQuery *query, bool isReverse) { if (_hitsLen == 0) return; // Decide on the minimum quality values; we pick the larger of // the fixed lengths, and the sequence length * coverage. // uint32 minLengthSingle = (uint32)(config._minCoverageSingle * _qsLen); uint32 minLengthMultiple = (uint32)(config._minCoverageMultiple * _qsLen); if (minLengthSingle < config._minLengthSingle) minLengthSingle = config._minLengthSingle; if (minLengthMultiple < config._minLengthMultiple) minLengthMultiple = config._minLengthMultiple; // First, sort by the dsPos. This is done so that we can find all the hits for // a specific scaffold. // sort_dsPos(); // Now, while there are hits left.... // uint32 firstHit = 0; uint32 lastHit = 0; uint32 currentSeq = 0; while (firstHit < _hitsLen) { // Move the currentSeq until the firstHit is below it. // while ((currentSeq < config._dbSTREAM->numberOfSequences()) && (config._dbSTREAM->startOf(currentSeq) <= _hits[firstHit]._dsPos)) currentSeq++; // // currentSeq is now the sequence AFTER the one that we want hits in. // // Find the first hit that is in currentSeq. If this is the last sequence, // then, of course, all remaining hits are in it. // if (currentSeq < config._dbSTREAM->numberOfSequences()) { lastHit = firstHit + 1; while ((lastHit < _hitsLen) && (_hits[lastHit]._dsPos < config._dbSTREAM->startOf(currentSeq))) lastHit++; } else { lastHit = _hitsLen; } // Drop back one sequence; this is the sequence the hits are in. // currentSeq--; #if TRACE fprintf(stdout, "Hits are in sequence %d\n", config._dbSTREAM->IIDOf(currentSeq)); fprintf(stdout, "filtering %u hits -- first = %u last = %u.\n", _hitsLen, firstHit, lastHit); #if 0 fprintf(stdout, "UNSORTED\n"); for (uint32 i=firstHit; istartOf(currentSeq); // Sort them, if needed. // if (lastHit - firstHit > 1) { // We cheat; heapsort isn't too friendly to sorting the middle of // an array, so we make a new array in the middle! // diagonalLine *hitsToSort = _hits + firstHit; // Build the heap. I initially thought this could be done at the // same time as the scan for the last hit, but it can't (easily) // for (int32 i=(lastHit - firstHit)/2 - 1; i>=0; i--) #ifdef WITHOUT_DIAGONALID adjustHeap(hitsToSort, i, lastHit - firstHit, _qsLen); #else adjustHeap(hitsToSort, i, lastHit - firstHit); #endif // Sort the hits be diagonal. This is the second part of // heap sort -- Interchange the new maximum with the element // at the end of the tree // for (uint32 i=lastHit - firstHit - 1; i>0; i--) { uint32 q = hitsToSort[i]._qsPos; uint32 d = hitsToSort[i]._dsPos; #ifndef WITHOUT_DIAGONALID uint32 l = hitsToSort[i]._diagonalID; #endif hitsToSort[i]._qsPos = hitsToSort[0]._qsPos; hitsToSort[i]._dsPos = hitsToSort[0]._dsPos; #ifndef WITHOUT_DIAGONALID hitsToSort[i]._diagonalID = hitsToSort[0]._diagonalID; #endif hitsToSort[0]._qsPos = q; hitsToSort[0]._dsPos = d; #ifndef WITHOUT_DIAGONALID hitsToSort[0]._diagonalID = l; #endif #ifdef WITHOUT_DIAGONALID adjustHeap(hitsToSort, 0, i, _qsLen); #else adjustHeap(hitsToSort, 0, i); #endif } } // Check the sorting // #if 0 #if 0 fprintf(stderr, "sort by diagonal:\n"); for (uint32 i=firstHit; i _hits[i+1]._diagonalID) { fprintf(stderr, "sort by diagonal failed.\n"); exit(1); } } #endif #if TRACE #if 0 fprintf(stdout, "SORTED\n"); for (uint32 i=firstHit; i= thisDiagonalID) { lastDiagonal = thisDiagonalID; if (qsLow > _hits[i]._qsPos) qsLow = _hits[i]._qsPos; if (qsHigh < _hits[i]._qsPos) qsHigh = _hits[i]._qsPos; if (dsLow > _hits[i]._dsPos) dsLow = _hits[i]._dsPos; if (dsHigh < _hits[i]._dsPos) dsHigh = _hits[i]._dsPos; IL->addMer(_hits[i]._qsPos); #if TRACE fprintf(stdout, "extend qs=%9u-%9u ds=%9u-%9u diag=%9u-%9u (diagonal)\n", qsLow, qsHigh, dsLow, dsHigh, frstDiagonal, lastDiagonal); #endif continue; } // XXX: Prototype for extending only if the next hit is near // the last hit. // if (((dsHigh <= _hits[i]._dsPos) && (_hits[i]._dsPos - dsHigh <= config._maxIntronLength)) || ((dsHigh >= _hits[i]._dsPos) && (dsHigh - _hits[i]._dsPos <= config._maxIntronLength))) { // Extend into multiple-exon like things only if the input // sequence is long. // if (_qsLen > config._smallSequenceCutoff) { // Extend if the qsOverlap is small (or nonexistant) // if ((qsHigh + config._merSize) < (_hits[i]._qsPos + config._qsOverlap)) { lastDiagonal = thisDiagonalID; if (qsLow > _hits[i]._qsPos) qsLow = _hits[i]._qsPos; if (qsHigh < _hits[i]._qsPos) qsHigh = _hits[i]._qsPos; if (dsLow > _hits[i]._dsPos) dsLow = _hits[i]._dsPos; if (dsHigh < _hits[i]._dsPos) dsHigh = _hits[i]._dsPos; IL->addMer(_hits[i]._qsPos); #if TRACE fprintf(stdout, "extend qs=%9u-%9u ds=%9u-%9u diag=%9u-%9u (qsOverlap)\n", qsLow, qsHigh, dsLow, dsHigh, frstDiagonal, lastDiagonal); #endif continue; } // Extend if the dsOverlap is small (or nonexistant) // if (_hits[i]._dsPos < (dsLow + config._dsOverlap)) { lastDiagonal = thisDiagonalID; if (qsLow > _hits[i]._qsPos) qsLow = _hits[i]._qsPos; if (qsHigh < _hits[i]._qsPos) qsHigh = _hits[i]._qsPos; if (dsLow > _hits[i]._dsPos) dsLow = _hits[i]._dsPos; if (dsHigh < _hits[i]._dsPos) dsHigh = _hits[i]._dsPos; IL->addMer(_hits[i]._qsPos); #if TRACE fprintf(stdout, "extend qs=%9u-%9u ds=%9u-%9u diag=%9u-%9u (dsOverlap)\n", qsLow, qsHigh, dsLow, dsHigh, frstDiagonal, lastDiagonal); #endif continue; } } } // XXX: End prototype #if TRACE fprintf(stdout, "close current cluster.\nGOOD? qsCov=%u; >= %u or %u? diag: %u < 25?\n", qsHigh - qsLow, minLengthSingle, minLengthMultiple, lastDiagonal - frstDiagonal); #endif // Save the current cluster and start a new one? // uint32 qCov = IL->sumOfLengths(); if ((qCov >= minLengthMultiple) || ((lastDiagonal - frstDiagonal < 25) && (qCov >= minLengthSingle))) { #if TRACE fprintf(stdout, "add match!\n"); #endif addMatch(qsLow, qsHigh + config._merSize, dsLow, dsHigh + config._merSize, IL); IL = new merCovering(config._merSize); } if (IL) IL->clear(); #if TRACE fprintf(stdout, "reset!\n"); #endif frstDiagonal = thisDiagonalID; lastDiagonal = thisDiagonalID; qsLow = _hits[i]._qsPos; qsHigh = _hits[i]._qsPos; dsLow = _hits[i]._dsPos; dsHigh = _hits[i]._dsPos; #if TRACE fprintf(stdout, "hit[qs=%6u ds=%7u d=%7u] box[qs=%6u-%6u ds=%7u-%7u d=%7u-%7u] (initial hit)\n", _hits[i]._qsPos, _hits[i]._dsPos, _qsLen - _hits[i]._qsPos - 1 + _hits[i]._dsPos, qsLow, qsHigh, dsLow, dsHigh, frstDiagonal, lastDiagonal); #endif IL->addMer(_hits[i]._qsPos); } // Save the final cluster? // uint32 qCov = IL->sumOfLengths(); if ((qCov >= minLengthMultiple) || ((lastDiagonal - frstDiagonal < 21) && (qCov >= minLengthSingle))) { addMatch(qsLow, qsHigh + config._merSize, dsLow, dsHigh + config._merSize, IL); IL = 0; } // Delete any remaining IL // delete IL; // Merge and print the matches // trapMatch *n = 0L; uint32 ML = 0; while (_matches) { // Save the current match, then delete it. // dsLow = _matches->_dsLo; dsHigh = _matches->_dsHi; IL = _matches->_IL; ML = IL->sumOfLengths(); n = _matches; _matches = _matches->_next; delete n; #if TRACE fprintf(stdout, "Merge: %8u %8u\n", dsLow, dsHigh); #endif // Assimilate as many of the remaining matches as possible. // // Think of this as first reversing the list, then merging as // long as (dsHigh + 1000 > _matches->_dsLo). But since we // don't reverse the list, we can map: // dsHigh --> _matches->dsHi // _matches->_dsLo --> dsLow // where dsHigh and dsLow are the values for the extended match. // while (_matches && (dsLow < _matches->_dsHi + 5000)) { // Combine the two merCoverings // IL->merge(_matches->_IL); ML += _matches->_IL->sumOfLengths(); // The start of the new match might be after the start of the // merged region. (Only rarely is it before) // if (dsLow > _matches->_dsLo) dsLow = _matches->_dsLo; // The end of current match is always greater than the end of the // new match! // //dsHigh = _matches->_dsHi; #if TRACE fprintf(stdout, "Merge: %8u %8u -> %8u %8u\n", _matches->_dsLo, _matches->_dsHi, dsLow, dsHigh); #endif n = _matches; _matches = _matches->_next; delete n->_IL; delete n; } if (config._binaryOutput) { aHit a; a._forward = !isReverse; a._merged = false; a._qsIdx = _qsIdx; a._dsIdx = config._dbSTREAM->IIDOf(currentSeq); a._dsLo = dsLow; a._dsHi = dsHigh; a._covered = IL->sumOfLengths(); a._matched = ML; a._numMers = _qsMers; query->addOutput(&a, sizeof(aHit)); } else { char line[128]; sprintf(line, "-%c -e "uint32FMT" -D "uint32FMT" "uint32FMT" "uint32FMT" -M "uint32FMT" "uint32FMT" "uint32FMT"\n", isReverse ? 'r' : 'f', _qsIdx, config._dbSTREAM->IIDOf(currentSeq), dsLow, dsHigh, IL->sumOfLengths(), ML, _qsMers); query->addOutput(line, 0); } delete IL; } // All done with these hits. Move to the next set. // firstHit = lastHit; } } kmer-code-2013-trunk/seagen/searchGENOME.H0000644000000000000000000000106011463747051016666 0ustar rootroot#include #include #include #include #include #include #include #include #include #include #include #include // At one time, this was needed for pthread.h or semaphore.h //typedef unsigned short ushort; #include #include #include "util++.H" #include "bio++.H" #include "positionDB.H" #include "existDB.H" #include "hitMatrix.H" #include "searcherState.H" #include "configuration.H" kmer-code-2013-trunk/seagen/searchGENOME.C0000644000000000000000000000656512322046702016666 0ustar rootroot#include #include #include #include "searchGENOME.H" configuration config; void *loaderThread(void *U); void searchThread(void *U, void *T, void *Q); void writerThread(void *U, void *Q); int main(int argc, char **argv) { // Read the configuration from the command line // config.read(argc, argv); // Open and init the query sequence // if (config._beVerbose) fprintf(stderr, "Opening the cDNA sequences.\n"); config._qsFASTA = new seqCache(config._qsFileName); config._dbSTREAM = new seqStream(config._dbFileName); // Complete the configuration // config._initTime = getTime(); // // Build the positions // // Read in the positionDB if it's already built, or build a new one. // if ((config._tableFileName) && (fileExists(config._tableFileName))) { if (config._tableBuildOnly) { fprintf(stderr, "All done. Table '%s' already built.\n", config._tableFileName); exit(1); } else { fprintf(stderr, "Loading positionDB state from '%s'\n", config._tableFileName); config._positions = new positionDB(config._tableFileName, config._merSize, config._merSkip, 0); } } else { merStream *MS = new merStream(new kMerBuilder(config._merSize), config._dbSTREAM, true, false); config._positions = new positionDB(MS, config._merSize, config._merSkip, 0L, 0L, 0L, 0, 0, 0, 0, config._beVerbose); delete MS; if (config._tableFileName) { if (config._beVerbose) fprintf(stderr, "Dumping positions table to '%s'\n", config._tableFileName); config._positions->saveState(config._tableFileName); if (config._tableBuildOnly) exit(0); } } // Build the masking database. // // Previous versions build the existDB takeing the posDB as a // parameter. The existDB would then be exclude mers not in the // posDB. A neat and nice feature, but with only 45,000 to 70,000 // mers in the masks, hardly worth the effort. // if (config._maskFileName) { if (config._beVerbose) fprintf(stderr, "Building maskDB from '%s'\n", config._maskFileName); config._maskDB = new existDB(config._maskFileName, config._merSize, existDBnoFlags, 0, ~uint32ZERO); } if (config._onlyFileName) { if (config._beVerbose) fprintf(stderr, "Building onlyDB from '%s'\n", config._onlyFileName); config._onlyDB = new existDB(config._onlyFileName, config._merSize, existDBnoFlags, 0, ~uint32ZERO); } config._buildTime = getTime(); #if 0 // Maybe we don't need this anymore! #ifdef __alpha // Start the deadlock detection threads // fprintf(stderr, "Deadlock detection enabled!\n"); pthread_create(&threadID, &threadAttr, deadlockDetector, 0L); pthread_create(&threadID, &threadAttr, deadlockChecker, 0L); #endif #endif sweatShop *ss = new sweatShop(loaderThread, searchThread, writerThread); ss->setNumberOfWorkers(config._numSearchThreads); for (uint32 i=0; isetThreadData(i, new searcherState); ss->setLoaderQueueSize(config._loaderQueue); ss->setWriterQueueSize(config._writerQueue); ss->run(0L, config._beVerbose); config._searchTime = getTime(); // the configuration does most cleanup, and it's on the stack. return(0); } kmer-code-2013-trunk/seagen/encodedQuery.H0000644000000000000000000000520312322046702017146 0ustar rootroot#ifndef ENCODEDQUERY_H #define ENCODEDQUERY_H #include #include #include "bio++.H" #include "seqCache.H" class encodedQuery { public: encodedQuery(seqInCore *S, uint32 k); ~encodedQuery(); void test(seqInCore *S); uint32 IID(void) { return(_iid); }; uint32 numberOfMers(void) { return(_mersTotal); }; uint32 numberOfValidMers(void) { return(_mersAvail); }; // Return either an approximation or the exact number of bp covered by valid mers. // uint32 bpCovered(bool doCompute); uint32 bpTotal(void); uint64 getMer(uint32 i, bool isReverse); bool getSkip(uint32 i, bool isReverse); void setSkip(uint32 i, bool isReverse); void addOutput(void *output, uint32 size); char *theOutput(void) { return(_output); }; uint32 theOutputLength(void) { return(_outputLen); }; uint32 numberOfResults(void) { return(_numberOfResults); }; private: uint32 _iid; uint32 _sequenceLength; uint32 _merSize; uint32 _mersTotal; // Number of mers possible in the query uint32 _mersAvail; // Number of mers not masked out uint64 *_mers; // List of mers uint8 *_skip; uint32 _numberOfResults; char *_output; uint32 _outputLen; uint32 _outputMax; }; inline uint32 encodedQuery::bpCovered(bool doCompute) { uint32 bp = numberOfValidMers(); if (doCompute) { merCovering *IL = new merCovering(_merSize); for (uint32 qi=0; qiaddMer(qi); } bp = IL->sumOfLengths(); delete IL; } return(bp); } inline uint32 encodedQuery::bpTotal(void) { return(_sequenceLength); } // XXX: We need to extend get*() to also take a isReverse flag, // and to then return the reverse-complement mer, skip, etc. inline uint64 encodedQuery::getMer(uint32 i, bool isReverse) { if (isReverse) { i = _mersTotal - i - 1; return(reverseComplementMer(_merSize, _mers[i])); } else { return(_mers[i]); } } inline bool encodedQuery::getSkip(uint32 i, bool isReverse) { if (isReverse) i = _mersTotal - i - 1; return(_skip[i]); } inline void encodedQuery::setSkip(uint32 i, bool isReverse) { if (isReverse) i = _mersTotal - i - 1; // If skip[i] is already set, don't adjust; otherwise, subtract // one from the available. _mersAvail -= 1 - _skip[i]; _skip[i] = 1; } #endif // ENCODEDQUERY_H kmer-code-2013-trunk/seagen/hitReader.C0000644000000000000000000001551212322046702016425 0ustar rootroot#include "hitReader.H" #include #include #include #include static int hitCompareCoverage(const void *a, const void *b) { const hit_s *A = (const hit_s *)a; const hit_s *B = (const hit_s *)b; if (A->coverage > B->coverage) return(-1); return(A->coverage < B->coverage); } static int hitCompareGenPos(const void *a, const void *b) { const hit_s *A = (const hit_s *)a; const hit_s *B = (const hit_s *)b; if (A->a._dsIdx < B->a._dsIdx) return(-1); if (A->a._dsIdx > B->a._dsIdx) return(1); if (A->a._forward < B->a._forward) return(-1); if (A->a._forward > B->a._forward) return(1); if (A->a._dsLo < B->a._dsLo) return(-1); if (A->a._dsLo > B->a._dsLo) return(1); if (A->a._dsHi < B->a._dsHi) return(-1); if (A->a._dsHi > B->a._dsHi) return(1); return(0); } hitReader::hitReader(int m) { _filesMax = m; _filesLen = 0; _files = new hitFile_s [_filesMax]; _listLen = 0; _listMax = 1024 * 1024; _list = new hit_s [_listMax]; _iid = uint32ZERO; _bestScore = 0.0; _worstScore = 1.0; } hitReader::~hitReader() { for (uint32 i=0; i<_filesLen; i++) //fclose(_files[i].file); delete _files[i].buff; delete [] _files; delete [] _list; } void hitReader::addInputFile(char *filename) { errno = 0; _files[_filesLen].stillMore = true; _files[_filesLen].buff = new readBuffer(filename); // Binary or ASCII input? // _files[_filesLen].isBINARY = (_files[_filesLen].buff->peek() != '-'); // Load the first hit loadHit(_files+_filesLen); _filesLen++; } void hitReader::loadHit(hitFile_s *HF) { if (HF->isBINARY) { ahit_readBinary(&HF->a, HF->buff); } else { fprintf(stderr, "ERROR: hitReader::loadHit() ascii not supported right now.\n"); exit(1); //fgets(HF->b, 1024, HF->file); //ahit_parseString(&HF->a, HF->b); } if (HF->buff->eof()) HF->stillMore = false; }; bool hitReader::loadHits(void) { _listLen = 0; _iid = uint32ZERO; _bestScore = 0.0; _worstScore = 1.0; // See if there are more hits to process. // bool keepGoing = false; for (uint32 i=0; i<_filesLen; i++) keepGoing |= _files[i].stillMore; if (keepGoing == false) return(false); // Find the lowest ESTid // _iid = 1 << 30; for (uint32 i=0; i<_filesLen; i++) if ((_files[i].stillMore) && (_iid > _files[i].a._qsIdx)) _iid = _files[i].a._qsIdx; // For each file, load the next hit if it's the est // we're looking at // for (uint32 i=0; i<_filesLen; i++) { while ((_files[i].stillMore) && (_files[i].a._qsIdx == _iid)) { if (_listLen >= _listMax) { _listMax *= 2; hit_s *new_list = new hit_s [_listMax]; memcpy(new_list, _list, _listLen * sizeof(hit_s)); delete [] _list; _list = new_list; } memcpy(&_list[_listLen].a, &_files[i].a, sizeof(aHit)); _list[_listLen].coverage = (double)_files[i].a._covered / (double)_files[i].a._numMers; _list[_listLen].multiplicity = (double)_files[i].a._matched / (double)_files[i].a._covered; // aHit->_covered is in bases, but aHit->_numMers is the // number of mers. Possible for coverage to be > 1.0. // if (_list[_listLen].coverage > 1.0) _list[_listLen].coverage = 1.0; if (_list[_listLen].coverage > _bestScore) _bestScore = _list[_listLen].coverage; if (_list[_listLen].coverage < _worstScore) _worstScore = _list[_listLen].coverage; #ifdef WITH_ANSWERS // Look for the answer string. If not found, set to zero. // _list[_listLen].mappedIdentity = 0; _list[_listLen].mappedCoverage = 0; for (int p=0; _files[i].b[p]; p++) { if ((_files[i].b[p] == 'Y') || (_files[i].b[p] == 'N')) { char *c = _files[i].b+p+1; _list[_listLen].mappedIdentity = (uint32)strtoul(c, &c, 10); _list[_listLen].mappedCoverage = (uint32)strtoul(c, &c, 10); } } #endif _listLen++; loadHit(_files+i); } } mergeOverlappingHits(); return(true); } void hitReader::sortByCoverage(void) { qsort(_list, _listLen, sizeof(hit_s), hitCompareCoverage); }; // scan the list of hits (for a single EST, remember) and merge // any that are overlapping // void hitReader::mergeOverlappingHits(void) { // Sort by the genomic position // qsort(_list, _listLen, sizeof(hit_s), hitCompareGenPos); // Scan through the list, merging. // uint32 cur = 0; // Currently active entry uint32 exa = 1; // Entry we examine for merging while (exa < _listLen) { // Do they overlap? if ((_list[cur].a._dsIdx == _list[exa].a._dsIdx) && (_list[cur].a._forward == _list[exa].a._forward) && (_list[cur].a._dsHi >= _list[exa].a._dsLo)) { // Yup, merge. Extend the current hit if it is smaller. if ((_list[cur].a._dsLo == _list[exa].a._dsLo) && (_list[cur].a._dsHi == _list[exa].a._dsHi)) { // Nop, they're the same. } else if (_list[cur].a._dsHi >= _list[exa].a._dsHi) { // Nop, exa is contained in cur. } else { // exa extends cur! // If cur is contained in exa, just get rid of cur. // Otherwise, we need to fudge up new scores -- but we // instead just mark them as merged, and don't filter them. // if (_list[cur].a._dsLo == _list[exa].a._dsLo) { memcpy(_list+cur, _list+exa, sizeof(hit_s)); } else { #ifdef DEBUG_HITREADER fprintf(stderr, "MERGE: ("uint32FMT","uint32FMT") -e "uint32FMT" " uint32FMT":"uint32FMT"-"uint32FMT"%c("uint32FMT"-"uint32FMT"-"uint32FMT") " uint32FMT":"uint32FMT"-"uint32FMT"%c("uint32FMT"-"uint32FMT"-"uint32FMT")\n", cur, exa, _list[cur].a._qsIdx, _list[cur].a._dsIdx, _list[cur].a._dsLo, _list[cur].a._dsHi, _list[cur].a._forward ? 'f' : 'r', _list[cur].a._covered, _list[cur].a._matched, _list[cur].a._numMers, _list[exa].a._dsIdx, _list[exa].a._dsLo, _list[exa].a._dsHi, _list[exa].a._forward ? 'f' : 'r', _list[exa].a._covered, _list[exa].a._matched, _list[exa].a._numMers); #endif _list[cur].a._merged = true; _list[cur].a._covered = 0; _list[cur].a._matched = 0; _list[cur].a._dsHi = _list[exa].a._dsHi; } } // By now, we've updated cur to include all that exa did. exa is junk. } else { // Nope, copy exa to the next spot (unless they're the same) // and move there. // cur++; if (cur != exa) memcpy(_list+cur, _list+exa, sizeof(hit_s)); } // Move to the next examination! exa++; } _listLen = cur + 1; } kmer-code-2013-trunk/seagen/hitMatrix.H0000644000000000000000000001020612322046702016467 0ustar rootroot#ifndef HITMATRIX_H #define HITMATRIX_H #include #include #include #include "bio++.H" #include "positionDB.H" #include "encodedQuery.H" // Define this to cut the space required for storing hits by 1/3 -- // from 12 byyes to 8 bytes -- at a slight computational expense -- // negligible on real hardware, I hope. // // The original definition of diagonalID was // qsLen - qsPos - 1 + dsPos // but qsLen is fixed for everyone, so we could reduce it to // dsPos - qsPos // but that's not unsigned. // // Results: on a human mapping, using chromosomes as the stream and // the whole human as the table (so we need to actually store a large // number of hits), we see a savings of 2GB and a small drop in // runtime. Process size went from 20.7GB to 18.7GB, CPU time from // 20578 to 20193 seconds (833MHz EV6.8AL (21264B)). // #define WITHOUT_DIAGONALID struct diagonalLine { uint32 _qsPos; uint32 _dsPos; #ifndef WITHOUT_DIAGONALID uint32 _diagonalID; #endif }; class trapMatch { public: uint32 _qsLo, _qsHi; uint32 _dsLo, _dsHi; merCovering *_IL; trapMatch *_next; trapMatch(uint32 qsLo, uint32 qsHi, uint32 dsLo, uint32 dsHi, merCovering *IL) { _qsLo = qsLo; _qsHi = qsHi; _dsLo = dsLo; _dsHi = dsHi; _IL = IL; _next = 0L; }; }; class hitMatrix { public: hitMatrix(uint32 qsLen, uint32 qsMers, uint32 qsIdx); ~hitMatrix(); void addHits(uint32 qi, uint64 *ps, uint64 cn); void sort_diagonal(void); void sort_dsPos(void); void filter(encodedQuery *query, bool isReverse); private: uint32 _qsLen; // Seq Len of Q uint32 _qsMers; // Valid mers in Q uint32 _qsIdx; // Index of Q in the FastA // Instead of building the lines during add(), we store // the information used to build lines, and then build them // in chain(). This was done to reduce simultaneous memory // usage, as the lineArrayMap and etc take up considerable space. // uint32 _hitsLen; uint32 _hitsMax; diagonalLine *_hits; // Making sense of the raw output from the search is not a trivial // task for perl. SMALL searches (dbEST vs 0.5MB sequence) used more // than 4GB of memory in perl. // // So, we bite the bullet and do it here. // // _matches is a sorted linked list of the regions we have found. // The list is kept in REVERSE order, as we usually add regions // in the correct order (correct reverse order), occasionally // we need to swap the last two. // // The list is deleted in filter() // trapMatch *_matches; void addMatch(uint32 qsLo, uint32 qsHi, uint32 dsLo, uint32 dsHi, merCovering *IL); }; inline void hitMatrix::addHits(uint32 qi, uint64 *ps, uint64 cn) { if ((_hitsLen + cn) >= _hitsMax) { _hitsMax = _hitsMax + _hitsMax + (uint32)cn; diagonalLine *h; try { h = new diagonalLine [_hitsMax]; } catch (std::bad_alloc) { fprintf(stderr, "hitMatrix::addHits()-- caught std::bad_alloc in %s at line %d.\n", __FILE__, __LINE__); fprintf(stderr, "hitMatrix::addHits()-- have "uint32FMT" hits, tried to add "uint64FMT" more\n", _hitsLen, cn); exit(1); } for (uint32 z=_hitsLen; z--; ) { h[z]._qsPos = _hits[z]._qsPos; h[z]._dsPos = _hits[z]._dsPos; #ifndef WITHOUT_DIAGONALID h[z]._diagonalID = _hits[z]._diagonalID; #endif } delete [] _hits; _hits = h; } for (uint64 i=0; i #include #include #include #include #include "aHit.H" #include "hitReader.H" extern uint32 uniqThresh; extern uint32 reptThresh; extern FILE *logFile; extern bool decided; extern const char *label; extern uint32 hitsToSave; extern double qualToSave; double difference = 0.1; void complicatedFilter_1_unique(hitReader &HR) { if (HR.numHits() <= uniqThresh) { decided = true; label = "unique"; hitsToSave = HR.numHits(); qualToSave = 0.0; // Try being a little more aggressive. Search for the last // point where the score difference across 10 hits is more than // difference and use that for a limit. // On the 100k test set #1 (ESTmapper paper, 1 Oct 2004) this modification // results in: // tp=106564 fp=1255487 fn=56705 tn=52816595 // // compared to saving all hits: // tp=106579 fp=1914659 fn=56690 tn=52157423 // // That is, we lost 15 true matches and didn't polish 660,000 // matches -- 1.21% of the total, but 50% of what we actually // need to polish. // uint32 i = HR.numHits() - 1; while ((i >= 10) && ((HR[i-10].coverage - HR[i].coverage) < difference)) i--; hitsToSave = HR.numHits(); qualToSave = HR[i].coverage; #if 0 // Take the middle hit, not the end. This doesn't hurt too much // (20 matches out of 100,000 ESTs, and we missed one EST // completely) but only gains us 0.08% additional filtering. if (i >= 15) qualToSave = HR[i-5].coverage; #endif // Save all hits with this coverage score! This isn't really needed, but it // makes the log message correct. // while ((i < HR.numHits()) && (qualToSave == HR[i].coverage)) i++; if (logFile) fprintf(logFile, uint32FMT"] unique: aggressively filtered to "uint32FMT" hits out of "uint32FMT" hits.\n", HR.iid(), i, HR.numHits()); } } void complicatedFilter_2_knee(hitReader &HR) { decided = false; hitsToSave = 0; qualToSave = 0.0; #if 0 decided = true; hitsToSave = 0; qualToSave = 1.1; return; #endif // Apply the same filter as used in #1 (the aggressive part), and accept // it if the number of hits saved is below some threshold. uint32 i = HR.numHits() - 1; while ((i >= 10) && ((HR[i-10].coverage - HR[i].coverage) < difference)) i--; // If i==9, then we failed to find a knee, and we fail this filter // if (i < 10) return; hitsToSave = HR.numHits(); qualToSave = HR[i].coverage; // Save all hits with this coverage score! // while ((i < HR.numHits()) && (qualToSave == HR[i].coverage)) i++; if (i <= uniqThresh) { decided = true; label = "knee"; if (logFile) fprintf(logFile, uint32FMT"] knee: filtered "uint32FMT" hits down to "uint32FMT" hits using threshold %f\n", HR.iid(), HR.numHits(), i, qualToSave); } } // If all scores are about the same, it's either a repeat or a // lot of spurious matches, depending on the level of signal. // void complicatedFilter_3_uniform(hitReader &HR) { decided = false; hitsToSave = 0; qualToSave = 0.0; if ((HR.bestScore() - HR.worstScore()) < difference) { decided = true; label = "uniform"; hitsToSave = reptThresh; qualToSave = 0.0; if (logFile) { fprintf(logFile, uint32FMT"] uniform: uniform signal strength, saving the first "uint32FMT" hits out of "uint32FMT" hits, best=%f, worst=%f\n", HR.iid(), hitsToSave, HR.numHits(), HR.bestScore(), HR.worstScore()); } } } // If we're not decided here, the EST had too many "good" hits to // be filtered by the threshold method. Try a more sophisticated // (confusing) method. // void complicatedFilter_4_largestdifference(hitReader &HR) { decided = false; hitsToSave = 0; qualToSave = 0.0; double largestDifference = 0.0; for (uint32 i=1; i < HR.numHits(); i++) if (largestDifference < (HR[i-1].coverage - HR[i].coverage)) largestDifference = HR[i-1].coverage - HR[i].coverage; // If the largest difference is below 10% coverage, then it's not // clear how to pick a threshold and we just save a bunch of hits. // if (largestDifference < difference) { decided = true; label = "diff"; hitsToSave = reptThresh; qualToSave = 0.0; if (logFile) fprintf(logFile, uint32FMT"] diff: has no clear signal knee, saving the first "uint32FMT" hits out of "uint32FMT" hits, best=%f, worst=%f, largestdiff=%f\n", HR.iid(), hitsToSave, HR.numHits(), HR.bestScore(), HR.worstScore(), largestDifference); } } // Identify any spike near the start. If we see a spike, // save the first uniqThresh hits. // // If the largest difference (which we guarantee to be >= 10% // coverage here) is in the first uniqThresh hits, then we // have a spike and we output uniqThresh hits. // // To narrow the range more, we find the last spot where the // difference in scores over 10 hits is > difference. This is a // generous heuristic. // void complicatedFilter_5_spikes(hitReader &HR) { decided = false; hitsToSave = 0; qualToSave = 0.0; uint32 spikeFound = 0; for (uint32 i=1; i < uniqThresh; i++) if ((HR[i-1].coverage - HR[i].coverage) > difference) spikeFound = i; // If we have found a spike, start at hit[uniqThresh], search // backwards for the first point where the difference in // scores across 10 hits is larger than difference // // Seems like a NOP, but it loosens things up a bit. Consider a // spike between hits 3 and 4, but 1=2=3 and 4=5=6=7=8=9=10=11. We // find a spike, then find a nice place to cut it. If we never // find a nice place, we save the top uniqThresh hits. if (spikeFound) { decided = true; label = "spike"; hitsToSave = uniqThresh; qualToSave = 0.0; for (uint32 i=uniqThresh-1; i > 9; i--) if ((HR[i-10].coverage - HR[i].coverage) > difference) { hitsToSave = i + 1; break; } qualToSave = HR[hitsToSave].coverage; if (logFile) fprintf(logFile, uint32FMT"] spike: at "uint32FMT", "uint32FMT" hits saved: thresh=%f, "uint32FMT" hits, best=%f, worst=%f\n", HR.iid(), spikeFound, hitsToSave, qualToSave, HR.numHits(), HR.bestScore(), HR.worstScore()); } } void complicatedFilter(hitReader &HR) { decided = false; label = "NOLABELERROR"; qualToSave = 1.0; hitsToSave = 0; complicatedFilter_1_unique(HR); if (decided) return; complicatedFilter_2_knee(HR); if (decided) return; complicatedFilter_3_uniform(HR); if (decided) return; complicatedFilter_4_largestdifference(HR); if (decided) return; complicatedFilter_5_spikes(HR); if (decided) return; decided = true; label = "unknown"; hitsToSave = reptThresh; qualToSave = 0.0; if (hitsToSave > HR.numHits()) hitsToSave = HR.numHits(); if (logFile) fprintf(logFile, uint32FMT"] is an unclassified signal, "uint32FMT" hits saved out of "uint32FMT" hits, best=%f, worst=%f\n", HR.iid(), hitsToSave, HR.numHits(), HR.bestScore(), HR.worstScore()); } kmer-code-2013-trunk/seagen/sortHits.C0000644000000000000000000001523312322046702016335 0ustar rootroot#include #include #include #include #include #include "aHit.H" #include "bio++.H" // Command line options. Only tmpPath needs to be global, and it can // be easily localized. // bool beVerbose = false; uint64 memoryLimit = 128 * 1024 * 1024; char *tmpPath = 0L; class aHitReader { public: // Open the file for reading, testing if it's binary or ascii input // aHitReader(char *filename) { errno = 0; theFile = fopen(filename, "r"); if (theFile == 0L) { fprintf(stderr, "sortHits-- ERROR opening '%s': %s\n", filename, strerror(errno)); exit(1); } char x = (char)fgetc(theFile); ungetc(x, theFile); isBinary = (x != '-'); if (!isBinary) buffer = new char [1024]; }; ~aHitReader() { fclose(theFile); delete [] buffer; }; bool readHit(aHit &hit) { if (isBinary) { ahit_readBinary(&hit, theFile); } else { fgets(buffer, 1024, theFile); ahit_parseString(&hit, buffer); } return(feof(theFile) == false); }; private: FILE *theFile; char *buffer; bool isBinary; }; // Write a bunch of hits to a temporary file (unlink the file after // it's opened) then allow those hits to be read back in. Doesn't // need the aHitReader, as we use just the binary format. // class aHitTemporary { public: aHitTemporary(aHit *hits, uint32 hitsLen) { theFile = makeTempFile(tmpPath); // XXX: Known bug on Tru64: fwrite() of data blocks > 2GB is broken uint32 outputPos = 0; uint32 outputLen = 1024 * 1024 / sizeof(aHit); while (outputPos < hitsLen) { errno = 0; outputPos += fwrite(hits, sizeof(aHit), hitsLen, theFile); if (errno) { fprintf(stderr, "ERROR: sortHits()-- Failed to write temporary file: %s\n", strerror(errno)); exit(1); } // XXX: do we write one too many? if (outputPos + outputLen > hitsLen) outputLen = hitsLen - outputPos; } rewind(theFile); hit._forward = false; hit._merged = false; hit._qsIdx = uint32ZERO; hit._dsIdx = uint32ZERO; hit._dsLo = uint32ZERO; hit._dsHi = uint32ZERO; hit._covered = uint32ZERO; hit._matched = uint32ZERO; hit._numMers = uint32ZERO; nextHit(); }; ~aHitTemporary() { fclose(theFile); }; aHit *theHit(void) { return(&hit); }; void nextHit(void) { if (hit._qsIdx != ~uint32ZERO) { errno = 0; fread(&hit, sizeof(aHit), 1, theFile); if (errno) { fprintf(stderr, "ERROR: sortHits()-- Failed to read a hit: %s\n", strerror(errno)); exit(1); } // If we hit eof, this hit is invalid, and so are all future ones. Set // hit to be junk. // if (feof(theFile)) { hit._forward = false; hit._merged = false; hit._qsIdx = ~uint32ZERO; hit._dsIdx = ~uint32ZERO; hit._dsLo = ~uint32ZERO; hit._dsHi = ~uint32ZERO; hit._covered = ~uint32ZERO; hit._matched = ~uint32ZERO; hit._numMers = ~uint32ZERO; } } }; private: FILE *theFile; aHit hit; }; int hitcmp(const void *a, const void *b) { aHit *A = (aHit *)a; aHit *B = (aHit *)b; if (A->_dsIdx < B->_dsIdx) return(-1); if (A->_dsIdx > B->_dsIdx) return(1); if (A->_qsIdx < B->_qsIdx) return(-1); if (A->_qsIdx > B->_qsIdx) return(1); if (A->_dsLo < B->_dsLo) return(-1); if (A->_dsLo > B->_dsLo) return(1); return(0); } int main(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "usage: %s [-v] [-m memorylimit] [-t temppath] hitfile1 hitfile2 ... > sorted-hits\n", argv[0]); fprintf(stderr, " memory limit is MB\n"); exit(1); } int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-v", 2) == 0) { beVerbose = true; } else if (strncmp(argv[arg], "-m", 2) == 0) { arg++; memoryLimit = atoi(argv[arg]); memoryLimit <<= 20; } else if (strncmp(argv[arg], "-t", 2) == 0) { arg++; tmpPath = argv[arg]; } else { // Must be at the first file name. Break. break; } arg++; } // Allocate a bunch of spaces to store hits. // uint64 hitsMax = memoryLimit / sizeof(aHit); uint32 hitsPos = 0; aHit *hits = new aHit [hitsMax]; uint32 tmpFlen = 0; uint32 tmpFmax = 1024; aHitTemporary **tmpF = new aHitTemporary * [tmpFmax]; while (arg < argc) { aHitReader *R = new aHitReader(argv[arg]); arg++; // Read hits until we exhaust out space, then sort and dump to disk. // while (R->readHit(hits[hitsPos])) { hitsPos++; if (hitsPos == hitsMax) { qsort(hits, hitsPos, sizeof(aHit), hitcmp); if (tmpFlen >= tmpFmax) { tmpFmax *= 2; aHitTemporary **tmp = new aHitTemporary * [tmpFmax]; memcpy(tmp, tmpF, sizeof(aHitTemporary) * tmpFlen); delete [] tmpF; tmpF = tmp; } tmpF[tmpFlen] = new aHitTemporary(hits, hitsPos); tmpFlen++; hitsPos = 0; } } delete R; } // All done reading. If we have stuff to sort, sort it. // if (hitsPos > 0) qsort(hits, hitsPos, sizeof(aHit), hitcmp); // No temporary files? Just write the hits and exit. We're done. // if (tmpFlen == 0) { for (uint32 i=0; i= tmpFmax) { tmpFmax *= 2; aHitTemporary **tmp = new aHitTemporary * [tmpFmax]; memcpy(tmp, tmpF, sizeof(aHitTemporary) * tmpFlen); delete [] tmpF; tmpF = tmp; } tmpF[tmpFlen] = new aHitTemporary(hits, hitsPos); tmpFlen++; // While there is still input, merge to the output // bool moreInput = true; while (moreInput) { // Pick the smallest hit -- if file [i] is finished, then hit[i] // is bogus and all the values are set to maximal values. // uint32 smallestHit = 0; for (uint32 nh = smallestHit+1; nh < tmpFlen; nh++) { if (hitcmp(tmpF[smallestHit]->theHit(), tmpF[nh]->theHit()) > 0) smallestHit = nh; } // If the smallest hit is invalid, we're done. Otherwise, write // the hit, and read a new one. // if (tmpF[smallestHit]->theHit()->_qsIdx == ~uint32ZERO) { moreInput = false; } else { ahit_printASCII(tmpF[smallestHit]->theHit(), stdout); tmpF[smallestHit]->nextHit(); } } // Should clean up, I know. return(0); } kmer-code-2013-trunk/seagen/filtertest.C0000644000000000000000000001765612322046702016716 0ustar rootroot#include #include #include #include #include #include "bio.h" #define MAX_ESTS (16 * 1024 * 1024) #define MAX_HITS (18474961) // for 20-03-5000-0.4 //#define SHOW_ONE //////////////////////////////////////// struct aHit { uint32 _forward; uint32 _qsIdx; uint32 _dsIdx; uint32 _dsLo; uint32 _dsHi; uint32 _covered; uint32 _matched; uint32 _numMers; uint32 _yesno; uint32 _identity; uint32 _coverage; float scoreCov; float scoreMult; }; void ahit_writeBinary(aHit *a, FILE *F) { fwrite(a, sizeof(aHit), 1, F); } void ahit_readBinary(aHit *a, FILE *F) { fread(a, sizeof(aHit), 1, F); } void ahit_printASCII(aHit *a, FILE *F) { #ifdef TRUE64BIT fprintf(F, "-%c -e %u -D %u %u %u -M %u %u %u %s %u %u\n", a->_forward ? 'f' : 'r', a->_qsIdx, a->_dsIdx, a->_dsLo, a->_dsHi, a->_covered, a->_matched, a->_numMers, a->_yesno ? "-Y" : "-N", a->_identity, a->_coverage); #else fprintf(F, "-%c -e %lu -D %lu %lu %lu -M %lu %lu %lu %s %lu %lu\n", a->_forward ? 'f' : 'r', a->_qsIdx, a->_dsIdx, a->_dsLo, a->_dsHi, a->_covered, a->_matched, a->_numMers, a->_yesno ? "-Y" : "-N", a->_identity, a->_coverage); #endif } void ahit_parseString(aHit *a, char *b) { char *c = b+1; a->_forward = (*c == 'f'); c += 1; if (c[2] != 'e') fprintf(stderr, "'%s' didn't get -e\n", b); c += 4; a->_qsIdx = (uint32)strtoul(c, &c, 10); if (c[2] != 'D') fprintf(stderr, "'%s' didn't get -D\n", b); c += 4; a->_dsIdx = (uint32)strtoul(c, &c, 10); a->_dsLo = (uint32)strtoul(c, &c, 10); a->_dsHi = (uint32)strtoul(c, &c, 10); if (c[2] == 'M') { c += 4; a->_covered = (uint32)strtoul(c, &c, 10); a->_matched = (uint32)strtoul(c, &c, 10); a->_numMers = (uint32)strtoul(c, &c, 10); } else { //fprintf(stderr, "'%s' didn't get -M\n", b); a->_covered = 0; a->_matched = 0; a->_numMers = 0; } a->_yesno = 0; a->_identity = 0; a->_coverage = 0; if (c[2] == 'Y') { c += 4; a->_yesno = 1; a->_identity = (uint32)strtoul(c, &c, 10); a->_coverage = (uint32)strtoul(c, &c, 10); } #if 0 if (c[2] == 'N') { c += 4; a->_yesno = 0; a->_identity = (uint32)strtoul(c, &c, 10); a->_coverage = (uint32)strtoul(c, &c, 10); } #endif } //////////////////////////////////////// int hitCompare(const void *a, const void *b) { const aHit *A = (const aHit *)a; const aHit *B = (const aHit *)b; if (A->scoreCov > B->scoreCov) return(-1); else return(A->scoreCov < B->scoreCov); } int hitCompareID(const void *a, const void *b) { const aHit *A = (const aHit *)a; const aHit *B = (const aHit *)b; if (A->_qsIdx < B->_qsIdx) return(-1); if (A->_qsIdx > B->_qsIdx) return(1); return(0); } int main(int argc, char **argv) { aHit *hits = new aHit [MAX_HITS]; uint32 hitsLen = 0; // read all the hits from stdin -- assumes ascii format // char hitLine[1025]; while (!feof(stdin)) { fgets(hitLine, 1024, stdin); if (!feof(stdin)) { ahit_parseString(hits + hitsLen, hitLine); // These are the scores used by the filter // hits[hitsLen].scoreCov = (float)hits[hitsLen]._covered / (float)hits[hitsLen]._numMers; hits[hitsLen].scoreMult = (float)hits[hitsLen]._matched / (float)hits[hitsLen]._covered; // aHit->_covered is in bases, but aHit->_numMers is the // number of mers. Possible for coverage to be > 1.0. // if (hits[hitsLen].scoreCov > 1.0) hits[hitsLen].scoreCov = 1.0; hitsLen++; if ((hitsLen & 0xff) == 0) { fprintf(stderr, "reading hits "uint32FMT"\r", hitsLen); fflush(stderr); } } } fprintf(stderr, "reading hits "uint32FMT"\n", hitsLen); // Sort the hits by estid // fprintf(stderr, "sorting hits by cDNA\n"); qsort(hits, hitsLen, sizeof(aHit), hitCompareID); // Sort the hits by score (scoreCov), in decreasing order. // fprintf(stderr, "sorting hits by score\n"); for (uint32 currentHit = 0; currentHit < hitsLen; ) { uint32 estOfInterest = hits[currentHit]._qsIdx; uint32 numHits = 0; for (uint32 t=currentHit; (t < hitsLen) && (hits[t]._qsIdx == estOfInterest); t++) numHits++; qsort(hits + currentHit, numHits, sizeof(aHit), hitCompare); currentHit += numHits; } fprintf(stderr, "filtering hits\n"); double L = 0.0; double H = 0.0; double V = 0.1; double M = 1.0; double MC = 0.0; uint32 ML = 0; double minIdentity = 98.0; double minCoverage = 96.0; for (uint32 Hcnt = 10; Hcnt <= 100; Hcnt += 10) { for (uint32 Lcnt = 10; Lcnt < Hcnt && Lcnt < 60; Lcnt += 10) { for (uint32 Vcnt = 10; Vcnt < 100; Vcnt += 10) { #ifdef SHOW_ONE Lcnt = 30; Hcnt = 40; Vcnt = 100; #endif L = Lcnt / 100.0; H = Hcnt / 100.0; V = Vcnt / 100.0; uint32 truepositive = 0; uint32 falsepositive = 0; uint32 truenegative = 0; uint32 falsenegative = 0; for (uint32 currentHit = 0; currentHit < hitsLen; ) { // Find the number of hits for this ESTid // uint32 estOfInterest = hits[currentHit]._qsIdx; uint32 numHits = 0; for (uint32 t=currentHit; (t < hitsLen) && (hits[t]._qsIdx == estOfInterest); t++) numHits++; double h = hits[currentHit].scoreCov - hits[currentHit + numHits - 1].scoreCov; double p = 0.0; if (h <= L) p = 1.0; if (h >= H) p = V; if (p == 0.0) p = 1.0 - (1.0 - V) * (h - L) / (H - L); // check p; it should be between V and 1.0 if (p > 1.0) { fprintf(stderr, "error in p; p=%f h=%f (%f %f %f)\n", p, h, L, H, V); p = 1.0; } if (p < V) { fprintf(stderr, "error in p; p=%f h=%f (%f %f %f)\n", p, h, L, H, V); p = V; } // Output the top p% hits, by score. // double cutL = hits[currentHit].scoreCov - p * h; if (cutL > M) cutL = M; #ifdef SHOW_ONE fprintf(stdout, "LHV = %f %f %f p=%f h=%f cutL=%f\n", L, H, V, p, h, cutL); #endif for (uint32 i=currentHit; i < currentHit + numHits; i++) { if ((cutL <= hits[i].scoreCov) && ((MC <= hits[i].scoreCov) || (ML <= hits[i]._covered))) { #ifdef SHOW_ONE fprintf(stdout, "POS: (%f)", hits[i].scoreCov); ahit_printASCII(hits+i, stdout); #endif if ((hits[i]._yesno == 1) && (hits[i]._identity >= minIdentity) && (hits[i]._coverage >= minCoverage)) truepositive++; else falsepositive++; } else { #ifdef SHOW_ONE fprintf(stdout, "NEG: (%f)", hits[i].scoreCov); ahit_printASCII(hits+i, stdout); #endif if ((hits[i]._yesno == 1) && (hits[i]._identity >= minIdentity) && (hits[i]._coverage >= minCoverage)) falsenegative++; else truenegative++; } } #ifdef SHOW_ONE fprintf(stdout, "----\n"); #endif currentHit += numHits; } // Print L, H, V, sensitivity, specificity // fprintf(stdout, "%f %f %f %6.4f %6.4f "uint32FMT" "uint32FMT" "uint32FMT" "uint32FMT"\n", L, H, V, (double)truepositive / (truepositive + falsenegative), (double)truenegative / (truenegative + falsepositive), truepositive, falsepositive, truenegative, falsenegative); fflush(stdout); #ifdef SHOW_ONE exit(0); #endif } } } return(0); } kmer-code-2013-trunk/seagen/analysis/0000755000000000000000000000000012641613357016243 5ustar rootrootkmer-code-2013-trunk/seagen/analysis/dumpScores.pl0000644000000000000000000000162410310526111020705 0ustar rootroot#!/usr/local/bin/perl # # # my $currentestid = -1; my $outprefix; my @outline; my $num = 0; my $maxscore = 0; while ($currentestid < 222439) { $_ = ; my ($dir, $junk, $estid, $junk, $chr, $beg, $end, $junk, $s1, $s2, $sm, $junk, $i, $c) = split '\s+', $_; if ($currentestid == $estid) { push @outline, "$s1,$s2,$i,$c"; $maxscore = $sm; $num++; } else { @outline = sort { $b <=> $a } @outline; if (defined($outprefix)) { print "$outprefix\t$maxscore\t$num\t"; my $a = $,; my $b = $\; $, = " "; $\ = "\n"; print @outline; $, = $a; $\ = $b; } $currentestid = $estid; $outprefix = "$estid"; undef @outline; push @outline, "$s1,$s2,$i,$c"; $maxscore = $sm; $num = 1; } } kmer-code-2013-trunk/seagen/analysis/plotScoresSingly.pl0000644000000000000000000000365010310526111022105 0ustar rootroot#!/usr/local/bin/perl # # bzip2 -dc /raid/WORK/EMpaper/run1-nofiltering/all-scored-hits.sorted.bz2 | perl dumpScores.pl | perl plotScoresSingly.pl # use strict; $| = 1; my $tmppath = "/tmp"; # First line was blank. Don't know why. #my $junk = ; while (!eof(STDIN)) { $_ = ; my ($estid, $maxscore, $numhits, @vals) = split '\s+', $_; open(A, "> $tmppath/hits-$estid.dat"); open(B, "> $tmppath/iden-$estid.dat"); open(C, "> $tmppath/covr-$estid.dat"); foreach my $h (@vals) { my ($a, $b, $i, $c) = split ',', $h; $a /= $maxscore; print A "$a\n"; $i /= 100.0; print B "$i\n"; $c /= 100.0; print C "$c\n"; } close(C); close(B); close(A); my $output = substr("0000000000$estid", -6, 6); my $direct = substr("0000000000$estid", -6, 3); print "$output\r"; system("mkdir $direct") if (! -d "$direct"); open(O, "> $tmppath/plot-$estid.gpl"); print O "set terminal pbm color\n"; print O "set output\n"; print O "set pointsize 0.5\n"; print O "set xtics 10\n"; #print O "set size 1.5,1.5\n"; print O "plot [-5:200][0.0:1.2] "; print O " 0.95 notitle lt 0, "; print O " 0.80 notitle lt 0, "; print O " 0.50 notitle lt 0, "; print O " \"$tmppath/hits-$estid.dat\" using 1 notitle with linespoints 1, "; print O " \"$tmppath/covr-$estid.dat\" using 1 notitle with points 3, "; print O " \"$tmppath/iden-$estid.dat\" using 1 notitle with points 2\n"; close(O); my $cmd = ""; $cmd = "gnuplot $tmppath/plot-$estid.gpl | ppmtogif -quiet > $direct/$output.gif"; $cmd .= " && rm -f"; $cmd .= " $tmppath/plot-$estid.gpl"; $cmd .= " $tmppath/hits-$estid.dat"; $cmd .= " $tmppath/iden-$estid.dat"; $cmd .= " $tmppath/covr-$estid.dat"; system("$cmd"); } close(STDIN); kmer-code-2013-trunk/seagen/aHit.H0000644000000000000000000000101512322046702015401 0ustar rootroot#ifndef AHIT_H #define AHIT_H #include "bio.h" #include "util++.H" #include struct aHit { uint32 _forward : 1; uint32 _merged : 1; uint32 _qsIdx; uint32 _dsIdx; uint32 _dsLo; uint32 _dsHi; uint32 _covered; uint32 _matched; uint32 _numMers; }; void ahit_writeBinary(aHit *a, FILE *F); void ahit_readBinary(aHit *a, FILE *F); void ahit_readBinary(aHit *a, readBuffer *F); void ahit_printASCII(aHit *a, FILE *F); void ahit_parseString(aHit *a, char *b); #endif // AHIT_H kmer-code-2013-trunk/seagen/filterESTsimple.C0000644000000000000000000000252312322046702017567 0ustar rootroot#include #include #include #include #include #include "aHit.H" // A very simple filter. // // Output the top 50 hits or all hits above 0.2, whichever is _smaller_. #include "hitReader.H" #define UNIQ_THRESH 50 #define QUAL_THRESH 0.2 int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "ESTmapper utility function -- not for human use.\n"); exit(1); } hitReader HR(argc); // takes no args int arg = 1; while (arg < argc) { HR.addInputFile(argv[arg]); arg++; } while (HR.loadHits()) { HR.sortByCoverage(); // Output top 'UNIQ_THRESH' hits uint32 max = UNIQ_THRESH; if (max >= HR.numHits()) max = HR.numHits(); for (uint32 i=0; i 0) && (count < UNIQ_THRESH)) { // Output all hits above QUAL_THRESH for (uint32 i=0; i < HR.numHits(); i++) if (QUAL_THRESH <= HR[i].coverage) ahit_printASCII(&HR[i].a, stdout); } else { // Output top 'UNIQ_THRESH' hits for (uint32 i=0; i < UNIQ_THRESH; i++) ahit_printASCII(&HR[i].a, stdout); } #endif } return(0); } kmer-code-2013-trunk/seagen/posix.H0000644000000000000000000000000011463747051015661 0ustar rootrootkmer-code-2013-trunk/seagen/configuration.H0000644000000000000000000000454312322046702017374 0ustar rootroot#ifndef CONFIGURATION_H #define CONFIGURATION_H // // A singleton for working with the command line parameters. // #include "bio++.H" #include "util++.H" class configuration { public: bool _beVerbose; uint32 _merSize; uint32 _numSearchThreads; uint32 _merSkip; bool _doReverse; bool _doForward; uint32 _maxDiagonal; uint32 _maxGap; uint32 _qsOverlap; uint32 _dsOverlap; uint32 _extendWeight; uint32 _extendMinimum; bool _extendAlternate; uint32 _maxIntronLength; uint32 _smallSequenceCutoff; uint32 _minLengthSingle; double _minCoverageSingle; double _minCoverageMultiple; uint32 _minLengthMultiple; char *_dbFileName; char *_qsFileName; char *_maskFileName; char *_onlyFileName; char *_outputFileName; char *_queryMatchFileName; int _outputFile; int _matchCountsFile; char *_tableTemporaryFileName; char *_tableFileName; bool _tableBuildOnly; bool _binaryOutput; seqStream *_dbSTREAM; // Running state // seqCache *_qsFASTA; existDB *_maskDB; existDB *_onlyDB; positionDB *_positions; uint32 _numberOfQueries; // Wall clock times // double _startTime; double _initTime; double _buildTime; double _searchTime; // sweatShop queue sizes; maximum number of sequences pre-loaded, // maximum number of results waiting for output. // uint32 _loaderQueue; struct timespec _loaderSleep; bool _loaderWarnings; struct timespec _searchSleep; uint32 _writerQueue; struct timespec _writerSleep; bool _writerWarnings; configuration(); ~configuration(); void usage(char *name); void read(int argc, char **argv); void setTime(struct timespec *ts, double t) { ts->tv_sec = (time_t)floor(t); ts->tv_nsec = (long)((t - ts->tv_sec) * 1e9); }; private: void addToUse(uint32 v); void parseUseLine(char *line); }; extern configuration config; #endif // CONFIGURATION_H kmer-code-2013-trunk/seagen/hitMatrix-sort.C0000644000000000000000000000326612322046702017457 0ustar rootroot#include "hitMatrix.H" // Sort by dsPos inline void adjustHeap_dsPos(diagonalLine *L, uint32 p, uint32 n) { uint32 q = L[p]._qsPos; uint32 d = L[p]._dsPos; #ifndef WITHOUT_DIAGONALID uint32 l = L[p]._diagonalID; #endif uint32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && (L[c]._dsPos < L[c+1]._dsPos)) c++; // Does the node in question fit here? // if (d >= L[c]._dsPos) break; // Else, swap the parent and the child // L[p]._qsPos = L[c]._qsPos; L[p]._dsPos = L[c]._dsPos; #ifndef WITHOUT_DIAGONALID L[p]._diagonalID = L[c]._diagonalID; #endif // Move down the tree // p = c; c = (p << 1) + 1; } L[p]._qsPos = q; L[p]._dsPos = d; #ifndef WITHOUT_DIAGONALID L[p]._diagonalID = l; #endif } void hitMatrix::sort_dsPos(void) { if (_hitsLen > 1) { // Create the heap of lines. // for (uint32 i=_hitsLen/2; i--; ) adjustHeap_dsPos(_hits, i, _hitsLen); // Interchange the new maximum with the element at the end of the tree // for (uint32 i=_hitsLen-1; i>0; i--) { uint32 q = _hits[i]._qsPos; uint32 d = _hits[i]._dsPos; #ifndef WITHOUT_DIAGONALID uint32 l = _hits[i]._diagonalID; #endif _hits[i]._qsPos = _hits[0]._qsPos; _hits[i]._dsPos = _hits[0]._dsPos; #ifndef WITHOUT_DIAGONALID _hits[i]._diagonalID = _hits[0]._diagonalID; #endif _hits[0]._qsPos = q; _hits[0]._dsPos = d; #ifndef WITHOUT_DIAGONALID _hits[0]._diagonalID = l; #endif adjustHeap_dsPos(_hits, 0, i); } } } kmer-code-2013-trunk/seagen/thr-deadlock.C0000644000000000000000000007205012322046702017057 0ustar rootroot#include "searchGENOME.H" // OSF/1 on Compaq Alpha has, in the past, gotten stuck in a deadlock // situation allocating memory. There's lots of debugging stuff at // the end if this file. #ifdef __alpha // Define this to kill the process with a vengance instead of // gracefully exiting. exit() tries to free memory, and is thus gets // caught in the deadlock -- but is useful for debugging. // #define KILL_INSTEAD_OF_EXIT #ifdef KILL_INSTEAD_OF_EXIT #include #endif uint32 deadlockTested = 0; uint32 deadlockPassed = 0; void* deadlockDetector(void *) { fprintf(stderr, "Hello! I'm a deadlockDetector!\n"); detectAgain: // Wait for the deadlock checker to reset things // while ((deadlockTested == 1) || (deadlockPassed == 1)) sleep(4); deadlockTested = 1; char *x = new char [16]; delete [] x; deadlockPassed = 1; goto detectAgain; return(0L); // Ignore the warning! } void* deadlockChecker(void *) { fprintf(stderr, "Hello! I'm a deadlockChecker!\n"); checkAgain: // Wait for the tester to test // while (deadlockTested == 0) sleep(5); // Give it another ten seconds to return // sleep(5); if (deadlockPassed == 0) { fprintf(stderr, "\n\n\nESTmapper/search-- Deadlock detected! Aborting the process!\n\n"); fflush(stderr); #ifdef KILL_INSTEAD_OF_EXIT kill(getpid(), SIGKILL); #endif exit(1); } //fprintf(stderr, "Deadlock OK\n"); // Reset the testing/checking flags // deadlockPassed = 0; deadlockTested = 0; goto checkAgain; return(0L); // Ignore the warning! } #endif // _alpha #ifdef DONT_EVER_ENABLE_THIS // // Here are some notes on what was tried, and the stack trace from a lock. // This test failed to find the cause. // #define SIZE (16 * 1024 * 1024) void* mallocStressor(void *) { struct timespec sleepAmt = { 0, 10000 }; unsigned long v = 0; fprintf(stderr, "Hello! I'm a mallocStressor!\n"); mallocAgain: //nanosleep(&sleepAmt, 0L); char *x = new char [SIZE]; for (unsigned int i=SIZE; i--; ) x[i] = i >> 5; for (unsigned int i=SIZE; i--; ) x[i] |= x[SIZE-i]; for (unsigned int i=SIZE; i--; ) v += x[i]; delete [] x; goto mallocAgain; return((void*)v); // Ignore the warning! } void main(int argc, char **argv) { pthread_attr_t threadAttr; pthread_t threadID; pthread_attr_init(&threadAttr); pthread_attr_setscope(&threadAttr, PTHREAD_SCOPE_SYSTEM); pthread_attr_setdetachstate(&threadAttr, PTHREAD_CREATE_DETACHED); pthread_attr_setschedpolicy(&threadAttr, SCHED_OTHER); pthread_create(&threadID, &threadAttr, deadlockDetector, 0L); pthread_create(&threadID, &threadAttr, deadlockChecker, 0L); for (unsigned int i=0; i<16; i++) pthread_create(&threadID, &threadAttr, mallocStressor, (void *)i); sleep(100); } // // // Stack trace #1 // // (ladebug) show thread Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- * 1 default thread blocked kern usleep SCHED_OTHER 19 -1 manager thread blk SCS SCHED_RR 19 -2 null thread for VP 2 running VP 2 null thread -1 > 2 blocked kern usleep SCHED_OTHER 19 -3 null thread for VP 3 running VP 3 null thread -1 3 blocked mut 9 SCHED_OTHER 19 -4 null thread for VP 4 running VP 4 null thread -1 4 blocked mut 9 SCHED_OTHER 19 -5 null thread for VP 5 running VP 5 null thread -1 5 blocked mut 9 SCHED_OTHER 19 6 blocked mut 9 SCHED_OTHER 19 (ladebug) thread 6 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 6 blocked mut 9 SCHED_OTHER 19 (ladebug) where >0 0x3ff805caf3c in __hstTransferRegisters(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #1 0x3ff805af74c in __osTransferContext(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #2 0x3ff805a3c50 in __dspTransferContext(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #3 0x3ff805a12f4 in __dspDispatch(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #4 0x3ff805ab90c in UnknownProcedure32FromFile8(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #5 0x3ff805ab2f4 in UnknownProcedure31FromFile8(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #6 0x3ff805abe3c in UnknownProcedure34FromFile8(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x20001e2fc40) in /usr/shlib/libpthread.so #7 0x3ff801bf6a0 in UnknownProcedure2FromFile22(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in /usr/shlib/lib c.so #8 0x3ff800cdad4 in malloc(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in /usr/shlib/libc.so #9 0x3ff81f300e8 in operator new(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in /usr/lib/cmplrs/cxx/libcxx.so #10 0x12000a53c in filter(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in searchGENOME #11 0x120008a7c in doSearch(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in searchGENOME #12 0x120008e2c in searchThread(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in searchGENOME #13 0x3ff805bd2c8 in __thdBase(0x3ffc01b8400, 0x3ff805ae6d0, 0xfffffffffffffffc, 0x40100000, 0x222384c, 0x20001e2fc40) in /usr/shlib/libpthread.so (ladebug) thread 5 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 5 blocked mut 9 SCHED_OTHER 19 (ladebug) where >0 0x3ff805caf3c in __hstTransferRegisters(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/libpthre ad.so #1 0x3ff805af74c in __osTransferContext(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/libpthread. so #2 0x3ff805a3c50 in __dspTransferContext(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/libpthread .so #3 0x3ff805a12f4 in __dspDispatch(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/libpthread.so #4 0x3ff805ab90c in UnknownProcedure32FromFile8(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/lib pthread.so #5 0x3ff805ab2f4 in UnknownProcedure31FromFile8(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/lib pthread.so #6 0x3ff805abe3c in UnknownProcedure34FromFile8(0x3ffc01b8400, 0x3ff805a1ce4, 0x3ff805a3608, 0x20000a0f600, 0x140028000, 0x20000a0f600) in /usr/shlib/lib pthread.so #7 0x3ff801be4f0 in UnknownProcedure12FromFile22(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in /usr/shlib/libc.so #8 0x3ff800cf2b0 in free(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in /usr/shlib/libc.so #9 0x3ff81f15a7c in operator delete(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in /usr/lib/cmplrs/cxx/libcxx.so #10 0x12000b090 in filter(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in searchGENOME #11 0x120008a7c in doSearch(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in searchGENOME #12 0x120008dec in searchThread(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in searchGENOME #13 0x3ff805bd2c8 in __thdBase(0x3ffc01b8400, 0x3ff805a1ce4, 0x140a275c0, 0x100000, 0x3700498c55, 0x20000a0f600) in /usr/shlib/libpthread.so (ladebug) thread 4 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 4 blocked mut 9 SCHED_OTHER 19 (ladebug) where >0 0x3ff805caf3c in __hstTransferRegisters(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #1 0x3ff805af74c in __osTransferContext(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #2 0x3ff805a3c50 in __dspTransferContext(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #3 0x3ff805a12f4 in __dspDispatch(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #4 0x3ff805ab90c in UnknownProcedure32FromFile8(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #5 0x3ff805ab2f4 in UnknownProcedure31FromFile8(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #6 0x3ff805abe3c in UnknownProcedure34FromFile8(0x3ffc01b8400, 0x3ff805ae6d0, 0x3ff805a33a4, 0x0, 0x0, 0x2000141fc40) in /usr/shlib/libpthread.so #7 0x3ff801bf6a0 in UnknownProcedure2FromFile22(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in /usr/shlib/libc.so #8 0x3ff800cdad4 in malloc(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in /usr/shlib/libc.so #9 0x3ff81f300e8 in operator new(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in /usr/lib/cmplrs/cxx/libcxx.so #10 0x12000a53c in filter(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in searchGENOME #11 0x120008a7c in doSearch(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in searchGENOME #12 0x120008dec in searchThread(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in searchGENOME #13 0x3ff805bd2c8 in __thdBase(0x3ffc01b8400, 0x3ff805ae6d0, 0x1, 0x40100000, 0x31c, 0x2000141fc40) in /usr/shlib/libpthread.so (ladebug) thread 3 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 3 blocked mut 9 SCHED_OTHER 19 (ladebug) where >0 0x3ff805caf3c in __hstTransferRegisters(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #1 0x3ff805af74c in __osTransferContext(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #2 0x3ff805a3c50 in __dspTransferContext(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #3 0x3ff805a12f4 in __dspDispatch(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #4 0x3ff805ab90c in UnknownProcedure32FromFile8(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #5 0x3ff805ab2f4 in UnknownProcedure31FromFile8(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #6 0x3ff805abe3c in UnknownProcedure34FromFile8(0x3ffc01b8400, 0x13, 0x3ff805a33a4, 0x0, 0x0, 0x2000283fc40) in /usr/shlib/libpthread.so #7 0x3ff801bf6a0 in UnknownProcedure2FromFile22(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libc.so #8 0x3ff800cdad4 in malloc(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libc.so #9 0x3ff81f32050 in UnknownProcedure3FromFile46(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/lib/cmplrs/cxx/libc xx.so #10 0x3ff81f34190 in __cxx_v60_dispatch__X4need3new8libcxxso(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/lib/cmp lrs/cxx/libcxx.so #11 0x3ff807f29d4 in UnknownProcedure11FromFile0(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libexc.so #12 0x3ff807f2cd8 in exc_dispatch_exception(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libexc.so #13 0x3ff807f39e0 in exc_raise_signal_exception(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libexc.so #14 0x3ff805b9470 in UnknownProcedure8FromFile16(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libpthread.so #15 0x3ff800d0b9c in __sigtramp(0x3ffc01b8400, 0x13, 0x20001927600, 0x40100000, 0x3ff81f34150, 0x2000283fc40) in /usr/shlib/libc.so #16 0x3ff801be2c0 in UnknownProcedure12FromFile22(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in /usr/shlib/libc.so #17 0x3ff800cf2b0 in free(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in /usr/shlib/libc.so #18 0x3ff81f15a7c in operator delete(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in /usr/lib/cmplrs/cxx/libcxx.so #19 0x12000b090 in filter(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in searchGENOME #20 0x120008a7c in doSearch(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in searchGENOME #21 0x120008dec in searchThread(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in searchGENOME #22 0x3ff805bd2c8 in __thdBase(0x3ffc0086f90, 0x0, 0x100000, 0x0, 0x3f003d2689, 0xa8003d2689) in /usr/shlib/libpthread.so (ladebug) thread 2 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 2 blocked kern usleep SCHED_OTHER 19 (ladebug) where >0 0x3ff800e5c38 in __usleep_thread(0x20000f15a40, 0x0, 0x0, 0x0, 0x0, 0x356) in /usr/shlib/libc.so #1 0x3ff801b3314 in __usleep(0x20000f15a40, 0x0, 0x0, 0x0, 0x0, 0x356) in /usr/shlib/libc.so #2 0x1200091ac in loaderThread(0x20000f15a40, 0x0, 0x0, 0x0, 0x0, 0x356) in searchGENOME #3 0x3ff805bd2c8 in __thdBase(0x20000f15a40, 0x0, 0x0, 0x0, 0x0, 0x356) in /usr/shlib/libpthread.so (ladebug) thread 1 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- >* 1 default thread blocked kern usleep SCHED_OTHER 19 (ladebug) where >0 0x3ff800e5c38 in __usleep_thread(0x11fffbe88, 0x0, 0x5254, 0x0, 0x0, 0x140002408) in /usr/shlib/libc.so #1 0x3ff801b3314 in __usleep(0x11fffbe88, 0x0, 0x5254, 0x0, 0x0, 0x140002408) in /usr/shlib/libc.so #2 0x12000661c in main(0x11fffbe88, 0x0, 0x5254, 0x0, 0x0, 0x140002408) in searchGENOME #3 0x1200055c8 in __start(0x11fffbe88, 0x0, 0x5254, 0x0, 0x0, 0x140002408) in searchGENOME // // // Stack trace #2 // // (ladebug) show thread Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- >* 4 blocked kern usleep SCHED_OTHER 19 1 default thread blocked mut 15 SCHED_OTHER 19 -1 manager thread blk SCS SCHED_RR 19 -2 null thread for VP 2 running VP 2 null thread -1 2 blocked mut 15 SCHED_OTHER 19 -3 null thread for VP 3 running VP 3 null thread -1 3 blocked mut 15 SCHED_OTHER 19 -4 null thread for VP 4 running VP 4 null thread -1 5 blocked mut 15 SCHED_OTHER 19 -5 null thread for VP 5 running VP 5 null thread -1 6 blocked mut 15 SCHED_OTHER 19 7 blocked mut 15 SCHED_OTHER 19 8 blocked mut 15 SCHED_OTHER 19 (ladebug) show mutex Mutex Name State Owner Pri Type Waiters (+Count) ------ ------------------------- ----- ------ --- -------- -------------------- 1 Once Normal 2 debugger client registry Normal 3 VM stats Normal 4 key creation Normal 5 malloc heap Normal 6 malloc hash Normal 7 malloc cache[0] Normal 8 malloc cache[1] Normal 9 malloc cache[2] Normal 10 malloc cache[3] Normal 11 malloc cache[4] Normal 12 malloc cache[5] Normal 13 malloc cache[6] Normal 14 malloc cache[7] Normal 15 malloc cache[8] Lock Normal 6, 7, 8, 1, 5, 2, 3 16 malloc cache[9] Normal 17 malloc cache[10] Normal 18 malloc cache[11] Normal 19 malloc cache[12] Normal 20 malloc cache[13] Normal 21 malloc cache[14] Normal 22 malloc cache[15] Normal 23 malloc cache[16] Normal 24 malloc cache[17] Normal 25 malloc cache[18] Normal 26 malloc cache[19] Normal 27 malloc cache[20] Normal 28 malloc cache[21] Normal 29 malloc cache[22] Normal 30 malloc cache[23] Normal 31 malloc cache[24] Normal 32 malloc cache[25] Normal 33 malloc cache[26] Normal 34 malloc cache[27] Normal 35 malloc cache[28] Normal 36 brk Normal 37 exc cr Recurs 38 exc read rwl Normal 39 VM 0 lookaside Normal 40 VM 1 lookaside Normal 41 VM 2 lookaside Normal 42 VM 3 lookaside Normal 43 VM 4 lookaside Normal 44 VM 5 lookaside Normal 45 VM 6 lookaside Normal 46 VM 0 cache Normal 47 VM 1 cache Normal 48 VM 2 cache Normal 49 Global lock Recurs 50 ldr Recurs 51 Recurs 52 stderr Recurs 53 stdout Recurs 54 Recurs 55 Recurs 56 inputTailMutex(0x14000105 Normal 57 queryMatchMutex(0x1400010 Normal (ladebug) thread 1 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 1 default thread blocked mut 15 SCHED_OTHER 19 (ladebug) where >0 0x3ff805ba8ac in __hstTransferRegisters(0x20002d47600, 0x0, 0x0, 0x100000000, 0x20002d47c40, 0x3ff805ac400) in /usr/shlib/libpthread.so #1 0x3ff805acf74 in __osTransferContext(0x20002d47600, 0x0, 0x0, 0x100000000, 0x20002d47c40, 0x3ff805ac400) in /usr/shlib/libpthread.so #2 0x3ff805a004c in __dspDispatch(0x20002d47600, 0x0, 0x0, 0x100000000, 0x20002d47c40, 0x3ff805ac400) in /usr/shlib/libpthread.so #3 0x3ff805a94e4 in UnknownProcedure146FromFile0(0x20002d47600, 0x0, 0x0, 0x100000000, 0x20002d47c40, 0x3ff805ac400) in /usr/shlib/libpthread.so #4 0x3ff805a9bf8 in UnknownProcedure148FromFile0(0x20002d47600, 0x0, 0x0, 0x100000000, 0x20002d47c40, 0x3ff805ac400) in /usr/shlib/libpthread.so #5 0x3ff801bed30 in UnknownProcedure12FromFile22(0x20002d47600, 0x0, 0x0, 0x0, 0x0, 0x3ff805ac400) in /usr/shlib/libc.so #6 0x3ff800cf2c0 in free(0x20002d47600, 0x0, 0x0, 0x0, 0x0, 0x3ff805ac400) in /usr/shlib/libc.so #7 0x3ff81f15a7c in operator delete(0x20002d47600, 0x0, 0x0, 0x0, 0x0, 0x3ff805ac400) in /usr/lib/cmplrs/cxx/libcxx.so #8 0x3ff81f2f53c in operator delete[](0x20002d47600, 0x0, 0x0, 0x0, 0x0, 0x3ff805ac400) in /usr/lib/cmplrs/cxx/libcxx.so #9 0x1200073fc in main(0x20002d47600, 0x0, 0x0, 0x0, 0x0, 0x3ff805ac400) in searchGENOME #10 0x120006088 in __start(0x20002d47600, 0x0, 0x0, 0x0, 0x0, 0x3ff805ac400) in searchGENOME (ladebug) thread 2 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 2 blocked mut 15 SCHED_OTHER 19 (ladebug) where >0 0x3ff805ba8ac in __hstTransferRegisters(0x2000141f600, 0x0, 0x0, 0x100000000, 0x3ff805a0194, 0x3ff805ac400) in /usr/shlib/libpthread.so #1 0x3ff805acf74 in __osTransferContext(0x2000141f600, 0x0, 0x0, 0x100000000, 0x3ff805a0194, 0x3ff805ac400) in /usr/shlib/libpthread.so #2 0x3ff805a004c in __dspDispatch(0x2000141f600, 0x0, 0x0, 0x100000000, 0x3ff805a0194, 0x3ff805ac400) in /usr/shlib/libpthread.so #3 0x3ff805a94e4 in UnknownProcedure146FromFile0(0x2000141f600, 0x0, 0x0, 0x100000000, 0x3ff805a0194, 0x3ff805ac400) in /usr/shlib/libpthread.so #4 0x3ff805a9bf8 in UnknownProcedure148FromFile0(0x2000141f600, 0x0, 0x0, 0x100000000, 0x3ff805a0194, 0x3ff805ac400) in /usr/shlib/libpthread.so #5 0x3ff801bfee0 in UnknownProcedure2FromFile22(0x2000141f600, 0x0, 0x4, 0x0, 0x0, 0x3ff805ac400) in /usr/shlib/libc.so #6 0x3ff800cdae4 in malloc(0x2000141f600, 0x0, 0x4, 0x0, 0x0, 0x3ff805ac400) in /usr/shlib/libc.so #7 0x3ff81f300e8 in operator new(0x2000141f600, 0x0, 0x4, 0x0, 0x0, 0x3ff805ac400) in /usr/lib/cmplrs/cxx/libcxx.so #8 0x3ff81f2f5dc in operator new[](0x2000141f600, 0x0, 0x4, 0x0, 0x0, 0x3ff805ac400) in /usr/lib/cmplrs/cxx/libcxx.so #9 0x12000a9bc in deadlockDetector(0x2000141f600, 0x0, 0x4, 0x0, 0x0, 0x3ff805ac400) in searchGENOME #10 0x3ff805c67e0 in __thdBase(0x2000141f600, 0x0, 0x4, 0x0, 0x0, 0x3ff805ac400) in /usr/shlib/libpthread.so (ladebug) thread 3 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 3 blocked mut 15 SCHED_OTHER 19 (ladebug) where >0 0x3ff805ba8ac in __hstTransferRegisters(0x20001e2f600, 0x0, 0x0, 0x100000000, 0x0, 0x3ff805ac400) in /usr/shlib/libpthread.so #1 0x3ff805acf74 in __osTransferContext(0x20001e2f600, 0x0, 0x0, 0x100000000, 0x0, 0x3ff805ac400) in /usr/shlib/libpthread.so #2 0x3ff805a004c in __dspDispatch(0x20001e2f600, 0x0, 0x0, 0x100000000, 0x0, 0x3ff805ac400) in /usr/shlib/libpthread.so #3 0x3ff805a94e4 in UnknownProcedure146FromFile0(0x20001e2f600, 0x0, 0x0, 0x100000000, 0x0, 0x3ff805ac400) in /usr/shlib/libpthread.so #4 0x3ff805a9bf8 in UnknownProcedure148FromFile0(0x20001e2f600, 0x0, 0x0, 0x100000000, 0x0, 0x3ff805ac400) in /usr/shlib/libpthread.so #5 0x3ff801bfee0 in UnknownProcedure2FromFile22(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libc.so #6 0x3ff800cdae4 in malloc(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libc.so #7 0x3ff805be20c in UnknownProcedure0FromFile99(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libpthread.so #8 0x3ff805be508 in UnknownProcedure1FromFile99(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libpthread.so #9 0x3ff805be5d0 in UnknownProcedure3FromFile99(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libpthread.so #10 0x3ff807f369c in UnknownProcedure15FromFile0(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libexc.so #11 0x3ff807f3a08 in exc_raise_signal_exception(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libexc.so #12 0x3ff805b5a9c in UnknownProcedure283FromFile0(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libpthread.so #13 0x3ff800d0bbc in __sigtramp(0x20001e2f600, 0x0, 0x0, 0x0, 0x3ff80119094, 0x3ff805ac400) in /usr/shlib/libc.so #14 0x3ff800e2158 in __kill(0x27aaf6, 0x6, 0x1000000, 0x0, 0x0, 0x1) in /usr/shlib/libc.so #15 0x12000aad0 in deadlockChecker(0x27aaf6, 0x6, 0x1000000, 0x0, 0x0, 0x1) in searchGENOME #16 0x3ff805c67e0 in __thdBase(0x27aaf6, 0x6, 0x1000000, 0x0, 0x0, 0x1) in /usr/shlib/libpthread.so (ladebug) thread 4 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- >* 4 blocked kern usleep SCHED_OTHER 19 (ladebug) where >0 0x3ff800e5e68 in __usleep_thread(0x20002335a30, 0x20002335a28, 0x0, 0x0, 0x12000a690, 0x12000a6c0) in /usr/shlib/libc.so #1 0x3ff80ba527c in nanosleep(0x20002335a30, 0x20002335a28, 0x0, 0x0, 0x12000a690, 0x12000a6c0) in /usr/shlib/librt.so #2 0x12000a6ec in loaderThread(0x20002335a30, 0x20002335a28, 0x0, 0x0, 0x12000a690, 0x12000a6c0) in searchGENOME #3 0x3ff805c67e0 in __thdBase(0x20002335a30, 0x20002335a28, 0x0, 0x0, 0x12000a690, 0x12000a6c0) in /usr/shlib/libpthread.so (ladebug) thread 5 Thread Name State Substate Policy Pri ------ ------------------------- --------------- ----------- ------------ --- > 5 blocked mut 15 SCHED_OTHER 19 (ladebug) where >0 0x3ff805ba8ac in __hstTransferRegisters(0x20000a0f600, 0x0, 0x0, 0x100000000, 0x20000a0fc40, 0x3ff805ac400) in /usr/shlib/libpthread.so #1 0x3ff805acf74 in __osTransferContext(0x20000a0f600, 0x0, 0x0, 0x100000000, 0x20000a0fc40, 0x3ff805ac400) in /usr/shlib/libpthread.so #2 0x3ff805a004c in __dspDispatch(0x20000a0f600, 0x0, 0x0, 0x100000000, 0x20000a0fc40, 0x3ff805ac400) in /usr/shlib/libpthread.so #3 0x3ff805a94e4 in UnknownProcedure146FromFile0(0x20000a0f600, 0x0, 0x0, 0x100000000, 0x20000a0fc40, 0x3ff805ac400) in /usr/shlib/libpthread.so #4 0x3ff805a9bf8 in UnknownProcedure148FromFile0(0x20000a0f600, 0x0, 0x0, 0x100000000, 0x20000a0fc40, 0x3ff805ac400) in /usr/shlib/libpthread.so #5 0x3ff801bfee0 in UnknownProcedure2FromFile22(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libc.so #6 0x3ff800cdae4 in malloc(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libc.so #7 0x3ff81f32050 in UnknownProcedure3FromFile46(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/lib/cmplrs/cxx/libcxx.so #8 0x3ff81f34190 in __cxx_v60_dispatch__X4need3new8libcxxso(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/lib/cmplrs/cxx/libcxx.so #9 0x3ff807f29d4 in UnknownProcedure11FromFile0(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libexc.so #10 0x3ff807f2cd8 in exc_dispatch_exception(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libexc.so #11 0x3ff807f39e0 in exc_raise_signal_exception(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libexc.so #12 0x3ff805b5a9c in UnknownProcedure283FromFile0(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libpthread.so #13 0x3ff800d0bbc in __sigtramp(0x20000a0f600, 0x0, 0x2000283f600, 0x2000283d288, 0x3ff81f34150, 0x3ff805ac400) in /usr/shlib/libc.so #14 0x3ff801beb00 in UnknownProcedure12FromFile22(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in /usr/shlib/libc.so #15 0x3ff800cf2c0 in free(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in /usr/shlib/libc.so #16 0x3ff81f15a7c in operator delete(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in /usr/lib/cmplrs/cxx/libcxx.so #17 0x120009680 in ~encodedQuery(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in searchGENOME #18 0x120009f9c in doSearch(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in searchGENOME #19 0x12000a39c in searchThread(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in searchGENOME #20 0x3ff805c67e0 in __thdBase(0x3ffc0087290, 0x0, 0x100000, 0x0, 0x3ff801bead0, 0x3ff801beba4) in /usr/shlib/libpthread.so #endif // DONT_EVER_ENABLE_THIS kmer-code-2013-trunk/seagen/Make.include0000644000000000000000000000364211512763666016654 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBMERYL/ :=$(realpath $/../libmeryl/)/ LIBKMER/ :=$(realpath $/../libkmer/)/ LIBSIM4/ :=$(realpath $/../libsim4/)/ src := $/searchGENOME.C \ $/configuration.C \ $/encodedQuery.C \ $/thr-deadlock.C \ $/thr-loader.C \ $/thr-search.C \ $/thr-output.C \ $/hitMatrix-sort.C \ $/aHit.H \ $/aHit.C \ $/hitMatrix.H \ $/posix.H \ $/searchGENOME.H src_C := $(filter %.C,${src}) other_C := $/hitConverter.C \ $/filterEST.C \ $/filterEST-complicated.C \ $/filterMRNA.C \ $/filterNULL.C \ $/sortHits.C \ $/filtertest.C \ $/hitReader.C $/.CXX_SRCS := ${src_C} ${other_C} $/hitMatrix.C $/.CXX_EXES := $/seagen \ $/hitConverter \ $/filterEST $/filterMRNA $/filterNULL $/filtertest \ $/sortHits $/filterESTsimple $/.CLEAN :=$/*.o $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBKMER/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) $/seagen: $/hitMatrix.o $/seagen: ${src_C:.C=.o} \ ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/hitConverter: $/hitConverter.o $/aHit.o ${LIBUTL/}libutil.a $/filterEST: $/filterEST.o $/filterEST-complicated.o $/hitReader.o $/aHit.o ${LIBUTL/}libutil.a $/filterESTsimple: $/filterESTsimple.o $/hitReader.o $/aHit.o ${LIBUTL/}libutil.a $/filterMRNA: $/filterMRNA.o $/hitReader.o $/aHit.o ${LIBUTL/}libutil.a $/filterNULL: $/filterNULL.o $/hitReader.o $/aHit.o ${LIBUTL/}libutil.a $/sortHits: $/sortHits.o $/aHit.o ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/filtertest: $/filtertest.o kmer-code-2013-trunk/seagen/hitConverter.C0000644000000000000000000000247212322046702017173 0ustar rootroot#include #include #include "aHit.H" void bin2asc(FILE *I, FILE *O) { uint32 i = 0; aHit a; fprintf(stderr, "Converting BINARY to ASCII.\n"); while (!feof(I)) { ahit_readBinary(&a, I); if (!feof(I)) { ahit_printASCII(&a, O); if ((++i & 0xffff) == 0) { fprintf(stderr, uint32FMT" hits.\r", i); fflush(stderr); } } } fprintf(stderr, uint32FMT" hits.\r", i); fprintf(stderr, "\n"); } void asc2bin(FILE *I, FILE *O) { uint32 i = 0; aHit a; char b[1025]; fprintf(stderr, "Converting ASCII to BINARY.\n"); while (!feof(I)) { fgets(b, 1024, I); if (!feof(I)) { ahit_parseString(&a, b); ahit_writeBinary(&a, O); if ((++i & 0xffff) == 0) { fprintf(stderr, uint32FMT" hits.\r", i); fflush(stderr); } } } fprintf(stderr, uint32FMT" hits.\r", i); fprintf(stderr, "\n"); } int main(int argc, char **argv) { if (argc != 1) { fprintf(stderr, "%s: I only read stdin and write stdout.\n", argv[0]); exit(1); } // If the first character in the stream is ascii, do ASCII -> BINARY. // Else, do BINARY -> ASCII. // char x = (char)fgetc(stdin); ungetc(x, stdin); if (x == '-') asc2bin(stdin, stdout); else bin2asc(stdin, stdout); return(0); } kmer-code-2013-trunk/seagen/hitReader.H0000644000000000000000000000320112322046702016422 0ustar rootroot#ifndef HITREADER_H #define HITREADER_H #include #include #include #include "bio.h" #include "aHit.H" // Define this if your hits have answers and you're curious about how // well the filter is performing. // // This is used in filterEST.C also. // //#define WITH_ANSWERS // XXX: Lots of this stuff can be moved into hitReader as temporary variables // typedef struct { bool stillMore; readBuffer *buff; char b[1024]; aHit a; bool isBINARY; } hitFile_s; typedef struct { aHit a; double coverage; double multiplicity; uint32 estid; #ifdef WITH_ANSWERS uint32 mappedIdentity; uint32 mappedCoverage; #endif } hit_s; class hitReader { public: hitReader(int m); ~hitReader(); void addInputFile(char *filename); void loadHit(hitFile_s *HF); bool loadHits(void); double bestScore(void) { return(_bestScore); }; double worstScore(void) { return(_worstScore); }; uint32 iid(void) { return(_iid); }; uint32 numHits(void) { return(_listLen); }; void sortByCoverage(void); void mergeOverlappingHits(void); hit_s &operator[](uint32 x) { if (x >= _listLen) { fprintf(stderr, "hitReader::operator[]()-- ERROR: asked for hit "uint32FMT" out of "uint32FMT".\n", x, _listLen); exit(1); } return(_list[x]); }; private: uint32 _filesMax; uint32 _filesLen; hitFile_s *_files; uint32 _listLen; uint32 _listMax; hit_s *_list; double _bestScore; double _worstScore; uint32 _iid; }; #endif // HITREADER_H kmer-code-2013-trunk/seagen/filterNULL.C0000644000000000000000000000105512322046702016473 0ustar rootroot#include "aHit.H" #include "hitReader.H" // A NULL filter. What comes in, comes out. Seems useless, but the // hitReader merges overlapping hits which would otherwise screw up // your mapping. int main(int argc, char **argv) { hitReader HR(argc); if (argc < 2) fprintf(stderr, "ESTmapper utility function -- not for human use.\n"), exit(1); int arg = 1; while (arg < argc) HR.addInputFile(argv[arg++]); while (HR.loadHits()) for (uint32 i=0; i < HR.numHits(); i++) ahit_printASCII(&HR[i].a, stdout); return(0); } kmer-code-2013-trunk/seagen/encodedQuery.C0000644000000000000000000001272012322046702017143 0ustar rootroot#include #include "encodedQuery.H" encodedQuery::encodedQuery(seqInCore *S, uint32 k) { _iid = S->getIID(); _sequenceLength = S->sequenceLength(); _merSize = k; _mersTotal = 0; _mersAvail = 0; _mers = 0L; _skip = 0L; _numberOfResults = 0; _output = 0L; _outputLen = 0; _outputMax = 0; if (k > _sequenceLength) return; char const *seq = S->sequence(); uint32 seqLen = S->sequenceLength(); _mersTotal = seqLen - k + 1; _mersAvail = 0; _mers = new uint64 [_mersTotal]; _skip = new uint8 [_mersTotal]; uint64 substring = uint64ZERO; uint64 mermask = uint64MASK(2 * k); int32 timeUntilValid = k; for (uint32 i=0; i= k) { _mers[i-k] = substring; _skip[i-k] = timeUntilValid > 0; _mersAvail += 1 - _skip[i-k]; } } } encodedQuery::~encodedQuery() { delete [] _mers; delete [] _skip; delete [] _output; } void encodedQuery::test(seqInCore *S) { // We assume we've been initialized with the forward version! uint32 k = _merSize; char const *seq = S->sequence(); uint32 seqLen = S->sequenceLength(); uint64 substring = uint64ZERO; uint64 mermask = uint64MASK(2 * k); int32 timeUntilValid = k; // Compute the complement version; we'll iterate through all data // in us, comparing against what the original method would say. uint32 _r_mersAvail = 0; uint64 *_r_mers = new uint64 [_mersTotal]; uint8 *_r_skip = new uint8 [_mersTotal]; substring = uint64ZERO; mermask = uint64MASK(2 * k); timeUntilValid = k; for (uint32 i=0; i= k) { _r_mers[i-k] = substring; _r_skip[i-k] = timeUntilValid > 0; _r_mersAvail += 1 - _r_skip[i-k]; } } #if 0 // For comparison, this is the original code used to compute the // reverse complement mers. for (uint32 i=0; i= k) { _mers[i-k] = substring; _skip[i-k] = timeUntilValid > 0; _mersAvail += 1 - _skip[i-k]; } } #endif // CHECK! // if (_r_mersAvail != _mersAvail) { fprintf(stderr, "encodedQuery::test()-- mersAvail incorrect: Recomputed:"uint32FMT" Real:"uint32FMT"\n", _mersAvail, _r_mersAvail); } char mer1[65]; char mer2[65]; bool fail = false; for (uint32 i=0; i<_mersTotal; i++) { if (getSkip(i, true) != _r_skip[i]) { fprintf(stderr, "encodedQuery::test()-- skip["uint32FMTW(4)"] incorrect: Acc:%d Real:%d\n", i, getSkip(i, true), _r_skip[i]); fail = true; } if (getSkip(i, true) == false) { if (getMer(i, true) != _r_mers[i]) { uint64ToMerString(_merSize, getMer(i, true), mer1); uint64ToMerString(_merSize, _r_mers[i], mer2); fprintf(stderr, "encodedQuery::test()-- mers["uint32FMTW(4)"] incorrect: Acc:"uint64HEX" %s Real:"uint64HEX" %s\n", i, getMer(i, true), mer1, _r_mers[i], mer2); fail = true; } } if (fail) { char rev[2048]; strcpy(rev, seq); fprintf(stderr, "seq='%s'\n", seq); fprintf(stderr, "rev='%s'\n", reverseComplementSequence(rev, seqLen)); exit(1); } } //fprintf(stderr, "encodedQuery::test()-- %s\n", seq); //fprintf(stderr, "encodedQuery::test()-- tested avail:"uint32FMT" total:"uint32FMT"\n", _mersAvail, _mersTotal); delete [] _r_mers; delete [] _r_skip; } void encodedQuery::addOutput(void *newout, uint32 size) { // Allocate space for the output -- 1MB should be enough for about // 29000 signals. Make it 32K -> 900 signals. // // You probably do not want to move this into the query // constructor, as that will just waste a lot of memory with // thousands of these in the input queue. // if (_output == 0L) { _outputLen = 0; _outputMax = 32 * 1024; _output = new char [_outputMax]; } if (_outputLen + 128 >= _outputMax) { _outputMax <<= 1; char *o = 0L; try { o = new char [_outputMax]; } catch (std::bad_alloc) { fprintf(stderr, "encodedQuery::addOutput()-- out of memory, tried to extend output string\n"); fprintf(stderr, "encodedQuery::addOutput()-- from "uint32FMT" to "uint32FMT" bytes.\n", _outputLen, _outputMax); exit(1); } memcpy(o, _output, _outputLen); delete [] _output; _output = o; } if (size > 0) { memcpy(_output + _outputLen, newout, size); _outputLen += size; } else { char *n = (char *)newout; while (*n) _output[_outputLen++] = *n++; _output[_outputLen] = 0; } _numberOfResults++; }; kmer-code-2013-trunk/seagen/thr-search.C0000644000000000000000000000410612322046702016553 0ustar rootroot#include "searchGENOME.H" #include "encodedQuery.H" // If you really, really, really want to know the exact number // of bases left in the query, use the interval list. Otherwise, // it's faster to guess. // //#define USEEXACTSIZE void doSearch(searcherState *state, encodedQuery *query, bool isReverse) { // Get the hits double startTime = getTime(); uint64 count = 0; hitMatrix *matrix = new hitMatrix(query->bpTotal(), query->bpCovered(false), query->IID()); for (uint32 qi=0; qinumberOfMers(); qi++) if ((query->getSkip(qi, isReverse) == false) && (config._positions->getExact(query->getMer(qi, isReverse), state->posn, state->posnMax, state->posnLen, count))) matrix->addHits(qi, state->posn, state->posnLen); state->searchTime += getTime() - startTime; // Filter, storing the resutls into theOutput startTime = getTime(); matrix->filter(query, isReverse); delete matrix; state->filterTime += getTime() - startTime; } void searchThread(void *U, void *T, void *Q) { searcherState *state = (searcherState *)T; encodedQuery *query = (encodedQuery *)Q; // Finish building the query -- mask out repetitive junk // double startTime = getTime(); if (config._maskDB) for (uint32 qi=0; qinumberOfMers(); qi++) if ((query->getSkip(qi, false) == false) && (config._maskDB->exists(query->getMer(qi, false)))) query->setSkip(qi, false); if (config._onlyDB) for (uint32 qi=0; qinumberOfMers(); qi++) if ((query->getSkip(qi, false) == false) && (!config._onlyDB->exists(query->getMer(qi, false)))) query->setSkip(qi, false); state->maskTime += getTime() - startTime; // Do searches. // if (config._doForward) doSearch(state, query, false); if (config._doReverse) doSearch(state, query, true); } kmer-code-2013-trunk/seagen/filterMRNA.C0000644000000000000000000000517112322046702016461 0ustar rootroot#include #include #include #include #include #include "aHit.H" #include "hitReader.H" int main(int argc, char **argv) { if (argc < 2) fprintf(stderr, "ESTmapper utility function -- not for human use.\n"), exit(1); hitReader HR(argc); double L = 0.2; double H = 0.6; double V = 0.7; double M = 0.3; double MC = 0.2; uint32 ML = 150; bool beVerbose = false; int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-v", 2) == 0) { beVerbose = true; } else if (strcmp(argv[arg], "-l") == 0) { L = atof(argv[++arg]); } else if (strcmp(argv[arg], "-h") == 0) { H = atof(argv[++arg]); } else if (strcmp(argv[arg], "-v") == 0) { V = atof(argv[++arg]); } else if (strcmp(argv[arg], "-m") == 0) { M = atof(argv[++arg]); } else if (strcmp(argv[arg], "-mc") == 0) { MC = atof(argv[++arg]); } else if (strcmp(argv[arg], "-ml") == 0) { ML = atoi(argv[++arg]); } else { HR.addInputFile(argv[arg]); } arg++; } if (beVerbose) { fprintf(stderr, "Filtering with:\n"); fprintf(stderr, " score difference of %4.2f or less -> 100.0%% of best score\n", L); fprintf(stderr, " score difference of %4.2f or more -> %5.1f%% of best score\n", H, 100*V); fprintf(stderr, " scores at least %4.2f are always output\n", M); fprintf(stderr, " scores at least %4.2f AND at least "uint32FMT" bases covered are always output\n", MC, ML); } while (HR.loadHits()) { HR.sortByCoverage(); double hiCov = HR[0].coverage; double loCov = HR[0].coverage; for (uint32 i=0; i < HR.numHits(); i++) if ((HR[i].a._merged == false) && (loCov > HR[i].coverage)) loCov = HR[i].coverage; double h = hiCov - loCov; double p = 0.0; if (h <= L) p = 1.0; if (h >= H) p = V; if (p == 0.0) p = 1.0 - (1.0 - V) * (h - L) / (H - L); // check p; it should be between V and 1.0 if ((p > 1.0) || (p < V)) fprintf(stderr, "error in p; p=%f\n", p); // Output the top p% hits, by score. double cutL = HR[0].coverage - p * h; if (cutL > M) cutL = M; // Save the hit if it has good coverage and it's either above // the minimum coverage or long. Also blindly save merged // hits. // for (uint32 i=0; i < HR.numHits(); i++) if (((cutL <= HR[i].coverage) && ((MC <= HR[i].coverage) || (ML <= HR[i].a._covered))) || (HR[i].a._merged)) ahit_printASCII(&HR[i].a, stdout); } return(0); } kmer-code-2013-trunk/trie/0000755000000000000000000000000012641613357014121 5ustar rootrootkmer-code-2013-trunk/trie/trie.C0000644000000000000000000002127612322046702015166 0ustar rootroot#include "util++.H" #include "bio++.H" //#define ALPHALEN 4 #define ALPHALEN 20 // NOTE that our list of letters is not alphabetic. The DNA letters // are first, then the rest of the protein letters. // const char *trieAlpha = "acgtdefhiklmnpqrsvwy"; uint32 trieAlphaMap[256] = {0}; class trieNode { public: trieNode(void) { for (uint32 i=0; inodeiid < B->nodeiid) return(-1); if (A->nodeiid > B->nodeiid) return(1); return(0); } uint32 addSequence(trieNode *nodes, uint32 &nodesLen, trieSeqPtr *seqptr, uint32 &seqptrLen, seqInCore *S, bool isReverse) { char *s = 0L; uint32 n = 0; if (S->sequenceLength() < 12) return(0); for (s = S->sequence(); *s; s++) if (trieAlphaMap[*s] == 0) return(0); for (s = S->sequence(); *s; s++) { uint32 v = trieAlphaMap[*s] - 1; // add a new pointer if needed if (nodes[n].next[v] == ~uint32ZERO) nodes[n].next[v] = nodesLen++; // Go there n = nodes[n].next[v]; } // add this sequence to node i -- after all sequences have been // added, we'll sort this list and build pointers. seqptr[seqptrLen].seqiid = S->getIID(); seqptr[seqptrLen].nodeiid = n; seqptr[seqptrLen].defline = strdup(S->header()); seqptr[seqptrLen].reversed = isReverse; seqptrLen++; return(1); } int main(int argc, char **argv) { char *queries = 0L; char *genome = 0L; FILE *logfile = 0L; int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-q") == 0) { queries = argv[++arg]; } else if (strcmp(argv[arg], "-g") == 0) { genome = argv[++arg]; } else if (strcmp(argv[arg], "-l") == 0) { errno = 0; logfile = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open logfile '%s': %s\n", argv[arg], strerror(errno)), exit(1); } else { err++; } arg++; } if (queries == 0L) err = 1; if (genome == 0L) err = 1; if (err) { fprintf(stderr, "usage: %s -q queries.fasta -g genome.fasta\n", argv[0]); fprintf(stderr, " -q queries.fasta -- the input with the short stuff\n"); fprintf(stderr, " -q queries.fasta -- the input with the short stuff\n"); exit(1); } for (uint32 i=0; igetSequenceInCore()) != 0L) { uint32 success = 0; success += addSequence(nodes, nodesLen, seqptr, seqptrLen, S, false); #if ALPHALEN == 4 reverseComplementSequence(S->sequence(), S->sequenceLength()); success += addSequence(nodes, nodesLen, seqptr, seqptrLen, S, true); #else success++; #endif if (success != 2) if (logfile) fprintf(logfile, "Failed to add sequence '%s' ('%s').\n", S->header(), S->sequence()); if (nodesLen >= nodesMax) fprintf(stderr, "ERROR: out of node space.\n"), exit(1); if (seqptrLen >= seqptrMax) fprintf(stderr, "ERROR: out of seqptr space.\n"), exit(1); delete S; } delete F; fprintf(stderr, "Used "uint32FMT" trie nodes. \n", nodesLen); // Fix up sequence pointers - we could probably do this inplace // with some trickery, but why? qsort(seqptr, seqptrLen, sizeof(trieSeqPtr), trieSeqPtrCompare); // Now sorted by node iid, so run through both arrays and set // pointers. We point to the first thing found, and remember // the number of things found. for (uint32 i=0; igetSequenceInCore()) != 0L) { char *s = S->sequence(); uint32 siid = S->getIID(); uint32 spos = 0; uint32 n[256] = {0}; // Pointer into the trie uint32 d[256] = {0}; // Depth this pointer is at (== sequence length) uint32 nLen = 0; //fprintf(stderr, "WORKING ON '%s'\n", S->header()); while (*s) { if (trieAlphaMap[*s] == 0) { // Not a valid symbol, all node pointers are killed, no exact matches // possible! nLen = 0; } else { // Valid symbol. Advance all pointers, print out any // matches, kill any pointers, and then finally add a new // one. uint32 v = trieAlphaMap[*s] - 1; uint32 ni; uint32 nj; // Advance pointers. // for (ni=0; ni 0) { for (nj=0; nj\n", seqptr[p].seqiid, d[ni] + 1, siid, d[ni] + 1, seqptr[p].reversed ? "complement" : "forward"); fprintf(stdout, "edef=%s\n", seqptr[p].defline); fprintf(stdout, "ddef=%s\n", S->header()); fprintf(stdout, "1-"uint32FMT" ("uint32FMT"-"uint32FMT") <"uint32FMT"-0-100>\n", d[ni] + 1, spos - d[ni] + 1, spos + 1, d[ni] + 1); fprintf(stdout, "sim4end\n"); } } } } // Add a new pointer for the just seen letter // if (nodes[0].next[v] != ~uint32ZERO) { d[nLen] = 0; n[nLen++] = nodes[0].next[v]; } } s++; spos++; } delete S; } // We should print out the total number of matches for each // sequence.... Report those with matches first. // if (logfile) { for (uint32 i=0; i 0)) fprintf(logfile, "sequence "uint32FMT" '%s' has "uint32FMT" matches.\n", seqptr[i].seqiid, seqptr[i].defline, nummatches[seqptr[i].seqiid]); for (uint32 i=0; i /dev/null), ${MAKEFILE/}/Make.compilers) MAKECOMPILERSNOTHING := $(shell ${MAKEFILE/}configure.sh) endif include ${MAKEFILE/}Make.compilers ##### non-recursive make magic # all directories which have been included // := # current path (empty) / := # recursive directories to be filtered out of // # and handled differently //-RECURSIVE := define MakeRecursive //-RECURSIVE :=$$/. endef ## Include -hack # Include is wrapped in something which will push and pop / # properly while adding newly discovered directories to // # and keeping track of who is who's children (.SUBS). # # Each directory so included has its Make.include file included. # Those Make.include files can use $/ prepended to local names # to prevent name pollution, and define their own subtargets. # define Include $(foreach x,$(strip ${1}),$(call Include_File,$x)) endef define Include_File ifeq ($(filter ${1}.,${WITHOUT_}),) ifeq ($(wildcard ${1}Make.include),${1}Make.include) $/.SUBS +=${1}. // +=${1}. ${1}.SUBS := / :=${1} include ${1}Make.include / :=$/ endif endif endef ##### System specific includes ## WITHOUT # If the user specifies a WITHOUT, then those paths are not # followed. ifndef WITHOUT WITHOUT:= endif WITHOUT_:=$(patsubst %,%.,$(strip ${WITHOUT})) ## First Make.include inclusion # invoke the toplevel include file. # We use 'Include_File' instead of 'Include' since $/ is empty $(eval $(call Include_File,$/)) #### Targets which have been declared RECURSIVE are removed # from // and processed separately. // :=$(filter-out ${//-RECURSIVE},${//}) # //-RECURSIVE now holds the paths which are being done legacy style # and // holds the paths which are going to be part of the system. ### Building subgoals in Make.rules # subgoals and depends are done with the deferred '=' not the ':='. # This is because we want dynamic scoping. __SUBGOALS__= __DEPGOALS__= -include ${MAKEFILE/}Make.rules # now we bring in the depends files as defined by the Includes # and the patterns in Make.rules $(eval DEPENDS:=$(foreach x,${//},$(call __DEPGOALS__,$x))) ifneq ($(strip ${DEPENDS}),) # this conditional gets us a way out if things go way wrong ifeq ($(filter %-clean,${MAKECMDGOALS}),) -include ${DEPENDS} endif endif ### the standard make targets, applied to all subdirectory targets # We define the basic form of the all, clean, ... rules on a # per-path basis (the $/.all, $..clean, ... targets). This allows # selective targeting. ## rules for each subtarget # current subtargets are : # all, ls, clean, real-clean, depends-clean, install # with all, clean, real-clean, install being required targets # of recursive makes. clean: ${//-RECURSIVE:.=.clean} $/.clean define .RULE-clean ${1:.=.clean}: $${${1:.=.SUBS}:.=.clean} ${RM} $${${1:.=.CLEAN}} ${__SUBGOALS__} ifneq ($(strip ${C_TMP_COMPILE} ${CXX_TMP_COMPILE}),) (cd $1 && ${RM} -r ${C_TMP_COMPILE} ${CXX_TMP_COMPILE}) endif endef $(eval $(foreach x,${//},$(call .RULE-clean,$x))) depends-clean: $/.depends-clean ${//-RECURSIVE:.=.depends-clean}: define .RULE-depends-clean ${1:.=.depends-clean}: $${${1:.=.SUBS}:.=.depends-clean} ${RM} ${1:.=Make.depends} ${__DEPGOALS__} endef $(eval $(foreach x,${//},$(call .RULE-depends-clean,$x))) real-clean: ${//-RECURSIVE:.=.real-clean} $/.real-clean define .RULE-real-clean ${1:.=.real-clean}: $${${1:.=.SUBS}:.=.real-clean} ${RM} $${${1:.=.CLEAN}} ${__SUBGOALS__} ${1:.=Make.depends} ${__DEPGOALS__} $${${1:.=.REAL-CLEAN}} #ifneq ($(strip ${INSTALL/}),) # ${RM} -r ${INSTALL/} #endif ifneq ($(strip ${C_TMP_COMPILE} ${CXX_TMP_COMPILE}),) (cd $1 && ${RM} -r ${C_TMP_COMPILE} ${CXX_TMP_COMPILE}) endif endef $(eval $(foreach x,${//},$(call .RULE-real-clean,$x))) all: ${//-RECURSIVE:.=.all} $/.all define .RULE-all ${1:.=.all}: $${${1:.=.SUBS}:.=.all} ${__SUBGOALS__} endef $(eval $(foreach x,${//},$(call .RULE-all,$x))) ## INSTALL: # Define targets which give a basic recursive traversal to hang # things from for anything install related that I haven't thought of # this does the copy part of the install ${//-RECURSIVE:.=.install-copy}: install-copy: ${//-RECURSIVE:.=.install-copy} $/.install-copy define .RULE-install-copy ${1:.=.install-copy}: $${${1:.=.SUBS}:.=.install-copy} endef $(eval $(foreach x,${//},$(call .RULE-install-copy,$x))) # define the actual install target as being a combination of the # all target plus the pre-/install-copy targets install: ${//-RECURSIVE:.=.install} $/.install define .RULE-install ${1:.=.install}: ${1:.=.all} ${1:.=.install-copy} endef $(eval $(foreach x,${//},$(call .RULE-install,$x))) # a sort of debugging thing. Let's me check on which targets # actually didn't get made, or got partially made LSOPTS:=-l ls: $/.ls ${//-RECURSIVE:.=.ls}: define .RULE-ls ${1:.=.ls}: $${${1:.=.SUBS}:.=.ls} @ files='$$(strip ${__SUBGOALS__})'; \ if [ -n "$$$${files}" ] ; then \ ls ${LSOPTS} $$$${files} ; \ fi ; exit 0; endef $(eval $(foreach x,${//},$(call .RULE-ls,$x))) kmer-code-2013-trunk/atac-driver/0000755000000000000000000000000012641613361015352 5ustar rootrootkmer-code-2013-trunk/atac-driver/mismatchCounter/0000755000000000000000000000000012641613360020516 5ustar rootrootkmer-code-2013-trunk/atac-driver/mismatchCounter/Make.include0000644000000000000000000000101511512763666022750 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_EXES := $/mismatchCounter $/.CXX_SRCS := $/mismatchCounter.C $/.CLEAN :=$/*.o $/*~ $/core $/mismatchCounter: $/mismatchCounter.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/mismatchCounter/mismatchCounter.C0000644000000000000000000001355312322046702023772 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "seqCache.H" #include "atac.H" #define ANNOTATE #define EXTRAMATCHES // Generates a histogram of the exact match block sizes // Counts to global number of mismatches // Annotates each match with the number of mismatches // Checks for identities outside matches void updateExactBlockHistogram(uint32 *blockHistogram, uint32 blockMatches) { if (blockMatches > 8 * 1024 * 1024) blockHistogram[0]++; else blockHistogram[blockMatches]++; } int main(int argc, char *argv[]) { int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-h") == 0) { // Generate a histogram of exact-match lengths } else if (strcmp(argv[arg], "-a") == 0) { // Annotate each match with the percent error, compute // the global percent error. } else if (strcmp(argv[arg], "-e") == 0) { // Generate a histogram of the percent error in each match } else if (strcmp(argv[arg], "-c") == 0) { // Check the edges of each match to ensure there isn't a match } else { fprintf(stderr, "usage: %s [-h exact-match-histogram] [-a] [-e error-histogram] [-c]\n", argv[0]); fprintf(stderr, " -h: histogram of the length of the exact match blocks\n"); fprintf(stderr, " -a: annotate each match with the percent error, write to stdout\n"); fprintf(stderr, " -e: histogram of the error rate of each match\n"); fprintf(stderr, " -c: check that the next base on each side is a mismatch\n"); exit(1); } arg++; } uint32 globalSequence = 0; uint32 globalMismatches = 0; uint32 blockMatches = 0; uint32 *blockHistogram = new uint32 [8 * 1024 * 1024]; for (uint32 x=0; x<8*1024*1024; x++) blockHistogram[x] = 0; atacFile AF("-"); atacMatchList &ML = *AF.matches(); seqCache *C1 = new seqCache(AF.assemblyFileA(), 1, false); seqCache *C2 = new seqCache(AF.assemblyFileA(), 1, false); for (uint32 mi=0; migetSequenceInCore(m->iid1); seqInCore *S2 = C2->getSequenceInCore(m->iid2); FastAAccessor A1(S1, false); FastAAccessor A2(S2, (m->fwd1 != m->fwd2)); A1.setRange(m->pos1, m->len1); A2.setRange(m->pos2, m->len2); uint32 localMismatches = 0; #ifdef EXTRAMATCHES uint32 extraMatchesL = 0; uint32 extraMatchesR = 0; // Check for matches on either side of the region. A1.setPosition(m->pos1); A2.setPosition(m->pos2); --A1; --A2; while (A1.isValid() && A2.isValid() && (letterToBits[(int)*A1] != 0xff)&& (letterToBits[(int)*A2] != 0xff) && IUPACidentity[(int)*A1][(int)*A2]) { extraMatchesL++; --A1; --A2; } A1.setPosition(m->pos1 + m->len1 - 1); A2.setPosition(m->pos2 + m->len2 - 1); ++A1; ++A2; while (A1.isValid() && A2.isValid() && (letterToBits[(int)*A1] != 0xff)&& (letterToBits[(int)*A2] != 0xff) && IUPACidentity[(int)*A1][(int)*A2]) { extraMatchesR++; ++A1; ++A2; } // WARN if we found extra identities #if 0 if (extraMatchesL + extraMatchesR > 0) { A1.setPosition(m->pos1); A2.setPosition(m->pos2); chomp(inLine); fprintf(stderr, "WARNING: found "uint32FMT" extra matches to the left and "uint32FMT" extra matches to the right in %s\n", extraMatchesL, extraMatchesR, inLine); #if 0 for (uint32 ii=0; iilen1; ii++, ++A1) fprintf(stdout, "%c", *A1); fprintf(stdout, "\n"); for (uint32 ii=0; iilen1; ii++, ++A2) fprintf(stdout, "%c", *A2); fprintf(stdout, "\n"); #endif } #endif #endif // EXTRAMATCHES A1.setPosition(m->pos1); A2.setPosition(m->pos2); for (uint32 ii=0; iilen1; ii++, ++A1, ++A2) { // Count global matches / mismatches // globalSequence++; if (!((letterToBits[(int)*A1] != 0xff) && (letterToBits[(int)*A2] != 0xff) && IUPACidentity[(int)*A1][(int)*A2])) { globalMismatches++; localMismatches++; } // Histogram of exact match block lengths // if ((letterToBits[(int)*A1] != 0xff) && (letterToBits[(int)*A2] != 0xff) && IUPACidentity[(int)*A1][(int)*A2]) { blockMatches++; } else { updateExactBlockHistogram(blockHistogram, blockMatches); blockMatches = 0; } } // Finish off stuff // updateExactBlockHistogram(blockHistogram, blockMatches); blockMatches = 0; // If annotate, emit a new record. } // Report stuff // fprintf(stderr, "globalSequence = "uint32FMT"\n", globalSequence); fprintf(stderr, "globalMismatches = "uint32FMT"\n", globalMismatches); #if 0 FILE *O = fopen("MismatchCounter.block.histogram.out", "w"); for (uint32 i=0; i<8 * 1024 * 1024; i++) fprintf(O, uint32FMT" "uint32FMT"\n", i, blockHistogram[i]); fclose(O); #endif return(0); } kmer-code-2013-trunk/atac-driver/interscaffold-gaps.pl0000644000000000000000000000617210323337014021461 0ustar rootroot#!/usr/bin/perl use strict; my $atacFile = undef; my $minLen = 10000; my $maxChr = 24; my $reference = 0; while (scalar(@ARGV)) { my $arg = shift @ARGV; if ($arg eq "-m") { $minLen = shift @ARGV; } elsif ($arg eq "-c") { $maxChr = shift @ARGV; } elsif ($arg eq "-a") { $atacFile = shift @ARGV; } elsif ($arg eq "-A") { $reference = 0; } elsif ($arg eq "-B") { $reference = 1; } else { die "Invalid option '$arg'\n"; } } if (!defined($atacFile)) { print STDERR "usage: $0 [-m minlen] [-c maxchr] [-A | -B] -a \n"; print STDERR " -m m Include matches larger than 'm'. Default: 10000\n"; print STDERR " -c c Include chromosomes in the reference below 'c'. Default: 24 (1-22+X+Y)\n"; print STDERR " -A | -B The reference genome is sequence A (B).\n"; print STDERR " -a x Process matches from atac file 'x'\n"; exit(1); } #if (! -e "$atacFile.gaps.sorted") { open(F, "< $atacFile"); open(G, "| sort -k1n -k2n > $atacFile.gaps.sorted"); while () { if (m/^M\sr\s/) { my @vals = split '\s+', $_; (undef, $vals[4]) = split ':', $vals[4]; (undef, $vals[8]) = split ':', $vals[8]; if (($reference == 0) && ($vals[4] < $maxChr) && ($vals[6] > $minLen)) { print G "$vals[4] $vals[5] $vals[6] - $vals[8] $vals[9] $vals[10] - $vals[11]\n"; } if (($reference == 1) && ($vals[8] < $maxChr) && ($vals[10] > $minLen)) { print G "$vals[8] $vals[9] $vals[10] - $vals[4] $vals[5] $vals[6] - $vals[11]\n"; } } } close(F); close(G); #} my $lastscf = -1; my @chr; my @pos; my @len; my @scf; open(F, "< $atacFile.gaps.sorted"); while () { my @vals = split '\s+', $_; push @chr, $vals[0]; push @pos, $vals[1]; push @len, $vals[2]; push @scf, $vals[4]; } close(F); push @scf, -1; push @scf, -1; my $num = scalar(@chr) - 1; # We compute stats on the distance between elements i and j # my $i = 0; my $j = 1; print "GAPS\n"; while ($i < $num) { $j = $i+1; if ($chr[$i] != $chr[$j]) { $i++; next; } again: # Move j ahead if it's an interleaved scaffold # # If our current scaffold is interleaved by someone else, # skip that someone else. Yes, the end result of this # is to have i and j point to the same scaffold. # if (($chr[$i] == $chr[$j]) && ($scf[$i] == $scf[$j+1]) && ($len[$j] < 5000)) { $j++; } # Move j ahead if it's the same scaffold # if (($chr[$i] == $chr[$j]) && ($scf[$i] == $scf[$j])) { $i = $j; $j++; goto again; } # Report, if begin and end are on the same chromosome. if ($chr[$i] == $chr[$j]) { my $aend = $pos[$i] + $len[$i]; my $bsta = $pos[$j]; my $gapl = $bsta - $aend; if ($gapl > 0) { print "GAP: $gapl -- $i ($chr[$i] $pos[$i] $len[$i] $scf[$i]) -- $j ($chr[$j] $pos[$j] $len[$j] $scf[$j])\n"; } } $i = $j; } kmer-code-2013-trunk/atac-driver/lengthFilter/0000755000000000000000000000000012641613360020000 5ustar rootrootkmer-code-2013-trunk/atac-driver/lengthFilter/lengthFilter.C0000644000000000000000000000737312322046702022541 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include "util++.H" // Filters out matches that are too short. // // Original implementation in Python by Clark Mobarry. void readHeader(char *inLine, FILE *in, uint32 &minLength, FILE *out) { bool printedLength = false; fgets(inLine, 1024, in); while (!feof(in) && (inLine[0] != 'M')) { if (strncmp(inLine, "/globalMatchMinSize", 18) == 0) { if (minLength > 0) { // Skip any whitespace, the =, and more whitespace. Copy. char *tmp = inLine + 14; while (isspace(*tmp)) tmp++; while (*tmp == '=') tmp++; while (isspace(*tmp)) tmp++; minLength = strtouint32(tmp, 0L); } sprintf(inLine, "/globalMatchMinSize="uint32FMT"\n", minLength); printedLength = true; } if (out) fputs(inLine, out); fgets(inLine, 1024, in); } if (printedLength == false) fprintf(stdout, "/globalMatchMinSize="uint32FMT"\n", minLength); if (minLength == 0) { fprintf(stderr, "I didn't find /globalMatchMinSize, please set it with -l\n"); exit(1); } } int main(int argc, char **argv) { char inLine[1024] = {0}; uint32 minLength = 0; uint32 totalDumped = 0; uint32 totalDumpedLength = 0; uint32 totalSaved = 0; uint32 totalSavedLength = 0; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-l") == 0) { minLength = strtouint32(argv[++arg], 0L); } else { fprintf(stderr, "usage: %s [-h] [-l length] < matches.atac > matches.atac\n", argv[0]); fprintf(stderr, " filters out all matches less than 'length' long.\n"); exit(1); } arg++; } readHeader(inLine, stdin, minLength, stdout); // we need to parse the header to get globalMatchMinSize, // and then let the command line override it. just make // a custom readHeader() for here, do it there. nothing // difficult. while (!feof(stdin)) { if (inLine[0] == 'M') { splitToWords S(inLine); if ((strtouint32(S[ 6], 0L) >= minLength) && (strtouint32(S[10], 0L) >= minLength)) { totalSaved++; totalSavedLength += strtouint32(S[ 6], 0L); fputs(inLine, stdout); } else { totalDumped++; totalDumpedLength += strtouint32(S[ 6], 0L); } } else { fputs(inLine, stdout); } fgets(inLine, 1024, stdin); } fprintf(stderr, "lengthFilter: Discarded "uint32FMTW(8)" matches with total length "uint32FMTW(10)", %7.3f%% of the sequence in matches.\n", totalDumped, totalDumpedLength, (double)totalDumpedLength / (totalDumpedLength + totalSavedLength) * 100.0); fprintf(stderr, "lengthFilter: Saved "uint32FMTW(8)" matches with total length "uint32FMTW(10)", %7.3f%% of the sequence in matches.\n", totalSaved, totalSavedLength, (double)totalSavedLength / (totalDumpedLength + totalSavedLength) * 100.0); } kmer-code-2013-trunk/atac-driver/lengthFilter/Make.include0000644000000000000000000000100411512763666022230 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_SRCS := $/lengthFilter.C $/.CXX_EXES := $/lengthFilter $/.CLEAN :=$/*.o $/*~ $/core $/lengthFilter: $/lengthFilter.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/test/0000755000000000000000000000000012641613360016330 5ustar rootrootkmer-code-2013-trunk/atac-driver/test/uf-test-3.atac0000644000000000000000000000133310215770307020711 0ustar rootroot! format atac 1.0 # # edge effects # # anchored on the A sequence, second match on the inside, third match on the outside. # # anchored on A, i, o # anchored on A, o, i # anchored on B, i, o # anchored on B, o, i M u 011 . A:0 400 200 1 B:0 400 200 1 M u 012 . A:0 400 100 1 B:0 200 100 1 M u 013 . A:0 600 100 1 B:0 800 100 1 # M u 014 . A:1 400 200 1 B:1 400 200 1 M u 015 . A:1 300 100 1 B:1 200 100 1 M u 016 . A:1 500 100 1 B:1 800 100 1 # M u 017 . A:2 400 200 1 B:2 400 200 1 M u 018 . A:2 200 100 1 B:2 400 100 1 M u 019 . A:2 800 100 1 B:2 600 100 1 # M u 020 . A:3 400 200 1 B:3 400 200 1 M u 021 . A:3 200 100 1 B:3 300 100 1 M u 022 . A:3 800 100 1 B:3 500 100 1 kmer-code-2013-trunk/atac-driver/test/uf-test-2.atac0000644000000000000000000000065510215770307020716 0ustar rootroot! format atac 1.0 # Left, right edges, forward # M u 002f . A:2 10 30 1 B:2 30 30 1 M u 003f . A:2 60 60 1 B:2 50 60 1 M u 004f . A:2 100 50 1 B:2 120 50 1 M u 005f . A:2 160 40 1 B:2 160 40 1 M u 006f . A:2 210 50 1 B:2 180 50 1 # Left, right edges, reverse # M u 010r . A:3 10 70 1 B:3 30 70 -1 M u 011r . A:3 60 60 1 B:3 140 60 -1 M u 012r . A:3 140 90 1 B:3 210 90 -1 M u 013r . A:3 240 120 1 B:3 240 120 -1 kmer-code-2013-trunk/atac-driver/test/uf-test-1f.atac0000644000000000000000000000034610215770307021060 0ustar rootroot! format atac 1.0 # # contained and kill # M u 001f . A:0 30 90 1 B:0 10 90 1 M u 666a . A:0 40 10 1 B:0 900 10 1 M u 666b . A:0 900 20 1 B:0 40 20 1 M u 666c . A:0 80 10 1 B:0 920 10 1 M u 666d . A:0 920 10 1 B:0 90 10 1 kmer-code-2013-trunk/atac-driver/test/uf-test-1r.atac0000644000000000000000000000036010215770307021070 0ustar rootroot! format atac 1.0 # # contained and kill, reverse # M u 001f . A:0 30 90 1 B:0 10 90 -1 M u 666a . A:0 40 10 1 B:0 900 10 1 M u 666b . A:0 900 20 1 B:0 10 20 1 M u 666c . A:0 80 10 1 B:0 920 10 1 M u 666d . A:0 920 10 1 B:0 70 10 1 kmer-code-2013-trunk/atac-driver/relabel.pl0000644000000000000000000000561610727710072017325 0ustar rootroot#!/usr/bin/perl # Reads an atac file, relabels the sequence names (e.g., WGSA:4) with # the defline ID's. use strict; sub readDeflines ($) { my $file = $_[0]; if ($file =~ m/^(.*).fasta/) { $file = $1; } if (-e "$file.deflines") { $file = "$file.deflines"; } elsif (-e "$file.fasta.deflines") { $file = "$file.fasta.deflines"; } else { print STDERR "Dang, gotta grep the deflines!\n"; system("grep '>' $file.fasta > $file.deflines"); $file = "$file.deflines"; } my @nameA; #print STDERR "$file\n"; open(Z, "< $file") or die "Failed to open '$file'\n"; while (!eof(Z)) { my $n = ; if ($n =~ m/^\>\s*(\S+)\s*/) { push @nameA, $1; } else { chomp $n; print STDERR "Failed to match defline '$n'\n"; } } close(Z); return(@nameA); } my $file = shift @ARGV; my @nameA; my @nameB; if ($file eq "-A") { @nameA = readDeflines(shift @ARGV); $file = shift @ARGV; } if ($file eq "-B") { @nameB = readDeflines(shift @ARGV); $file = shift @ARGV; } open(F, "< $file") or die "Failed to open '$file' for input\n"; open(G, "> $file.uids") or die "Failed to open '$file.uids' for output\n"; while () { if (m/assemblyFile1=(.*)$/) { @nameA = readDeflines($1); print STDERR "num nameA = ", scalar(@nameA), "\n"; } if (m/assemblyFile2=(.*)$/) { @nameB = readDeflines($1); print STDERR "num nameB = ", scalar(@nameB), "\n"; } if (m/^M\s/) { my @v = split '\s+', $_; if ($v[4] =~ m/^\w+:(\d+)$/) { if (defined($nameA[$1])) { $v[4] = $nameA[$1]; } else { die "Didn't find nameA for $1\n"; } } else { die "Didn't match v[4] = $v[4]\n"; } if ($v[8] =~ m/^\w+:(\d+)$/) { if (defined($nameB[$1])) { $v[8] = $nameB[$1]; } else { die "Didn't find nameA for $1\n"; } } else { die "Didn't match v[8] = $v[8]\n"; } # Special case stuff.... # if ($v[4] =~ m/^Chr(\d+)$/) { $v[4] = "mchr$1"; } elsif ($v[4] =~ m/^Chr(\d+)_random$/) { $v[4] = "mchr${1}r"; } elsif ($v[4] =~ m/^SCAFFOLD(\d+)$/) { $v[4] = "bscf$1"; } elsif ($v[4] =~ m/^Contig(\d+)$/) { $v[4] = "wscf$1"; } elsif ($v[4] =~ m/^chr(\d+)$/) { $v[4] = "hchr$1"; } if ($v[8] =~ m/^SCAFFOLD(\d+)$/) { $v[8] = "bscf$1"; } elsif ($v[8] =~ m/^Contig(\d+)$/) { $v[8] = "wscf$1"; } elsif ($v[8] =~ m/^chr(\d+)$/) { $v[8] = "hchr$1"; } my $line = join " ", @v; print G "$line\n"; } else { print G $_; } } close(G); close(F); kmer-code-2013-trunk/atac-driver/gapShifter/0000755000000000000000000000000012641613360017445 5ustar rootrootkmer-code-2013-trunk/atac-driver/gapShifter/testAtac.C0000644000000000000000000000612012322046702021314 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" // Reads a set of atac matches, computes the percent identity of the // regions, and warns if any identites are low. void usage(char *name) { fprintf(stderr, "usage: %s [-d identity] [-i identity] -m matches\n", name); fprintf(stderr, " -i print a warning if a match is below this percent identity\n"); } int main(int argc, char *argv[]) { char *matchesFile = 0L; double identityLimit = 0.9; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else if (strcmp(argv[arg], "-i") == 0) { identityLimit = atof(argv[++arg]); if (identityLimit > 1.0) identityLimit /= 100; } else { usage(argv[0]); exit(1); } arg++; } if (matchesFile == 0L) usage(argv[0]), exit(1); atacFile AF(matchesFile); atacMatchList &ML = *AF.matches(); seqCache Acache(AF.assemblyFileA(), 0, false); seqCache Bcache(AF.assemblyFileB(), 0, false); Acache.loadAllSequences(); Bcache.loadAllSequences(); for (uint32 i=0; iiid1)->sequence() + m->pos1; char *b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; if (m->fwd2) { for (uint32 p=0; plen1; p++) { if (toUpper[(int)a[p]] == toUpper[(int)b[p]]) identities++; } } else { for (uint32 p=0, q=m->len2-1; plen1; p++, q--) { if (toUpper[(int)a[p]] == toUpper[complementSymbol[(int)b[q]]]) identities++; } } double myIdentity = (double)identities / m->len1; if (myIdentity < identityLimit) { fprintf(stderr, "match "uint32FMT" is only %6.2f%% identity: ", i, 100.0 * identities / m->len1); m->print(stderr, AF.labelA(), AF.labelB()); if (m->len1 < 200) { char tmp[1000]; strncpy(tmp, a, m->len1); tmp[m->len1] = 0; fprintf(stderr, " %s\n", tmp); strncpy(tmp, b, m->len1); tmp[m->len1] = 0; fprintf(stderr, " %s\n", tmp); } } } return(0); } kmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures.C0000644000000000000000000001307212322046702022715 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" void usage(char *name) { fprintf(stderr, "usage: %s [] -m matches -l log\n", name); fprintf(stderr, " When it works, fill this in...\n"); } // Reads an atac mapping, and a list of features. Features on one // axis are projected to the other axis using the atac map. int main(int argc, char **argv) { char *matchesFile = 0L; char *featureFile = 0L; FILE *logFile = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else if (strcmp(argv[arg], "-f") == 0) { featureFile = argv[++arg]; } else if (strcmp(argv[arg], "-l") == 0) { errno = 0; logFile = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open logfile '%s': %s\n", strerror(errno), argv[arg]), exit(1); } else { usage(argv[0]); exit(1); } arg++; } if (matchesFile == 0L) usage(argv[0]), exit(1); if (featureFile == 0L) usage(argv[0]), exit(1); if (logFile == 0L) usage(argv[0]), exit(1); atacFile AF(matchesFile); atacMatchList &ML = *AF.matches(); atacMatchOrder MO(ML); // XXXX extrabroken! #warning BROKEN atacFeatureList FL; // Project features from A to B. MO.sortA(); FL.sort(); uint32 mid = 0; uint32 fid = 0; uint32 pid = 0; while ((mid < MO.numberOfMatches()) && (fid < FL.numberOfFeatures())) { atacMatch *m = MO[mid]; atacFeature *f = FL[fid]; if (m->iid1 < f->iid) { mid++; continue; } if (f->iid < m->iid1) { fid++; continue; } // Same sequences now! if (m->pos1 + m->len1 < f->pos) { // match ends before the feature mid++; continue; } if (f->pos + f->len < m->pos1) { // Feature begins before match fid++; continue; } // Feature and match now overlap! // // This does A -> B -- ONLY. // // If feature is completely in match, this is easy. // if ((m->pos1 <= f->pos) && ((f->pos + f->len) <= (m->pos1 + m->len1))) { uint32 beg; if (m->fwd2 == true) { beg = m->pos2 + f->pos - m->pos1; } else { beg = m->pos2 + m->len2 - (f->pos - m->pos1) - f->len; } if (f->len > 0) fprintf(stdout, "M u Aprojected"uint32FMT" %s.%s %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %d\n", pid, f->featureuid, m->matchuid, AF.labelA(), f->iid, f->pos, f->len, AF.labelB(), m->iid2, beg, f->len, (m->fwd2) ? 1 : -1); pid++; fid++; continue; } // If match is completely within feature, super easy! // if ((f->pos < m->pos1) && (m->pos1 + m->len1) < (f->pos + f->len)) { if (m->len1 > 0) fprintf(stdout, "M u Bprojected"uint32FMT" %s.%s %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %d\n", pid, f->featureuid, m->matchuid, AF.labelA(), m->iid1, m->pos1, m->len1, AF.labelB(), m->iid2, m->pos2, m->len2, (m->fwd2) ? 1 : -1); pid++; fid++; continue; } // Dang, feature isn't completely in match. Guess where feature // could be ending? Or just project as much as possible? if (f->pos < m->pos1) { uint32 len = f->len - (m->pos1 - f->pos); uint32 beg; if (m->fwd2 == true) { beg = m->pos2; } else { beg = m->pos2 + m->len2 - len; } if (len > 0) fprintf(stdout, "M u Cprojected"uint32FMT" %s.%s %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %d\n", pid, f->featureuid, m->matchuid, AF.labelA(), f->iid, m->pos1, len, AF.labelB(), m->iid2, beg, len, (m->fwd2) ? 1 : -1); pid++; fid++; continue; } if (m->pos1 + m->len1 < f->pos + f->len) { uint32 len = m->pos1 + m->len1 - f->pos; uint32 beg; if (m->fwd2 == true) { beg = m->pos2 + m->len2 - len; } else { beg = m->pos2; } if (len > 0) fprintf(stdout, "M u Dprojected"uint32FMT" %s.%s %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %d\n", pid, f->featureuid, m->matchuid, AF.labelA(), f->iid, f->pos, len, AF.labelB(), m->iid2, beg, len, (m->fwd2) ? 1 : -1); pid++; fid++; continue; } fprintf(stderr, "projectFeatures: Unhandled case?\n"); m->print(stdout, "A", "B"); f->print(stdout, "A"); assert(0); } } kmer-code-2013-trunk/atac-driver/gapShifter/coalesceMatches.C0000644000000000000000000000377012322046702022637 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" // Reads a set of matches, coalesces those on the same diagonal. // Does not preserve runs. // // No args, reads stdin, writes stdout. int main(int argc, char *argv[]) { atacFile AF("-"); atacMatchOrder MO(*AF.matches()); atacMatch *l = 0L; atacMatch *r = 0L; MO.sortDiagonal(); for (uint32 i=1; iiid1 == r->iid1) && (l->iid2 == r->iid2) && (l->fwd1 == r->fwd1) && (l->fwd2 == r->fwd2) && (l->pos1 + l->len1 == r->pos1) && (l->pos2 + l->len2 == r->pos2) && (strcmp(l->type, r->type) == 0) && (strcmp(l->parentuid, r->parentuid) == 0)) { fprintf(stderr, "MERGE:\n"); l->print(stderr, AF.labelA(), AF.labelB()); r->print(stderr, AF.labelA(), AF.labelB()); l->len1 += r->len1; l->len2 += r->len2; l->print(stderr, AF.labelA(), AF.labelB()); } else { l->print(stdout, AF.labelA(), AF.labelB()); l = 0L; } } if (l) l->print(stdout, AF.labelA(), AF.labelB()); return(0); } kmer-code-2013-trunk/atac-driver/gapShifter/extractUnmapped.C0000644000000000000000000003763012322046702022722 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" // // Tested to work on ../../atac/B36LCvsHUREF6A/B36LCvsHUREF6A.gapsFixed.atac // // Reads a set of matches and outputs two sequence files containing sequence that // is not matched. void writeGaplessSequence(FILE *output, seqInCore *S, uint32 beg, uint32 end, uint32 extend, atacMatch *l, atacMatch *r) { char *s = S->sequence(); // Skip any N's starting where we are currently // while ((beg < end) && (toUpper[(int)s[beg]] == 'N')) beg++; while ((beg < end) && (toUpper[(int)s[end-1]] == 'N')) end--; if (beg >= end) return; // Extend the ends up to 'extend' positions, as long as we don't // hit a gap. // for (uint32 x=0; ((x < extend) && (beg > 0) && (toUpper[(int)s[beg-1]] != 'N')); x++) beg--; for (uint32 x=0; ((x < extend) && (end < S->sequenceLength()) && (toUpper[(int)s[end]] != 'N')); x++) end++; // Just make sure we're still in bounds! if (end > S->sequenceLength()) end = S->sequenceLength(); // Over the whole sequence // while (beg < end) { // Skip any N's starting where we are currently // while ((beg < end) && (toUpper[(int)s[beg]] == 'N')) beg++; // Move our current up to here uint32 cur = beg; // If we're at the end of the sequence, this block doesn't // exist; it's solid N. // if (beg < end) { // Move cur up to the next N // while ((cur < end) && (toUpper[(int)s[cur]] != 'N')) cur++; // And output whatever this block is // fprintf(output, "%s extracted from iid "uint32FMT" pos "uint32FMT" "uint32FMT" between match %s(%s) and %s(%s)\n", S->header(), S->getIID(), beg, cur, (l) ? l->matchuid : "none", (l) ? l->parentuid : "none", (r) ? r->matchuid : "none", (r) ? r->parentuid : "none"); fwrite(S->sequence() + beg, sizeof(char), cur-beg, output); fprintf(output, "\n"); } // Move to the next block. beg = cur; } } // COPIED from libatac/matchList.C, but need to dereference the atacMatch again. // static int sort1_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; if (A->iid1 < B->iid1) return(-1); if (A->iid1 > B->iid1) return(1); if (A->pos1 < B->pos1) return(-1); if (A->pos1 > B->pos1) return(1); if (A->len1 > B->len1) return(-1); if (A->len1 < B->len1) return(1); if (A->iid2 < B->iid2) return(-1); if (A->iid2 > B->iid2) return(1); if (A->pos2 < B->pos2) return(-1); if (A->pos2 > B->pos2) return(1); if (A->len2 > B->len2) return(-1); if (A->len2 < B->len2) return(1); return(0); } static int sort2_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; if (A->iid2 < B->iid2) return(-1); if (A->iid2 > B->iid2) return(1); if (A->pos2 < B->pos2) return(-1); if (A->pos2 > B->pos2) return(1); if (A->len2 > B->len2) return(-1); if (A->len2 < B->len2) return(1); if (A->iid1 < B->iid1) return(-1); if (A->iid1 > B->iid1) return(1); if (A->pos1 < B->pos1) return(-1); if (A->pos1 > B->pos1) return(1); if (A->len1 > B->len1) return(-1); if (A->len1 < B->len1) return(1); return(0); } // New method, uses an intervalList to find the unmapped regions for // each sequence. // class extractMatchList { public: extractMatchList() { matchesLen = 0; matchesMax = 16; matches = new atacMatch * [matchesMax]; }; ~extractMatchList() { delete [] matches; }; atacMatch *operator[](uint32 idx) { return(matches[idx]); }; uint32 len(void) { return(matchesLen); }; void add(atacMatch *m) { if (matchesLen >= matchesMax) { matchesMax *= 2; atacMatch **M = new atacMatch * [matchesMax]; memcpy(M, matches, sizeof(atacMatch *) * matchesLen); delete [] matches; matches = M; } matches[matchesLen++] = m; }; void sort1(void) { qsort(matches, matchesLen, sizeof(atacMatch*), sort1_); }; void sort2(void) { qsort(matches, matchesLen, sizeof(atacMatch*), sort2_); }; private: atacMatch **matches; uint32 matchesLen; uint32 matchesMax; }; void extractUnmapped(seqCache *A, seqCache *B, FILE *Aoutput, FILE *Boutput, uint32 extend, atacFile &AF, atacMatchList &ML) { uint32 numSeqsA = AF.fastaA()->getNumberOfSequences(); uint32 numSeqsB = AF.fastaB()->getNumberOfSequences(); extractMatchList *coveredA = new extractMatchList [numSeqsA]; extractMatchList *coveredB = new extractMatchList [numSeqsB]; // Populate the intervals with the mapping // for (uint32 x=0; xiid1].add(m); coveredB[m->iid2].add(m); } // Sort the intervals, manually invert the interval -- remembering // what matches are where. // for (uint32 seq=0; seqfind(seq); //seqInCore *S = ML.fastaA()->getSequenceInCore(); seqInCore *S = A->getSequenceInCore(seq); if (coveredA[seq].len() == 0) { // Hey! This sequence has NO matches at all! // writeGaplessSequence(Aoutput, S, 0, AF.fastaA()->getSequenceLength(seq), extend, 0L, 0L); } else { if (0 < coveredA[seq][0]->pos1) { writeGaplessSequence(Aoutput, S, 0, coveredA[seq][0]->pos1, extend, 0L, coveredA[seq][0]); } for (uint32 i=1; ipos1 + coveredA[seq][i-1]->len1 < coveredA[seq][i]->pos1) { writeGaplessSequence(Aoutput, S, coveredA[seq][i-1]->pos1 + coveredA[seq][i-1]->len1, coveredA[seq][i]->pos1, extend, coveredA[seq][i-1], coveredA[seq][i]); } } uint32 last = coveredA[seq].len()-1; if (coveredA[seq][last]->pos1) { writeGaplessSequence(Aoutput, S, coveredA[seq][last]->pos1 + coveredA[seq][last]->len1, AF.fastaA()->getSequenceLength(seq), extend, coveredA[seq][0], 0L); } } } // DUPLICATION OF THE ABOVE! (Replace 1 with 2, A with B) // Sort the intervals, manually invert the interval -- remembering // what matches are where. // for (uint32 seq=0; seqgetSequenceInCore(seq); if (coveredB[seq].len() == 0) { // Hey! This sequence has NO matches at all! // writeGaplessSequence(Boutput, S, 0, AF.fastaB()->getSequenceLength(seq), extend, 0L, 0L); } else { if (0 < coveredB[seq][0]->pos2) { writeGaplessSequence(Boutput, S, 0, coveredB[seq][0]->pos2, extend, 0L, coveredB[seq][0]); } for (uint32 i=1; ipos2 + coveredB[seq][i-1]->len2 < coveredB[seq][i]->pos2) { writeGaplessSequence(Boutput, S, coveredB[seq][i-1]->pos2 + coveredB[seq][i-1]->len2, coveredB[seq][i]->pos2, extend, coveredB[seq][i-1], coveredB[seq][i]); } } uint32 last = coveredB[seq].len()-1; if (coveredB[seq][last]->pos2) { writeGaplessSequence(Boutput, S, coveredB[seq][last]->pos2 + coveredB[seq][last]->len2, AF.fastaB()->getSequenceLength(seq), extend, coveredB[seq][0], 0L); } } } } void extractUnmappedRuns(seqCache *A, seqCache *B, FILE *ARoutput, FILE *BRoutput, uint32 extend, atacMatchList &ML) { seqInCore *S1 = 0L; seqInCore *S2 = 0L; atacMatchOrder MO(ML); MO.sortA(); for (uint32 i=1; iiid1 != r->iid1) continue; if (l->iid2 != r->iid2) continue; // Extract from (l->pos1 + l->len1) to (r->pos1), if it's longer than 20bp bool lengthOK = true; if (l->pos1 + l->len1 + 20 >= r->pos1) lengthOK = false; if ((l->fwd2 == true) && (l->pos2 + l->len2 + 20 >= r->pos2)) lengthOK = false; if ((l->fwd2 == false) && (r->pos2 + r->len2 + 20 >= l->pos2)) lengthOK = false; // Extract if our two matches are in the same run. // if ((lengthOK) && (strcmp(l->parentuid, r->parentuid) == 0)) { #if 0 if (l->iid1 != S1->getIID()) { delete S1; W1->find(l->iid1); S1 = W1->getSequenceInCore(); } if (l->iid2 != S2->getIID()) { delete S2; W2->find(l->iid2); S2 = W2->getSequenceInCore(); } #else S1 = A->getSequenceInCore(l->iid1); S2 = B->getSequenceInCore(l->iid2); #endif writeGaplessSequence(ARoutput, S1, l->pos1 + l->len1, r->pos1, extend, l, r); // Need to deal with reverse matches here! In run matches // should be the same orientation, but we'll still check. // if (l->fwd2 != r->fwd2) { fprintf(stderr, "WOAH! Matches of different orientation in a run?!?\n"); exit(1); } if (l->fwd2) { writeGaplessSequence(BRoutput, S2, l->pos2 + l->len2, r->pos2, extend, l, r); } else { writeGaplessSequence(BRoutput, S2, r->pos2 + r->len2, l->pos2, extend, l, r); } } } } void usage(char *name) { fprintf(stderr, "usage: %s [-OP output.fasta] [-t trfile] -m matches\n", name); fprintf(stderr, " OP\n"); fprintf(stderr, " -a extract all unmapped sequence in A\n"); fprintf(stderr, " -b extract all unmapped sequence in B\n"); fprintf(stderr, " -ar extract within run unmapped sequence in A\n"); fprintf(stderr, " -br extract within run unmapped sequence in B\n"); fprintf(stderr, " BOTH -ar and -br need to be specified!\n"); fprintf(stderr, "\n"); fprintf(stderr, " -t mask out tandem repeats listed in trfile\n"); } FILE * openOutputFile(char *name) { errno = 0; FILE *R = fopen(name, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", name, strerror(errno)), exit(1); return(R); } int main(int argc, char *argv[]) { char *matchesFile = 0L; FILE *Aoutput = 0L; FILE *Boutput = 0L; FILE *ARoutput = 0L; FILE *BRoutput = 0L; uint32 extend = 0; char *trFile = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else if (strcmp(argv[arg], "-a") == 0) { Aoutput = openOutputFile(argv[++arg]); } else if (strcmp(argv[arg], "-b") == 0) { Boutput = openOutputFile(argv[++arg]); } else if (strcmp(argv[arg], "-ar") == 0) { ARoutput = openOutputFile(argv[++arg]); } else if (strcmp(argv[arg], "-br") == 0) { BRoutput = openOutputFile(argv[++arg]); } else if (strcmp(argv[arg], "-e") == 0) { extend = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-t") == 0) { trFile = argv[++arg]; } else { usage(argv[0]); exit(1); } arg++; } if (matchesFile == 0L) usage(argv[0]), exit(1); atacFile AF(matchesFile); atacMatchList &ML = *AF.matches(); // Build caches for both sequences, then modify that sequence to // mask out tandem repeats. // seqCache *A = new seqCache(AF.assemblyFileA(), 0, true); seqCache *B = new seqCache(AF.assemblyFileB(), 0, true); A->loadAllSequences(); B->loadAllSequences(); if (trFile) { errno =0; FILE *F = fopen(trFile, "r"); if (errno) fprintf(stderr, "Error opening '%s': %s\n", trFile, strerror(errno)); char L[1024] = { 0 }; splitToWords W(L); fprintf(stderr, "Masking repeats in '%s'\n", trFile); uint32 statidx = 0; uint32 stats[2] = { 0 }; while (!feof(F)) { fgets(L, 1024, F); W.split(L); char source = W[0][0]; uint32 iid = strtouint32(W[1], 0L); uint32 pos = strtouint32(W[2], 0L); uint32 len = strtouint32(W[3], 0L); bool fwd = (W[4][0] != '-'); seqInCore *S = 0L; char *s = 0L; if (source == 'B') { S = A->getSequenceInCore(iid); s = A->getSequenceInCore(iid)->sequence(); statidx = 0; } else if (source == 'H') { S = B->getSequenceInCore(iid); s = B->getSequenceInCore(iid)->sequence(); statidx = 1; } else { fprintf(stderr, "Unknown source '%c'\n", source); exit(1); } //fprintf(stderr, "Masking %c "uint32FMTW(8)" from "uint32FMTW(9)" to "uint32FMTW(9)" on strand %c\r", // source, iid, pos, pos+len, (fwd) ? 'f' : 'r'); if (fwd) { s += pos; } else { s += S->sequenceLength() - pos - len; } for (uint32 i=0; i #include #include #include "bio++.H" #include "atac.H" // Attempts to align unmapped regions. // // For each unmapped region, we extract the corresponding sequences, // sim4db them together, parse the output to make atac-format // matches, but of a lower confidence. // // IDX1 ------------------------------------------ // |||||||| |||||||||| // IDX2 -A------------\ /--------------B- // \ / // // The nasty case is that IDX1 could be doubly mapped, once by A and // once by B. So we also need to label those regions that are mapped // multiple times as an even lower confidence. // // We probably should bias the alignment towards the anchored edge, // implying I should use something other than sim4db here. // // We end up with three confidence classes: // 1) mapped by atac itself, 1-to-1 matches // 2) mapped by sim4db above, with no conflict, between anchors // 3) same as 2, but conflicting // // Why sim4db? It's splicing model might introduce some noise on the // ends (which we'll clean up), but more importantly, the splicing // allows us to skip over large blocks of whatever (rearrangement, // tandem repeat, etc). And it's also in my source tree and I know // how to use it. this is unfinished crap // The below is the main from writing unmatched regions int main(int argc, char *argv[]) { FILE *Aoutput = 0L; FILE *Boutput = 0L; char *matchesFile = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-a") == 0) { errno = 0; Aoutput = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", argv[arg], strerror(errno)), exit(1); } else if (strcmp(argv[arg], "-b") == 0) { errno = 0; Boutput = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", argv[arg], strerror(errno)), exit(1); } else if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else { fprintf(stderr, "usage: %s -a Aunmatched.fasta -b B.unmatched.fasta < matches\n", argv[0]); exit(1); } arg++; } if ((Aoutput == 0L) || (Boutput == 0L) || (matchesFile == 0L)) { fprintf(stderr, "usage: %s -a Aunmatched.fasta -b B.unmatched.fasta < matches\n", argv[0]); exit(1); } atacMatchList ML1(matchesFile, 'm', false); atacMatchList ML2(matchesFile, 'm', false); ML1.sort1(); // Sorted by first index ML2.sort2(); // Sorted by second index seqCache *W1 = ML1._seq1; seqCache *W2 = ML1._seq2; // For every match, // find the match before and the match after, on both axes // // Extract unmapped in sequence 1 ML.sort1(); W = ML._seq1; W->find(ML[0]->iid1); S = W->getSequenceInCore(); for (uint32 i=1; iiid1 != r->iid1) continue; if (l->iid1 != S->getIID()) { delete S; W->find(l->iid1); S = W->getSequenceInCore(); } // Extract from (l->pos1 + l->len1) to (r->pos1), if it's longer than 20bp // if (l->pos1 + l->len1 + 20 < r->pos1) writeGaplessSequence(Aoutput, S, l->pos1 + l->len1, r->pos1); } // Extract unmapped in sequence 2 ML.sort2(); W = ML._seq2; W->find(ML[0]->iid2); S = W->getSequenceInCore(); for (uint32 i=1; iiid2 != r->iid2) continue; if (l->iid2 != S->getIID()) { delete S; W->find(l->iid2); S = W->getSequenceInCore(); } // Extract from (l->pos2 + l->len2) to (r->pos2), if it's longer than 20bp // if (l->pos2 + l->len2 + 20 < r->pos2) writeGaplessSequence(Boutput, S, l->pos2 + l->len2, r->pos2); } fclose(Aoutput); fclose(Boutput); return(0); } kmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures-test-cases/0000755000000000000000000000000012641613360024663 5ustar rootrootkmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures-test-cases/test.atac.log0000644000000000000000000000071410546427006027260 0ustar rootrootAt gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. At gapSize=1000 closed 0 f-gaps and 0 r-gaps. kmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures-test-cases/test.ataf0000644000000000000000000000070410546427006026502 0ustar rootroot! format atac 1.0 F tr test01 . A:0 0 50 # Test containment F tr test02 . A:0 100 200 F tr test03 . A:0 100 150 F tr test04 . A:0 150 100 F tr test05 . A:0 250 150 # Test before F tr test06 . A:0 50 40 F tr test07 . A:0 50 50 F tr test08 . A:0 50 150 F tr test09 . A:0 50 250 # Test after F tr test10 . A:0 100 250 F tr test11 . A:0 290 50 F tr test12 . A:0 300 50 F tr test13 . A:0 310 50 # Match contained in feature F tr test14 . A:0 50 400 kmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures-test-cases/test.log0000644000000000000000000000000010546427006026335 0ustar rootrootkmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures-test-cases/test.atac0000644000000000000000000000006710546427006026501 0ustar rootroot!format atac 1.0 M u m1 r1 A:0 100 200 1 B:0 500 200 1 kmer-code-2013-trunk/atac-driver/gapShifter/projectFeatures-test-cases/test-rev.atac0000644000000000000000000000007010546427006027265 0ustar rootroot!format atac 1.0 M u m2 r2 A:0 100 200 1 B:0 500 200 -1 kmer-code-2013-trunk/atac-driver/gapShifter/cleanAtac.C0000644000000000000000000001201512322046702021417 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" // Reads a set of atac matches, trims off ends that are mismatch. // Computes the percent identity of the resulting match. // Outputs the trimmed match if it is above some percent identity. void usage(char *name) { fprintf(stderr, "usage: %s [-d identity] [-i identity] -m matches\n", name); fprintf(stderr, " -d discard the match if it is below this percent identity\n"); } int main(int argc, char *argv[]) { char *matchesFile = 0L; double discardThreshold = 0.0; uint32 discardLength = 0; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else if (strcmp(argv[arg], "-d") == 0) { discardThreshold = atof(argv[++arg]); if (discardThreshold > 1.0) discardThreshold /= 100; } else if (strcmp(argv[arg], "-l") == 0) { discardLength = atoi(argv[++arg]); } else { usage(argv[0]); exit(1); } arg++; } if (matchesFile == 0L) usage(argv[0]), exit(1); atacFile AF(matchesFile); atacMatchList &ML = *AF.matches(); seqCache Acache(AF.assemblyFileA(), 32, false); seqCache Bcache(AF.assemblyFileB(), 32, false); for (uint32 i=0; iiid1)->sequence() + m->pos1; //char *b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; //uint32 p, q; // Trim the match // if (m->fwd2) { char *a = Acache.getSequenceInCore(m->iid1)->sequence() + m->pos1; char *b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; uint32 p = 0; while ((m->len1 > 0) && (toUpper[(int)a[p]] != toUpper[(int)b[p]])) { m->pos1++; m->pos2++; m->len1--; m->len2--; p++; } a = Acache.getSequenceInCore(m->iid1)->sequence() + m->pos1; b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; p = m->len1-1; while ((m->len1 > 0) && (toUpper[(int)a[p]] != toUpper[(int)b[p]])) { m->len1--; m->len2--; p--; } } else { char *a = Acache.getSequenceInCore(m->iid1)->sequence() + m->pos1; char *b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; uint32 p = 0; uint32 q = m->len2 - 1; while ((m->len1 > 0) && (toUpper[(int)a[p]] != complementSymbol[toUpper[(int)b[q]]])) { m->pos1++; m->len1--; m->len2--; p++; q--; } a = Acache.getSequenceInCore(m->iid1)->sequence() + m->pos1; b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; p = m->len1 - 1; q = 0; while ((m->len1 > 0) && (toUpper[(int)a[p]] != complementSymbol[toUpper[(int)b[q]]])) { m->len1--; m->pos2++; m->len2--; p--; q++; } } if (m->len1 > 0) { char *a = Acache.getSequenceInCore(m->iid1)->sequence() + m->pos1; char *b = Bcache.getSequenceInCore(m->iid2)->sequence() + m->pos2; if (m->fwd2) { for (uint32 p=0; plen1; p++) { if (toUpper[(int)a[p]] == toUpper[(int)b[p]]) identities++; } } else { for (uint32 p=0, q=m->len2-1; plen1; p++, q--) { if (toUpper[(int)a[p]] == toUpper[complementSymbol[(int)b[q]]]) identities++; } } double myIdentity = (double)identities / m->len1; if ((myIdentity > discardThreshold) && (m->len1 > discardLength)) { m->print(stdout, AF.labelA(), AF.labelB()); } else { fprintf(stderr, "match "uint32FMT" is only %6.2f%% identity and "uint32FMT" long: ", i, 100.0 * identities / m->len1, m->len1); m->print(stderr, AF.labelA(), AF.labelB()); if (m->len1 < 200) { char tmp[1000]; strncpy(tmp, a, m->len1); tmp[m->len1] = 0; fprintf(stderr, " %s\n", tmp); strncpy(tmp, b, m->len1); tmp[m->len1] = 0; fprintf(stderr, " %s\n", tmp); } } } } return(0); } kmer-code-2013-trunk/atac-driver/gapShifter/Make.include0000644000000000000000000000335011512763666021703 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_SRCS := $/gapShifter.C $/extractSequence.C $/extractUnmapped.C $/coalesceMatches.C $/correctGaps.C $/testAtac.C $/cleanAtac.C $/projectFeatures.C $/.CXX_EXES := $/gapShifter $/extractSequence $/extractUnmapped $/coalesceMatches $/correctGaps $/testAtac $/cleanAtac $/projectFeatures $/.CLEAN :=$/*.o $/*~ $/core $/gapShifter: $/gapShifter.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/extractUnmapped: $/extractUnmapped.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/extractSequence: $/extractSequence.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/coalesceMatches: $/coalesceMatches.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/correctGaps: $/correctGaps.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/testAtac: $/testAtac.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/cleanAtac: $/cleanAtac.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/projectFeatures: $/projectFeatures.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/gapShifter/extractUnmapped-sim4dbFixer.pl0000644000000000000000000000317610415631065025332 0ustar rootroot#!/usr/bin/perl # Fixes up the IID's and coords of the sim4db mapped regions from extractUnmapped. # Reads polishes from stdin, writes fixed polishes to stdout. use strict; use lib "/bioinfo/assembly/walenz/src/genomics/scripts"; use sim4polish; $| = 1; while (!eof(STDIN)) { my %p = &sim4polish::readPolish(*STDIN); if ($p{'raw'}) { my $estIID; my $estBeg = 0; my $estEnd = 0; my $dbIID; my $dbBeg = 0; my $dbEnd = 0; if ($p{'estDefLine'} =~ m/extracted\s+from\s+iid\s+(\d+)\s+pos\s+(\d+)\s+(\d+)\s+/) { $estIID = $1; $estBeg = $2; $estEnd = $3; } if ($p{'dbDefLine'} =~ m/extracted\s+from\s+iid\s+(\d+)\s+pos\s+(\d+)\s+(\d+)\s+/) { $dbIID = $1; $dbBeg = $2; $dbEnd = $3; } if (defined($estIID)) { $p{'estID'} = $estIID; foreach my $exon (@{@p{'exons'}}) { $exon->{'cDNAstart'} += $estBeg; $exon->{'cDNAend'} += $estBeg; } } if (defined($dbIID)) { $p{'dbID'} = $dbIID; foreach my $exon (@{@p{'exons'}}) { $exon->{'GENOMICstart'} += $dbBeg; $exon->{'GENOMICend'} += $dbBeg; } } # normalize foreach my $exon (@{@p{'exons'}}) { $exon->{'GENOMICstart'} += $p{'dbLo'}; $exon->{'GENOMICend'} += $p{'dbLo'}; } $p{'dbLo'} = 0; $p{'dbHi'} = 0; $p{'estLen'} = 0; $p{'raw'} = &sim4polish::updatePolish(%p); print $p{'raw'}; } } kmer-code-2013-trunk/atac-driver/gapShifter/extractSequence.C0000644000000000000000000001015112322046702022706 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "atac.H" #include "bio++.H" #include "seqCache.H" // Reads a set of matches and outputs sequence that was mapped. Filters matches, etc. void extractA(seqCache *A, seqCache *B, FILE *Aoutput, FILE *Boutput, uint32 Aiid, uint32 Biid, atacMatchList &ML) { } void usage(char *name) { fprintf(stderr, "usage: %s [-OP output.fasta] [-t trfile] -m matches\n", name); fprintf(stderr, " OP\n"); fprintf(stderr, " -a extract all unmapped sequence in A\n"); fprintf(stderr, " -b extract all unmapped sequence in B\n"); fprintf(stderr, " -ar extract within run unmapped sequence in A\n"); fprintf(stderr, " -br extract within run unmapped sequence in B\n"); fprintf(stderr, " BOTH -ar and -br need to be specified!\n"); fprintf(stderr, "\n"); fprintf(stderr, " -t mask out tandem repeats listed in trfile\n"); } FILE * openOutputFile(char *name) { errno = 0; FILE *R = fopen(name, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", name, strerror(errno)), exit(1); return(R); } int main(int argc, char *argv[]) { char *matchesFile = 0L; FILE *Aoutput = 0L; FILE *Boutput = 0L; uint32 Aiid = ~uint32ZERO; uint32 Biid = ~uint32ZERO; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else if (strcmp(argv[arg], "-a") == 0) { Aoutput = openOutputFile(argv[++arg]); } else if (strcmp(argv[arg], "-b") == 0) { Boutput = openOutputFile(argv[++arg]); } else if (strcmp(argv[arg], "-1") == 0) { Aiid = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-2") == 0) { Biid = strtouint32(argv[++arg], 0L); } else { usage(argv[0]); exit(1); } arg++; } if (matchesFile == 0L) usage(argv[0]), exit(1); atacFile AF(matchesFile); atacMatchList &ML = *AF.matches(); seqCache *A = new seqCache(AF.assemblyFileA(), 0, true); seqCache *B = new seqCache(AF.assemblyFileB(), 0, true); A->loadAllSequences(); B->loadAllSequences(); for (uint32 x=0; xiid1)) && ((Biid == ~uint32ZERO) || (Biid == m->iid2))) { if (Aoutput) { seqInCore *S = A->getSequenceInCore(m->iid1); fprintf(Aoutput, "%s extracted from iid "uint32FMT" pos "uint32FMT" "uint32FMT" match %s(%s)\n", S->header(), S->getIID(), m->pos1, m->pos1 + m->len1, m->matchuid, m->parentuid); fwrite(S->sequence() + m->pos1, sizeof(char), m->len1, Aoutput); fprintf(Aoutput, "\n"); } if (Boutput) { seqInCore *S = B->getSequenceInCore(m->iid2); fprintf(Boutput, "%s extracted from iid "uint32FMT" pos "uint32FMT" "uint32FMT" match %s(%s)\n", S->header(), S->getIID(), m->pos2, m->pos2 + m->len2, m->matchuid, m->parentuid); fwrite(S->sequence() + m->pos2, sizeof(char), m->len2, Boutput); fprintf(Boutput, "\n"); } } } if (Aoutput) fclose(Aoutput); if (Boutput) fclose(Boutput); return(0); } kmer-code-2013-trunk/atac-driver/gapShifter/correctGaps.C0000644000000000000000000002500312322046702022021 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" void usage(char *name) { fprintf(stderr, "usage: %s [] -m matches -l log\n", name); fprintf(stderr, " When it works, fill this in...\n"); } int main(int argc, char *argv[]) { char *matchesFile = 0L; FILE *logFile = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { matchesFile = argv[++arg]; } else if (strcmp(argv[arg], "-l") == 0) { errno = 0; logFile = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open logfile '%s': %s\n", strerror(errno), argv[arg]), exit(1); } else { usage(argv[0]); exit(1); } arg++; } if (matchesFile == 0L) usage(argv[0]), exit(1); if (logFile == 0L) usage(argv[0]), exit(1); atacFile AF(matchesFile); atacMatchList &ML = *AF.matches(); atacMatchOrder MO(ML); // Sort by either axis. MO.sortA(); // We need to compute the identity of the gap; our metric (thanks to Nelson) is // if ("long" and "not low identity") or ("short"), close the gap // We could use the seqCache, but with only a handful of gaps, we // just let the OS cache stuff. seqCache *C1 = new seqCache(AF.assemblyFileA(), 2, false); seqCache *C2 = new seqCache(AF.assemblyFileB(), 1024, false); seqInCore *S1 = 0L; seqInCore *S2 = 0L; for (uint32 iter=0; iter<10; iter++) { uint32 gapsize = 1000; uint32 fgaps = 0; uint32 rgaps = 0; int mergeuid = 1; for (uint32 i=1; iiid1 == r->iid1) && // Matches are between the same sequences (l->iid2 == r->iid2) && (l->fwd2 == r->fwd2)) { // Matches are the same orientation if (l->fwd2 == true) { if ((l->pos1 + l->len1 <= r->pos1) && // Matches are ordered correctly (should be, from the sort) (l->pos2 + l->len2 <= r->pos2)) { gap1 = r->pos1 - (l->pos1 + l->len1); gap2 = r->pos2 - (l->pos2 + l->len2); if ((gap1 == gap2) && (gap1 <= gapsize)) { S1 = C1->getSequenceInCore(l->iid1); S2 = C2->getSequenceInCore(l->iid2); char *s1 = S1->sequence() + l->pos1 + l->len1; char *s2 = S2->sequence() + l->pos2 + l->len1; uint32 identities = 0; uint32 n1 = 0; uint32 n2 = 0; for (uint32 p=0; plen1) && (2*gap1 <= r->len1)) || // (gap is short, and the flanks are big ((gap1 < 11) && (100*identities < 90*gap1)))) { // (gap is short and high quality // ALSO need to check that the gap is not actually // mapped in sequence 2. Not really, just make sure // these two matches are in the same run. // if (strcmp(l->parentuid, r->parentuid) != 0) { fprintf(logFile, "HEY! F gap of size "uint32FMT" not in a run?\n", gap1); l->print(logFile, AF.labelA(), AF.labelB()); r->print(logFile, AF.labelA(), AF.labelB()); } else { fgaps++; joinMatches = true; //fprintf(logFile, "potential f gap of size L "uint32FMTW(4)" (n1="uint32FMTW(4)" n2="uint32FMTW(4)" ident="uint32FMTW(4)"/"uint32FMTW(4)")!\n", // gap1, n1, n2, identities, gap1); //l->print(logFile, AF.labelA(), AF.labelB()); //r->print(logFile, AF.labelA(), AF.labelB()); } } } } } // was a forward match if (l->fwd2 == false) { if ((l->pos1 + l->len1 <= r->pos1) && // Matches are ordered correctly (should be, from the sort) (r->pos2 + r->len2 <= l->pos2)) { gap1 = r->pos1 - (l->pos1 + l->len1); gap2 = l->pos2 - (r->pos2 + r->len2); if ((gap1 == gap2) && (gap1 <= gapsize)) { S1 = C1->getSequenceInCore(l->iid1); S2 = C2->getSequenceInCore(l->iid2); char *s1 = S1->sequence() + l->pos1 + l->len1; char *s2 = S2->sequence() + r->pos2 + r->len2; uint32 identities = 0; uint32 n1 = 0; uint32 n2 = 0; for (uint32 p=0, q=gap1-1; plen1) && (2*gap1 <= r->len1)) || // (gap is short, and the flanks are big ((gap1 < 11) && (100*identities < 90*gap1)))) { // (gap is short and high quality // ALSO need to check that the gap is not actually // mapped in sequence 2. Not really, just make sure // these two matches are in the same run. // if (strcmp(l->parentuid, r->parentuid) != 0) { fprintf(logFile, "HEY! R gap of size "uint32FMT" not in a run?\n", gap1); l->print(logFile, AF.labelA(), AF.labelB()); r->print(logFile, AF.labelA(), AF.labelB()); } else { rgaps++; joinMatches = true; //fprintf(logFile, "potential r gap of size L "uint32FMTW(4)" (n1="uint32FMTW(4)" n2="uint32FMTW(4)" ident="uint32FMTW(4)"/"uint32FMTW(4)")!\n", // gap1, n1, n2, identities, gap1); //l->print(logFile, AF.labelA(), AF.labelB()); //r->print(logFile, AF.labelA(), AF.labelB()); } } } } } } if (joinMatches) { fprintf(logFile, "CLOSE "uint32FMT"----------------------------------------\n", gap1); l->print(logFile, AF.labelA(), AF.labelB()); r->print(logFile, AF.labelA(), AF.labelB()); MO.mergeMatches(l, r, mergeuid); l->print(logFile, AF.labelA(), AF.labelB()); mergeuid++; i--; } } fprintf(logFile, "At gapSize="uint32FMT" closed "uint32FMT" f-gaps and "uint32FMT" r-gaps.\n", gapsize, fgaps, rgaps); if (fgaps + rgaps == 0) iter = 10; } #if 0 // This analyzes an atac mapping, looking for a signature that indicates a bad // alignment. If we have an alignment of: // XXXXXXaC-YYYYYY // XXXXXX-CtYYYYYY // this will generate three matches, instead of one match with mismatches in it. // We scan the FORWARD matches for this pattern, and report any we find. // // We only found 3 on huref4 vs b35. Further development here was stopped. for (uint32 i=2; ilen1 < 3) { // The match in the middle is small if ((l->iid1 == r->iid1) && // Matches are between the same sequences (l->iid2 == r->iid2) && (l->fwd2 == r->fwd2)) { // Matches are the same orientation if (l->fwd2 == true) { if ((l->pos1 + l->len1 <= r->pos1) && // Matches are ordered correctly (should be, from the sort) (l->pos2 + l->len2 <= r->pos2)) { uint32 gapl1 = m->pos1 - (l->pos1 + l->len1); uint32 gapl2 = m->pos2 - (l->pos2 + l->len2); uint32 gapr1 = r->pos1 - (m->pos1 + m->len1); uint32 gapr2 = r->pos2 - (m->pos2 + m->len2); if ((gapl1 + gapr1 == gapl2 + gapr2) && (gapl1 + gapr1 < 5)) { fprintf(logFile, "potential f fix of size L "uint32FMT" "uint32FMT" and R "uint32FMT" "uint32FMT"!\n", gapl1, gapl2, gapr1, gapr2); l->print(logFile, "A", "B"); m->print(logFile, "A", "B"); r->print(logFile, "A", "B"); } } else { fprintf(logFile, "sort is forward broken.\n"); } } // was a forward match } } } #endif // Write the new output to stdout -- we preserve runs here, but // discard everything else. // AF.writeHeader(stdout); for (uint32 i=0; iprint(stdout, AF.labelA(), AF.labelB()); for (uint32 i=0; inumberOfMatches(); i++) AF.runs()->getMatch(i)->print(stdout, AF.labelA(), AF.labelB()); return(0); } kmer-code-2013-trunk/atac-driver/gapShifter/gapShifter.C0000644000000000000000000005671512322046702021657 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "atac.H" #include "bio++.H" #include "seqCache.H" #define MAXPRINT 90 #if 0 #define REPORT_RESULTS #define REPORT_UNSHIFTABLE #define REPORT_SHIFTING #endif // Global statistics, reset on each iteration // uint32 numShifted; // valid to be shifted, and were shifted uint32 numNotShifted; // but valid to be shifted uint32 numDiffSeq; // all the rest are not valid to be shifted uint32 numDiffOri; uint32 numZeroLen; uint32 numOutOfOrder; uint32 numNotAdjacent; uint32 numNoGap; uint32 numGapTooBig; uint32 numOverlapping; uint32 amountShifted[1024]; FILE *logFile = 0L; // XXXXXX outer loop needs to skip empty matches!! // Reads a set of matches and shifts the location of all the gaps to // one side or the other. // // Example: // // A middle gap: // GGGGGGGGGGATATATATATATATATATATATATGGGGGGGGG // GGGGGGGGGGATAT--ATATATATATATATATATGGGGGGGGG // // A left-most gap: // GGGGGGGGGGATATATATATATATATATATATATGGGGGGGGG // GGGGGGGGGG--ATATATATATATATATATATATGGGGGGGGG // // A right-most gap: // GGGGGGGGGGATATATATATATATATATATATATGGGGGGGGG // GGGGGGGGGGATATATATATATATATATATAT--GGGGGGGGG // // Shifting is done for both assembly-axes. // Returns true if these two matches have a potentially shiftable gap // between them. Potentially shiftable means that the matches are // contiguous on one axis, and consecutive (no matches between) on // the other axis. // // Assume the matches are sorted by the first sequence. // bool isPotentiallyShiftable(atacMatch *ma, atacMatch *mb, atacMatchOrder &MOB, uint32 gapLimit) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "isPotentiallyShiftable()\n"); ma->print(stderr, "A", "B"); mb->print(stderr, "A", "B"); #endif // Not shiftable if on different sequences // if ((ma->iid1 != mb->iid1) || (ma->iid2 != mb->iid2)) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE different sequences\n"); #endif numDiffSeq++; return(false); } // Not shiftable if the orientation of the two matches is // different. This is probably not a gap we want to muck with. // if ((ma->fwd1 != mb->fwd1) || (ma->fwd2 != mb->fwd2)) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE different orientation\n"); #endif numDiffOri++; return(false); } // Not shiftable if any length is zero. This isn't a gap, it's a // dead match. // if ((ma->len1 == 0) || (ma->len2 == 0) || (mb->len1 == 0) || (mb->len2 == 0)) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE zero length\n"); #endif numZeroLen++; return(false); } atacMatch *bl = (ma->fwd2) ? ma : mb; // the left match on B, relative to forward orientation atacMatch *br = (ma->fwd2) ? mb : ma; // the right // Not shiftable if the B matches are out of order // if (bl->pos2 > br->pos2) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE misordered on B\n"); #endif numOutOfOrder++; return(false); } uint32 magap = mb->pos1 - (ma->pos1 + ma->len1); uint32 mbgap = br->pos2 - (bl->pos2 + bl->len2); // Not shiftable if there is no zero size gap // if ((magap > 0) && (mbgap > 0)) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE no zero size gap ("uint32FMT", "uint32FMT")\n", magap, mbgap); #endif numNotAdjacent++; return(false); } // Not shiftabe if there is no gap // if ((magap == 0) && (mbgap == 0)) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE no gap on both sequences ("uint32FMT", "uint32FMT")\n", magap, mbgap); #endif numNoGap++; return(false); } // Not shiftable if the gap is big // if ((magap > gapLimit) || (mbgap > gapLimit)) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE gap too big ("uint32FMT", "uint32FMT")\n", magap, mbgap); #endif numGapTooBig++; return(false); } // Not shiftable if they overlap // if (ma->pos1 + ma->len1 > mb->pos1) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE overlap on sequence A\n"); #endif numOverlapping++; return(false); } if (bl->pos2 + bl->len2 > br->pos2) { #ifdef REPORT_UNSHIFTABLE fprintf(stderr, "UNSHIFTABLE overlap on sequence B\n"); #endif numOverlapping++; return(false); } uint32 iid1 = ma->matchiid; uint32 iid2 = mb->matchiid; // Check that there isn't another match stuck in the middle on // the B axis. // if (ma->fwd2 == true) { if ((MOB.index(iid1)+1) != MOB.index(iid2)) { fprintf(stderr, "WARNING: Match inbetween! (forward)\n"); fprintf(stderr, "iid1 "uint32FMT", iid2 "uint32FMT"\n", iid1, iid2); ma->print(stderr, "A", "B"); mb->print(stderr, "A", "B"); fprintf(stderr, "before, iid1, after\n"); MOB[MOB.index(iid1)-1]->print(stderr, "A", "B"); MOB[MOB.index(iid1) ]->print(stderr, "A", "B"); MOB[MOB.index(iid1)+1]->print(stderr, "A", "B"); fprintf(stderr, "before, iid2, after\n"); MOB[MOB.index(iid2)-1]->print(stderr, "A", "B"); MOB[MOB.index(iid2) ]->print(stderr, "A", "B"); MOB[MOB.index(iid2)+1]->print(stderr, "A", "B"); return(false); } } else { if ((MOB.index(iid1)-1) != MOB.index(iid2)) { fprintf(stderr, "WARNING: Match inbetween! (reverse-complement)\n"); fprintf(stderr, "iid1 "uint32FMT", iid2 "uint32FMT"\n", iid1, iid2); ma->print(stderr, "A", "B"); mb->print(stderr, "A", "B"); fprintf(stderr, "before, iid1, after\n"); MOB[MOB.index(iid1)-1]->print(stderr, "A", "B"); MOB[MOB.index(iid1) ]->print(stderr, "A", "B"); MOB[MOB.index(iid1)+1]->print(stderr, "A", "B"); fprintf(stderr, "before, iid2, after\n"); MOB[MOB.index(iid2)-1]->print(stderr, "A", "B"); MOB[MOB.index(iid2) ]->print(stderr, "A", "B"); MOB[MOB.index(iid2)+1]->print(stderr, "A", "B"); return(false); } } return(true); } void dumpAgap(atacMatch *ma, atacMatch *mb, atacMatchOrder &MOB, seqCache *C1, seqCache *C2, uint32 gapLimit, bool shiftRight) { } void dumpBgap(atacMatch *ma, atacMatch *mb, atacMatchOrder &MOB, seqCache *C1, seqCache *C2, uint32 gapLimit, bool shiftRight) { } // Returns the beginning of the sequence from pos to pos+len char * getSequenceBeg(char *str, uint32 pos, uint32 len, FastAAccessor &it) { uint32 i = 0; it.setRange(pos, len); if (len > MAXPRINT) len = MAXPRINT; if (len > 0) { it.setPosition(pos); for (i=0; i 0) { it.setPosition(pos); for (i=0; i MAXPRINT) len = MAXPRINT; if (len > 0) { it.setPosition(pos - len); for (i=0; igetSequenceInCore(ma->iid1); seqInCore *s2 = C2->getSequenceInCore(ma->iid2); FastAAccessor mas1(s1, ma->fwd1 == false); FastAAccessor mas2(s2, ma->fwd2 == false); FastAAccessor mbs1(s1, mb->fwd1 == false); FastAAccessor mbs2(s2, mb->fwd2 == false); mas1.setRange(ma->pos1, ma->len1); mas2.setRange(ma->pos2, ma->len2); mbs1.setRange(mb->pos1, mb->len1); mbs2.setRange(mb->pos2, mb->len2); uint32 shifted = 0; // We want to extend ma to the right, this will shift the gap to // the right-most position (relative to the forward genomic). // // While there is a match after ma, extend ma to the right, and // decrease mb from the left. // if (shiftRight == false) { // Similar to above. The accessor hides most of the pain caused // by reverse complement. mas1.setPosition(ma->pos1 + ma->len1 - 1); mas2.setPosition(ma->pos2 + ma->len2 - 1); mbs1.setPosition(mb->pos1); --mbs1; mbs2.setPosition(mb->pos2); --mbs2; #ifdef REPORT_DEBUG // Dump out some sequence to see where we really are // fprintf(stderr, "A: "); for (uint32 i=0; i<50; i++) { fprintf(stderr, "%c", *mas1); --mas1; } fprintf(stderr, "\n"); fprintf(stderr, "B: "); for (uint32 i=0; i<50; i++) { fprintf(stderr, "%c", *mbs1); --mbs1; } fprintf(stderr, "\n"); // Reset the iterators // mas1.setPosition(ma->pos1 + ma->len1 - 1); mas2.setPosition(ma->pos2 + ma->len2 - 1); mbs1.setPosition(mb->pos1); --mbs1; mbs2.setPosition(mb->pos2); --mbs2; #endif while (mas1.isValid() && mas2.isValid() && mbs1.isValid() && mbs2.isValid() && (ma->len1 > 0) && (ma->len2 > 0) && (letterToBits[(int)*mbs1] != 0xff) && (letterToBits[(int)*mbs2] != 0xff) && IUPACidentity[(int)*mbs1][(int)*mbs2]) { #ifdef REPORT_SHIFTING fprintf(stderr, "EXTENDrev: MA %c/%c ----- %c/%c MB\n", *mas1, *mas2, *mbs1, *mbs2); #endif mas1.extendRight(-1); ma->len1--; --mas1; mas2.extendRight(-1); ma->len2--; --mas2; mbs1.extendLeft(1); mb->len1++; --mbs1; mbs2.extendLeft(1); mb->len2++; --mbs2; shifted++; } } else { // A wants to be the first thing after ma -- the first base in // the gap. Set the position to the last thing in the range, // then use the increment operator to extend past that. The spec // on FastAAccessor says we can't directly go somewhere outside // the range. // mas1.setPosition(ma->pos1 + ma->len1 - 1); ++mas1; mas2.setPosition(ma->pos2 + ma->len2 - 1); ++mas2; // B can be set to the first thing in the match with no problem. // mbs1.setPosition(mb->pos1); mbs2.setPosition(mb->pos2); // While we're still in sequence (isValid()) and we haven't // obliterated the match we're shifting the gap into, and we can // extend the other match (being both validSymbols and identity), // shift the gap to the right. // while (mas1.isValid() && mas2.isValid() && mbs1.isValid() && mbs2.isValid() && (mb->len1 > 0) && (mb->len2 > 0) && (letterToBits[(int)*mas1]) && (letterToBits[(int)*mas2]) && IUPACidentity[(int)*mas1][(int)*mas2]) { #ifdef REPORT_SHIFTING fprintf(stderr, "EXTENDfwd: MA %c/%c ----- %c/%c MB\n", *mas1, *mas2, *mbs1, *mbs2); #endif mas1.extendRight(1); ma->len1++; ++mas1; mas2.extendRight(1); ma->len2++; ++mas2; mbs1.extendLeft(-1); mb->len1--; ++mbs1; mbs2.extendLeft(-1); mb->len2--; ++mbs2; shifted++; } } // Finally, update the two matches with the shifted results. ma->pos1 = mas1.getRangeBegin(); ma->len1 = mas1.getRangeLength(); ma->pos2 = mas2.getRangeBegin(); ma->len2 = mas2.getRangeLength(); mb->pos1 = mbs1.getRangeBegin(); mb->len1 = mbs1.getRangeLength(); mb->pos2 = mbs2.getRangeBegin(); mb->len2 = mbs2.getRangeLength(); // // The rest is just error checking. // if (shifted) numShifted++; else numNotShifted++; if (shifted < 1024) amountShifted[shifted]++; // leftmatch origend newend rightmatch origbegin newbegin if (shifted && logFile) { if (ma->fwd2) { // Forward matches are easy. // fprintf(logFile, "%s\t%s\t%s:"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t->\t"uint32FMT"\t"uint32FMT"\t", ma->matchuid, mb->matchuid, AF.labelA(), ma->iid1, macopy.pos1 + macopy.len1, ma->pos1 + ma->len1, mbcopy.pos1, mb->pos1); fprintf(logFile, "%s:"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t->\t"uint32FMT"\t"uint32FMT"\n", AF.labelB(), ma->iid2, macopy.pos2 + macopy.len2, ma->pos2 + ma->len2, mbcopy.pos2, mb->pos2); } else { // Reverse matches are painful. The gap on B is between the // right edge of mb, and the the left edge of ma. // fprintf(logFile, "%s\t%s\t%s:"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t->\t"uint32FMT"\t"uint32FMT"\t", ma->matchuid, mb->matchuid, AF.labelA(), ma->iid1, macopy.pos1 + macopy.len1, ma->pos1 + ma->len1, mbcopy.pos1, mb->pos1); fprintf(logFile, "%s:"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t->\t"uint32FMT"\t"uint32FMT"\n", AF.labelB(), ma->iid2, mbcopy.pos2 + mbcopy.len2, mb->pos2 + mb->len2, macopy.pos2, ma->pos2); } } #ifdef REPORT_RESULTS if (shifted) fprintf(stderr, "SHIFTED "uint32FMT" bases.\n", shifted); else fprintf(stderr, "NOT SHIFTED.\n"); fprintf(stderr, uint32FMTW(9)"-"uint32FMTW(9)" -- "uint32FMTW(9)"-"uint32FMTW(9)"\n", macopy.pos1, macopy.pos1 + macopy.len1, mbcopy.pos1, mbcopy.pos1 + mbcopy.len1); fprintf(stderr, uint32FMTW(9)"-"uint32FMTW(9)" -- "uint32FMTW(9)"-"uint32FMTW(9)"\n", macopy.pos2, macopy.pos2 + macopy.len2, mbcopy.pos2, mbcopy.pos2 + mbcopy.len2); fprintf(stderr, "shifted "uint32FMT" bases (fwd1=%d fwd2=%d fwd1=%d fwd2=%d)\n", shifted, ma->fwd1, ma->fwd2, mb->fwd1, mb->fwd2); fprintf(stderr, uint32FMTW(9)"-"uint32FMTW(9)" -- "uint32FMTW(9)"-"uint32FMTW(9)"\n", ma->pos1, ma->pos1 + ma->len1, mb->pos1, mb->pos1 + mb->len1); fprintf(stderr, uint32FMTW(9)"-"uint32FMTW(9)" -- "uint32FMTW(9)"-"uint32FMTW(9)"\n", ma->pos2, ma->pos2 + ma->len2, mb->pos2, mb->pos2 + mb->len2); #endif uint32 errors = 0; if (macopy.pos1 != ma->pos1) fprintf(stderr, "WARNING: begin of assembly 1 moved!\n"), errors++; if (mbcopy.pos1 + mbcopy.len1 != mb->pos1 + mb->len1) fprintf(stderr, "WARNING: end of assembly 1 moved!\n"), errors++; if ((ma->fwd2 == true) && (macopy.pos2 != ma->pos2)) fprintf(stderr, "WARNING: begin of assembly 2 moved!\n"), errors++; if ((ma->fwd2 == false) && (mbcopy.pos2 != mb->pos2)) fprintf(stderr, "WARNING: begin of assembly 2 moved (rc)!\n"), errors++; if ((ma->fwd2 == true) && (mbcopy.pos2 + mbcopy.len2 != mb->pos2 + mb->len2)) fprintf(stderr, "WARNING: end of assembly 1 moved!\n"), errors++; if ((ma->fwd2 == false) && (macopy.pos2 + macopy.len2 != ma->pos2 + ma->len2)) fprintf(stderr, "WARNING: end of assembly 2 moved (rc)!\n"), errors++; // For debugging, claim there were errors if we shifted something. #ifdef REPORT_RESULTS errors++; #endif if (errors > 0) { atacMatch *l = 0L; atacMatch *r = 0L; char str1[1024]; char str2[1024]; char str3[1024]; // Print the sequence. We could print each piece separately // (and, indeed, we tried that initially) but that's difficult // because we need to remember which match is first on B, macopy.print(stderr, "A", "B"); mbcopy.print(stderr, "A", "B"); l = &macopy; r = &mbcopy; getSequenceEnd(str1, l->pos1, l->len1, mas1); getSequenceAll(str2, l->pos1 + l->len1, r->pos1 - l->pos1 - l->len1, mas1); getSequenceBeg(str3, r->pos1, r->len1, mas1); fprintf(stderr, "SEQA: %s -- %s -- %s\n", str1, str2, str3); if (macopy.fwd2) { // We're forward, so l is really first on B. getSequenceEnd(str1, l->pos2, l->len2, mas2); getSequenceAll(str2, l->pos2 + l->len2, r->pos2 - l->pos2 - l->len2, mas2); getSequenceBeg(str3, r->pos2, r->len2, mas2); } else { // Nope, reverse complement, so r is really first on B. This // only changes how we get the gap. // getSequenceEnd(str1, l->pos2, l->len2, mas2); getSequenceAll(str2, r->pos2 + r->len2, l->pos2 - r->pos2 - r->len2, mas2); getSequenceBeg(str3, r->pos2, r->len2, mas2); } fprintf(stderr, "SEQB: %s -- %s -- %s\n", str1, str2, str3); // Do the same thing (same getSequence calls) for the after picture. ma->print(stderr, "A", "B"); mb->print(stderr, "A", "B"); l = ma; r = mb; getSequenceEnd(str1, l->pos1, l->len1, mas1); getSequenceAll(str2, l->pos1 + l->len1, r->pos1 - l->pos1 - l->len1, mas1); getSequenceBeg(str3, r->pos1, r->len1, mas1); fprintf(stderr, "SEQA: %s -- %s -- %s\n", str1, str2, str3); if (ma->fwd2) { getSequenceEnd(str1, l->pos2, l->len2, mas2); getSequenceAll(str2, l->pos2 + l->len2, r->pos2 - l->pos2 - l->len2, mas2); getSequenceBeg(str3, r->pos2, r->len2, mas2); } else { getSequenceEnd(str1, l->pos2, l->len2, mas2); getSequenceAll(str2, r->pos2 + r->len2, l->pos2 - r->pos2 - r->len2, mas2); getSequenceBeg(str3, r->pos2, r->len2, mas2); } fprintf(stderr, "SEQB: %s -- %s -- %s\n", str1, str2, str3); } //if (errors) // exit(1); return(shifted); } int main(int argc, char *argv[]) { if (argc == 1) { fprintf(stderr, "usage: %s [options] < matches > matches\n", argv[0]); fprintf(stderr, " Instead of the usual switch based options to enable behavior\n"); fprintf(stderr, " gapShifter iterates of a list of shift directions and sizes.\n"); fprintf(stderr, " l -- shift gaps to the left\n"); fprintf(stderr, " r -- shift gaps to the right\n"); fprintf(stderr, " # -- set the maximum size of a gap to shift\n"); fprintf(stderr, " log x -- open a logfile 'x' for results of the next shift\n"); fprintf(stderr, "\n"); fprintf(stderr, " for example\n"); fprintf(stderr, " gapShifter 1 l r l r 10 l r l log X r < some.atac > shifted.atac\n"); fprintf(stderr, " would shift 1bp gaps to the left, then to the right, then left,\n"); fprintf(stderr, " then set the gap size to 10bp and repeat. The last shift is logged\n"); fprintf(stderr, " into fle 'X'.\n"); fprintf(stderr, " \n"); fprintf(stderr, " This is useful since shifting gaps can obliterate matches, but possibly.\n"); fprintf(stderr, " when both left and right shifts are used.\n"); fprintf(stderr, " GCTAATTAGACG\n"); fprintf(stderr, " GCT-AT-AGACG\n"); fprintf(stderr, " The second gap can be shifted to the left, and the first gap can be\n"); fprintf(stderr, " shifted right, resulting in\n"); fprintf(stderr, " GCTAATTAGACG\n"); fprintf(stderr, " GCTA--TAGACG\n"); fprintf(stderr, " Thus, two one base gaps were merged into a two base gap, which might\n"); fprintf(stderr, " then be able to be shifted. e.g.:\n"); fprintf(stderr, " atgatcatcttatc\n"); fprintf(stderr, " at---c-t--tatc\n"); exit(1); } atacFile AF("-"); atacMatchList &ML = *AF.matches(); atacMatchOrder MOA(ML); atacMatchOrder MOB(ML); MOA.sortA(); MOB.sortB(); // second to last == loadAll // last == report loading // seqCache *C1 = new seqCache(AF.assemblyFileA(), 2, false); seqCache *C2 = new seqCache(AF.assemblyFileB(), 1024, false); bool shiftRight = true; uint32 gapLimit = 5; char *logFileName = 0L; int arg=1; while (arg < argc) { bool doShift = false; if (strcmp(argv[arg], "log") == 0) { logFileName = argv[++arg]; errno = 0; logFile = fopen(logFileName, "w"); if (errno) fprintf(stderr, "gapShifter: can't open log file '%s': %s\n", logFileName, strerror(errno)), exit(1); } else if (strcmp(argv[arg], "l") == 0) { shiftRight = false; doShift = true; } else if (strcmp(argv[arg], "r") == 0) { shiftRight = true; doShift = true; } else { gapLimit = strtouint32(argv[arg], 0L); } if (doShift) { for (uint32 x=0; x<1024; x++) amountShifted[x] = 0; numShifted = 0; numNotShifted = 0; numDiffSeq = 0; numDiffOri = 0; numZeroLen = 0; numOutOfOrder = 0; numNotAdjacent = 0; numNoGap = 0; numGapTooBig = 0; numOverlapping = 0; fprintf(stderr, "Shifting gaps of length at most "uint32FMT" bases, to the %s.\n", gapLimit, (shiftRight) ? "right" : "left"); uint32 gapsShifted = 0; for (uint32 i=1; ilen1 > 0) && (ma->len2 > 0)) fprintf(stdout, "M u %s %s %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %d\n", ma->matchuid, ma->parentuid, AF.labelA(), ma->iid1, ma->pos1, ma->len1, AF.labelB(), ma->iid2, ma->pos2, ma->len2, ma->fwd2 ? 1 : -1); } return(0); } kmer-code-2013-trunk/atac-driver/libatac/0000755000000000000000000000000012641613360016750 5ustar rootrootkmer-code-2013-trunk/atac-driver/libatac/atacMatch.H0000644000000000000000000000441312322046702020744 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef ATAC_MATCH_H #define ATAC_MATCH_H #include #include using namespace std; #include "bio++.H" #include "seqCache.H" class atacMatch { public: atacMatch() { matchuid[0] = 0; parentuid[0] = 0; matchiid = 0; type[0] = 0; iid1 = pos1 = len1 = fwd1 = 0; iid2 = pos2 = len2 = fwd2 = 0; }; atacMatch(char *line); atacMatch(char *muid, char *puid, uint32 miid, char *t, uint32 i1, uint32 p1, uint32 l1, uint32 f1, uint32 i2, uint32 p2, uint32 l2, uint32 f2); void decode(char *line); // Sanity check the match record -- make sure it's within the // sequence itself. // bool sanity(seqCache *A, seqCache *B, char *inLine); char matchuid[16]; // external id char parentuid[16]; // external parent id uint32 matchiid; // internal id, usually pointing to an entry in atacMatchList char type[4]; // right now, only need one byte, but we keep things aligned uint32 iid1, pos1, len1, fwd1; uint32 iid2, pos2, len2, fwd2; void print(FILE *f, char const *label1, char const *label2) const { fprintf(f, "M %s %s %s %s:"uint32FMT" "uint32FMT" "uint32FMT" %d %s:"uint32FMT" "uint32FMT" "uint32FMT" %d\n", type, matchuid, parentuid, label1, iid1, pos1, len1, fwd1 ? 1 : -1, label2, iid2, pos2, len2, fwd2 ? 1 : -1); }; }; #endif // ATAC_MATCH_H kmer-code-2013-trunk/atac-driver/libatac/atacFile.C0000644000000000000000000001702511061606335020570 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005, 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" static bool isHeader(char *inLine) { return((inLine[0] == '!') || (inLine[0] == '#') || (inLine[0] == '/')); } atacFileStream::atacFileStream(char const *filename) { if (filename == 0L) return; _inFile = stdin; if ((filename != 0L) && (strcmp(filename, "-") != 0)) { errno = 0; _inFile = fopen(filename, "r"); if (errno) fprintf(stderr, "atacFileStream::atacFileStream()-- failed to open %s: %s\n", filename, strerror(errno)), exit(1); } _theMatchIID = 0; _theFeatureIID = 0; readHeader(_inLine, _inFile); } atacFileStream::~atacFileStream() { }; atacMatch* atacFileStream::nextMatch(char type) { atacMatch *ret = 0L; while ((ret == 0L) && (feof(_inFile) == false)) { if (_inLine[0] == 'M') { _theMatch.decode(_inLine); if (_theMatch.type[0] == type) { _theMatch.matchiid = _theMatchIID++; ret = &_theMatch; } } fgets(_inLine, 1024, _inFile); } return(ret); } atacFeature* atacFileStream::nextFeature(char type[4]) { atacFeature *ret = 0L; while ((ret == 0L) && (feof(_inFile) == false)) { if (_inLine[0] == 'F') { _theFeature.decode(_inLine); // Return the feature if it is the correct type. 0 is the // wildcard. // if (((_theFeature.type[0] == 0) || (type[0] == 0) || (_theFeature.type[0] == type[0])) && ((_theFeature.type[1] == 0) || (type[1] == 0) || (_theFeature.type[1] == type[1])) && ((_theFeature.type[2] == 0) || (type[2] == 0) || (_theFeature.type[2] == type[2])) && ((_theFeature.type[3] == 0) || (type[3] == 0) || (_theFeature.type[3] == type[3]))) { _theFeature.featureiid = _theFeatureIID++; ret = &_theFeature; } } fgets(_inLine, 1024, _inFile); } return(ret); } atacFile::atacFile(char const *filename) { if (filename == 0L) return; FILE *inFile = stdin; char inLine[1024]; if ((filename != 0L) && (strcmp(filename, "-") != 0)) { errno = 0; inFile = fopen(filename, "r"); if (errno) fprintf(stderr, "atacFile::atacFile()-- failed to load %s: %s\n", filename, strerror(errno)), exit(1); } // Read the preamble, look for our data sources. This leaves us with // the first match in the inLine, and fills in fileA and fileB. // readHeader(inLine, inFile); while (!feof(inFile)) { switch(inLine[0]) { case 'M': { atacMatch m(inLine); if (m.sanity(fastaA(), fastaB(), inLine)) { if ((m.type[0] == 'u') || (m.type[0] == 'x')) { _matches.add(m); } else if (m.type[0] == 'r') { _runs.add(m); } else if (m.type[0] == 'c') { _clumps.add(m); } else { chomp(inLine); fprintf(stderr, "atacFile::atacFile()-- Unknown match record type '%c' -- '%s'.\n", m.type[0], inLine); } } } break; case 'F': { chomp(inLine); fprintf(stderr, "atacFile::atacFile()-- Unknown feature record -- '%s'.\n", inLine); } break; default: { chomp(inLine); fprintf(stderr, "atacFile::atacFile()-- Unknown record -- '%s'.\n", inLine); } break; } fgets(inLine, 1024, inFile); } } atacFile::~atacFile() { } atacFileBase::atacFileBase() { _fileA[0] = 0; _fileB[0] = 0; _labelA[0] = 0; _labelB[0] = 0; _seqA = 0L; _seqB = 0L; } atacFileBase::~atacFileBase() { delete _seqA; delete _seqB; } void atacFileBase::readHeader(char *inLine, FILE *in) { fgets(inLine, 1024, in); while (!feof(in) && isHeader(inLine)) { chomp(inLine); if (inLine[0] == '/') { char *key = inLine + 1; char *val = inLine + 1; while (isspace(*key)) key++; // Skip whitespace between "/" and the key while (*val != '=') val++; // Move to the "=" *val++ = 0; // Terminate the key while (isspace(*val)) val++; // Skip whitespace between "=" and the val chomp(key); chomp(val); //fprintf(stderr, "key='%s' val='%s'\n", key, val); string K = key; string V = val; _params[K] = V; // Save ones we use if (strncmp(key, "assemblyFile1", 14) == 0) strcpy(_fileA, val); if (strncmp(key, "assemblyFile2", 14) == 0) strcpy(_fileB, val); if (strncmp(key, "assemblyId1", 12) == 0) strcpy(_labelA, val); if (strncmp(key, "assemblyId2", 12) == 0) strcpy(_labelB, val); } // Otherwise, it's a comment or the header fgets(inLine, 1024, in); } //fprintf(stderr, "assemblyFile1 = '%s'\n", _fileA); //fprintf(stderr, "assemblyFile2 = '%s'\n", _fileB); //fprintf(stderr, "assemblyId1 = '%s'\n", _labelA); //fprintf(stderr, "assemblyId2 = '%s'\n", _labelB); // Open some seqCache for each of the files // if (_fileA && _fileA[0]) { if (fileExists(_fileA)) { _seqA = new seqCache(_fileA); } else { fprintf(stderr, "atacFile::readHeader()-- can't find '%s', no sequence read.\n", _fileA); } } if (_fileB && _fileB[0]) { if (fileExists(_fileA)) { _seqB = new seqCache(_fileB); } else { fprintf(stderr, "atacFile::readHeader()-- can't find '%s', no sequence read.\n", _fileB); } } } void atacFileBase::writeHeader(FILE *out) { if (out == 0L) out = stdout; fprintf(out, "!format atac 1.0\n"); fprintf(out, "#\n"); fprintf(out, "# Legend:\n"); fprintf(out, "#\n"); fprintf(out, "# Field 0: the row class\n"); fprintf(out, "# Field 1: the match type u=ungapped, x=exact, ....\n"); fprintf(out, "# Field 2: the match instance index\n"); fprintf(out, "# Field 3: the parent index\n"); fprintf(out, "# Field 4: the FASTA sequence id in the first assembly\n"); fprintf(out, "# Field 5: the offset from the start of the sequence for the match\n"); fprintf(out, "# Field 6: the length of the match in the first assembly\n"); fprintf(out, "# Field 7: the orientation of the match sequence in the first assembly.\n"); fprintf(out, "# Field 8: the FASTA sequence id for the second assembly\n"); fprintf(out, "# Field 9: the offset from the start of the sequence for the match\n"); fprintf(out, "# Field 10: the length of the match in the second assembly\n"); fprintf(out, "# Field 11: the orientation of the match sequence in the second assembly.\n"); fprintf(out, "#\n"); map::iterator it; for (it=_params.begin(); it != _params.end(); it++) fprintf(out, "/%s=%s\n", it->first.c_str(), it->second.c_str()); } kmer-code-2013-trunk/atac-driver/libatac/atacFeatureList.H0000644000000000000000000000322412322046702022136 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef ATAC_FEATURELIST_H #define ATAC_FEATURELIST_H #include #include using namespace std; #include "bio++.H" class atacFeatureList { public: atacFeatureList(); ~atacFeatureList(); uint32 numberOfFeatures(void) { return(_featuresLen); }; atacFeature *getFeature(uint32 i) { return(_features + i); }; atacFeature *operator[](uint32 i) { return(_features + i); }; void add(atacFeature &m); //void delete(); // Sort by the A assembly or B assembly location void sort(uint32 first=0, uint32 len=0); void sortFeatureUID(uint32 first=0, uint32 len=0); void sortParentUID(uint32 first=0, uint32 len=0); private: uint32 _featuresLen; uint32 _featuresMax; atacFeature *_features; }; #endif // ATAC_FEATURELIST_H kmer-code-2013-trunk/atac-driver/libatac/atacFileStreamMerge.C0000644000000000000000000000764512322046702022730 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2007 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" class afsm { public: atacFileStream *_theFile; atacMatch *_theMatch; atacFeature *_theFeature; bool _endOfFile; afsm() { _theFile = 0L; _theMatch = 0L; _theFeature = 0L; _endOfFile = false; }; ~afsm() { }; }; atacFileStreamMerge::atacFileStreamMerge(void) { _filesLen = 0; _filesMax = 4; _files = new afsm [_filesMax]; _theMatchIID = 0; _theFeatureIID = 0; } atacFileStreamMerge::~atacFileStreamMerge(void) { for (uint32 i=0; i<_filesLen; i++) delete _files[i]._theFile; delete [] _files; // But wait! Unless we munge our copies of data, we'll free things // again when atacFileBase destructs! // _seqA = 0L; _seqB = 0L; } void atacFileStreamMerge::writeHeader(FILE *out) { if (_files[0]._theFile != 0L) _files[0]._theFile->writeHeader(out); } void atacFileStreamMerge::addFile(char const *filename) { if (filename == 0L) return; if (_filesLen >= _filesMax) { _filesMax *= 2; afsm *F = new afsm [_filesMax]; memcpy(F, _files, sizeof(afsm) * _filesLen); delete [] _files; _files = F; } _files[_filesLen]._theFile = new atacFileStream(filename); // Duplicate a bunch of stuff to our file. // if (_filesLen == 0) { strcpy(_fileA, _files[_filesLen]._theFile->assemblyFileA()); strcpy(_fileB, _files[_filesLen]._theFile->assemblyFileB()); strcpy(_labelA, _files[_filesLen]._theFile->labelA()); strcpy(_labelB, _files[_filesLen]._theFile->labelB()); //_params = _files[_filesLen]._theFile->_params; _seqA = _files[_filesLen]._theFile->fastaA(); _seqB = _files[_filesLen]._theFile->fastaB(); } _filesLen++; } atacMatch* atacFileStreamMerge::nextMatch(char type) { atacMatch *theMatch; uint32 theMatchIdx; // Make sure everyone has a match // for (uint32 i=0; i<_filesLen; i++) { if (_files[i]._endOfFile == false) { if (_files[i]._theMatch == 0L) _files[i]._theMatch = _files[i]._theFile->nextMatch(type); if (_files[i]._theMatch == 0L) _files[i]._endOfFile = true; } } // Pick the smallest. // theMatch = _files[0]._theMatch; theMatchIdx = 0; // need to set matchIID // should probably also make a new match UID, or better, fix seatac to make UIDs for (uint32 i=1; i<_filesLen; i++) { if (_files[i]._theMatch) { if (theMatch == 0L) { theMatch = _files[i]._theMatch; theMatchIdx = i; } if (theMatch) { if (_files[i]._theMatch->iid1 < theMatch->iid1) { theMatch = _files[i]._theMatch; theMatchIdx = i; } if ((_files[i]._theMatch->iid1 <= theMatch->iid1) && (_files[i]._theMatch->iid2 <= theMatch->iid2)) { theMatch = _files[i]._theMatch; theMatchIdx = i; } } } } // Mark it as used // _files[theMatchIdx]._theMatch = 0L; return(theMatch); } atacFeature* atacFileStreamMerge::nextFeature(char type[4]) { return(0L); } kmer-code-2013-trunk/atac-driver/libatac/atacFeature.H0000644000000000000000000000463412322046702021310 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef ATAC_FEATURE_H #define ATAC_FEATURE_H #include #include using namespace std; #include "bio++.H" #include "seqCache.H" // A barebones feature. // // F type featureuid parentuid LABEL:IID POS // // The idea is to mark some region as containing some feature. You // can use the featureiid to map to a chunk of non-atac data, e.g., a // strucure containing information about the feature. // // An EST feature could be represented as: // F est est4 . B35:3 423551 10421 // F exon exon7 est4 B35:3 423551 346 // F exon exon8 est4 B35:3 425931 146 // F exon exon9 est4 B35:3 433426 546 // // There is nothing there that immediately links these atac features // to their respective est/exon data structures. This is a Good // Thing (tm) because usually we don't have any form of ID with those // data structures, so we'd be using an offset or something, which // would change if the features are filtered. // class atacFeature { public: atacFeature() {}; atacFeature(char *line); atacFeature(char *fuid, char *puid, uint32 fiid, char *t, uint32 i, uint32 p, uint32 l); void decode(char *line); bool sanity(seqCache *A, char *inLine); char featureuid[16]; char parentuid[16]; uint32 featureiid; char type[4]; uint32 iid, pos, len; void print(FILE *f, char const *label) const { fprintf(f, "F %s %s %s %s:"uint32FMT" "uint32FMT" "uint32FMT"\n", type, featureuid, parentuid, label, iid, pos, len); }; }; #endif // ATAC_FEATURE_H kmer-code-2013-trunk/atac-driver/libatac/atacFeature.C0000644000000000000000000000532012322046702021274 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005, 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" static uint32 decodeAtacName(char *atac, char *label) { if (label) { while (*atac && (*atac != ':')) *label++ = *atac++; *label = 0; } else { while (*atac && (*atac != ':')) atac++; } if (*atac) return(strtouint32(atac+1, 0L)); return(~uint32ZERO); } atacFeature::atacFeature(char *line) { decode(line); } atacFeature::atacFeature(char *fuid, char *puid, uint32 fiid, char *t, uint32 i, uint32 p, uint32 l) { strcpy(featureuid, fuid); strcpy(parentuid, puid); featureuid[15] = 0; parentuid[15] = 0; featureiid = fiid; type[0] = 0; type[1] = 0; type[2] = 0; type[3] = 0; strcpy(type, t); iid = i; pos = p; len = l; } void atacFeature::decode(char *line) { splitToWords W(line); strcpy(featureuid, W[2]); strcpy(parentuid, W[3]); featureuid[15] = 0; parentuid[15] = 0; featureiid = 0; type[0] = 0; type[1] = 0; type[2] = 0; type[3] = 0; strcpy(type, W[1]); iid = decodeAtacName(W[4], 0L); pos = strtouint32(W[5], 0L); len = strtouint32(W[6], 0L); } bool atacFeature::sanity(seqCache *A, char *inLine) { bool featureOK = true; if (A) { if ((pos) > A->getSequenceLength(iid) || (pos + len) > A->getSequenceLength(iid)) { chomp(inLine); fprintf(stderr, "Feature longer than sequence (by "uint32FMT"bp): seqLen="uint32FMTW(8)" %s\n", pos + len - A->getSequenceLength(iid), A->getSequenceLength(iid), inLine); featureOK = false; } if (iid >= A->getNumberOfSequences()) { chomp(inLine); fprintf(stderr, "Feature references invalid sequence iid: %s\n", inLine); featureOK = false; } } return(featureOK); } kmer-code-2013-trunk/atac-driver/libatac/atacMatchList.H0000644000000000000000000000273112322046702021601 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef ATAC_MATCHLIST_H #define ATAC_MATCHLIST_H #include #include using namespace std; #include "bio++.H" class atacMatchList { public: atacMatchList(); ~atacMatchList(); void add(atacMatch &m); //void delete(); uint32 numMatches(void) { return(_matchesLen); }; uint32 numberOfMatches(void) { return(_matchesLen); }; atacMatch *getMatch(uint32 i) { return(_matches + i); }; atacMatch *operator[](uint32 i) { return(_matches + i); }; private: uint32 _matchesLen; uint32 _matchesMax; atacMatch *_matches; }; #endif // ATAC_MATCHLIST_H kmer-code-2013-trunk/atac-driver/libatac/fasta-accessor.H0000644000000000000000000001257212322046702021762 0ustar rootroot#ifndef FASTA_ACCESSOR_H #define FASTA_ACCESSOR_H #include "bio++.H" // Define this to do bounds checking // #if 1 #define SANITY(NAM, POS) \ if ((POS) > _len) { \ fprintf(stderr, "%s-- position "uint32FMT" larger than length "uint32FMT"\n", \ NAM, (POS), _len); \ assert((POS) <= _len); \ } #else #define SANITY(NAM, POS) #endif // Externally, we show the coordinate in the forward strand. // Internally, we represent the coordinate on the physical string. // The user is required to maintain the range that the // reverse-complement string is valid over. It is not possible to // randomly access sequence outside the range, but it is possible to // iterate over it (but then you cannot get the coordinate of where // you are at!) class FastAAccessor { private: public: char *_seq; uint32 _pos; uint32 _len; uint32 _rcBase; uint32 _rcLen; bool _doRevComp; private: void FastAAccessorInit(char *S, uint32 length, bool revcomp) { _seq = S; _pos = 0; _len = length; if (length == 0) _len = (uint32)strlen(S); _rcBase = 0; _rcLen = _len; _doRevComp = revcomp; if (_doRevComp) _pos = _len-1; }; public: FastAAccessor(seqInCore *S, bool revcomp=false) { FastAAccessorInit(S->sequence(), S->sequenceLength(), revcomp); }; FastAAccessor(char *S, uint32 length=0, bool revcomp=false) { FastAAccessorInit(S, length, revcomp); }; private: // Given a range in the forward string, we can reverse-complement // just that range. This amounts to translating the forward string // to make the beginning of the range be the origin, then // reverseing the range, then translating the sequence back to the // original origin. // uint32 rc(uint32 p) const { return(_rcBase + _rcLen - (p - _rcBase) - 1); }; public: // For iterating over reverse complement regions of a forward // sequence. // // e.g., (500, 250) would be: // // |-----|-----|XXXXX|-----| // 0 250 500 750 1000 // // Set both to zero (also the default) to unset the range // // The physical location (_pos) doesn't change, but this will // change the value of the corresponding forward coordinate, but // not the meaning. // bool setRange(uint32 base=0, uint32 length=0) { if ((base < _len) && (base + length <= _len)) { if ((base == 0) && (length == 0)) { _rcBase = 0; _rcLen = _len; } else { _rcBase = base; _rcLen = length; } return(true); } else { fprintf(stderr, "FastAAccessor::setRange()-- base="uint32FMT" and length="uint32FMT" exceed sequence length of "uint32FMT"\n", base, length, _len); assert(0); return(false); } }; // True if this physical location is valid. // bool isValid(void) { return(_pos < _len); }; char operator[](uint32 p) const { if ((_rcBase <= p) && (p < _rcBase + _rcLen)) { if (_doRevComp) return(complementSymbol[_seq[rc(p)]]); else return(_seq[p]); } else { fprintf(stderr, "operator[]-- Tried to access to "uint32FMT", but range is "uint32FMT"-"uint32FMT"\n", p, _rcBase, _rcBase + _rcLen); assert(0); return(0); } }; // Set the accessor to some position. // bool setPosition(uint32 p) { if ((_rcBase <= p) && (p < _rcBase + _rcLen)) { if (_doRevComp) _pos = rc(p); else _pos = p; return(true); } else { fprintf(stderr, "setPosition()-- Tried to set to "uint32FMT", but range is "uint32FMT"-"uint32FMT".\n", p, _rcBase, _rcBase + _rcLen); assert(0); return(false); } }; uint32 getPosition(void) { if (_doRevComp) return(rc(_pos)); else return(_pos); }; uint32 getRangeBegin(void) { return(_rcBase); }; uint32 getRangeEnd(void) { return(_rcBase + _rcLen); }; uint32 getRangeLength(void) { return(_rcLen); }; bool extendLeft(int32 x) { _rcLen += x; if (_doRevComp == false) _rcBase -= x; if ((_rcBase > _len) || (_rcBase + _rcLen > _len)) { fprintf(stderr, "FastAAccessor::extendLeft()-- extend by "int32FMT" makes invalid: length is "uint32FMT", new range is "uint32FMT"-"uint32FMT"\n", x, _len, _rcBase, _rcBase + _rcLen); assert(0); return(false); } return(true); }; bool extendRight(int32 x) { _rcLen += x; if (_doRevComp == true) _rcBase -= x; if ((_rcBase > _len) || (_rcBase + _rcLen > _len)) { fprintf(stderr, "FastAAccessor::extendRight()-- extend by "int32FMT" makes invalid: length is "uint32FMT", new range is "uint32FMT"-"uint32FMT"\n", x, _len, _rcBase, _rcBase + _rcLen); assert(0); return(false); } return(true); }; char operator*(void) const { SANITY("FastAAccessor::operator*()", _pos); if (_doRevComp) return(complementSymbol[_seq[_pos]]); else return(_seq[_pos]); }; char get(void) const { SANITY("FastAAccessor::get()", _pos); if (_doRevComp) return(complementSymbol[_seq[_pos]]); else return(_seq[_pos]); }; FastAAccessor &operator--(void) { if (_doRevComp) _pos++; else _pos--; return(*this); }; FastAAccessor &operator++(void) { if (_doRevComp) _pos--; else _pos++; return(*this); }; }; #endif // FASTA_ACCESSOR_H kmer-code-2013-trunk/atac-driver/libatac/atacMatch.C0000644000000000000000000000716012322046702020741 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "atac.H" static uint32 decodeAtacName(char *atac, char *label) { if (label) { while (*atac && (*atac != ':')) *label++ = *atac++; *label = 0; } else { while (*atac && (*atac != ':')) atac++; } if (*atac) return(strtouint32(atac+1, 0L)); return(~uint32ZERO); } atacMatch::atacMatch(char *line) { decode(line); } atacMatch::atacMatch(char *muid, char *puid, uint32 miid, char *t, uint32 i1, uint32 p1, uint32 l1, uint32 f1, uint32 i2, uint32 p2, uint32 l2, uint32 f2) { strncpy(matchuid, muid, 16); strncpy(parentuid, puid, 16); matchuid[15] = 0; parentuid[15] = 0; matchiid = miid; type[0] = 0; type[1] = 0; type[2] = 0; type[3] = 0; type[0] = t[0]; type[1] = t[1]; if (t[1]) type[2] = t[2]; iid1 = i1; pos1 = p1; len1 = l1; fwd1 = f1; iid2 = i2; pos2 = p2; len2 = l2; fwd2 = f2; } void atacMatch::decode(char *line) { iid1 = 0; pos1 = 0; len1 = 0; fwd1 = 0; iid2 = 0; pos2 = 0; len2 = 0; fwd2 = 0; splitToWords S(line); iid1 = decodeAtacName(S[4], 0L); pos1 = strtouint32(S[5], 0L); len1 = strtouint32(S[6], 0L); fwd1 = (S[7][0] == '-') ? 0 : 1; iid2 = decodeAtacName(S[8], 0L); pos2 = strtouint32(S[9], 0L); len2 = strtouint32(S[10], 0L); fwd2 = (S[11][0] == '-') ? 0 : 1; strncpy(matchuid, S[2], 16); strncpy(parentuid, S[3], 16); matchuid[15] = 0; parentuid[15] = 0; matchiid = 0; type[0] = 0; type[1] = 0; type[2] = 0; type[3] = 0; type[0] = S[1][0]; type[1] = S[1][1]; if (S[1][1]) type[2] = S[1][2]; } // Sanity check the match record -- make sure it's within the // sequence itself. // bool atacMatch::sanity(seqCache *A, seqCache *B, char *inLine) { bool matchOK = true; if (A && B) { if ((pos1) > A->getSequenceLength(iid1) || (pos1 + len1) > A->getSequenceLength(iid1)) { chomp(inLine); fprintf(stderr, "Match longer than sequence (by "uint32FMT"bp) in 1: seqLen="uint32FMTW(8)" %s\n", pos1 + len1 - A->getSequenceLength(iid1), A->getSequenceLength(iid1), inLine); matchOK = false; } if ((pos2) > B->getSequenceLength(iid2) || (pos2 + len2) > B->getSequenceLength(iid2)) { chomp(inLine); fprintf(stderr, "Match longer than sequence (by "uint32FMT"bp) in 2: seqLen="uint32FMTW(8)" %s\n", pos2 + len2 - B->getSequenceLength(iid2), B->getSequenceLength(iid2), inLine); matchOK = false; } if ((iid1 >= A->getNumberOfSequences()) || (iid2 >= B->getNumberOfSequences())) { chomp(inLine); fprintf(stderr, "Match references invalid sequence iid: %s\n", inLine); matchOK = false; } } return(matchOK); } kmer-code-2013-trunk/atac-driver/libatac/atac.H0000644000000000000000000000613412322046702017771 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef ATAC_COMMON_H #define ATAC_COMMON_H #include #include using namespace std; #include "bio++.H" #include "seqCache.H" #include "atacMatch.H" #include "atacMatchList.H" #include "atacMatchOrder.H" #include "atacFeature.H" #include "atacFeatureList.H" #include "fasta-accessor.H" class atacFileBase { public: atacFileBase(); virtual ~atacFileBase(); char *labelA(void) { return(_labelA); }; char *labelB(void) { return(_labelB); }; char *assemblyFileA(void) { return(_fileA); }; char *assemblyFileB(void) { return(_fileB); }; seqCache *fastaA(void) { return(_seqA); }; seqCache *fastaB(void) { return(_seqB); }; protected: void readHeader(char *inLine, FILE *in); public: void writeHeader(FILE *out); protected: char _fileA[1024]; // The name of our genome files char _fileB[1024]; char _labelA[256]; // The label of each of the sequences char _labelB[256]; map _params; seqCache *_seqA; seqCache *_seqB; }; class atacFileStream : public atacFileBase { public: atacFileStream(char const *filename); ~atacFileStream(); atacMatch *nextMatch(char type); atacFeature *nextFeature(char type[4]); private: FILE *_inFile; char _inLine[1024]; uint32 _theMatchIID; uint32 _theFeatureIID; atacMatch _theMatch; atacFeature _theFeature; }; class afsm; class atacFileStreamMerge : public atacFileBase { public: atacFileStreamMerge(void); ~atacFileStreamMerge(); void addFile(char const *filename); atacMatch *nextMatch(char type); atacFeature *nextFeature(char type[4]); void writeHeader(FILE *out); private: uint32 _filesLen; uint32 _filesMax; afsm *_files; uint32 _theMatchIID; uint32 _theFeatureIID; }; class atacFile : public atacFileBase { public: atacFile(char const *filename); ~atacFile(); atacMatchList *matches(void) { return(&_matches); }; atacMatchList *runs(void) { return(&_runs); }; atacMatchList *clumps(void) { return(&_clumps); }; private: atacMatchList _matches; atacMatchList _runs; atacMatchList _clumps; }; #endif // ATAC_COMMON_H kmer-code-2013-trunk/atac-driver/libatac/atacFeatureList.C0000644000000000000000000000611312322046702022131 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" atacFeatureList::atacFeatureList() { _featuresLen = 0; _featuresMax = 256; _features = new atacFeature [_featuresMax]; } atacFeatureList::~atacFeatureList() { delete [] _features; } void atacFeatureList::add(atacFeature &m) { if (_featuresLen >= _featuresMax) { _featuresMax <<= 2; atacFeature *A = new atacFeature [_featuresMax]; memcpy(A, _features, sizeof(atacFeature) * _featuresLen); delete [] _features; _features = A; } memcpy(&_features[_featuresLen], &m, sizeof(atacFeature)); _features[_featuresLen].featureiid = _featuresLen++; } static int sort_(const void *a, const void *b) { const atacFeature *A = (const atacFeature *)a; const atacFeature *B = (const atacFeature *)b; if (A->iid < B->iid) return(-1); if (A->iid > B->iid) return(1); if (A->pos < B->pos) return(-1); if (A->pos > B->pos) return(1); if (A->len > B->len) return(-1); if (A->len < B->len) return(1); return(0); } static int sortfeatureuid_(const void *a, const void *b) { const atacFeature *A = (const atacFeature *)a; const atacFeature *B = (const atacFeature *)b; int r = strcmp(A->featureuid, B->featureuid); if (r < 0) return(-1); if (r > 0) return(1); r = strcmp(A->parentuid, B->parentuid); if (r < 0) return(-1); if (r > 0) return(1); return(0); } static int sortparentuid_(const void *a, const void *b) { const atacFeature *A = (const atacFeature *)a; const atacFeature *B = (const atacFeature *)b; int r = strcmp(A->parentuid, B->parentuid); if (r < 0) return(-1); if (r > 0) return(1); r = strcmp(A->featureuid, B->featureuid); if (r < 0) return(-1); if (r > 0) return(1); return(0); } void atacFeatureList::sort(uint32 first, uint32 len) { if (len == 0) len = _featuresLen; qsort(_features + first, len, sizeof(atacFeature), sort_); } void atacFeatureList::sortFeatureUID(uint32 first, uint32 len) { if (len == 0) len = _featuresLen; qsort(_features + first, len, sizeof(atacFeature), sortfeatureuid_); } void atacFeatureList::sortParentUID(uint32 first, uint32 len) { if (len == 0) len = _featuresLen; qsort(_features + first, len, sizeof(atacFeature), sortparentuid_); } kmer-code-2013-trunk/atac-driver/libatac/Make.include0000644000000000000000000000137311512763666021211 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ $/.CXX_INCS := $/atac.H \ $/atacFeature.H \ $/atacFeatureList.H \ $/atacMatch.H \ $/atacMatchList.H \ $/atacMatchOrder.H $/.CXX_SRCS := $/atacFeature.C \ $/atacFeatureList.C \ $/atacFile.C \ $/atacFileStreamMerge.C \ $/atacMatch.C \ $/atacMatchList.C \ $/atacMatchOrder.C $/.CXX_LIBS := $/libatac.a $/.CLEAN :=$/*.o $/*~ $/libatac.a: ${$/.CXX_SRCS:.C=.o} $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBBIO/} -I${LIBSEQ/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/libatac/fasta-accessor-test.C0000644000000000000000000001676412322046702022741 0ustar rootroot#include #include #include "fasta-accessor.H" // rev is the reverse complement of fwd, so if we make an accessor // from it, and set the reverse-complement flag we should get back // exactly fwd. // void simpleTest(void) { // Yup, real sequence. I don't remember what it belongs to. char *fwd = "CGCTTTAATGGGCGAACAGCCCAACCCTTGGGAGAGACTCCACCCCCAGGATGCGACGAGCCGACATCGAGGTGCCAAAC" "CATGCCGTCGATATGGACTCTGAGCGACGCCGCTTCCACAAGCCAGCGCCGGGTCACTAGTTCCGACTTTCGTCCCTGCT" "CGACCTGCCGGTCTCGCAGTCAAGCTCCCTTGTGCACTTAGCCTCCGTTACCTTTTAGGAGGCAACCGCCCCAGTTAAAC" "TACCCACCAGGCAATGTCCCTGATCCGGATCACGGACCTAGGTTAGATATCCAGAACGACGCTTCACAGTCTCCCACCTA" "TCCTACACAAGCCGTACCGAACACCAATACCAAGCTATAGTAAAGGTCCCGGGGTCTTTCCGTCCTGCTGCGCGTAACGA" "CAGTGGAGAAGTCGTTACGCCATTCGTGCAGGTCGGAACTTACCCGACAAGGAATTTCGCTACCTTAGGATGGTTATAGT" "TACCACCGCCGTTTACTGGGTTAACCTTCCAGCACCGGGCAGGCGTCAGTCCGTATACATCGTCTTGCGACTTAGCACGG" "ACCTGTGTTTTTAGTAAACAGTCGCTTCTCCCTGGTCTCTCCCCTTCTCCCGAAGTTACGGGGGTATTTTGCCGAGTTCC" "TTAACCATGATTCACTCGATCGCCTTGGTATTCTCTACCTAACCACCTGAGTCGGTTTGGTAGGATCACCCTGCTTCCCG" "CATTCGCGGTCACTATCAGGTCTCAGGATATGTGTGAGACGGATTTGCCTATCTCACTCCCTACACCCTTGGACGTGGAC" "TTGACTACTACCAAATCGGGTCACGCGCTCCGCTCAACATTCCATCACCCGAAGGTGACAGAAAAAAGAGTTTTAGGCGT" "TTAGCATCAAAAGGTTCATCTCGACTACGCCTGTCGGCCTCGCCTTAGGTCCCGACTTACCCAGGGCAGATTAGCTTGAC" "CCTGGAACCCTTGGTTATTCGGCGGACGGGTTTCTCGCCC"; char *rev = "GGGCGAGAAACCCGTCCGCCGAATAACCAAGGGTTCCAGGGTCAAGCTAATCTGCCCTGGGTAAGTCGGGACCTAAGGCG" "AGGCCGACAGGCGTAGTCGAGATGAACCTTTTGATGCTAAACGCCTAAAACTCTTTTTTCTGTCACCTTCGGGTGATGGA" "ATGTTGAGCGGAGCGCGTGACCCGATTTGGTAGTAGTCAAGTCCACGTCCAAGGGTGTAGGGAGTGAGATAGGCAAATCC" "GTCTCACACATATCCTGAGACCTGATAGTGACCGCGAATGCGGGAAGCAGGGTGATCCTACCAAACCGACTCAGGTGGTT" "AGGTAGAGAATACCAAGGCGATCGAGTGAATCATGGTTAAGGAACTCGGCAAAATACCCCCGTAACTTCGGGAGAAGGGG" "AGAGACCAGGGAGAAGCGACTGTTTACTAAAAACACAGGTCCGTGCTAAGTCGCAAGACGATGTATACGGACTGACGCCT" "GCCCGGTGCTGGAAGGTTAACCCAGTAAACGGCGGTGGTAACTATAACCATCCTAAGGTAGCGAAATTCCTTGTCGGGTA" "AGTTCCGACCTGCACGAATGGCGTAACGACTTCTCCACTGTCGTTACGCGCAGCAGGACGGAAAGACCCCGGGACCTTTA" "CTATAGCTTGGTATTGGTGTTCGGTACGGCTTGTGTAGGATAGGTGGGAGACTGTGAAGCGTCGTTCTGGATATCTAACC" "TAGGTCCGTGATCCGGATCAGGGACATTGCCTGGTGGGTAGTTTAACTGGGGCGGTTGCCTCCTAAAAGGTAACGGAGGC" "TAAGTGCACAAGGGAGCTTGACTGCGAGACCGGCAGGTCGAGCAGGGACGAAAGTCGGAACTAGTGACCCGGCGCTGGCT" "TGTGGAAGCGGCGTCGCTCAGAGTCCATATCGACGGCATGGTTTGGCACCTCGATGTCGGCTCGTCGCATCCTGGGGGTG" "GAGTCTCTCCCAAGGGTTGGGCTGTTCGCCCATTAAAGCG"; FastAAccessor F(fwd, 1000, false); FastAAccessor R(rev, 1000, true); uint32 C; for (uint32 i=0; i<1000; i++) if (F[i] != R[i]) exit(1); F.setPosition(0); R.setPosition(0); for (C=0; F.isValid() && R.isValid(); ++C, ++F, ++R) if (*F != *R) exit(2); if (C != 1000) exit(3); F.setPosition(999); R.setPosition(999); for (C=0; F.isValid() && R.isValid(); ++C, --F, --R) if (*F != *R) exit(4); if (C != 1000) exit(5); } // Test pulling out a subsequence. We're given coordinates in the // forward direction, but want to pull out the reverse complement // sequence. // void easierTest(void) { char sub[1000]; int i; // 100A 200N 100T 400N 200G // for (i=0; i<1000; i++) sub[i] = 'N'; for (i=0; i<100; i++) sub[i] = 'A'; for (i=300; i<400; i++) sub[i] = 'T'; for (i=600; i<700; i++) sub[i] = 'R'; for (i=800; i<1000; i++) sub[i] = 'G'; // Pull out the reverse-complement sequence from 300-400 // // Asking for sequence from 300 to 400 should give up exactly to // 'A' (reverse-complelent of T) block. // // Without setting the range, we'd get back the sequence at // 700-600, the location when globally reverse-complemented. // FastAAccessor S(sub, 1000, true); S.setRange(300, 100); for (i=300; i<400; i++) if (S[i] != 'A') fprintf(stderr, "FAILED: got %c at pos %d\n", S[i], i), exit(5); S.setRange(0, 0); for (i=300; i<400; i++) if (S[i] != 'Y') fprintf(stderr, "FAILED: got %c at pos %d\n", S[i], i), exit(6); } // A harder test: build an accessor to access sequence from 100 to // 300, and grow/shrink the region. // void harderTest(void) { char sub[1000]; int e; // 100A 200C 300G 400N // for (int i=0; i<1000; i++) sub[i] = 'N'; for (int i=0; i<100; i++) sub[i] = 'A'; for (int i=100; i<300; i++) sub[i] = 'C'; for (int i=300; i<600; i++) sub[i] = 'G'; // Try forward. { fprintf(stderr, "Forward setRange/setPosition\n"); FastAAccessor A(sub, 1000, false); A.setRange(100, 200); A.setPosition(100); fprintf(stderr, "Range: "uint32FMT"-"uint32FMT" len="uint32FMT"\n", A.getRangeBegin(), A.getRangeEnd(), A.getRangeLength()); if ((A.getRangeBegin() != 100) || (A.getRangeEnd() != 300) || (A.getRangeLength() != 200)) fprintf(stderr, "FAILED.\n"), exit(1); e = 0; for (int j=0; j<200; j++) { fprintf(stderr, "%c", *A); if (*A != 'C') e++; ++A; } fprintf(stderr, "\n"); if (e) fprintf(stderr, "FAILED forward setRange/setPosition test: %d errors\n", e), exit(1); // Decrease the size of our region, using the extend operators, then shift it // to the right/left. // for (int i=0; i<190; i++) A.extendLeft(-1); for (int i=0; i<10; i++) A.extendRight(1); for (int i=0; i<10; i++) A.extendLeft(-1); fprintf(stderr, "Range: "uint32FMT"-"uint32FMT" len="uint32FMT"\n", A.getRangeBegin(), A.getRangeEnd(), A.getRangeLength()); if ((A.getRangeBegin() != 300) || (A.getRangeEnd() != 310) || (A.getRangeLength() != 10)) fprintf(stderr, "FAILED.\n"), exit(1); e = 0; for (int j=0; j<20; j++) { fprintf(stderr, "%c", *A); if (*A != 'G') e++; ++A; } fprintf(stderr, "\n"); if (e) fprintf(stderr, "FAILED reverse extendRange test: %d errors\n", e), exit(1); } { fprintf(stderr, "Reverse setRange/setPosition\n"); FastAAccessor A(sub, 1000, true); A.setRange(100, 200); A.setPosition(100); fprintf(stderr, "Range: "uint32FMT"-"uint32FMT" len="uint32FMT"\n", A.getRangeBegin(), A.getRangeEnd(), A.getRangeLength()); if ((A.getRangeBegin() != 100) || (A.getRangeEnd() != 300) || (A.getRangeLength() != 200)) fprintf(stderr, "FAILED.\n"), exit(1); e = 0; for (int j=0; j<200; j++) { fprintf(stderr, "%c", *A); if (*A != 'G') e++; ++A; } fprintf(stderr, "\n"); if (e) fprintf(stderr, "FAILED reverse setRange/setPosition test: %d errors\n", e), exit(1); // Decrease the size of our region, using the extend operators, then shift it // to the right/left. // for (int i=0; i<190; i++) A.extendLeft(-1); for (int i=0; i<10; i++) A.extendRight(1); for (int i=0; i<10; i++) A.extendLeft(-1); fprintf(stderr, "Range: "uint32FMT"-"uint32FMT" len="uint32FMT"\n", A.getRangeBegin(), A.getRangeEnd(), A.getRangeLength()); if ((A.getRangeBegin() != 90) || (A.getRangeEnd() != 100) || (A.getRangeLength() != 10)) fprintf(stderr, "FAILED.\n"), exit(1); e = 0; for (int j=0; j<20; j++) { fprintf(stderr, "%c", *A); if (*A != 'T') e++; ++A; } fprintf(stderr, "\n"); if (e) fprintf(stderr, "FAILED reverse extendRange test: %d errors\n", e), exit(1); } } int main(int argc, char **argv) { simpleTest(); easierTest(); harderTest(); fprintf(stderr, "All tests OK!\n"); exit(0); } kmer-code-2013-trunk/atac-driver/libatac/atacMatchOrder.H0000644000000000000000000000576312322046702021751 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef ATAC_MATCHORDER_H #define ATAC_MATCHORDER_H #include #include using namespace std; #include "bio++.H" class atacMatchOrder { private: void initialize(atacMatchList *ML) { _matchesLen = ML->numberOfMatches(); _matchesMax = ML->numberOfMatches(); _matches = new atacMatch * [_matchesLen]; _matchIIDtoIdx = new uint32 [_matchesLen]; for (uint32 i=0; i<_matchesLen; i++) { _matches[i] = ML->getMatch(i); _matchIIDtoIdx[i] = ~uint32ZERO; } }; public: atacMatchOrder(atacMatchList *ML) { initialize(ML); }; atacMatchOrder(atacMatchList &ML) { initialize(&ML); }; ~atacMatchOrder() { delete [] _matches; }; uint32 numMatches(void) { return(_matchesLen); }; uint32 numberOfMatches(void) { return(_matchesLen); }; // Return match i in our list. Common usage would be // getMatch(index(iid) + 1) // return the match after the one we have // getMatch(index(iid) - 1) // atacMatch *getMatch(uint32 i) { return(_matches[i]); }; atacMatch *operator[](uint32 i) { return(_matches[i]); }; // Return the index, in our sorted list, of the matchiid supplied. // uint32 index(uint32 matchiid) { return(_matchIIDtoIdx[matchiid]); }; // Merge the r match into the l match. The l match gets the result. void mergeMatches(atacMatch *l, atacMatch *r, uint32 mergeuid); void sortA(uint32 first=0, uint32 len=0); void sortB(uint32 first=0, uint32 len=0); void sortDiagonal(uint32 first=0, uint32 len=0); void sortMatchUID(uint32 first=0, uint32 len=0); void sortParentUID(uint32 first=0, uint32 len=0); private: uint32 _matchesLen; uint32 _matchesMax; atacMatch **_matches; atacMatchList *_ML; void updateIndex(void) { for (uint32 i=0; i<_matchesLen; i++) _matchIIDtoIdx[i] = ~uint32ZERO; for (uint32 i=0; i<_matchesLen; i++) _matchIIDtoIdx[_matches[i]->matchiid] = i; }; uint32 *_matchIIDtoIdx; }; #endif // ATAC_MATCHORDER_H kmer-code-2013-trunk/atac-driver/libatac/atacMatchList.C0000644000000000000000000000272310537431532021602 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005, 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" atacMatchList::atacMatchList() { _matchesLen = 0; _matchesMax = 256; _matches = new atacMatch [_matchesMax]; } atacMatchList::~atacMatchList() { delete [] _matches; } void atacMatchList::add(atacMatch &m) { if (_matchesLen >= _matchesMax) { _matchesMax <<= 2; atacMatch *A = new atacMatch [_matchesMax]; memcpy(A, _matches, sizeof(atacMatch) * _matchesLen); delete [] _matches; _matches = A; } memcpy(&_matches[_matchesLen], &m, sizeof(atacMatch)); _matches[_matchesLen].matchiid = _matchesLen++; } kmer-code-2013-trunk/atac-driver/libatac/atacMatchOrder.C0000644000000000000000000001411312322046702021731 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005, 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "atac.H" void atacMatchOrder::mergeMatches(atacMatch *l, atacMatch *r, uint32 mergeuid) { atacMatch n; // Create a new match record for the merged match. We could // probably do this inplace in l. // Copy all the defaults from L first. This copies most of the stuff. // memcpy(&n, l, sizeof(atacMatch)); sprintf(n.matchuid, "merge"uint32FMT, mergeuid); n.len1 = (r->pos1 + r->len1) - (l->pos1); n.len2 = n.len1; if (r->fwd2 == false) n.pos2 = r->pos2; n.fwd2 = r->fwd2; // Update l with the new contents. memcpy(l, &n, sizeof(atacMatch)); // Remove the r match from our set. The hardest part is figuring // out what index the r match is at. The easiest way to do that is // the most inefficient (start at zero, when we find the r match, // start updating). The quickest way (given we want an array) // makes us trust our index. // _matchesLen--; for (uint32 idx = index(r->matchiid); idx < _matchesLen; idx++) { _matches[idx] = _matches[idx+1]; _matchIIDtoIdx[_matches[idx]->matchiid] = idx; } } static int sortA_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; if (A->iid1 < B->iid1) return(-1); if (A->iid1 > B->iid1) return(1); if (A->pos1 < B->pos1) return(-1); if (A->pos1 > B->pos1) return(1); if (A->len1 > B->len1) return(-1); if (A->len1 < B->len1) return(1); if (A->iid2 < B->iid2) return(-1); if (A->iid2 > B->iid2) return(1); if (A->pos2 < B->pos2) return(-1); if (A->pos2 > B->pos2) return(1); if (A->len2 > B->len2) return(-1); if (A->len2 < B->len2) return(1); return(0); } static int sortB_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; if (A->iid2 < B->iid2) return(-1); if (A->iid2 > B->iid2) return(1); if (A->pos2 < B->pos2) return(-1); if (A->pos2 > B->pos2) return(1); if (A->len2 > B->len2) return(-1); if (A->len2 < B->len2) return(1); if (A->iid1 < B->iid1) return(-1); if (A->iid1 > B->iid1) return(1); if (A->pos1 < B->pos1) return(-1); if (A->pos1 > B->pos1) return(1); if (A->len1 > B->len1) return(-1); if (A->len1 < B->len1) return(1); return(0); } static int sortdiagonal_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; if (A->iid2 < B->iid2) return(-1); if (A->iid2 > B->iid2) return(1); if (A->iid1 < B->iid1) return(-1); if (A->iid1 > B->iid1) return(1); if (A->fwd2 < B->fwd2) return(-1); if (A->fwd2 > B->fwd2) return(1); // We're now in the same sequence pair with the same orientation. // So much easier if we use signed math. // This works for forward matches int32 dA = (int32)A->pos2 - (int32)A->pos1; int32 dB = (int32)B->pos2 - (int32)B->pos1; if (A->fwd2 == 0) { // OK, so not the greatest diagonal computation ever. We end up // with a gigantic discontinuity at the origin, but we don't // care, just as long as the diagonals are distinct. // dA = (int32)A->pos2 - (1000000000 - (int32)(A->pos2 + A->len2)); dB = (int32)B->pos2 - (1000000000 - (int32)(B->pos2 + B->len2)); } if (dA < dB) return(-1); if (dA > dB) return(1); // This is just candy; might make things easier later if (A->pos1 < B->pos1) return(-1); if (A->pos1 > B->pos1) return(1); if (A->len1 > B->len1) return(-1); if (A->len1 < B->len1) return(1); return(0); } static int sortmatchuid_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; int r = strcmp(A->matchuid, B->matchuid); if (r < 0) return(-1); if (r > 0) return(1); r = strcmp(A->parentuid, B->parentuid); if (r < 0) return(-1); if (r > 0) return(1); return(0); } static int sortparentuid_(const void *a, const void *b) { const atacMatch *A = *(const atacMatch * const *)a; const atacMatch *B = *(const atacMatch * const *)b; int r = strcmp(A->parentuid, B->parentuid); if (r < 0) return(-1); if (r > 0) return(1); r = strcmp(A->matchuid, B->matchuid); if (r < 0) return(-1); if (r > 0) return(1); return(0); } void atacMatchOrder::sortA(uint32 first, uint32 len) { if (len == 0) len = _matchesLen; qsort(_matches + first, len, sizeof(atacMatch*), sortA_); updateIndex(); } void atacMatchOrder::sortB(uint32 first, uint32 len) { if (len == 0) len = _matchesLen; qsort(_matches + first, len, sizeof(atacMatch*), sortB_); updateIndex(); } void atacMatchOrder::sortDiagonal(uint32 first, uint32 len) { if (len == 0) len = _matchesLen; qsort(_matches + first, len, sizeof(atacMatch*), sortdiagonal_); updateIndex(); } void atacMatchOrder::sortMatchUID(uint32 first, uint32 len) { if (len == 0) len = _matchesLen; qsort(_matches + first, len, sizeof(atacMatch*), sortmatchuid_); updateIndex(); } void atacMatchOrder::sortParentUID(uint32 first, uint32 len) { if (len == 0) len = _matchesLen; qsort(_matches + first, len, sizeof(atacMatch*), sortparentuid_); updateIndex(); } kmer-code-2013-trunk/atac-driver/run-comparison.pl0000644000000000000000000001651110236607024020664 0ustar rootroot#!/usr/bin/perl # Runs the full assembly-to-assembly mapping comparison. # # Takes two ATAC-format mapping files and: # shifts 1bp gaps to the 3' end # compute the assembly annotation # # Generate statistics: # sum and histogram of the different annotations # sum and histogram of the disagreement # # number of matches in each # number of scaffolds mapped to multiple chromosomes # # Nx (0 <= x <= 100) # # histogram of match length # histogram of run length # histogram of clump length # my $bin = "/bioinfo/assembly/walenz/hummap2/src/genomics/atac-driver"; my $overlap = "$bin/alignOverlap/overlap"; my $gapShifter = "$bin/gapShifter/gapShifter"; my $mismatchCounter = "$bin/mismatchCounter/mismatchCounter"; if (scalar(@ARGV) != 2) { print STDERR "usage: $0 \n"; exit(1); } my $amap = shift @ARGV; my $atac = shift @ARGV; my $dir = "COMPARE"; my $tmp = "COMPARE"; # /tmp on assembly-a is pathetic # If we're given relative paths, make them absolute # my $pwd = `pwd`; chomp $pwd; $amap = "$pwd/$amap" if ($amap !~ m!^/!); $atac = "$pwd/$atac" if ($atac !~ m!^/!); # We run stuff in the $dir directory, set that up # system("mkdir $dir") if (! -d "$dir"); system("ln -s $amap $dir/amap.atac") if (! -e "$dir/amap.atac"); system("ln -s $atac $dir/atac.atac") if (! -e "$dir/atac.atac"); if (! -e "$dir/amap.shift.atac") { print STDERR "GAP SHIFTER NEEDS assembly-a, so that the whole genome can be loaded!\n"; system("$gapShifter -g 1 < $dir/amap.atac > $dir/amap.shift.atac") and die "Failed to shift amap.\n"; } if (! -e "$dir/atac.shift.atac") { print STDERR "GAP SHIFTER NEEDS assembly-a, so that the whole genome can be loaded!\n"; system("$gapShifter -g 1 < $dir/atac.atac > $dir/atac.shift.atac") and die "Failed to shift atac.\n"; } # Run overlap # if (! -e "$dir/overlap.map1annotation") { system("$overlap $dir/amap.shift.atac $dir/atac.shift.atac $dir/overlap") and die "Failed to overlap.\n"; } # Reads the annotation output of overlap.C and adds percent identity # for each match. # # This is done by first converting the overlap.C output into atac # format (one file for each mapping), running mismatchCounter on each # of those files, then merging the results together. # if (! -e "overlap.map1annotation.identity") { if ((-e "$tmp/c.otoi") && (-e "$tmp/d.otoi")) { print STDERR "Using $tmp/c.otoi and $tmp/d.otoi\n"; } else { overlapToAtac("$dir/overlap.map1annotation", "$tmp/a.otoi", "$tmp/b.otoi"); print STDERR "Counting mismatches.\n"; system("$mismatchCounter < $tmp/a.otoi > $tmp/c.otoi") and die "Failed mismatchCounter on A.\n"; system("$mismatchCounter < $tmp/b.otoi > $tmp/d.otoi") and die "Failed mismatchCounter on B.\n"; } open(A, "< $tmp/c.otoi") or die "Failed to open $tmp/c.otoi\n"; open(B, "< $tmp/d.otoi") or die "Failed to open $tmp/d.otoi\n"; open(M, "< $dir/overlap.map1annotation"); open(O, "> $dir/overlap.map1annotation.identity"); my $a; my $b; my $m; print STDERR "Merging results.\n"; while (!eof(A) && !eof(B) && !eof(M)) { $m = ; chomp $m; # Skip any ATAC headers in A and B do { $a = ; chomp $a; } while ($a =~ m/^\//); do { $b = ; chomp $b; } while ($b =~ m/^\//); my @av = split '\s+', $a; my @bv = split '\s+', $a; my ($aid, $abeg, $alen, $aori, $amis, $aident) = (undef, undef, undef, undef, undef, "0.0"); my ($bid, $bbeg, $blen, $bori, $bmis, $bident) = (undef, undef, undef, undef, undef, "0.0"); if ($a =~ m/HUREF:(\d+)\s+(\d+)\s+(\d+)\s+(-*\d+)\s+>\s+\/mismatches=(\d+)\s+\/identity=(\d+\.\d+)/) { $aid = $1; $abeg = $2; $alen = $3; $aori = $4; $amis = $5; $aident = $6; } elsif ($a !~ m/^M\sm\s/) { print "Anope $a\n"; } if ($b =~ m/HUREF:(\d+)\s+(\d+)\s+(\d+)\s+(-*\d+)\s+>\s+\/mismatches=(\d+)\s+\/identity=(\d+\.\d+)/) { $bid = $1; $bbeg = $2; $blen = $3; $bori = $4; $bmis = $5; $bident = $6; } elsif ($b !~ m/^M\sm\s/) { print "Bnope $b\n"; } $aident = substr(" $aident", -7); $bident = substr(" $bident", -7); if ($m =~ m/^(.*\]\s+\d+\s+\(.*\))\s+(\d+\s+\(.*\))/) { print O "$1 $aident $2 $bident\n"; } else { print "Mnope $m\n"; exit(1); } } } # Reads the annotation output of overlap.C and writes two atac format # files, one for map1, one for map2. There is a 1-1 map between # lines, unmapped B35 (either because B35 was unmapped by both # mappings, or unmapped by the other mapping) are noted in 'm' # matches ("M m UID . B35LC:xxxxx beg len 1") # sub overlapToAtac { my $infile = shift @_; my $outfile1 = shift @_; my $outfile2 = shift @_; open(I, "< $infile") or die "Can't open '$infile' for reading.\n"; open(O1, "> $outfile1") or die "Can't open '$outfile1' for writing.\n"; open(O2, "> $outfile2") or die "Can't open '$outfile2' for writing.\n"; print STDERR "Converting $infile -> $outfile1 and $outfile2\n"; print O1 "/assemblyFile1=MERYL/B35LC.fasta\n"; print O1 "/assemblyFile2=MERYL/HUREF2.fasta\n"; print O1 "/assemblyId1=B35LC\n"; print O1 "/assemblyId2=HUREF2\n"; print O2 "/assemblyFile1=MERYL/B35LC.fasta\n"; print O2 "/assemblyFile2=MERYL/HUREF2.fasta\n"; print O2 "/assemblyId1=B35LC\n"; print O2 "/assemblyId2=HUREF2\n"; my $id = 0; while () { if (m/^.\s+(\d+):(\d+)-(\d+)\[\s*\d+\]\s(\d+)\s\(\s*(\d+):\s*(\d+)-\s*(\d+)\)\s(\d+)\s\(\s*(\d+):\s*(\d+)-\s*(\d+)\)\s*$/) { my $id1 = $1; my $b1 = $2; my $e1 = $3; my $l1 = $e1 - $b1; my $mid2a = $4; my $id2a = $5; my $b2a = $6; my $e2a = $7; my $l2a = $e2a - $b2a; my $oria = 1; my $mid2b = $8; my $id2b = $9; my $b2b = $10; my $e2b = $11; my $l2b = $e2b - $b2b; my $orib = 1; $b1 =~ s/^0+//; $e1 =~ s/^0+//; $b1 = 0 if ($b1 == 0); # fix for blowing away all the zeros if ($e2a < $b2a) { ($b2a, $e2a) = ($e2a, $b2a); $l2a = $e2a - $b2a; $oria = -1 } if ($e2b < $b2b) { ($b2b, $e2b) = ($e2b, $b2b); $l2b = $e2b - $b2b; $orib = -1 } $mid2a =~ s/^0+//; $mid2b =~ s/^0+//; if ($e2a > 0) { print O1 "M u $id . B35LC:$id1 $b1 $l1 1 HUREF:$id2a $b2a $l2a $oria\n"; } else { print O1 "M m $id . B35LC:$id1 $b1 $l1 1\n"; } if ($e2b > 0) { print O2 "M u $id . B35LC:$id1 $b1 $l1 1 HUREF:$id2b $b2b $l2b $orib\n"; } else { print O2 "M m $id . B35LC:$id1 $b1 $l1 1\n"; } $id++; } else { #print "Nope.\n"; #exit(1); } } close(O1); close(O2); close(I); } kmer-code-2013-trunk/atac-driver/matchExtender/0000755000000000000000000000000012641613360020144 5ustar rootrootkmer-code-2013-trunk/atac-driver/matchExtender/match.H0000644000000000000000000000547512322046702021360 0ustar rootroot#ifndef MATCH_H #define MATCH_H #include #include #include #include "atac.H" #include "bio++.H" #include "seqCache.H" class match_s { public: char _matchId[32]; seqInCore *_seq1; FastAAccessor *_acc1; uint32 _iid1; uint32 _pos1; uint32 _len1; uint32 _ori1; seqInCore *_seq2; FastAAccessor *_acc2; uint32 _iid2; uint32 _pos2; uint32 _len2; uint32 _ori2; bool _isDeleted; // Our diagonal never, ever changes uint32 _diagonal; public: match_s(char *matchId, seqInCore *s1, uint32 i1, uint32 p1, uint32 l1, uint32 o1, seqInCore *s2, uint32 i2, uint32 p2, uint32 l2, uint32 o2) { strncpy(_matchId, matchId, 32); _seq1 = s1; _acc1 = new FastAAccessor(_seq1, false); _iid1 = i1; _pos1 = p1; _len1 = l1; _ori1 = o1; _seq2 = s2; _acc2 = new FastAAccessor(_seq2, (o1 != o2)); _iid2 = i2; _pos2 = p2; _len2 = l2; _ori2 = o2; _isDeleted = false; _acc1->setRange(_pos1, _len1); _acc2->setRange(_pos2, _len2); _acc1->setPosition(_pos1); _acc2->setPosition(_pos2); // the diagonal is.... if (_ori1 == _ori2) _diagonal = _seq1->sequenceLength() - _pos1 + _pos2; else _diagonal = _seq1->sequenceLength() - _pos1 + _seq2->sequenceLength() - (_pos2 + _len2); }; ~match_s() { delete _acc1; delete _acc2; }; // Compare by diagonal, then by position in the first sequence. // bool operator<(const match_s& r) const { if (_diagonal < r._diagonal) return(true); if (_diagonal == r._diagonal) return(_pos1 < r._pos1); return(false); }; void dump(FILE *out, const char *descr, bool showSeq=false); // For compatibility void extendLeft(int32 num) { _acc1->extendLeft(num); _acc2->extendLeft(num); }; void extendRight(int32 num) { _acc1->extendRight(num); _acc2->extendRight(num); }; bool isDeleted(void) { return(_isDeleted); }; void setDeleted(void) { _isDeleted = true; }; // Since we're on the same diagonal, and ungapped, the choice of // testing sequence 1 or 2 is arbirary. // bool canMergeWith(match_s *m) { return((m != 0L) && (_diagonal == m->_diagonal) && (_acc1->getRangeEnd() >= m->_acc1->getRangeBegin())); }; // Extend us to end where m ends // void consume(match_s *m) { if (m->_acc1->getRangeEnd() > _acc1->getRangeEnd()) extendRight(m->_acc1->getRangeEnd() - _acc1->getRangeEnd()); }; uint32 len(void) { return(_acc1->getRangeEnd() - _acc1->getRangeBegin()); }; uint32 pos1(void) { return(_acc1->getRangeBegin()); }; uint32 pos2(void) { return(_acc2->getRangeBegin()); }; seqInCore *seq1(void) { return(_seq1); }; seqInCore *seq2(void) { return(_seq2); }; }; #endif // MATCH_H kmer-code-2013-trunk/atac-driver/matchExtender/Make.include0000644000000000000000000000114311512763666022400 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_SRCS := $/matchExtender.C $/matchExtender-dump.C $/matchExtender-func.C $/.CXX_EXES := $/matchExtender $/.CLEAN :=$/*.o $/*~ $/core $/matchExtender: $/matchExtender.o $/matchExtender-dump.o $/matchExtender-func.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/matchExtender/matchExtender-func.C0000644000000000000000000003520512322046702023775 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include #include "bio++.H" #include "atac.H" #include "match.H" using namespace std; extern uint32 minEndRunLen; extern uint32 maxMMBlock; extern uint32 minBlockSep; extern double minIdentity; extern uint32 maxNbrSep; extern uint32 maxNbrPathMM; //#define DEBUG_TRACE //#define DEBUG_TRIMTOPERCENT //#define DEBUG_EXTEND //#define DEBUG_EXTEND_CONSUME //#define DEBUG_EXTEND_BACK //#define DEBUG_EXTEND_FORWARD // Return true if c1 and c2 are identities, false otherwise. // bool isIdentity(char c1, char c2) { return((letterToBits[(int)c1] != 0xff) && (letterToBits[(int)c2] != 0xff) && IUPACidentity[(int)c1][(int)c2]); } // Finds the largest block >= 'pct' (95%) identity. // bool trim_to_pct(vector& matches, uint32 midx, double pct) { #ifdef DEBUG_TRACE fprintf(stderr, "trim_to_pct()\n"); #endif uint32 best_start = 0; uint32 best_len = 0; match_s *m = matches[midx]; FastAAccessor &A = *m->_acc1; FastAAccessor &B = *m->_acc2; A.setPosition(m->pos1()); B.setPosition(m->pos2()); #ifdef DEBUG_TRIMTOPERCENT //m->dump(stderr, "TrimToPercent", false); #endif // For all starting positions: // // We could short-circuit here - once (m->len() - start) becomes // shorter than our best_len, we have no hope in finding a better // one. // for (uint32 start=0; (start< m->len()) && (m->len() - start > best_len); ++start) { uint32 best_run_len = 0; uint32 sum = 0; A.setPosition(m->pos1() + start); B.setPosition(m->pos2() + start); // And all ending positions: // // Compute the number of identities we've seen, and remember the // length of the highest identity. // for (uint32 len = 1; start + len <= m->len(); ++len) { char c1 = *A; char c2 = *B; // We just extend the last result by one, rather than recompute // the whole value for our new range (start, len). // if (isIdentity(c1, c2)) sum++; // If the sum is more than 'pct' identities, we are by // construction the longest run at this starting point, so // remember it. // if (sum >= pct * len) best_run_len = len; ++A; ++B; } // Special case: if the whole string is okay, don't check any // subranges // if ((start == 0) && (best_run_len == m->len())) return(false); // If we've just found a longer subrange, remember it. // if (best_run_len > best_len) { best_start = start; best_len = best_run_len; } } if (best_len < m->len()) { #ifdef DEBUG_TRIMTOPERCENT fprintf(stderr, "============================================================\n"); fprintf(stderr, "Trimming to substring with start="uint32FMT" and len="uint32FMT" for percent identity\n", best_start, best_len); m->dump(stderr, "BEFORE", true); #endif m->extendLeft(-(int32)best_start); m->extendRight(-(int32)(m->len() - best_len)); #ifdef DEBUG_TRIMTOPERCENT m->dump(stderr, "AFTER", true); fprintf(stderr, "============================================================\n"); #endif return(true); } return(false); } void extend_match_backward(vector& matches, uint32 midx, uint32 min_start_pos) { #ifdef DEBUG_TRACE fprintf(stderr, "extend_match_backward()-- min_start_pos="uint32FMT"\n", min_start_pos); #endif // Assumes when traveling backwards that we will never run into // another match (otherwise, that match would have been forward // extended previously). uint32 num_recent_mismatches = 0; match_s *m = matches[midx]; uint32 good_run_len = (int) m->len(); uint32 num_pending = 0; FastAAccessor &A = *m->_acc1; FastAAccessor &B = *m->_acc2; A.setPosition(m->_acc1->getRangeBegin()); B.setPosition(m->_acc2->getRangeBegin()); // Decrement, instead of subtract one from the position above, to // avoid any issues with overflow (e.g., 0 - 1). // --A; --B; while ((A.getPosition() > min_start_pos) && A.isValid() && B.isValid()) { char c1 = *A; char c2 = *B; if (isIdentity(c1, c2)) { good_run_len++; // If we've gone long enough, erase our mismatch record // if (good_run_len == minBlockSep) // 20 by default num_recent_mismatches = 0; // If we're in the middle of a long good run, add the character // to the match (END_RUN_LEN=10) // // Otherwise, if we just made the minimum extension length, add // all of the pending characters. // // Otherwise, this character is pending. However, still do // output if we're run out of sequence. // if (good_run_len > minEndRunLen) { m->extendLeft(1); } else if (good_run_len == minEndRunLen) { m->extendLeft(num_pending + 1); num_pending = 0; } else { num_pending++; } } else { good_run_len = 0; num_pending++; num_recent_mismatches++; if (num_recent_mismatches > maxMMBlock) // 3 by default break; } --A; --B; } // If we hit the end of the sequence, and are good, do extension // // if ((A.getPosition() == min_start_pos) || (B.getPosition() == 0)) // if (!A.isValid() || !B.isValid() || (A.getPosition() <= min_start_pos)) m->extendLeft(num_pending); #ifdef DEBUG_EXTEND_BACK fprintf(stderr, "extend_back()-- M u %s . %s %d %d 1 %s %d %d 1\n", m->_matchId, m->_id1, m->_acc1->getRangeBegin(), m->_acc1->getRangeLength(), m->_id2, m->_acc2->getRangeBegin(), m->_acc2->getRangeLength()); #endif } bool can_reach_nearby_match(match_s *src, match_s *dest) { #ifdef DEBUG_TRACE fprintf(stderr, "can_reach_nearby_match()\n"); #endif if (dest->pos1() - (src->pos1() + src->len()) > (uint32) maxNbrSep) // 100 return false; #if 0 src->dump(stderr, "src:"); dest->dump(stderr, "dst:"); #endif FastAAccessor &A = *src->_acc1; FastAAccessor &B = *src->_acc2; A.setPosition(A.getRangeEnd() - 1); B.setPosition(B.getRangeEnd() - 1); ++A; ++B; uint32 num_mismatch = 0; while ((num_mismatch <= maxNbrPathMM) && // 5 (A.getPosition() < dest->pos1()) && (A.isValid()) && (B.isValid())) { if (!isIdentity(*A, *B)) num_mismatch++; ++A; ++B; } #if 0 fprintf(stderr, "num_mismatch=%d pos: %d %d valid: A:%d B:%d\n", num_mismatch, A.getPosition(), dest->pos1(), A.isValid(), B.isValid()); #endif return(num_mismatch <= maxNbrPathMM); // 5 } // Stops and returns true if we hit the next match // bool extend_match_forward(vector& matches, uint32 midx, match_s *target) { #ifdef DEBUG_TRACE fprintf(stderr, "extend_match_forward()\n"); #endif match_s *m = matches[midx]; uint32 num_recent_mismatches = 0; uint32 num_pending = 0; uint32 good_run_len = (int) m->len(); FastAAccessor &A = *m->_acc1; FastAAccessor &B = *m->_acc2; #ifdef DEBUG_EXTEND_FORWARD fprintf(stderr, "extend_match_forward()-- A:%4d-%4d B:%4d-%4d\n", A.getRangeBegin(), A.getRangeLength(), B.getRangeBegin(), B.getRangeLength()); #endif // Set our position to the last valid base in the range, then move // to the next one. // A.setPosition(A.getRangeEnd() - 1); B.setPosition(B.getRangeEnd() - 1); ++A; ++B; while (A.isValid() && B.isValid()) { char c1 = *A; char c2 = *B; if (isIdentity(c1, c2)) { good_run_len++; //fprintf(stderr, "extend-forward %c %c\n", c1, c2); // Pass Go and collect $200 // if (good_run_len == minBlockSep) num_recent_mismatches = 0; // If not enough good characters yet, increase the length // pending. We used to check for the hitting the end of the // sequence here. // // Otherwise, if we have just made the minumum good run length, // do the extension. // // Otherwise, if we're above the minimum good length, extend by // another character. // if (good_run_len < minEndRunLen) { num_pending++; } else if (good_run_len == minEndRunLen) { m->extendRight(num_pending + 1); num_pending = 0; } else if (good_run_len > minEndRunLen) { m->extendRight(1); } // If we've run into (and possibly over) another seed match, // return so the main loop can consume and restart. // if (m->canMergeWith(target)) return(true); } else { good_run_len = 0; num_pending++; num_recent_mismatches++; if (num_recent_mismatches > maxMMBlock) return(false); } ++A; ++B; } // If we've got a short good run but have hit the end of // a sequence, do extension. // if ((!A.isValid() || !B.isValid()) && (good_run_len < minEndRunLen)) m->extendRight(num_pending); #ifdef DEBUG_EXTEND_FORWARD fprintf(stderr, "extend_match_forward(finish)-- A:%4d-%4d B:%4d-%4d\n", A.getRangeBegin(), A.getRangeLength(), B.getRangeBegin(), B.getRangeLength()); #endif return(false); } uint32 extend_matches_on_diagonal(vector& matches, uint32 diag_start) { #ifdef DEBUG_TRACE fprintf(stderr, "extend_matches_on_diagonal()\n"); #endif uint32 diag_id = matches[diag_start]->_diagonal; uint32 idx; uint32 prev_end = 0; match_s *m; match_s *next_m = NULL; // Back extend each match as far as possible (but never over the // preceding match // for (idx = diag_start; (idx < matches.size()) && (matches[idx]->_diagonal == diag_id); ++idx) { m = matches[idx]; #ifdef DEBUG_EXTEND_BACK m->dump(stderr, "Before back extension:", true); #endif extend_match_backward(matches, idx, prev_end); #ifdef DEBUG_EXTEND_BACK m->dump(stderr, "After back extension:", true); #endif #ifdef DEBUG_EXTEND fprintf(stderr, "1M u %s . %s %d %d 1 %s %d %d 1\n", matches[idx]->_matchId, matches[idx]->_id1, matches[idx]->_acc1->getRangeBegin(), matches[idx]->_acc1->getRangeLength(), matches[idx]->_id2, matches[idx]->_acc2->getRangeBegin(), matches[idx]->_acc2->getRangeLength()); #endif prev_end = m->pos1() + m->len(); if ((m->pos1() > m->seq1()->sequenceLength()) || (m->pos2() > m->seq2()->sequenceLength())) m->dump(stderr, "NEGATIVE after back extend!\n", true), abort(); } // Now forward extend each match idx = diag_start; while ((idx < matches.size()) && (matches[idx]->_diagonal == diag_id)) { if (matches[idx]->isDeleted()) { idx++; continue; } #ifdef DEBUG_EXTEND fprintf(stderr, "2M u %s . %s %d %d 1 %s %d %d 1\n", matches[idx]->_matchId, matches[idx]->_id1, matches[idx]->_acc1->getRangeBegin(), matches[idx]->_acc1->getRangeLength(), matches[idx]->_id2, matches[idx]->_acc2->getRangeBegin(), matches[idx]->_acc2->getRangeLength()); #endif m = matches[idx]; next_m = 0L; for (uint32 next_idx=idx+1; ((next_idx < matches.size()) && (matches[next_idx]->_diagonal == diag_id) && (next_m == 0L)); next_idx++) if (matches[next_idx]->isDeleted() == false) next_m = matches[next_idx]; // First, try to reach the next match with the simple "maximum of // k mismatches" rule. If we made it, consume the next match and // start the loop again with the same match (now extended) // if (next_m && can_reach_nearby_match(m, next_m)) { #ifdef DEBUG_EXTEND_CONSUME m->dump(stderr, "I can_reach_nearby_match and extend this", true); next_m->dump(stderr, "with this", true); #endif m->consume(next_m); next_m->setDeleted(); #ifdef DEBUG_EXTEND_CONSUME m->dump(stderr, "Extended through next match via neighbor search:", true); #endif continue; } // Otherwise, try to make it to the next match with the // character-at- a-time extension rules. If we make it, restart // the loop with the same match (now extended). Otherwise, trim // the extended match as necessary and move on to the next // match. // if (extend_match_forward(matches, idx, next_m)) { #ifdef DEBUG_EXTEND_CONSUME m->dump(stderr, "I extend_match_forward and extend this", true); next_m->dump(stderr, "with this", true); #endif m->consume(next_m); next_m->setDeleted(); #ifdef DEBUG_EXTEND_CONSUME m->dump(stderr, "Extended through next match via forward extension:", true); #endif continue; } #ifdef DEBUG_EXTEND //m->dump(stderr, "Failed to make next match. Final extended version:", true); #endif #ifdef DEBUG_EXTEND fprintf(stderr, "3M u %s . %s %d %d 1 %s %d %d 1\n", matches[idx]->_matchId, matches[idx]->_id1, matches[idx]->_acc1->getRangeBegin(), matches[idx]->_acc1->getRangeLength(), matches[idx]->_id2, matches[idx]->_acc2->getRangeBegin(), matches[idx]->_acc2->getRangeLength()); #endif // Didn't make it, so trim and move on // if (trim_to_pct(matches, idx, minIdentity)) { #ifdef DEBUG_EXTEND_TRIMMING m->dump(stderr, "After trimming:", true); #endif } else { #ifdef DEBUG_EXTEND_TRIMMING fprintf(stderr, "No trimming done.\n"); #endif } #ifdef DEBUG_EXTEND fprintf(stderr, "4M u %s . %s %d %d 1 %s %d %d 1\n", matches[idx]->_matchId, matches[idx]->_id1, matches[idx]->_acc1->getRangeBegin(), matches[idx]->_acc1->getRangeLength(), matches[idx]->_id2, matches[idx]->_acc2->getRangeBegin(), matches[idx]->_acc2->getRangeLength()); #endif #ifdef DEBUG_EXTEND if ((m->pos1() > m->seq1()->sequenceLength()) || (m->pos2() > m->seq2()->sequenceLength())) m->dump(stderr, "NEGATIVE after forward extend!", true), abort(); fprintf(stderr, "\n==============\n\n"); #endif ++idx; } return idx; } kmer-code-2013-trunk/atac-driver/matchExtender/matchExtender.C0000644000000000000000000001706712322046702023052 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include using namespace std; #include "atac.H" #include "match.H" #include "bio++.H" #include "seqCache.H" uint32 minEndRunLen = 10; // -E /matchExtenderMinEndRunLen uint32 maxMMBlock = 3; // -B /matchExtenderMaxMMBlock uint32 minBlockSep = 20; // -S /matchExtenderMinBlockSep double minIdentity = 0.95; // -I /matchExtenderMinIdentity uint32 maxNbrSep = 100; // -P /matchExtenderMaxNbrSep uint32 maxNbrPathMM = 5; // -D /matchExtenderMaxNbrPathMM bool trim_to_pct(vector& matches, uint32 midx, double pct); void extend_match_backward(vector& matches, uint32 midx, uint32 min_start_pos); bool can_reach_nearby_match(match_s *src, match_s *dest); bool extend_match_forward(vector& matches, uint32 midx, match_s *target); uint32 extend_matches_on_diagonal(vector& matches, uint32 diag_start); class MatchCompare { public: int operator()(const match_s *m1, const match_s *m2) { return(*m1 < *m2); } }; // Read matches until the iid differs. Leave the next match in inLine. // bool readMatches(atacFileStreamMerge &AF, atacMatch *&m, seqCache *C1, seqCache *C2, vector &fwdMatches, vector &revMatches) { fwdMatches.clear(); revMatches.clear(); // If M is null, we're here for the first time, so get the next // (first) match from the file. M is also null if we're at the end // of the file, so if after getting a match (that's done at the end // of this routine) we're still null, we're all done. // if (m == 0L) m = AF.nextMatch('x'); if (m == 0L) return(false); uint32 iid1 = m->iid1; uint32 iid2 = m->iid2; seqInCore *seq1 = C1->getSequenceInCore(iid1); seqInCore *seq2 = C2->getSequenceInCore(iid2); while (m) { if ((m->iid1 == iid1) && (m->iid2 == iid2)) { if (m->fwd1 == m->fwd2) fwdMatches.push_back(new match_s(m->matchuid, seq1, m->iid1, m->pos1, m->len1, m->fwd1, seq2, m->iid2, m->pos2, m->len2, m->fwd2)); else revMatches.push_back(new match_s(m->matchuid, seq1, m->iid1, m->pos1, m->len1, m->fwd1, seq2, m->iid2, m->pos2, m->len2, m->fwd2)); } else { break; } m = AF.nextMatch('x'); } if (fwdMatches.size() > 0) sort(fwdMatches.begin(), fwdMatches.end(), MatchCompare()); if (revMatches.size() > 0) sort(revMatches.begin(), revMatches.end(), MatchCompare()); return(true); } int main(int argc, char *argv[]) { bool fail = false; atacMatch *m = 0L; atacFileStreamMerge AF; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-e") == 0) { minEndRunLen = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-b") == 0) { maxMMBlock = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-s") == 0) { minBlockSep = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-i") == 0) { minIdentity = atof(argv[++arg]); } else if (strcmp(argv[arg], "-p") == 0) { maxNbrSep = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-d") == 0) { maxNbrPathMM = strtouint32(argv[++arg], 0L); } else { //fprintf(stderr, "unknown option %s\n", argv[arg]); //fail = true; AF.addFile(argv[arg]); } arg++; } if (fail) { fprintf(stderr, "usage: %s [options] header.atac matches.atac ... > matches.atac\n", argv[0]); fprintf(stderr, " -e matchExtenderMinEndRunLen, 10\n"); fprintf(stderr, " -b matchExtenderMaxMMBlock, 3\n"); fprintf(stderr, " -s matchExtenderMinBlockSep, 20\n"); fprintf(stderr, " -i matchExtenderMinIdentity, 0.95\n"); fprintf(stderr, " -p matchExtenderMaxNbrSep, 100\n"); fprintf(stderr, " -d matchExtenderMaxNbrPathMM, 5\n"); exit(1); } AF.writeHeader(stdout); seqCache *C1 = new seqCache(AF.assemblyFileA(), 1, false); seqCache *C2 = new seqCache(AF.assemblyFileB(), 1, false); C1->loadAllSequences(); vector fwdMatches; vector revMatches; while (readMatches(AF, m, C1, C2, fwdMatches, revMatches)) { uint32 diag_start = 0; while (diag_start < fwdMatches.size()) { //fprintf(stderr, "fwd: M u %s . %s %d %d 1 %s %d %d 1\n", // fwdMatches[diag_start]->_matchId, // fwdMatches[diag_start]->_id1, fwdMatches[diag_start]->_acc1->getRangeBegin(), fwdMatches[diag_start]->_acc1->getRangeLength(), // fwdMatches[diag_start]->_id2, fwdMatches[diag_start]->_acc2->getRangeBegin(), fwdMatches[diag_start]->_acc2->getRangeLength()); diag_start = extend_matches_on_diagonal(fwdMatches, diag_start); } diag_start = 0; while (diag_start < revMatches.size()) { //fprintf(stderr, "rev: M u %s . %s %d %d 1 %s %d %d 1\n", // revMatches[diag_start]->_matchId, // revMatches[diag_start]->_id1, revMatches[diag_start]->_acc1->getRangeBegin(), revMatches[diag_start]->_acc1->getRangeLength(), // revMatches[diag_start]->_id2, revMatches[diag_start]->_acc2->getRangeBegin(), revMatches[diag_start]->_acc2->getRangeLength()); diag_start = extend_matches_on_diagonal(revMatches, diag_start); } // Dump and destroy all the matches // for (uint32 i=0; iisDeleted()) fprintf(stdout, "M u %s . %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" 1\n", fwdMatches[i]->_matchId, AF.labelA(), fwdMatches[i]->_iid1, fwdMatches[i]->_acc1->getRangeBegin(), fwdMatches[i]->_acc1->getRangeLength(), AF.labelB(), fwdMatches[i]->_iid2, fwdMatches[i]->_acc2->getRangeBegin(), fwdMatches[i]->_acc2->getRangeLength()); delete fwdMatches[i]; } for (uint32 i=0; iisDeleted()) fprintf(stdout, "M u %s . %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" -1\n", revMatches[i]->_matchId, AF.labelA(), revMatches[i]->_iid1, revMatches[i]->_acc1->getRangeBegin(), revMatches[i]->_acc1->getRangeLength(), AF.labelB(), revMatches[i]->_iid2, revMatches[i]->_acc2->getRangeBegin(), revMatches[i]->_acc2->getRangeLength()); delete revMatches[i]; } fwdMatches.clear(); revMatches.clear(); } delete C1; delete C2; } kmer-code-2013-trunk/atac-driver/matchExtender/matchExtender-dump.C0000644000000000000000000000633612322046702024012 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applied Biosystems // Copyright (c) 2005 J. Craig Venter Institute // Author: Dan Fasulo // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "bio++.H" #include "match.H" void match_s::dump(FILE *out, const char *descr, bool showSeq) { fprintf(out, "%s: ID:%s range1:"uint32FMT","uint32FMT" _pos="uint32FMT" (seqlen="uint32FMT")\n", descr, _matchId, _acc1->getRangeBegin(), _acc1->getRangeLength(), _acc1->_pos, _seq1->sequenceLength()); fprintf(out, "%s ID:%s range2:"uint32FMT","uint32FMT" _pos="uint32FMT" (seqlen="uint32FMT") diag:"uint32FMT" %s\n", descr, _matchId, _acc2->getRangeBegin(), _acc2->getRangeLength(), _acc2->_pos, _seq2->sequenceLength(), _diagonal, (_ori1 != _ori2) ? "reversed" : ""); if (showSeq) { FastAAccessor &A = *_acc1; FastAAccessor &B = *_acc2; // Save the position of the accessors // uint32 acc1pos = A._pos; uint32 acc2pos = B._pos; A.setPosition(A.getRangeBegin()); B.setPosition(B.getRangeBegin()); uint32 margin = 5; uint32 i = 0; char *seq = new char [_acc1->getRangeEnd() - _acc1->getRangeBegin() + margin + margin + 32]; char *las = seq; strcpy(seq, ">>> "); while (*las) las++; for (i=0; igetRangeEnd() - _acc1->getRangeBegin(); i++, ++A) if (A.isValid()) *las++ = *A; else *las++ = ' '; *las++ = ':'; for (i=0; i>> " : "<<< "); while (*las) las++; for (i=0; igetRangeEnd() - _acc1->getRangeBegin(); i++, ++B) if (B.isValid()) *las++ = *B; else *las++ = ' '; *las++ = ':'; for (i=0; i_pos = acc1pos; _acc2->_pos = acc2pos; } } kmer-code-2013-trunk/atac-driver/config.py0000644000000000000000000000037212431447644017201 0ustar rootroot#!/usr/local/packages/python-2.7.3/bin/python2.7 import sys import os import getopt from distutils import sysconfig print sysconfig.get_python_inc() # flags = ['-I' + , # '-I' + sysconfig.get_python_inc(plat_specific=True)] kmer-code-2013-trunk/atac-driver/atac.pl0000755000000000000000000007765212524124640016640 0ustar rootroot#!/usr/bin/env perl # # This file is part of A2Amapper. # Copyright (c) 2005-2009 J. Craig Venter Institute # Author: Brian Walenz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received (LICENSE.txt) a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use strict; use FindBin; my $id1 = undef; my $seq1 = undef; my $id2 = undef; my $seq2 = undef; my $ATACdir = undef; my $GENOMEdir = "default"; # Location of genome assemblies my $MERYLdir = "default"; # Location of genome mercount databases my $BINdir = "$FindBin::Bin"; my $LIBdir = "$FindBin::Bin/../lib"; my $mersize = 20; # the mer size my $minfill = 20; # the mimimum fill for a reported match. my $merlimit = 1; # unique mers only my $maxgap = 0; # the maximum substitution gap # annotates the resulting atac file with parameters # for cross species, also sets match extender options my $crossSpecies = 0; my $matchExtenderOpts = ""; my $filtername = "$LIBdir/filter-heavychains.so"; my $filteropts = "-S 100 -J 100000"; my $numSegments = 1; # More than one not supported in seatac; search for -use too my $numThreads = 4; my $merylThreads = 2; my $merylOnly = 0; # Check that we have everything we need to run # my $leaff = "$BINdir/leaff"; my $meryl = "$BINdir/meryl"; my $existDB = "$BINdir/existDB"; my $seatac = "$BINdir/seatac"; my $chainer = "$BINdir/AtacDriver.py"; my $correctgaps = "$BINdir/correctGaps"; my $statsgenerator = "$BINdir/statsGenerator"; my $makeplot = "perl $BINdir/makeplot.pl"; die "Can't run $leaff\n" if (! -x $leaff); die "Can't run $meryl\n" if (! -x $meryl); die "Can't run $existDB\n" if (! -x $existDB); die "Can't run $seatac\n" if (! -x $seatac); die "Can't find $chainer\n" if (! -e $chainer); die "Can't find $filtername\n" if (! -e $filtername); die "Can't run $correctgaps\n" if (! -x $correctgaps); die "Can't run $statsgenerator\n" if (! -x $statsgenerator); # Main begins here! # # We used to use a long descriptive name for the matches, which # encoded some parameters, but since we never really change those # parameters, we stop encoding. # # It used to be "${id1}vs${id2}.k$mersize.u$merlimit.f$minfill.g$maxgap" parseArgs(); findSources(); my $matches = "${id1}vs${id2}"; if (! -e "$ATACdir/$matches.atac") { my $mercount1 = countMers($id1, $mersize, $merlimit); my $mercount2 = countMers($id2, $mersize, $merlimit); buildMask($mercount1, $mercount2); my @segmentIDs = findHits(); extendMatches(@segmentIDs); makeChains(); closeGaps(); makeClumps(); generateStatistics(); rewriteUIDs("$ATACdir/$matches.atac"); print STDERR "\n"; print STDERR "Finished! Output is:\n"; print STDERR " matches and runs -- $ATACdir/$matches.atac\n"; print STDERR " clumps -- $ATACdir/$matches.*clump*.atac\n"; } # Subroutines below! sub usage { print STDERR "usage: $0 -dir AvsB -id1 A -seq1 A.fasta -id2 B -seq2 B.fasta -meryldir M [opts\n"; print STDERR "\n"; print STDERR "ATAC will compute and place results in the run-directory.\n"; print STDERR "The meryl directory is used to store assembly-specific\n"; print STDERR "intermediate files. Internally, atac uses an ID to refer to\n"; print STDERR "a assembly; if the same ID/seq pair is used across multiple\n"; print STDERR "runs, the assembly-specific intermediate files can be reused.\n"; print STDERR "\n"; print STDERR "A * indicates a required argument.\n"; print STDERR "\n"; print STDERR "* -dir run-directory -- path to the RESULTS directory\n"; print STDERR "* -meryldir path -- path to the MERYL directory\n"; print STDERR " -genomedir path -- path to the GENOMES directory\n"; print STDERR "\n"; print STDERR "* -id1 id1 -- ID of the A assembly\n"; print STDERR "* -seq1 seq1.fasta -- sequence of the A assembly\n"; print STDERR "* -id2 id2 -- ID of the B assembly\n"; print STDERR "* -seq2 seq2.fasta -- sequence of the B assembly\n"; print STDERR "\n"; print STDERR "NOTE: A hash table will be built for id1. For space and\n"; print STDERR " performance, this should usually be the smaller assembly.\n"; print STDERR "\n"; print STDERR "NOTE: It is generally assumed that id1 is the REFERENCE assembly.\n"; print STDERR "\n"; print STDERR " -numsegments s -- number of segments to do the search in\n"; print STDERR " (doubling segments halves memory usage)\n"; print STDERR " -numsegments NOT SUPPORTED; DO NOT USE\n"; print STDERR "\n"; print STDERR " -numthreads t -- number of threads to use per search\n"; print STDERR " (slight increase in memory usage)\n"; print STDERR "\n"; print STDERR " -merylonly -- only run the meryl components\n"; print STDERR " -merylthreads t -- number of threads to use for meryl\n"; print STDERR "\n"; print STDERR " -samespecies -- use magic values for same species\n"; print STDERR " -crossspecies -- use guesses for different species\n"; print STDERR "\n"; print STDERR " -segmentid x -- only run segment with id x\n"; print STDERR " (don't use)\n"; exit(1); } sub parseArgs { while (scalar(@ARGV) > 0) { my $arg = shift @ARGV; if ($arg eq "-dir") { $ATACdir = shift @ARGV; } elsif ($arg eq "-id1") { $id1 = shift @ARGV; } elsif ($arg eq "-seq1") { $seq1 = shift @ARGV; } elsif ($arg eq "-id2") { $id2 = shift @ARGV; } elsif ($arg eq "-seq2") { $seq2 = shift @ARGV; } elsif ($arg eq "-genomedir") { $GENOMEdir = shift @ARGV; } elsif ($arg eq "-meryldir") { $MERYLdir = shift @ARGV; } elsif ($arg eq "-numsegments") { $numSegments = shift @ARGV; } elsif ($arg eq "-numthreads") { $numThreads = shift @ARGV; } elsif ($arg eq "-merylonly") { $merylOnly = 1; } elsif ($arg eq "-merylthreads") { $merylThreads = shift @ARGV; } elsif ($arg eq "-samespecies") { $mersize = 20; # the mer size $merlimit = 1; # unique mers only $minfill = 20; # the mimimum fill for a reported match. $maxgap = 0; # the maximum substitution gap } elsif ($arg eq "-samespecies9") { $mersize = 20; # the mer size $merlimit = 9; # mostly unique mers only $minfill = 20; # the mimimum fill for a reported match. $maxgap = 0; # the maximum substitution gap } elsif ($arg eq "-crossspecies20") { $mersize = 20; # the mer size $merlimit = 9; # mostly unique mers only $minfill = 20; # the mimimum fill for a reported match. $maxgap = 0; # the maximum substitution gap $crossSpecies = 1; # extra parameters in the atac file } elsif ($arg eq "-crossspecies") { $mersize = 18; # the mer size $merlimit = 9; # mostly unique mers only $minfill = 18; # the mimimum fill for a reported match. $maxgap = 0; # the maximum substitution gap $crossSpecies = 1; # extra parameters in the atac file } elsif($arg eq "-filtername") { $filtername = shift @ARGV; } elsif($arg eq "-filteropts") { $filteropts = shift @ARGV; } elsif ($arg eq "-mersize") { $mersize = shift @ARGV; $minfill = $mersize; } elsif ($arg eq "-merlimit") { $merlimit = shift @ARGV; } elsif ($arg eq "-justtestingifitworks") { exit(0); } else { die "unknown option $arg\n"; } } # Search for -use too. die "-numsegments NOT SUPPORTED.\n" if ($numSegments != 1); if (!defined($id1) || !defined($id2)) { usage(); } my $pwd = `pwd`; $pwd =~ s/^\s+//; $pwd =~ s/\s+$//; $GENOMEdir = "$pwd/$GENOMEdir" if ($GENOMEdir !~ m!^/!); $MERYLdir = "$pwd/$MERYLdir" if ($MERYLdir !~ m!^/!); $ATACdir = "$pwd/$ATACdir" if ($ATACdir !~ m!^/!); die "Unset GENOMEdir?'\n" if (! defined($GENOMEdir)); die "Unset MERYLdir?'\n" if (! defined($MERYLdir)); die "Unset ATACdir?'\n" if (! defined($ATACdir)); if (!defined($seq1) || (!defined($seq2))) { die "Can't find the GENOMEdir '$GENOMEdir'\n" if (! -d $GENOMEdir); } if (defined($seq1)) { $seq1 = "$pwd/$seq1" if ($seq1 !~ m!^/!); } if (defined($seq2)) { $seq2 = "$pwd/$seq2" if ($seq2 !~ m!^/!); } system("mkdir $ATACdir") if (! -d "$ATACdir"); system("mkdir $ATACdir/work") if (! -d "$ATACdir/work"); system("mkdir $ATACdir/stats") if (! -d "$ATACdir/stats"); system("mkdir $MERYLdir") if (! -d "$MERYLdir"); } # Read the nickname file, set up symlinks to the data sources # sub findSources { my %GENOMEaliases; # Read all the *.atai files in the genome directory, save only # those nicknames that have actual files associated with them. # This lets us have multiple collections of assemblies, and also # lets us move the directory around (e.g., for running on a # laptop). # if (-d $GENOMEdir) { # What? No GENOMEdir? The main already checked that we know both # sequence files. Plus, we'd just fail below. open(A, "ls $GENOMEdir |"); while () { chomp; if (m/\.atai$/) { my $ataifile = "$GENOMEdir/$_"; open(F, "< $ataifile") or die "Can't open '$ataifile'\n"; while () { chomp; if (m/^!\s*format\s+atac\s+(.*)$/) { print STDERR "Found format $1\n"; } elsif (m/^S\s+(\S+)\s+(\S+)$/) { if (-e $2) { $GENOMEaliases{$1} = $2; } else { print STDERR "WARNING: File '$2' not found for alias '$1'.\n"; } } else { #die "Error parsing genome description.\n '$_'\n"; } } close(F); } } } close(A); # If the user gave both an id and a sequence, make sure that # the id is distinct. # die "No id1 supplied!\n" if (!defined($id1)); die "No id2 supplied!\n" if (!defined($id2)); die "id1 = '$id1' is already used by sequence '$GENOMEaliases{$id1}'\n" if (defined($GENOMEaliases{$id1}) && defined($seq1)); die "id2 = '$id2' is already used by sequence '$GENOMEaliases{$id2}'\n" if (defined($GENOMEaliases{$id2}) && defined($seq2)); $seq1 = $GENOMEaliases{$id1} if (!defined($seq1)); $seq2 = $GENOMEaliases{$id2} if (!defined($seq2)); die "Unknown alias $id1.\n" if (!defined($seq1)); die "Unknown alias $id2.\n" if (!defined($seq2)); die "File '$seq1' doesn't exist for alias $id1.\n" if (! -e $seq1); die "File '$seq2' doesn't exist for alias $id2.\n" if (! -e $seq2); #system("ln -s $seq1 $MERYLdir/$id1.fasta") if (! -e "$MERYLdir/$id1.fasta"); #system("ln -s $seq2 $MERYLdir/$id2.fasta") if (! -e "$MERYLdir/$id2.fasta"); #system("ln -s ${seq1}idx $MERYLdir/$id1.fastaidx") if (! -e "$MERYLdir/$id1.fastaidx") && (-e "${seq1}idx"); #system("ln -s ${seq2}idx $MERYLdir/$id2.fastaidx") if (! -e "$MERYLdir/$id2.fastaidx") && (-e "${seq2}idx"); removeNewLines($seq1, "$MERYLdir/$id1.fasta"); removeNewLines($seq2, "$MERYLdir/$id2.fasta"); } sub removeNewLines ($$) { my $in = shift @_; my $ot = shift @_; return if (-e "$ot"); print STDERR "Rewriting '$in' to '$ot', removing newlines.\n"; open(F, "< $in") or die "Failed to open '$in' for reading: $!\n"; open(O, "> $ot") or die "Failed to open '$ot' for writing: $!\n"; $_ = ; print O $_; while () { if (m/^>/) { print O "\n"; } else { chomp; } print O $_; } close(O); close(F); } # Check that meryl is finished for each of the inputs # sub countMers { my ($id, $mersize, $merlimit) = @_; # Using "-H 32" is needed if the two sequences aren't about the # same order of magnitude in size. This value is appropriate for # sequences that are genome size. if (! -e "$MERYLdir/$id.ms$mersize.mcdat") { my $cmd; $cmd = "$meryl -B -C "; $cmd .= "-threads $merylThreads "; $cmd .= "-m $mersize "; $cmd .= "-s $MERYLdir/$id.fasta "; $cmd .= "-o $MERYLdir/$id.ms$mersize "; #die "why rebuild $MERYLdir/$id.ms$mersize.mcdat\n"; if (runCommand($cmd)) { unlink "$MERYLdir/$id.ms$mersize.mcidx"; unlink "$MERYLdir/$id.ms$mersize.mcdat"; die "Failed to count mers in $id\n"; } } if (! -e "$MERYLdir/$id.ms$mersize.le$merlimit.mcdat") { my $cmd; $cmd = "$meryl -v "; $cmd .= "-M lessthanorequal $merlimit "; $cmd .= "-s $MERYLdir/$id.ms$mersize "; $cmd .= "-o $MERYLdir/$id.ms$mersize.le$merlimit "; #die "why rebuild $MERYLdir/$id.ms$mersize.le$merlimit.mcdat\n"; if (runCommand($cmd)) { unlink "$MERYLdir/$id.ms$mersize.le$merlimit.mcidx"; unlink "$MERYLdir/$id.ms$mersize.le$merlimit.mcdat"; die "Failed to count mers lessthanorequal $merlimit in $id\n"; } } return "$id.ms$mersize.le$merlimit"; } # Return the number of mers in a meryl file. # sub numberOfMers ($) { my $mers = 0; open(F, "$meryl -Dc -s $_[0] |"); while () { $mers = $1 if (m/Found\s(\d+)\smers/); } close(F); print STDERR "$_[0] has $mers mers.\n"; return($mers); } sub buildMask ($$) { my $mercount1 = shift @_; my $mercount2 = shift @_; return if (-e "$ATACdir/work/$matches.mask.done"); my $minFile="min.$mercount1.$mercount2"; # $mercount1 and $mercount2 are the mers we want to use for # searching. Obviously, only in-common mers can be found, we # make a file of those mers here. if (! -e "$ATACdir/work/$minFile.mcdat") { print STDERR "Finding the min count between $mercount1 and $mercount2.\n"; my $cmd; $cmd = "$meryl "; $cmd .= "-M min "; $cmd .= "-s $MERYLdir/$mercount1 "; $cmd .= "-s $MERYLdir/$mercount2 "; $cmd .= "-o $ATACdir/work/$minFile "; if (runCommand($cmd)) { unlink "$ATACdir/work/$minFile.mcidx"; unlink "$ATACdir/work/$minFile.mcdat"; die "Failed to find the min count between $mercount1 and $mercount2\n"; } } die "Failed to make the mask?\n" if (! -e "$ATACdir/work/$minFile.mcdat"); # From that list of in-common mers (in-common and below some # count) we want to make a list of the mers that can be used in # the search table. We can either make a positive (use these # mers) or negative (don't use these mers) list, we just want to # pick the smaller of the two. # # # The positive 'include' list is just the 'min' mers found above. # # The negative 'exclude' list is the min mers, removed from the mers in id1. # my $includeSize = (-s "$ATACdir/work/$minFile.mcdat"); my $excludeSize = (-s "$MERYLdir/$id1.ms$mersize.mcdat") - (-s "$ATACdir/work/$minFile.mcdat"); print STDERR "includeSize is proportional to $includeSize.\n"; print STDERR "excludeSize is proportional to $excludeSize.\n"; # But this sometimes breaks (if the mcidx files are different sizes), so we now # pay the cost of actually counting the number of mers. # $includeSize = numberOfMers("$ATACdir/work/$minFile"); $excludeSize = numberOfMers("$MERYLdir/$id1.ms$mersize") - $includeSize; print STDERR "includeSize is exactly $includeSize mers.\n"; print STDERR "excludeSize is exactly $excludeSize mers.\n"; if ($includeSize < $excludeSize) { rename "$ATACdir/work/$minFile.mcidx", "$ATACdir/work/$matches.include.mcidx"; rename "$ATACdir/work/$minFile.mcdat", "$ATACdir/work/$matches.include.mcdat"; } else { if (! -e "$ATACdir/work/$matches.exclude.mcdat") { print STDERR "Finding 'exclude' mers!\n"; # Our use of xor here is really just a subtraction. We # want to report those mers that are only in the first # file, not in the second. All mers in the second file # should be in the first file, by construction. my $cmd; $cmd = "$meryl "; $cmd .= "-M xor "; $cmd .= "-s $MERYLdir/$id1.ms$mersize "; $cmd .= "-s $ATACdir/work/$minFile "; $cmd .= "-o $ATACdir/work/$matches.exclude "; if (runCommand($cmd)) { unlink "$ATACdir/work/$matches.exclude.mcidx"; unlink "$ATACdir/work/$matches.exclude.mcdat"; die "Failed to make exclude mers!\n"; } } if (-e "$ATACdir/work/$matches.exclude.mcdat") { unlink "$ATACdir/work/$minFile.mcdat"; unlink "$ATACdir/work/$minFile.mcidx"; } else { die "Failed to find exclude mers?\n"; } } # Success! # open(F, "> $ATACdir/work/$matches.mask.done"); close(F); exit(0) if ($merylOnly == 1); } sub findHits { my $segmentID = "000"; my @segmentIDs; open(F, "$leaff --partitionmap $numSegments $MERYLdir/$id1.fasta |"); $numSegments = ; while() { my $segments = ""; my @pieces = split '\s+', $_; my $memory = shift @pieces; foreach my $piece (@pieces) { if ($piece =~ m/(\d+)\(\d+\)/) { $segments .= "$1\n"; } else { die "Error parsing segment: $piece\n"; } } open(S, "> $ATACdir/work/$matches-segment-$segmentID"); print S $segments; close(S); push @segmentIDs, $segmentID; $segmentID++; } close(F); die "No segments found?\n" if (scalar(@segmentIDs) == 0); # # Now, for each segment that hasn't run, run it. # foreach my $segmentID (@segmentIDs) { # For large runs, while developing, we found it very useful # to build the tables first, save them to disk, then do the # compute. This is also mandatory if one wants to segment # the other assembly to reduce the time each piece runs. # # However, doing so adds a lot of complexity to this script, # and isn't terribly useful anymore. # my $cmd; $cmd = "$seatac "; $cmd .= "-verbose "; $cmd .= "-mersize $mersize "; $cmd .= "-minlength $minfill "; $cmd .= "-maxgap $maxgap "; $cmd .= "-numthreads $numThreads "; $cmd .= "-table $MERYLdir/$id1.fasta "; $cmd .= "-stream $MERYLdir/$id2.fasta "; $cmd .= "-only $ATACdir/work/$matches.include " if (-e "$ATACdir/work/$matches.include.mcdat"); $cmd .= "-mask $ATACdir/work/$matches.exclude " if (-e "$ATACdir/work/$matches.exclude.mcdat"); # Until we fix the -use support in seatac. #$cmd .= "-use $ATACdir/work/$matches-segment-$segmentID "; $cmd .= "-output $ATACdir/work/$matches-segment-$segmentID.matches "; $cmd .= "-filtername $filtername " if (defined($filtername)); $cmd .= "-filteropts \"-1 $id1 -2 $id2 $filteropts\" "; $cmd .= "> $ATACdir/work/$matches-$segmentID.out 2>&1"; if (! -e "$ATACdir/work/$matches-segment-$segmentID.matches") { if (runCommand($cmd)) { unlink "$ATACdir/work/$matches-segment-$segmentID.matches.crash"; rename "$ATACdir/work/$matches-segment-$segmentID.matches", "$ATACdir/work/$matches-segment-$segmentID.matches.crash"; die "Failed to run $matches-$segmentID\n"; } } } return(@segmentIDs); } sub extendMatches (@) { my @segmentIDs = @_; return if (-e "$ATACdir/work/$matches.matches.extended"); # Check that each search finished. # foreach my $segmentID (@segmentIDs) { if (! -e "$ATACdir/work/$matches-segment-$segmentID.matches") { die "$ATACdir/work/$matches-segment-$segmentID.matches failed to complete.\n"; } } if ($crossSpecies) { $matchExtenderOpts = "-e 4 -b 5 -s 5 -i 0.70 -p 100 -d 25"; } # Build the header file. # open(ATACFILE, "> $ATACdir/work/$matches.header") or die; print ATACFILE "!format atac 1.0\n"; print ATACFILE "#\n"; print ATACFILE "# Legend:\n"; print ATACFILE "#\n"; print ATACFILE "# Field 0: the row class\n"; print ATACFILE "# Field 1: the match type u=ungapped, x=exact, ....\n"; print ATACFILE "# Field 2: the match instance index\n"; print ATACFILE "# Field 3: the parent index\n"; print ATACFILE "# Field 4: the FASTA sequence id in the first assembly\n"; print ATACFILE "# Field 5: the offset from the start of the sequence for the match\n"; print ATACFILE "# Field 6: the length of the match in the first assembly\n"; print ATACFILE "# Field 7: the orientation of the match sequence in the first assembly.\n"; print ATACFILE "# Field 8: the FASTA sequence id for the second assembly\n"; print ATACFILE "# Field 9: the offset from the start of the sequence for the match\n"; print ATACFILE "# Field 10: the length of the match in the second assembly\n"; print ATACFILE "# Field 11: the orientation of the match sequence in the second assembly.\n"; print ATACFILE "#\n"; print ATACFILE "/assemblyId1=$id1\n"; print ATACFILE "/assemblyId2=$id2\n"; print ATACFILE "/assemblyFile1=$MERYLdir/$id1.fasta\n"; print ATACFILE "/assemblyFile2=$MERYLdir/$id2.fasta\n"; # We used to trim off the fasta from the filename...why? my $seq1trimmed = $seq1; my $seq2trimmed = $seq2; $seq1trimmed = $1 if ($seq1trimmed =~ m/(.*).fasta$/); $seq2trimmed = $1 if ($seq2trimmed =~ m/(.*).fasta$/); print ATACFILE "/rawMatchMerSize=$mersize\n"; print ATACFILE "/rawMatchMerMaxDegeneracy=$merlimit\n"; print ATACFILE "/rawMatchAllowedSubstutionBlockSize=$maxgap\n"; print ATACFILE "/rawMatchMinFillSize=$minfill\n"; print ATACFILE "/heavyChainsOn=1\n"; print ATACFILE "/heavyMaxJump=100000\n"; print ATACFILE "/heavyMinFill=100\n"; print ATACFILE "/matchExtenderOn=1\n"; print ATACFILE "/uniqueFilterOn=1\n"; print ATACFILE "/fillIntraRunGapsOn=1\n"; if ($crossSpecies){ # The non-default parameters for Mouse versus Rat. print ATACFILE "/matchExtenderMinEndRunLen=4\n"; print ATACFILE "/matchExtenderMaxMMBlock=5\n"; print ATACFILE "/matchExtenderMinBlockSep=5\n"; print ATACFILE "/matchExtenderMinIdentity=0.7\n"; print ATACFILE "/matchExtenderMaxNbrSep=100\n"; print ATACFILE "/matchExtenderMaxNbrPathMM=25\n"; print ATACFILE "/globalMatchMinSize=20\n"; print ATACFILE "/fillIntraRunGapsErate=0.30\n"; } close(ATACFILE); # run matchExtender # my $cmd; $cmd = "$BINdir/matchExtender $matchExtenderOpts "; $cmd .= "$ATACdir/work/$matches.header "; foreach my $segmentID (@segmentIDs) { $cmd .= " $ATACdir/work/$matches-segment-$segmentID.matches"; } $cmd .= " > $ATACdir/work/$matches.matches.extended"; if (runCommand($cmd)) { rename "$ATACdir/work/$matches.matches.extended", "$ATACdir/work/$matches.matches.extended.FAILED"; die "Failed.\n"; } # Copy all the matches to the matchExtender. We take the liberty # of making new match uids for these, since seatac can't make # unique ids if it is run in multiple passes. # if (0) { my $uid = "000000000"; my $comma = $,; $, = " "; my $slash = $\; $\ = "\n"; foreach my $segmentID (@segmentIDs) { open(MATCHES, "< $ATACdir/work/$matches-segment-$segmentID.matches") or die "Failed to open '$ATACdir/work/$matches-segment-$segmentID.matches'\n"; while () { if (m/^M/) { my @v = split '\s+', $_; $v[2] = "m$uid"; undef $v[12]; undef $v[13]; undef $v[14]; undef $v[15]; print ATACFILE @v; $uid++; } } close(MATCHES); } $, = $comma; $\ = $slash; } } sub makeChains { return if (-e "$ATACdir/work/$matches.matches.extended.chained.atac"); if (!defined($ENV{"TMPDIR"})) { print STDERR "WARNING: TMPDIR not set, defaulting to '$ATACdir'.\n"; $ENV{"TMPDIR"} = $ATACdir; } # Path to the python shared-objects (in lib) and the python scripts. # $ENV{'PYTHONPATH'} = "$LIBdir"; if (runCommand("python $chainer $ATACdir/work/$matches.matches.extended")) { print STDERR "PYTHONPATH=$ENV{'PYTHONPATH'}\n"; die "Chainer failed.\n"; } } sub closeGaps { return if (-e "$ATACdir/$matches.atac"); my $cmd; $cmd = "$correctgaps "; $cmd .= " -m $ATACdir/work/$matches.matches.extended.chained.atac "; $cmd .= " -l $ATACdir/work/$matches.matches.extended.chained.gapsclosed.log"; $cmd .= " > $ATACdir/work/$matches.matches.extended.chained.gapsclosed.atac"; if (runCommand($cmd)) { rename "$ATACdir/work/$matches.matches.extended.chained.gapsclosed.atac", "$ATACdir/work/$matches.matches.extended.chained.gapsclosed.FAILED"; die "Failed to close gaps!\n"; } if (! -e "$ATACdir/work/$matches.atac") { system("ln -s $ATACdir/work/$matches.matches.extended.chained.gapsclosed.atac $ATACdir/work/$matches.atac"); } if (! -e "$ATACdir/$matches.atac") { system("ln $ATACdir/work/$matches.matches.extended.chained.gapsclosed.atac $ATACdir/$matches.atac"); } } sub makeClumps { my $cmd; my $ref; my $rid; my $clumpCost = 5000; $ref = 1; $rid = $id1; if (! -e "$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac") { $cmd = "cd $ATACdir "; $cmd .= "&& "; $cmd .= "$BINdir/clumpMaker "; $cmd .= " -c $clumpCost "; $cmd .= " -$ref "; $cmd .= " -f $ATACdir/$matches.atac "; $cmd .= "> $ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac"; if (runCommand($cmd)) { rename "$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac", "$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac.FAILED"; die "Failed to make clumps!\n"; } } rewriteUIDs("$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac"); $ref = 2; $rid = $id2; if (! -e "$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac") { $cmd = "cd $ATACdir "; $cmd .= "&& "; $cmd .= "$BINdir/clumpMaker "; $cmd .= " -c $clumpCost "; $cmd .= " -$ref "; $cmd .= " -f $ATACdir/$matches.atac "; $cmd .= "> $ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac"; if (runCommand($cmd)) { rename "$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac", "$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac"; die "Failed to make clumps!\n"; } } rewriteUIDs("$ATACdir/$matches.ref=$rid.clumpCost=$clumpCost.atac"); } sub generateStatistics { my $cmd; if (! -e "$ATACdir/stats/$matches.stats") { $cmd = "$statsgenerator "; $cmd .= "-a $ATACdir/$matches.atac "; $cmd .= "-p $ATACdir/stats/$matches "; $cmd .= "-g A "; $cmd .= "> $ATACdir/stats/$matches.stats"; if (runCommand($cmd)) { rename "$ATACdir/stats/$matches.stats", "$ATACdir/stats/$matches.stats.FAILED"; die "Failed to ganerate statistics.\n"; } } if (! -e "$ATACdir/stats/$matches.matches.png") { $cmd = "$makeplot u $ATACdir/$matches.atac $ATACdir/stats/$matches.matches.png"; if (runCommand($cmd)) { unlink "$ATACdir/stats/$matches.matches.png"; unlink "$ATACdir/stats/$matches.matches.ps"; die "Failed to ganerate dot plots.\n"; } } if (! -e "$ATACdir/stats/$matches.runs.png") { $cmd = "$makeplot r $ATACdir/$matches.atac $ATACdir/stats/$matches.runs.png"; if (runCommand($cmd)) { unlink "$ATACdir/stats/$matches.runs.png"; unlink "$ATACdir/stats/$matches.runs.ps"; die "Failed to ganerate dot plots.\n"; } } } # Reads an atac file with atac-format IDs, writes an atac file with # UIDs (the first word in the defline). sub rewriteUIDs ($) { my $infile = shift @_; my $otfile = "$infile.uids"; return if (-e "$infile.uids"); my $seqA; my $tagA; my %uidA; my $seqB; my $tagB; my %uidB; my $iid; open(F, "< $infile") or die "Failed to open '$infile'\n"; while (!defined($seqA) || !defined($tagA) || !defined($seqB) || !defined($tagB)) { $_ = ; $seqA = $1 if (m/^\/assemblyFile1=(.*)$/); $tagA = $1 if (m/^\/assemblyId1=(.*)$/); $seqB = $1 if (m/^\/assemblyFile2=(.*)$/); $tagB = $1 if (m/^\/assemblyId2=(.*)$/); } close(F); if (!defined($seqA) || !defined($tagA) || !defined($seqB) || !defined($tagB)) { die "Something fishy. Didn't find seqs or tags in '$infile'.\n"; } $iid = 0; open(F, "< $seqA") or die "Failed to open '$seqA'\n"; while () { if (m/^>(\S+)\s*.*$/) { #chomp; #print STDERR "$tagA:$iid -> $_\n"; $uidA{"$tagA:$iid"} = $1; $iid++; } } close(F); $iid = 0; open(F, "< $seqB") or die "Failed to open '$seqA'\n"; while () { if (m/^>(\S+)\s*.*$/) { #chomp; #print STDERR "$tagB:$iid -> $_\n"; $uidB{"$tagB:$iid"} = $1; $iid++; } } close(F); open(F, "< $infile") or die; open(G, "> $otfile") or die; while () { chomp $_; my @v = split '\s+', $_; if (m/^M/) { die "Didn't find uidA for $v[4]\n" if (!defined($uidA{$v[4]})); die "Didn't find uidB for $v[8]\n" if (!defined($uidB{$v[8]})); $v[4] = $uidA{$v[4]}; $v[8] = $uidB{$v[8]}; $_ = join ' ', @v; } print G "$_\n"; } close(G); close(F); } # Utility to run a command and check the exit status. We used to try # to decode the exit status...sigh. # sub runCommand { my $cmd = shift @_; print STDERR "\n$cmd\n\n"; if (system($cmd)) { return(1); } return(0); } kmer-code-2013-trunk/atac-driver/run-length-n50.pl0000644000000000000000000000215310236607065020375 0ustar rootroot#!/usr/bin/perl # Reads a list of numbers on stdin, computes the n50. # # If there are two numbers per line, they are assumed to be # a begin-end pair. # grep "M u " ATAC/atac.shift.atac | cut -d' ' -f 7 | perl n50.pl 3076782067 # grep "M u " ATAC/box2.shift.atac | cut -d' ' -f 7 | perl n50.pl 3076782067 my @values; while () { s/^\s+//; s/\s$//; s/\s+/ /; my @vals = split '\s+', $_; my $val; if (scalar(@vals) == 1) { $val = $vals[0]; } elsif (scalar(@vals) == 1) { $val = $vals[1] - $vals[0]; $val = $vals[0] - $vals[1] if ($val < 0); } else { } push @values, $val; } if (scalar(@ARGV) > 0) { $totalLength = int($ARGV[0]); } else { foreach my $v (@values) { $totalLength += $v; } } @values = sort { $a <=> $b } @values; for (my $nvalue = 1; $nvalue <= 100; $nvalue += 1) { my $limit = $nvalue * $totalLength / 100; my $iter = 0; my $sum = 0; while (($sum < $limit) && ($iter < scalar(@values))) { $sum += $values[$iter++]; } print STDOUT "$nvalue $limit : $values[$iter-1]\n"; } kmer-code-2013-trunk/atac-driver/uniqueFilter/0000755000000000000000000000000012641613361020026 5ustar rootrootkmer-code-2013-trunk/atac-driver/uniqueFilter/Make.include0000644000000000000000000000100411512763666022255 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_EXES := $/uniqueFilter $/.CXX_SRCS := $/uniqueFilter.C $/.CLEAN :=$/*.o $/*~ $/core $/uniqueFilter: $/uniqueFilter.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/uniqueFilter/uniqueFilter.C0000644000000000000000000006170012322046702022605 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include "util++.H" #include "atac.H" // Kaz Kylheku library. #include "kazlib/dict.h" #include "kazlib/except.h" #include "kazlib/hash.h" #include "kazlib/list.h" #include "kazlib/sfx.h" // Filters out matches that have non-unique pieces. Does not discard // the whole match, but just trims out the non-unique section. // // Original implementation in Python by Clark Mobarry: // // sort the matches in X // apply the mask to the X // sort the matchs in Y // apply the mask to the Y // output the matches // // if we keep the coverage intervals in core, we get around sorting // the matches. how big can they be -- especially if we only keep // things with > 1 coverage! But, we also can't use an elegant // algorithm for trimming/splitting. // We can abuse this to subtract matches from other matches. The // operation done for removing non-unique is to first find any // overlapping intervals in the set, then subract those from the // input. If we instead just find all intervals in a set of matches, // and subtract those from the input, we get subtraction. // Reads the input, builds an interval list of the regions // that have coverage > 1. // struct coverage1_s { uint32 axis; uint32 position; int increment; }; struct coverage2_s { uint32 axis; uint32 beg; uint32 end; uint32 coverage; }; struct match_s { uint32 iid1, pos1, len1, ori1; uint32 iid2, pos2, len2, ori2; }; int sortCoverage1(const void *a, const void *b) { const coverage1_s *A = *((const coverage1_s * const *)a); const coverage1_s *B = *((const coverage1_s * const *)b); if (A->axis < B->axis) return(-1); if (A->axis > B->axis) return(1); if (A->position < B->position) return(-1); if (A->position > B->position) return(1); if (A->increment > B->increment) return(-1); if (A->increment < B->increment) return(1); return(0); } // Not a complete comparison, but we only use this for an interval // list. // int sortCoverage2(const void *a, const void *b) { const coverage2_s *A = *((const coverage2_s * const *)a); const coverage2_s *B = *((const coverage2_s * const *)b); if (A->axis < B->axis) return(-1); if (A->axis > B->axis) return(1); if (A->beg < B->beg) return(-1); if (A->beg > B->beg) return(1); return(0); } // Same as sortCoverage2, but the array being sorted is not // an array of pointers. // int sortCoverage3(const void *a, const void *b) { const coverage2_s *A = (const coverage2_s *)a; const coverage2_s *B = (const coverage2_s *)b; if (A->axis < B->axis) return(-1); if (A->axis > B->axis) return(1); if (A->beg < B->beg) return(-1); if (A->beg > B->beg) return(1); return(0); } // An interval list, searchable // class coverageIntervals { public: dict_t *_il; dict_load_t _load; public: coverageIntervals() { _il = dict_create(DICTCOUNT_T_MAX, sortCoverage2); }; ~coverageIntervals() { dict_free(_il); pfree(); }; // We want to return the first node that is before our thing. Our comparison // tests the start position. If there is no first node before our thing, // return the next node. // dnode_t *lookup(void *thing) { dnode_t *it = dict_upper_bound(_il, thing); if (it == 0L) it = dict_lower_bound(_il, thing); return(it); }; void addInterval(int axis, int beg, int end, int coverage) { dnode_t *node = (dnode_t *)palloc(sizeof(dnode_t)); coverage2_s *cov = (coverage2_s *)palloc(sizeof(coverage2_s)); cov->axis = axis; cov->beg = beg; cov->end = end; cov->coverage = coverage; // initialize the node with the value dnode_init(node, 0L); // insert the node into the tree using the key dict_insert(_il, node, (void *)cov); }; void beginLoad(void) { dict_load_begin(&_load, _il); }; void endLoad(void) { dict_load_end(&_load); }; void loadInterval(int axis, int beg, int end, int coverage) { dnode_t *node = (dnode_t *)palloc(sizeof(dnode_t)); coverage2_s *cov = (coverage2_s *)palloc(sizeof(coverage2_s)); cov->axis = axis; cov->beg = beg; cov->end = end; cov->coverage = coverage; // initialize the node with the value dnode_init(node, 0L); // insert the node into the tree using the key dict_load_next(&_load, node, (void *)cov); }; }; void offsetsToCoverage(uint32 minCov, bigQueue *I, coverageIntervals *L) { uint32 axis = ~uint32ZERO; uint32 position = ~uint32ZERO; uint32 coverage = 0; uint64 covered = 0; L->beginLoad(); speedCounter D(" %8.0f matches treed (%8.2f matches/sec)\r", 1, 511, false); while (I->next()) { coverage1_s *cov1 = (coverage1_s *)I->get(); if ((cov1->axis != axis) && (coverage != 0)) fprintf(stderr, "Sorting error -- have coverage at the end of an axis.\n"), exit(1); int length = cov1->position - position; if ((coverage >= minCov) && (length > 0)) { D.tick(); L->loadInterval(axis, position, position+length, coverage); covered += length; } // Occasionally, we get stung by insisting to use unsigned // numbers. This is one of them. // if ((coverage == 0) && (cov1->increment == -1)) fprintf(stderr, "Sorting error -- have negative coverage (axis="uint32FMT" position="uint32FMT")!\n", axis, position), exit(1); coverage += cov1->increment; axis = cov1->axis; position = cov1->position; } D.finish(); L->endLoad(); fprintf(stderr, "offsetsToCoverage()-- Found "uint64FMT" bases at coverage "uint32FMT" or greater.\n", covered, minCov); } void findCoverageIntervals(char const *fileName, uint32 minCov, coverageIntervals *Fint, coverageIntervals *Rint) { bigQueue F(sortCoverage1, 0L, 0L, 0L, sizeof(coverage1_s), 128, 0L); bigQueue R(sortCoverage1, 0L, 0L, 0L, sizeof(coverage1_s), 128, 0L); // // Read the input file, building a bigQueue of the interval offsets // atacFileStream AF(fileName); atacMatch *m = AF.nextMatch('u'); while (m) { coverage1_s *fbeg = (coverage1_s *)malloc(sizeof(coverage1_s)); coverage1_s *fend = (coverage1_s *)malloc(sizeof(coverage1_s)); coverage1_s *rbeg = (coverage1_s *)malloc(sizeof(coverage1_s)); coverage1_s *rend = (coverage1_s *)malloc(sizeof(coverage1_s)); fbeg->axis = m->iid1; fbeg->position = m->pos1; fbeg->increment = 1; fend->axis = m->iid1; fend->position = m->pos1 + m->len1; fend->increment = -1; rbeg->axis = m->iid2; rbeg->position = m->pos2; rbeg->increment = 1; rend->axis = m->iid2; rend->position = m->pos2 + m->len2; rend->increment = -1; F.add(fbeg); F.add(fend); R.add(rbeg); R.add(rend); m = AF.nextMatch('u'); } // Sort each bigQueue // F.sort(); R.sort(); // Convert the interval offsets into a coverage interval list // offsetsToCoverage(minCov, &F, Fint); offsetsToCoverage(minCov, &R, Rint); } void intersectTest(match_s *matches, uint32 matchesLen, coverageIntervals *Fint, coverageIntervals *Rint, uint32 matchNumber) { bool errors = false; for (uint32 i=0; ilookup(&thing); thing.axis = matches[i].iid2; thing.beg = matches[i].pos2; thing.end = matches[i].pos2 + matches[i].len2; thing.coverage = 0; dnode_t *node2 = Rint->lookup(&thing); // Keep iterating until the node returned from the tree // is empty, or it is after our region // while (node1 && node2) { const coverage2_s *key1 = 0L, *key2 = 0L; bool isect1=false, before1=false; bool isect2=false, before2=false; if (node1) { key1 = (const coverage2_s *)dnode_getkey(node1); isect1 = ((key1->axis == matches[i].iid1) && (matches[i].pos1 < key1->end) && (key1->beg < matches[i].pos1 + matches[i].len1)); before1 = ((key1->axis < matches[i].iid1) || ((key1->axis == matches[i].iid1) && (key1->beg < matches[i].pos1 + matches[i].len1))); } if (node2) { key2 = (const coverage2_s *)dnode_getkey(node2); isect2 = ((key2->axis == matches[i].iid2) && (matches[i].pos2 < key2->end) && (key2->beg < matches[i].pos2 + matches[i].len2)); before2 = ((key2->axis < matches[i].iid2) || ((key2->axis == matches[i].iid2) && (key2->beg < matches[i].pos2 + matches[i].len2))); } if (isect1) { fprintf(stderr, "Got fwd intersection on i="uint32FMT" matchNumber="uint32FMT"\n", i, matchNumber); fprintf(stdout, "--"uint32FMT" "uint32FMT" 1 "uint32FMT" "uint32FMT" %d\n", matches[i].pos1, matches[i].pos1 + matches[i].len1, matches[i].pos2, matches[i].pos2 + matches[i].len2, matches[i].ori2 ? 1 : -1); fprintf(stdout, "--key1 beg="uint32FMT" end="uint32FMT"\n", key1->beg, key1->end); errors = true; } if (isect2) { fprintf(stderr, "Got rev intersection on i="uint32FMT" matchNumber="uint32FMT"\n", i, matchNumber); fprintf(stdout, "--"uint32FMT" "uint32FMT" 1 "uint32FMT" "uint32FMT" %d\n", matches[i].pos1, matches[i].pos1 + matches[i].len1, matches[i].pos2, matches[i].pos2 + matches[i].len2, matches[i].ori2 ? 1 : -1); fprintf(stdout, "--key2 beg="uint32FMT" end="uint32FMT"\n", key2->beg, key2->end); errors = true; } // If we intersected or were before, move to the next, otherwise, // stop // if (isect1 || before1) node1 = dict_next(Fint->_il, node1); else node1 = 0L; if (isect2 || before2) node2 = dict_next(Rint->_il, node2); else node2 = 0L; } } if (errors) abort(); } // This is used all over the place. // #define D08D2 uint32FMTW(8)" "uint32FMTW(8) #define KEY1THING "key1 = "uint32FMTW(8)" "uint32FMTW(8)" "uint32FMTW(8)" thing = "D08D2" "D08D2"\n" #define KEY2THING "key2 = "uint32FMTW(8)" "uint32FMTW(8)" "uint32FMTW(8)" thing = "D08D2" "D08D2"\n" int main(int argc, char **argv) { char *inputName = 0L; char *subtractName = 0L; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-i") == 0) { inputName = argv[++arg]; } else if (strcmp(argv[arg], "-s") == 0) { subtractName = argv[++arg]; } else { fprintf(stderr, "usage: %s [-h] [-s subtractFile] [-i inputFile]\n", argv[0]); fprintf(stderr, " -s instead of finding regions to remove by looking\n"); fprintf(stderr, " for duplicatd regions in inputFile, load them\n"); fprintf(stderr, " from subtractFile.\n"); exit(1); } arg++; } if (inputName == 0L) fprintf(stderr, "usage: %s [-i inputfile] [-o outputfile] [-h]\n", argv[0]), exit(1); coverageIntervals *Fint = new coverageIntervals; coverageIntervals *Rint = new coverageIntervals; if (subtractName) findCoverageIntervals(subtractName, 1, Fint, Rint); else findCoverageIntervals(inputName, 2, Fint, Rint); // The original implementation would then sort the matches in X, // merge the sorted intervals and the sorted matches together, resort // the matches by Y, and merge with Rcov. That is a lot of work to // avoid keeping two interval lists in memory. // // We build an in-core interval list for both assemblies, and // then stream the matches by it. // we need to ask the interval list: // return the intervals that are covered by this interval uint32 matchesLen = 0; uint32 matchesMax = 1024; match_s *matches = new match_s [matchesMax]; match_s extent; uint32 matchNumber = 0; atacFileStream AF(inputName); atacMatch *m = AF.nextMatch('u'); while (m) { matches[0].iid1 = m->iid1; matches[0].pos1 = m->pos1; matches[0].len1 = m->len1; matches[0].ori1 = m->fwd1; matches[0].iid2 = m->iid2; matches[0].pos2 = m->pos2; matches[0].len2 = m->len2; matches[0].ori2 = m->fwd2; // Save the original, we'll use this to test for intersections. // memcpy(&extent, matches, sizeof(match_s)); bool fwd = (matches[0].ori1 == matches[0].ori2); // Query the tree for the first interval intersecting iid1 // XXX: Should this be upper_bound instead? // // A scratch interval used for querying the list // coverage2_s thing; thing.axis = extent.iid1; thing.beg = extent.pos1; thing.end = extent.pos1 + extent.len1; thing.coverage = 0; dnode_t *node1start = Fint->lookup(&thing); dnode_t *node1 = node1start; thing.axis = extent.iid2; thing.beg = extent.pos2; thing.end = extent.pos2 + extent.len2; thing.coverage = 0; dnode_t *node2start = Rint->lookup(&thing); dnode_t *node2 = node2start; // while the node intersects the match, trim or split it, then // get the next node. // // any way I tried this, it's ugly. // // if there is one match, then it is in [0]. If there is more // than one match, then the trimmed match is in [0], but the // split matches are in [1] on. // XXX the problem is that we split off things, then move // to the next match, without checking previously split // things against this match // // Keep iterating until the node returned from the tree // is empty, or it is after our region // while (node1 || node2) { bool before1=false; bool before2=false; bool modified=false; const coverage2_s *key1 = 0L; const coverage2_s *key2 = 0L; if (node1) { key1 = (const coverage2_s *)dnode_getkey(node1); before1 = ((key1->axis < extent.iid1) || ((key1->axis == extent.iid1) && (key1->beg < extent.pos1 + extent.len1))); // Three cases: (1) we trim off the front, (2) trim off the // back or (3) split. And, (4) delete the whole damn thing. // // Further complicated by having multiple things to try. // // If anything is modified, reset the node to the start // for (uint32 i=0; ibeg <= matches[i].pos1) && ((matches[i].pos1 + matches[i].len1) <= key1->end)) { modified = true; // Trim the whole thing? // matches[i].pos1 = 0; matches[i].len1 = 0; matches[i].pos2 = 0; matches[i].len2 = 0; } else if ((matches[i].pos1 < key1->beg) && (key1->end < (matches[i].pos1 + matches[i].len1))) { modified = true; // Contained. Split it. // // The left half // int newLen = key1->beg - matches[i].pos1; matches[matchesLen].iid1 = matches[i].iid1; matches[matchesLen].pos1 = matches[i].pos1; matches[matchesLen].len1 = newLen; matches[matchesLen].ori1 = matches[i].ori1; if (fwd) { matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = matches[i].pos2; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } else { matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = matches[i].pos2 + matches[i].len2 - newLen; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } matchesLen++; // The right half // newLen = matches[i].pos1 + matches[i].len1 - key1->end; matches[matchesLen].iid1 = matches[i].iid1; matches[matchesLen].pos1 = key1->end; matches[matchesLen].len1 = newLen; matches[matchesLen].ori1 = matches[i].ori1; if (fwd) { matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = matches[i].pos2 + (key1->end - matches[i].pos1); matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } else { matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = matches[i].pos2; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } matchesLen++; // Invalidate this match // matches[i].pos1 = 0; matches[i].len1 = 0; matches[i].pos2 = 0; matches[i].len2 = 0; } else if ((key1->beg <= matches[i].pos1) && (matches[i].pos1 < key1->end)) { modified = true; // Trim the begin? // int trimLen = key1->end - matches[i].pos1; matches[i].pos1 += trimLen; matches[i].len1 -= trimLen; if (fwd == true) matches[i].pos2 += trimLen; matches[i].len2 -= trimLen; } else if ((key1->beg < (matches[i].pos1 + matches[i].len1)) && ((matches[i].pos1 + matches[i].len1) <= key1->end)) { modified = true; // Trim the end? // int trimLen = matches[i].pos1 + matches[i].len1 - key1->beg; matches[i].len1 -= trimLen; if (fwd == false) matches[i].pos2 += trimLen; matches[i].len2 -= trimLen; } } } // isect if (node2) { key2 = (const coverage2_s *)dnode_getkey(node2); before2 = ((key2->axis < extent.iid2) || ((key2->axis == extent.iid2) && (key2->beg < extent.pos2 + extent.len2))); for (uint32 i=0; ibeg <= matches[i].pos2) && ((matches[i].pos2 + matches[i].len2) <= key2->end)) { modified = true; // Trim the whole thing? // matches[i].pos1 = 0; matches[i].len1 = 0; matches[i].pos2 = 0; matches[i].len2 = 0; } else if ((matches[i].pos2 < key2->beg) && (key2->end < (matches[i].pos2 + matches[i].len2))) { modified = true; // Contained. Split it. // // The left (forward strand) half // if (fwd) { int newLen = key2->beg - matches[i].pos2; matches[matchesLen].iid1 = matches[i].iid1; matches[matchesLen].pos1 = matches[i].pos1; matches[matchesLen].len1 = newLen; matches[matchesLen].ori1 = matches[i].ori1; matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = matches[i].pos2; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } else { int newLen = matches[i].pos2 + matches[i].len2 - key2->end; matches[matchesLen].iid1 = matches[i].iid1; matches[matchesLen].pos1 = matches[i].pos1; matches[matchesLen].len1 = newLen; matches[matchesLen].ori1 = matches[i].ori1; matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = key2->end; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } matchesLen++; // The right (forward strand) half // if (fwd) { int newLen = matches[i].pos2 + matches[i].len2 - key2->end; matches[matchesLen].iid1 = matches[i].iid1; matches[matchesLen].pos1 = matches[i].pos1 + key2->end - matches[i].pos2; matches[matchesLen].len1 = newLen; matches[matchesLen].ori1 = matches[i].ori1; matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = key2->end; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } else { int newLen = key2->beg - matches[i].pos2; matches[matchesLen].iid1 = matches[i].iid1; matches[matchesLen].pos1 = matches[i].pos1 + matches[i].pos2 + matches[i].len2 - key2->beg; matches[matchesLen].len1 = newLen; matches[matchesLen].ori1 = matches[i].ori1; matches[matchesLen].iid2 = matches[i].iid2; matches[matchesLen].pos2 = matches[i].pos2; matches[matchesLen].len2 = newLen; matches[matchesLen].ori2 = matches[i].ori2; } matchesLen++; // Invalidate this match // matches[i].pos1 = 0; matches[i].len1 = 0; matches[i].pos2 = 0; matches[i].len2 = 0; } else if ((key2->beg <= matches[i].pos2) && (matches[i].pos2 < key2->end)) { modified = true; // Trim the begin? fwdOK, revOK // int trimLen = key2->end - matches[i].pos2; matches[i].pos2 += trimLen; matches[i].len2 -= trimLen; if (fwd == true) matches[i].pos1 += trimLen; matches[i].len1 -= trimLen; } else if ((key2->beg < (matches[i].pos2 + matches[i].len2)) && ((matches[i].pos2 + matches[i].len2) <= key2->end)) { modified = true; // Trim the end? // int trimLen = matches[i].pos2 + matches[i].len2 - key2->beg; matches[i].len1 -= trimLen; if (fwd == false) matches[i].pos1 += trimLen; matches[i].len2 -= trimLen; } } } // If we intersected or were before, move to the next, otherwise, // stop. // if (modified) node1 = node1start; else if (before1) node1 = dict_next(Fint->_il, node1); else node1 = 0L; if (modified) node2 = node2start; else if (before2) node2 = dict_next(Rint->_il, node2); else node2 = 0L; } // end of while (node1 || node2) // Nobody should be outside the extent // for (uint32 i=0; i 0) && (matches[i].len2 > 0)) { if ((matches[i].pos1 < extent.pos1) || (matches[i].pos1 + matches[i].len1 > extent.pos1 + extent.len1) || (matches[i].pos2 < extent.pos2) || (matches[i].pos2 + matches[i].len2 > extent.pos2 + extent.len2)) { fprintf(stderr, "match "uint32FMT" is outside the extent!\n", i); abort(); } } } // Print out all the modified matches // for (uint32 i=0; i 0) && (matches[i].len2 > 0)) { fprintf(stdout, "M %s %s."uint32FMT" . %s "uint32FMT" "uint32FMT" 1 %s "uint32FMT" "uint32FMT" %d\n", m->matchuid, m->parentuid, i, AF.labelA(), matches[i].pos1, matches[i].len1, AF.labelB(), matches[i].pos2, matches[i].len2, matches[i].ori2 ? 1 : -1); } } // Check that the modified matches do not intersect anything in // the tree. // intersectTest(matches, matchesLen, Fint, Rint, matchNumber); matchNumber++; } } kmer-code-2013-trunk/atac-driver/makeplot.pl0000644000000000000000000002453211436347437017542 0ustar rootroot#!/usr/bin/env perl # # This file is part of A2Amapper. # Copyright (c) 2008-2009 J. Craig Venter Institute # Author: Brian Walenz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received (LICENSE.txt) a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # The reference MUST by the first sequence, and it MUST be a single sequence. # use strict; use FindBin; my $mm = shift @ARGV; my $in = shift @ARGV; my $ot = shift @ARGV; if (($mm ne "u") && ($mm ne "r")) { die "First arg must be 'u' (ungapped matches) or 'r' (runs).\n"; } if (!defined($ot) && ($in =~ m/(.*).atac/)) { $ot = $1; } if ($ot =~ m/^(.*).png/) { $ot = $1; } if ($ot =~ m/^(.*).ps/) { $ot = $1; } die if (!defined($in)); die if (!defined($ot)); my $version = `gnuplot -V`; if ($version =~ m/gnuplot\s+(\d+\.\d+)\s+/) { $version = $1; } else { chomp $version; print STDERR "WARNING: Unknown gnuplot version '$version'\n"; $version = 0; } if ($version < 4.2) { print STDERR "gnuplot version 4.2 is needed for plots.\n"; exit(0); } open(FD, "> $ot.fdat"); open(RD, "> $ot.rdat"); open(GP, "> $ot.gp"); print GP "set size 1,1\n"; print GP "set grid\n"; print GP "unset key\n"; print GP "set border 10\n"; print GP "set tics scale 0\n"; print GP "set xlabel \"REF\"\n"; print GP "set ylabel \"ASM\"\n"; print GP "set title \"$ot\"\n"; print GP "set format \"%.0f\"\n"; #print GP "set mouse format \"%.0f\"\n"; #print GP "set mouse mouseformat \"[%.0f, %.0f]\"\n"; #print GP "set mouse clipboardformat \"[%.0f, %.0f]\"\n"; print GP "set style line 1 lt 1 lw 1 pt 6 ps 1\n"; print GP "set style line 2 lt 3 lw 1 pt 6 ps 1\n"; print GP "set style line 3 lt 2 lw 1 pt 6 ps 1\n"; # We need to know the length of the reference so we can cycle # the coordinates # my $refLength = 0; # And we need to know the lengths of the scaffolds in the assembly. # my $asmFile1; my $asmId1; my $asmFile2; my $asmId2; open(IN, "< $in") or die; while () { if (m/assemblyId1=(.*)/) { $asmId1 = $1; } if (m/assemblyFile1=(.*)/) { $asmFile1 = $1; } if (m/assemblyFile2=(.*)/) { $asmFile2 = $1; } if (m/assemblyId2=(.*)/) { $asmId2 = $1; } } close(IN); # Figure out which scaffolds are reversed. my %reversed; my %goofed; { open(IN, "< $in") or die; while () { my @v = split '\s+', $_; if (($v[0] eq "M") && ($v[1] eq $mm)) { $reversed{$v[8]} += $v[10] * $v[11]; # length * orientation # Remember if there was a goofy inversion in this # scaffold. First time through we remember the # orientation of the first match, then later matches # check it is the same, reseting to a token value (2) if # they differ. if (!defined($goofed{$v[8]})) { $goofed{$v[8]} = $v[11]; } else { if ($goofed{$v[8]} != $v[11]) { $goofed{$v[8]} = 2; } } } } close(IN); foreach my $k (keys %reversed) { if ($reversed{$k} < 0) { $reversed{$k} = 1; } else { $reversed{$k} = 0; } if ($goofed{$k} == 2) { $goofed{$k} = 1; } else { $goofed{$k} = 0; } } } # Find the reference length my $refLength; open(F, "< $asmFile1") or die "Failed to open genome reference '$asmFile1'\n"; $_ = ; # defline while () { s/^\s+//; s/\s+$//; $refLength += length ($_); } close(F); # Find the assembly lengths -- ignore anything without a match. my %asmLength; my $asmLength; open(F, "$FindBin::Bin/leaff -F $asmFile2 -i $asmId2 |"); while () { my @v = split '\s+', $_; if ($v[0] eq "G") { $asmLength{$v[2]} = $v[3] if (exists($reversed{$v[2]})); $asmLength += $v[3]; } } close(F); print GP "set xrange [0:", $refLength+1, "]\n"; print GP "set yrange [0:", $asmLength+1, "]\n"; # Figure out where to split the reference - we shift both X and Y, so # we arbitrarily pick any assembly sequence and anchor it at the # origin. Well, not arbitrary. We pick the longest only so that # we're pretty sure we didn't pick some crappy tiny contig. my $refSplit = 0; { my $asmAnchorSequence; my $minAsm = 999999999; # Pick the longest sequence as the anchor -- skipping this block # will instead pick the first thing in the file. # # Don't pick anything with reversed crap on the ends; this # greatly screws up. # foreach my $k (keys %asmLength) { if ($goofed{$k}) { next; } if (!defined($asmAnchorSequence) || ($asmLength{$asmAnchorSequence} < $asmLength{$k})) { $asmAnchorSequence = $k; } } # Except when all scaffolds are goofed. *sigh* if (!defined($asmAnchorSequence)) { foreach my $k (keys %asmLength) { if (!defined($asmAnchorSequence) || ($asmLength{$asmAnchorSequence} < $asmLength{$k})) { $asmAnchorSequence = $k; } } } open(IN, "< $in") or die; while () { my @v = split '\s+', $_; if (($v[0] eq "M") && ($v[1] eq $mm)) { if ($reversed{$v[8]}) { $v[9] = $asmLength{$v[8]} - ($v[9] + $v[10]); $v[11] *= -1; } if (!defined($asmAnchorSequence)) { $asmAnchorSequence = $v[8]; } if ($v[8] eq $asmAnchorSequence) { if ($v[9] < $minAsm) { $minAsm = $v[9]; $refSplit = $v[5]; } } } } close(IN); } # Figure out how to place the assembly sequences # # $refSplit controls where we shift the reference origin. # %offsetRef controls where we place the assembly in the Y axis. # # We want to rotate the reference so the largest scaffold is placed # at the origin. my %offsetRef; { my %minY; my %minYbackup; open(IN, "< $in") or die; while () { my @v = split '\s+', $_; if (($v[0] eq "M") && ($v[1] eq $mm)) { if ($reversed{$v[8]}) { $v[9] = $asmLength{$v[8]} - ($v[9] + $v[10]); $v[11] *= -1; } # Confusing. Rotate the reference coordinate, then # remember the smallest for each scaffold. $v[5] -= $refSplit; $v[5] += $refLength if ($v[5] < 0); my $d = $v[5]; # Ignore if this is a tiny crappy little thing. This # allows us to place most of the real matches on the # diagonal, showing obvious small chimers. if ($v[10] >= 2000) { if (!exists($minY{$v[8]}) || ($d < $minY{$v[8]})) { $minY{$v[8]} = $d; } } else { if (!exists($minY{$v[8]}) || ($d < $minYbackup{$v[8]})) { $minYbackup{$v[8]} = $d; } } } } close(IN); # If we never found a large block, use the biggest we did find. foreach my $k (keys %minYbackup) { if (!exists($minY{$k})) { $minY{$k} = $minYbackup{$k}; } } my @sortme; my $lengthsum = 0; foreach my $k (keys %minY) { push @sortme, "$minY{$k}\0$k"; } @sortme = sort { $a <=> $b } @sortme; foreach my $v (@sortme) { my ($p, $k) = split '\0', $v; $offsetRef{$k} = $lengthsum; $lengthsum += $asmLength{$k}; } } print GP "set ytics ( \\\n"; { # Ugh, gross. my @keys = keys %offsetRef; while (scalar(@keys)) { my $k = shift @keys; if (scalar(@keys) > 0) { print GP " \"$k\" $offsetRef{$k},\\\n"; } else { print GP " \"$k\" $offsetRef{$k}\\\n"; } } } print GP ")\n"; print GP "set xtics ( \\\n"; #for (my $p=500000; $p<$refLength; $p += 500000) { # my $i = $p - $refSplit; # if ($i < 0) { # $i += $refLength; # } # print GP " \"$p\" $i,\\\n"; #} print GP " \"origin\" ", $refLength - $refSplit, "\\\n"; print GP ")\n"; my $hasFdat = 0; my $hasRdat = 0; open(IN, "< $in") or die; while () { my @v = split '\s+', $_; if (($v[0] eq "M") && ($v[1] eq $mm)) { if ($reversed{$v[8]}) { $v[9] = $asmLength{$v[8]} - ($v[9] + $v[10]); $v[11] *= -1; } my $abeg = $v[5]; my $aend = $v[5] + $v[6]; my $bbeg = $v[9]; my $bend = $v[9] + $v[10]; $abeg -= $refSplit; $aend -= $refSplit; if (($abeg < 0) || ($aend < 0)) { $abeg += $refLength; $aend += $refLength; } $bbeg += $offsetRef{$v[8]}; $bend += $offsetRef{$v[8]}; if ($v[11] == 1) { $hasFdat++; print FD "$abeg $bbeg\n"; print FD "$aend $bend\n"; print FD "\n\n"; } else { $hasRdat++; print RD "$abeg $bend\n"; print RD "$aend $bbeg\n"; print RD "\n\n"; } } } close(IN); close(FD); close(RD); print GP "set terminal png tiny size 800,800\n"; print GP "set output \"$ot.png\"\n"; if ($hasFdat && $hasRdat) { print GP "plot \\\n"; print GP " \"$ot.fdat\" w lp ls 1, \\\n"; print GP " \"$ot.rdat\" w lp ls 2\n"; #print GP "pause -1\n"; } elsif ($hasFdat) { print GP "plot \\\n"; print GP " \"$ot.fdat\" w lp ls 1\n"; #print GP "pause -1\n"; } elsif ($hasRdat) { print GP "plot \\\n"; print GP " \"$ot.rdat\" w lp ls 2\n"; #print GP "pause -1\n"; } else { # No matches?? #die; } print GP "set terminal postscript color\n"; print GP "set output \"$ot.ps\"\n"; print GP "replot\n"; close(GP); system("gnuplot $ot.gp"); #unlink "$ot.fdat"; #unlink "$ot.rdat"; #unlink "$ot.gp"; kmer-code-2013-trunk/atac-driver/chimera/0000755000000000000000000000000012641613360016761 5ustar rootrootkmer-code-2013-trunk/atac-driver/chimera/happy-clones-span-clumps.C0000644000000000000000000003002712415073322023726 0ustar rootroot#include #include using namespace std; #include #include "util++.H" // Reads a clump-annotated atac file, builds a search tree of all the // matches in those clumps. Then reads a list of happy clones mapped // to the sequence, figures out what clump each read in the clone is // in, and reports whenever the clone spans a clump. // Contains a list of intervalLists, one for each clump. The // intervalList stores the positions of the matches in this clump. // class atacClumpCoordTreeScaffold { public: atacClumpCoordTreeScaffold() { clumpsLen = 0; clumpsMax = 64; clumpID = new uint32 [clumpsMax]; clumps = new intervalList * [clumpsMax]; clumpmin = new uint32 [clumpsMax]; clumpmax = new uint32 [clumpsMax]; clumpconfirm = new uint32 [clumpsMax * clumpsMax]; for (uint32 i=0; iadd(begin, length); if (clumpmin[i] > begin) clumpmin[i] = begin; if (clumpmax[i] < begin + length) clumpmax[i] = begin + length; return; } } if (clumpsLen == clumpsMax) { fprintf(stderr, "ERROR: increase clumpsMax!\n"); exit(1); } // Didn't add to an existing clump, so must be a new clump. // clumpID[clumpsLen] = clumpid; clumps[clumpsLen] = new intervalList; clumps[clumpsLen]->add(begin, length); clumpmin[clumpsLen] = begin; clumpmax[clumpsLen] = begin + length; clumpsLen++; }; uint32 getClumpID(uint32 begin, uint32 end) { uint32 clumpid = 0; uint32 numhits = 0; // We can make this much quicker if we remember the extent of // each interval list. // // We want to allow partial matches, so check that the end is // above the min, and the begin is before the max. // // b-------e b-----e // -------clump------ // for (uint32 i=0; ioverlapping(begin, end, intervals, intervalsLen, intervalsMax) > 0) { clumpid = clumpID[i]; numhits++; } } else { // If you really want to check.... //if (clumps[i]->overlapping(begin, end, intervals, intervalsLen, intervalsMax) > 0) // fprintf(stderr, "WARNING: Found overlapping clump outside extent!\n"); } } if (numhits == 0) return(0); if (numhits == 1) return(clumpid); //fprintf(stderr, "FOUND MORE THAN ONE CLUMP MATCHING!\n"); return(~uint32ZERO); }; void sortClumps(void) { uint32 ciid; intervalList *cptr; uint32 cmin; uint32 cmax; uint32 i = 0; uint32 j = 0; // an insertion sort for (i=clumpsLen; i--; ) { ciid = clumpID[i]; cptr = clumps[i]; cmin = clumpmin[i]; cmax = clumpmax[i]; for (j=i+1; (j < clumpsLen) && (cmin > clumpmin[j]); j++) { clumpID[j-1] = clumpID[j]; clumps[j-1] = clumps[j]; clumpmin[j-1] = clumpmin[j]; clumpmax[j-1] = clumpmax[j]; } clumpID[j-1] = ciid; clumps[j-1] = cptr; clumpmin[j-1] = cmin; clumpmax[j-1] = cmax; } }; void confirm(uint32 ca, uint32 cb) { uint32 caidx = 0; uint32 cbidx = 0; for (uint32 i=0; i **clumps; uint32 *clumpmin; uint32 *clumpmax; uint32 *clumpconfirm; uint32 intervalsLen; uint32 intervalsMax; uint32 *intervals; }; class atacClumpCoordTree { public: atacClumpCoordTree() { scaffoldsMax = 262144; scaffolds = new atacClumpCoordTreeScaffold * [scaffoldsMax]; for (uint32 i=0; i= scaffoldsMax) { fprintf(stderr, "ERROR: increase scaffoldsMax "uint32FMT"\n", scaffoldid); exit(1); } if (scaffolds[scaffoldid] == 0L) scaffolds[scaffoldid] = new atacClumpCoordTreeScaffold; scaffolds[scaffoldid]->addMatch(clumpid, begin, length); }; void removeSingleClumpScaffolds(void) { uint32 deleted = 0; uint32 remain = 0; for (uint32 i=0; iclumpsLen < 2)) { delete scaffolds[i]; scaffolds[i] = 0L; deleted++; } if (scaffolds[i]) { scaffolds[i]->sortClumps(); remain++; } } fprintf(stderr, "Deleted "uint32FMT" scaffolds with less than 2 clumps.\n", deleted); fprintf(stderr, "Remain "uint32FMT" scaffolds with more than 2 clumps.\n", remain); }; void showMultipleClumpScaffolds(void) { for (uint32 i=0; iclumpsLen >= 2)) { fprintf(stdout, "\n"); for (uint32 j=0; jclumpsLen; j++) { bool overlap = false; if ((j+1 < scaffolds[i]->clumpsLen) && (scaffolds[i]->clumpmax[j] > scaffolds[i]->clumpmin[j+1])) overlap = true; fprintf(stdout, "scaffold "uint32FMT" clump "uint32FMT" begin "uint32FMT" end "uint32FMT"\n", i, scaffolds[i]->clumpID[j], scaffolds[i]->clumpmin[j], scaffolds[i]->clumpmax[j]); if (overlap) fprintf(stdout, "scaffold "uint32FMT" clump "uint32FMT" and clump "uint32FMT" OVERLAP\n", i, scaffolds[i]->clumpID[j], scaffolds[i]->clumpID[j+1]); for (uint32 b=0; bclumpsLen; b++) { uint32 cc = j * scaffolds[i]->clumpsMax + b; if (scaffolds[i]->clumpconfirm[cc]) { fprintf(stdout, "scaffold "uint32FMT" clump "uint32FMT" and "uint32FMT" confirmed by "uint32FMT" clones.\n", i, scaffolds[i]->clumpID[j], scaffolds[i]->clumpID[b], scaffolds[i]->clumpconfirm[cc]); } } } } } }; uint32 getClumpID(uint32 scaffoldid, uint32 begin, uint32 end) { if (scaffolds[scaffoldid]) return(scaffolds[scaffoldid]->getClumpID(begin, end)); return(0); }; void confirmClump(uint32 scaffoldid, uint32 ca, uint32 cb) { if (scaffolds[scaffoldid]) { scaffolds[scaffoldid]->confirm(ca, cb); } }; uint32 scaffoldsMax; atacClumpCoordTreeScaffold **scaffolds; }; atacClumpCoordTree* buildCoordTree(char *clumpFile) { atacClumpCoordTree *ct = new atacClumpCoordTree; FILE *inf; char inl[1024]; // We can't use the built-in atac reader, because it strips out // clump information. Bummer. errno = 0; inf = fopen(clumpFile, "r"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", clumpFile, strerror(errno)), exit(1); fgets(inl, 1024, inf); if (feof(inf)) return(0L); while (!feof(inf)) { if ((inl[0] == 'M') && (inl[2] == 'u')) { splitToWords S(inl); //fprintf(stderr, "%s", inl); if (S[12][0] != '#') fprintf(stderr, "no clump for '%s'\n", inl); if (S[13][0] != '-') { char *scfid = S[8]; while (*scfid != ':') scfid++; ct->addMatch(atoi(scfid + 1), atoi(S[13]), atoi(S[9]), atoi(S[10])); } } fgets(inl, 1024, inf); } fclose(inf); return(ct); } int main(int argc, char **argv) { char *clumpFile = 0L; char *happyFile = 0L; int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-clumps") == 0) { clumpFile = argv[++arg]; } else if (strcmp(argv[arg], "-happy") == 0) { happyFile = argv[++arg]; } else { err++; } arg++; } if (clumpFile == 0L) fprintf(stderr, "No -clumps supplied!\n"), err++; if (happyFile == 0L) fprintf(stderr, "No -happy clones supplied!\n"), err++; if (err) fprintf(stderr, "usage: %s ...\n", argv[0]), exit(1); atacClumpCoordTree *ct = buildCoordTree(clumpFile); ct->removeSingleClumpScaffolds(); ct->showMultipleClumpScaffolds(); //////////////////////////////////////// // // ugly hack -- read in the map from HUREF6A UID to scaffold // map UIDtoIID; { char *uidmapName = "/project/huref6/assembly/fasta/HUREF6A.info"; errno = 0; FILE *F = fopen(uidmapName, "r"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", uidmapName, strerror(errno)), exit(1); char L[1024]; fgets(L, 1024, F); while (!feof(F)) { if (L[0] == 'G') { splitToWords S(L); UIDtoIID[strtouint64(S[13]+1, 0L)] = strtouint32(S[10], 0L); } fgets(L, 1024, F); } } // //////////////////////////////////////// FILE *inf; char ina[1024]; char inb[1024]; speedCounter S("%9.0f clones (%6.1f clones/sec)\r", 1, 4096, true); errno = 0; inf = fopen(happyFile, "r"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", happyFile, strerror(errno)), exit(1); fgets(ina, 1024, inf); chomp(ina); fgets(inb, 1024, inf); chomp(inb); while (!feof(inf)) { splitToWords A(ina); splitToWords B(inb); // Some sanity checking. if (strcmp(A[2], B[2]) != 0) { fprintf(stderr, "ERROR: Different clone!\n%s\n%s\n", ina, inb); } uint32 scfa = UIDtoIID[strtouint64(A[7], 0L)]; uint32 scfb = UIDtoIID[strtouint64(B[7], 0L)]; uint32 cla = ct->getClumpID(scfa, atoi(A[8]), atoi(A[9])); uint32 clb = ct->getClumpID(scfb, atoi(B[8]), atoi(B[9])); if (cla == ~uint32ZERO) { fprintf(stdout, "%s spans clump in scaffold %s,"uint32FMT"\n", ina, A[7], scfa); cla = 0; } if (clb == ~uint32ZERO) { fprintf(stdout, "%s spans clump in scaffold %s,"uint32FMT" \n", inb, B[7], scfb); clb = 0; } if ((cla != 0) && (clb != 0) && (cla != clb)) { ct->confirmClump(scfa, (cla < clb) ? cla : clb, (cla < clb) ? clb : cla); fprintf(stdout, "scaffold %s,"uint32FMT" clump "uint32FMT" "uint32FMT" confirmed by %s\n", A[7], scfa, (cla < clb) ? cla : clb, (cla < clb) ? clb : cla, A[2]); } S.tick(); fgets(ina, 1024, inf); chomp(ina); fgets(inb, 1024, inf); chomp(inb); } fclose(inf); S.finish(); ct->showMultipleClumpScaffolds(); delete ct; } kmer-code-2013-trunk/atac-driver/chimera/Make.include0000644000000000000000000000106211512763666021215 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_EXES := $/happy-clones-span-clumps $/.CXX_SRCS := $/happy-clones-span-clumps.C $/.CLEAN :=$/*.o $/*~ $/core $/happy-clones-span-clumps: $/happy-clones-span-clumps.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/chimera/use-clumps-to-detect-chimera.pl0000644000000000000000000000661510537430472024722 0ustar rootroot#!/usr/bin/perl # Takes a path to a properly formatted atac file, uses that file to # detect potential chimeric scaffolds. use strict; my $atacFile = undef; my $reference = "A"; my $noiseLevel = 1; while (scalar(@ARGV)) { my $arg = shift @ARGV; if ($arg eq "-A") { $reference = "A"; } elsif ($arg eq "-B") { $reference = "B"; } elsif ($arg eq "-n") { $noiseLevel = shift @ARGV; } elsif (-e $arg) { $atacFile = $arg; } else { print STDERR "Unknown option (or input file) '$arg'\n"; } } if (! -e "$atacFile") { print STDERR "usage: $0 [-A | -B] file.atac\n"; print STDERR " -A use the first assembly as the reference (default)\n"; print STDERR " -B use the second assembly as the reference\n"; exit(1); } open(ATAC, "< $atacFile") or die; my @ATAC = ; chomp @ATAC; close(ATAC); my %ATACtoUID; foreach my $line (@ATAC) { if ($line =~ m/assemblyFile(\d)=(.*)$/) { chomp $line; my $sequenceFile; if (($1 == 1) && ($reference eq "B")) { $sequenceFile = $2; } if (($1 == 2) && ($reference eq "A")) { $sequenceFile = $2; } # If not defined, we don't need to read in these ID's. if (defined($sequenceFile)) { $sequenceFile =~ s/.fasta/.info/; die "Failed to find info on '$sequenceFile'\n" if (! -e $sequenceFile); print STDERR "Reading ATAC to UID map for '$sequenceFile'\n"; open(F, "< $sequenceFile"); while () { if (m/^G/) { my @vals = split '\s+', $_; $ATACtoUID{$vals[2]} = $vals[13]; } } close(F); } } } # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 # M u H4467431a11 r1 B35LC:0 56097 66 1 HUREF4:36734 812 66 -1 # 10867 # M u H4467431a10 r1 B35LC:0 56163 29 1 HUREF4:36734 782 29 -1 # 10867 # Note that our match below does not match the non-clump marker "-1" # Find the scaffolds with errors # # Save the clump id for the first instance of every scaffold. If # we've seen the scaffold before, and the clump id is now different, # remember this scaffold. my %scaffold; my %errors; foreach (@ATAC) { if (m/^M\su\s.*\s#\s(\d+)$/) { my @v = split '\s+', $_; if (!defined($scaffold{$v[8]})) { $scaffold{$v[8]} = $v[13]; } elsif ($scaffold{$v[8]} ne $v[13]) { $errors{$v[8]}++; } } } # Print them # # Go through the map again, remembering the number of times we see a # scaffold/clump pair. It's also useful to remember the sum of the # lengths for this pair, and the chromosome it maps to. my %counts; my %length; my %chrid; foreach (@ATAC) { if (m/^M\su\s.*\s#\s(\d+)$/) { my @v = split '\s+', $_; if (defined($errors{$v[8]})) { my $string = "$v[8]\t$ATACtoUID{$v[8]}\t$v[13]"; $counts{$string}++; $length{$string} += $v[10]; $chrid{$string} = $v[4]; } } } # We could provide a raw dump of this data, but we'd like # to first denoise it. A very simple denoser works - just # don't report anything with one match. open(F, "| sort -k3,3"); foreach my $s (keys %counts) { if ($counts{$s} > $noiseLevel) { print F "$counts{$s}\t$length{$s}\t$s\t$chrid{$s}\n"; } } close(F); kmer-code-2013-trunk/atac-driver/chainer/0000755000000000000000000000000012641613361016763 5ustar rootrootkmer-code-2013-trunk/atac-driver/chainer/python/0000755000000000000000000000000012641613361020304 5ustar rootrootkmer-code-2013-trunk/atac-driver/chainer/python/squeezeIntraRunGaps.py0000644000000000000000000005125210546555025024646 0ustar rootroot#!/usr/bin/env python import sys import string import MatchRecord #import MyFile import IdxStore #import localAlignerInterface #import shelve # True=1 False=0 theIsolatedSNPcount = 0 completefillednotXY = 0 completefilledXnotY = 0 completefilledYnotX = 0 completefilledXandY = 0 def analyzeGap(x,y,left,right,outfile,maxgap,margin): global theIsolatedSNPcount global completefillednotXY global completefilledXnotY global completefilledYnotX global completefilledXandY solidThreshold=20 inter_run_gap_count = 0 x_chCount = {} y_chCount = {} x_notACGT = 0 y_notACGT = 0 lp = 0 # We should modify the match instead! rp = 0 x_pos = 0 x_len = 0 y_pos = 0 y_len = 0 if( (left.x_scaf_uid == right.x_scaf_uid) and (left.y_scaf_uid == right.y_scaf_uid) and (left.runid == right.runid) ): # (left.sindex + 1 == right.sindex) ): # This is obsolete in Russell's file format. # sys.stderr.write("Intra-run gap\n") left_forward = (left.x_orientation == left.y_orientation) right_forward = (right.x_orientation == right.y_orientation) if( left_forward != right_forward): sys.stderr.write("Bad orientations in run\n") assert(left_forward == right_forward) sorted_by_x = (left.x_start <= right.x_start) dovetail_in_x = sorted_by_x and (left.x_start+left.x_length <= right.x_start+right.x_length) sorted_by_y = (left.y_start <= right.y_start) dovetail_in_y = sorted_by_y and (left.y_start+left.y_length <= right.y_start+right.y_length) if(not(sorted_by_x or sorted_by_y)): print >>sys.stderr, "bad sorting in runs" print >>sys.stderr, left print >>sys.stderr, right assert(sorted_by_x or sorted_by_y) # This concept of sorted allows neggaps but not containmant in both axes. if(sorted_by_x and not dovetail_in_x): print >>sys.stderr, "sorted_by_x and not dovetail_in_x" print >>sys.stderr, left print >>sys.stderr, right if(sorted_by_y and not dovetail_in_y): print >>sys.stderr, "sorted_by_y and not dovetail_in_y" print >>sys.stderr, left print >>sys.stderr, right if(not((not left_forward) or (sorted_by_x and sorted_by_y))): print >>sys.stderr, "bad sorting in runs" print >>sys.stderr, left print >>sys.stderr, right if(not((left_forward) or (not(sorted_by_x and sorted_by_y)))): print >>sys.stderr, "bad sorting in runs" print >>sys.stderr, left print >>sys.stderr, right assert((not left_forward) or (sorted_by_x and sorted_by_y)) assert((left_forward) or (not(sorted_by_x and sorted_by_y))) if(sorted_by_x): # Sorted by x positions. #print "Sorted by X" x_pos = left.x_start + left.x_length # Start of the intra-run gap. x_len = right.x_start - x_pos # Length of the intra-run gap. if(left_forward): y_pos = left.y_start + left.y_length y_len = right.y_start - y_pos else: y_pos = right.y_start + right.y_length y_len = left.y_start - y_pos # end if else: # Assume sorted by y positions #print "Sorted by Y" y_pos = left.y_start + left.y_length y_len = right.y_start - y_pos if(left_forward): x_pos = left.x_start + left.x_length x_len = right.x_start - x_pos else: x_pos = right.x_start + right.x_length x_len = left.x_start - x_pos # end if # end if # print "Left %d,%d Right %d,%d Width %d,%d" % (x_pos,y_pos,x_pos+x_len,y_pos+y_len,x_len,y_len) assert(left.x_start >= 0) assert(left.x_length > 0) assert(left.y_start >= 0) assert(left.y_length > 0) assert(right.x_start >= 0) assert(right.x_length > 0) assert(right.y_start >= 0) assert(right.y_length > 0) # Trim the intra-run neggaps to become proper gaps. if(0 and (x_len < 0 or y_len < 0)): sys.stderr.write("neggap x_uid= %s x_pos= %d x_len= %d y_uid= %s y_pos= %d y_len= %d\n" % (left.x_scaf_uid,x_pos,x_len,left.y_scaf_uid,y_pos,y_len)) trim_len = max( -x_len, -y_len) # Increase the intra-run gap length: x_len += trim_len; y_len += trim_len # Decrease the right-hand match length: right.x_length -= trim_len right.y_length -= trim_len # Adjust the right-hand gap ending position: if(left_forward): right.x_start += trim_len right.y_start += trim_len else: if(sorted_by_x): right.x_start += trim_len y_pos -= trim_len else: # assume sorted_by_y right.y_start += trim_len x_pos -= trim_len # end if #end if sys.stderr.write("newgap x_uid= %s x_pos= %d x_len= %d y_uid= %s y_pos= %d y_len= %d\n" % (left.x_scaf_uid,x_pos,x_len,left.y_scaf_uid,y_pos,y_len)) # end if assert(right.x_length > 0) assert(right.y_length > 0) # We now have a proper intra-run gap segment between two match segments. x_substring = "" if(x_len > 0): x_substring = string.upper( x.getStringFromFasta( sorted_by_x, left.x_scaf_uid, x_pos, x_len)); # end if y_substring = "" if(y_len > 0): y_substring = string.upper( y.getStringFromFasta( sorted_by_y, left.y_scaf_uid, y_pos, y_len)); # end if if(x_len > 0 and not(x_len == len(x_substring))): sys.stderr.write("x string lengths mismatch asked=%d got=%d\n" % (x_len,len(x_substring))) sys.stderr.write("x_uid= %s x_pos= %d x_len= %d y_uid= %s y_pos= %d y_len= %d\n" % (left.x_scaf_uid,x_pos,x_len,left.y_scaf_uid,y_pos,y_len)) print >>sys.stderr, "left match" print >>sys.stderr, left print >>sys.stderr, "right match" print >>sys.stderr, right # end if if(y_len > 0 and not(y_len == len(y_substring))): sys.stderr.write("y string lengths mismatch asked=%d got=%d\n" % (y_len,len(y_substring))) sys.stderr.write("x_uid= %s x_pos= %d x_len= %d y_uid= %s y_pos= %d y_len= %d\n" % (left.x_scaf_uid,x_pos,x_len,left.y_scaf_uid,y_pos,y_len)) print >>sys.stderr, "left match" print >>sys.stderr, left print >>sys.stderr, "right match" print >>sys.stderr, right # end if assert(x_len < 0 or x_len == len(x_substring)) assert(y_len < 0 or y_len == len(y_substring)) assert(lp == 0) assert(rp == 0) # Next we extend the raw matches to squeeze the intra-run gaps # with exactly matching sequence. if( lp+rp < x_len and lp+rp < y_len ): while(lp+rp < x_len and lp+rp < y_len): # modify lp x_ch = x_substring[lp]; y_ch = y_substring[lp]; is_a_match = (x_ch==y_ch) and \ (x_ch=="A" or x_ch=="C" or x_ch=="G" or x_ch=="T") if(is_a_match): lp += 1 else: break # end if # end while while(lp+rp < x_len and lp+rp < y_len): # modify rp x_ch = x_substring[-1-rp]; y_ch = y_substring[-1-rp]; is_a_match = (x_ch==y_ch) and \ (x_ch=="A" or x_ch=="C" or x_ch=="G" or x_ch=="T") if(is_a_match): rp += 1 else: break # end if # end while # end if # Next we extend the raw matches to squeeze the intra-run gaps. # Each mismatch character must be padded on both sides by # "solidThreshold" characters from {A,C,G,T}. if( x_len > lp+rp and y_len > lp+rp ): lq = lp; solid = solidThreshold; tentativeSNPCount = 0 while(lq+rp < x_len and lq+rp < y_len): x_ch = x_substring[lq]; y_ch = y_substring[lq]; is_a_match = (x_ch==y_ch) and \ (x_ch=="A" or x_ch=="C" or x_ch=="G" or x_ch=="T") if(solid >= solidThreshold): lp = lq theIsolatedSNPcount += tentativeSNPCount tentativeSNPCount = 0 if(is_a_match): solid += 1 lp += 1 else: solid = 0 tentativeSNPCount = 1 tentativeSNPposition = lq # end if else: if(is_a_match): solid += 1 else: # a second mismatch within 20 bp break # end if # end if lq += 1 # end while if(lq+rp == x_len and lq+rp == y_len): lp = lq theIsolatedSNPcount += tentativeSNPCount tentativeSNPCount = 0 # end if rq = rp; solid = solidThreshold; tentativeSNPCount = 0 while( lp+rq < x_len and lp+rq < y_len ): x_ch = x_substring[-1-rq]; y_ch = y_substring[-1-rq]; is_a_match = (x_ch==y_ch) and \ (x_ch=="A" or x_ch=="C" or x_ch=="G" or x_ch=="T") if(solid >= solidThreshold): rp = rq theIsolatedSNPcount += tentativeSNPCount tentativeSNPCount = 0 if(is_a_match): solid += 1 rp += 1 else: solid = 0 tentativeSNPCount = 1 # end if else: if(is_a_match): solid += 1 else: # a second mismatch within 20 bp break # end if # end if rq += 1 # end while if( lp+rq == x_len and lp+rq == y_len ): rp = rq theIsolatedSNPcount += tentativeSNPCount tentativeSNPCount = 0 # end if # end if # Next we close any remaining intra-run gaps that can form # ungapped alignments of a specified high quality. # Currently we have hard coded that there must be # 5 or less mismatches or # better than 95% identity # in the intrarun gap remaining after the previous gap closing. assert(lp >= 0) assert(rp >= 0) if( x_len == y_len and x_len > lp+rp): lq = lp; mismatchCount = 0 while(lq+rp < x_len and lq+rp < y_len): x_ch = x_substring[lq]; y_ch = y_substring[lq]; is_a_match = (x_ch==y_ch) and \ (x_ch=="A" or x_ch=="C" or x_ch=="G" or x_ch=="T") if(not is_a_match): mismatchCount += 1 lq += 1 # end while if(mismatchCount <= 5 or mismatchCount <= 0.05*(x_len-lp-rp) ): lp=lq # sys.stderr.write("# Closed gap by jumping\n"); # end if # end if if(0): sys.stderr.write( "rawX: %s\n" % x_substring) sys.stderr.write( "rawY: %s\n" % y_substring) sys.stderr.write( "squX: %s\n" % x_substring[:lp]+ \ string.lower(x_substring[lp:x_len-rp]) + \ x_substring[x_len-rp:x_len]) sys.stderr.write( "squY: %s\n" % y_substring[:lp]+ \ string.lower(y_substring[lp:y_len-rp]) + \ y_substring[y_len-rp:y_len]) sys.stderr.write( "sqeX: %s\n" % x_substring[lp:x_len-rp]) sys.stderr.write( "sqeY: %s\n" % y_substring[lp:y_len-rp]) sys.stderr.write( "x_seg=(%s %s %d %d)" % \ (left.x_orientation, left.x_scaf_uid, x_pos+lp, x_len-rp-lp)) sys.stderr.write( "y_seg=(%s %s %d %d)\n" % \ (left.y_orientation, left.y_scaf_uid, y_pos+lp, y_len-rp-lp)) if(lp+rp > x_len and x_len >= 0): sys.stderr.write("overfilledX ") sys.stderr.write( "x_seg=(%s %s %d %d) " % \ (left.x_orientation, left.x_scaf_uid, x_pos+lp, x_len-rp-lp)) sys.stderr.write( "y_seg=(%s %s %d %d)\n" % \ (left.y_orientation, left.y_scaf_uid, y_pos+lp, y_len-rp-lp)) if(lp+rp > y_len and y_len >= 0): sys.stderr.write("overfilledY ") sys.stderr.write( "x_seg=(%s %s %d %d) " % \ (left.x_orientation, left.x_scaf_uid, x_pos+lp, x_len-rp-lp)) sys.stderr.write( "y_seg=(%s %s %d %d)\n" % \ (left.y_orientation, left.y_scaf_uid, y_pos+lp, y_len-rp-lp)) if(lp+rp < x_len and lp+rp < y_len): completefillednotXY += 1 if(lp+rp == x_len and x_len < y_len): completefilledXnotY += 1 if(lp+rp == y_len and y_len < x_len): completefilledYnotX += 1 if(lp+rp == x_len and x_len == y_len): completefilledXandY += 1 # Print out abutting intervals to fill gaps. if(lp>0): left_fill = left.copy() if(left_forward): left_fill.subtype = "L" left_fill.x_start = x_pos left_fill.x_length = lp left_fill.y_start = y_pos left_fill.y_length = lp left_fill.matchid = left_fill.matchid + "L" else: if(sorted_by_x): left_fill.subtype = "L" left_fill.x_start = x_pos left_fill.x_length = lp left_fill.y_start = y_pos+y_len-lp left_fill.y_length = lp left_fill.matchid = left_fill.matchid + "L" else: # assume sorted_by_y left_fill.subtype = "L" left_fill.x_start = x_pos+x_len-lp left_fill.x_length = lp left_fill.y_start = y_pos left_fill.y_length = lp left_fill.matchid = left_fill.matchid + "L" # end if # end if # outfile.write(str(left_fill)) print >>outfile, left_fill # end if if(rp>0): right_fill = right.copy() if(left_forward): right_fill.subtype = "R" right_fill.x_start = x_pos+x_len-rp right_fill.x_length = rp right_fill.y_start = y_pos+y_len-rp right_fill.y_length = rp right_fill.matchid = right_fill.matchid + "R" else: if(sorted_by_x): right_fill.subtype = "R" right_fill.x_start = x_pos+x_len-rp right_fill.x_length = rp right_fill.y_start = y_pos right_fill.y_length = rp right_fill.matchid = right_fill.matchid + "R" else: # assume sorted_by_y right_fill.subtype = "R" right_fill.x_start = x_pos right_fill.x_length = rp right_fill.y_start = y_pos+y_len-rp right_fill.y_length = rp right_fill.matchid = right_fill.matchid + "R" # end if # end if #outfile.write(str(right_fill)) print >>outfile, right_fill # end if if(0): # Start gap composition diagnostics. if( (x_len > lp+rp) or (y_len > lp+rp) ): for ch in x_substring[lp:x_len-rp]: if(not(ch=='A' or ch=='C' or ch=='G' or ch=='T')): x_notACGT += 1 try: x_chCount[ch] += 1 except KeyError: x_chCount[ch] = 1 for ch in y_substring[lp:y_len-rp]: if(not(ch=='A' or ch=='C' or ch=='G' or ch=='T')): y_notACGT += 1 try: y_chCount[ch] += 1 except KeyError: y_chCount[ch] = 1 if(1 or x_notACGT > 0 or y_notACGT > 0): sys.stderr.write("Ncounts %d %d\n" % (x_notACGT,y_notACGT)) sys.stderr.write("x_gap_len= %d y_gap_len= %d\n" % (x_len-lp-rp,y_len-lp-rp)) sys.stderr.write("x_seg=(%s %s %d %d)\n" % \ (left.x_orientation, left.x_scaf_uid, x_pos+lp, x_len-lp-rp)) sys.stderr.write("y_seg=(%s %s %d %d)\n" % \ (left.y_orientation, left.y_scaf_uid, y_pos+lp, y_len-lp-rp)) # sys.stderr.write("x_chCount= ") # sys.stderr.write(x_chCount) # sys.stderr.write("y_chCount= ") # sys.stderr.write( y_chCount); else: # sys.stderr.write("Inter-run gap\n") inter_run_gap_count += 1 # sys.stderr.write("done\n") squeezed = lp+rp return (inter_run_gap_count, squeezed,x_len-squeezed,y_len-squeezed,x_notACGT,y_notACGT) # end def def mainLoop( inpfile, outfile, xIdx, yIdx): maxgap = 100000 # This should be set by an ATAC global. margin = 20 # This should be set by an ATAC global. countLines = 0 inter_run_gap_count_total = 0 closed_gap_count_total = 0 squeezed_total = 0 x_len_total = 0 y_len_total = 0 x_nonACGT_total = 0 y_nonACGT_total = 0 inpfile.seek(0) inpfileIter = iter(inpfile) left = None for line in inpfileIter: if(line[0] == 'M'): left = MatchRecord.MatchRecord(line) # outfile.write(str(left)) print >>outfile, left countLines += 1 break; for line in inpfileIter: if(line[0] == 'M'): right = MatchRecord.MatchRecord(line) #if( countLines % 10000 == 0): # sys.stderr.write("countLines=%d\n" % countLines) (inter_run_gap_count,squeezed,x_len,y_len,x_notACGT,y_notACGT) \ = analyzeGap(xIdx,yIdx,left,right, outfile, maxgap, margin) inter_run_gap_count_total += inter_run_gap_count squeezed_total += squeezed x_len_total += x_len y_len_total += y_len x_nonACGT_total += x_notACGT y_nonACGT_total += y_notACGT if(x_len == 0 and y_len == 0): closed_gap_count_total += 1 # Output the record which was possibly trimmed. #outfile.write(str(right)) print >>outfile, right countLines += 1 left = right # end if # end for sys.stderr.write( "countLines %d inter_run_gap_count %d closed_gap_count %d squeezed %d x_len %d y_len %d x_nonACGT %d y_nonACGT %d\n" % (countLines,inter_run_gap_count_total,closed_gap_count_total, squeezed_total,x_len_total,y_len_total,x_nonACGT_total,y_nonACGT_total)) sys.stderr.write("theIsolatedSNPcount = %d\n" % theIsolatedSNPcount) sys.stderr.write("completefillednotXY = %d\n" % completefillednotXY) sys.stderr.write("completefilledXnotY = %d\n" % completefilledXnotY) sys.stderr.write("completefilledYnotX = %d\n" % completefilledYnotX) sys.stderr.write("completefilledXandY = %d\n" % completefilledXandY) # end def # Allow each module to have its own main for testing. if __name__ == '__main__': inpname = sys.argv[1] outname = sys.argv[2] xname = sys.argv[3] yname = sys.argv[4] assemblyId1 = sys.argv[5] assemblyId2 = sys.argv[6] # mismatches = checkExactMatches( x, y, inpfile) # sys.stderr.write("mismatches = %d\n" % mismatches) xIdx = IdxStore.IdxStore(xname,assemblyId1) yIdx = IdxStore.IdxStore(yname,assemblyId2) inpfile = open(inpname) outfile = open(outname,"w") mainLoop( inpfile, outfile, xIdx, yIdx) outfile.close() # end if kmer-code-2013-trunk/atac-driver/chainer/python/PerfectRuns.py0000755000000000000000000002143710230663544023130 0ustar rootroot#!/usr/bin/env python # Looking in /usr/local/ir/bin on the Compaqs for the correct Python interpreter. # export PYTHONPATH=${PYTHONPATH}:$WORK/cds/IR/COMPASS/src/AtacPipeline """ Extensive documentation for the Python language is available at http://www.python.org. """ import sys import MyFile import MatchRecord def cvm(f,x,y): # A cvm variant (flag ? x : y) = (y,x)[f] if f : return x else: return y # end if # end def def createSignedEnumeration(inpfile): outfile = MyFile.myfile() p = 1 inpfile.seek(0) for line in inpfile: if(line[0] == 'M'): FM = MatchRecord.MatchRecord(line) forwardX = FM.x_orientation forwardY = FM.y_orientation srank = cvm(forwardX == forwardY, p, -p) p += 1 FM.extend['srank'] = srank print >>outfile, FM # end if # end while return outfile # end def def findPerfectRuns ( inpfile, maxJump, runIdPrefix ): outfile = MyFile.myfile() left = None runid = 1 inpfile.seek(0) for line in inpfile: if(line[0] == 'M'): right = MatchRecord.MatchRecord(line) pr = int(right.extend['srank']) del(right.extend['srank']) if(left != None): maxGapInXandY = 0 if(left.x_scaf_uid == right.x_scaf_uid and left.y_scaf_uid == right.y_scaf_uid ): # Find the maximum of the gap in x and y axis. x_rs = right.x_start x_re = x_rs + right.x_length x_ls = left.x_start x_le = x_ls + left.x_length assert(x_rs < x_re) assert(x_ls < x_le) # All matches are positive length. x_gapLeftBeforeRight = x_rs - x_le x_gapRightBeforeLeft = x_ls - x_re assert(not(x_gapLeftBeforeRight>0 and x_gapRightBeforeLeft>0)) x_gap = max(x_gapLeftBeforeRight,x_gapRightBeforeLeft) # x_gap == 0 is abutting # x_gap < 0 is overlapping y_rs = right.y_start y_re = y_rs + right.y_length y_ls = left.y_start y_le = y_ls + left.y_length assert(y_rs < y_re) assert(y_ls < y_le) y_gapLeftBeforeRight = y_rs - y_le y_gapRightBeforeLeft = y_ls - y_re assert(not(y_gapLeftBeforeRight>0 and y_gapRightBeforeLeft>0)) y_gap = max(y_gapLeftBeforeRight,y_gapRightBeforeLeft) # y_gap == 0 is abutting # y_gap < 0 is overlapping maxGapInXandY = max(x_gap,y_gap) if 1: # Check the sorting of the matches. sorted_by_x = (x_ls <= x_rs) sorted_by_y = (y_ls <= y_rs) if(not(sorted_by_x or sorted_by_y)): print >>sys.stderr, "bad sorting in findPerfectRuns" print >>sys.stderr, left print >>sys.stderr, right assert(sorted_by_x or sorted_by_y) dovetail_in_x = (x_ls <= x_rs) and (x_le <= x_re) dovetail_in_y = (y_ls <= y_rs) and (y_ls <= y_re) if(sorted_by_x and not(dovetail_in_x)): print >>sys.stderr, "contained in x in findPerfectRuns" print >>sys.stderr, left print >>sys.stderr, right if(sorted_by_y and not(dovetail_in_y)): print >>sys.stderr, "contained in y in findPerfectRuns" print >>sys.stderr, left print >>sys.stderr, right # endif if( (left.x_scaf_uid != right.x_scaf_uid) or # check first axis id (left.y_scaf_uid != right.y_scaf_uid) or # check second axis id (maxGapInXandY > maxJump) or (pr != lastpr + 1) # Using the signed rank NOT the run id !!!! ): runid += 1 # end if # end if lastpr = pr right.runid = "%s%d" % (runIdPrefix,runid,) # Assign the run id in the same slot as the signed rank. print >>outfile, right left = right # end if # end for return outfile # end def def formPerfectRuns ( inpfile, firstSort, secondSort, maxJump, runIdPrefix ): inpfile.seek(0) step = 0 print >>sys.stderr, 'formPerfectRuns step=' + str(step) step += 1 tmpfile = MyFile.myfile() firstSort( inpfile, tmpfile) print >>sys.stderr, 'formPerfectRuns step=' + str(step) step += 1 outfile = createSignedEnumeration(tmpfile) print >>sys.stderr, 'formPerfectRuns step=' + str(step) step += 1 tmpfile = MyFile.myfile() secondSort( outfile, tmpfile) print >>sys.stderr, 'formPerfectRuns step=' + str(step) step += 1 outfile = findPerfectRuns( tmpfile, maxJump, runIdPrefix) return outfile # end def def runsAsMatches(inpfile): outfile = MyFile.myfile() lastF = None firstF = None runFill = 0 inpfile.seek(0) for line in inpfile: if(line[0] == 'M'): curF = MatchRecord.MatchRecord(line) if ((lastF == None) or (curF.runid != lastF.runid)): if ((lastF != None) and (firstF.x_scaf_uid != lastF.x_scaf_uid)): print >>sys.stderr, firstF print >>sys.stderr, lastF # end if assert((lastF==None) or (firstF.x_scaf_uid == lastF.x_scaf_uid)) assert((lastF==None) or (firstF.y_scaf_uid == lastF.y_scaf_uid)) if (None != lastF): x1 = firstF.x_start x2 = lastF.x_start startX = cvm(x1 < x2, x1, x2) x1 += firstF.x_length x2 += lastF.x_length endX = cvm(x1 > x2, x1, x2) y1 = firstF.y_start y2 = lastF.y_start startY = cvm( y1 < y2, y1, y2) y1 += firstF.y_length y2 += lastF.y_length endY = cvm(y1 > y2, y1, y2) lastF.subtype = 'r' lastF.matchid = lastF.runid lastF.runid = "." # the agreed NULL value lastF.x_start = startX lastF.y_start = startY lastF.x_length = endX - startX lastF.y_length = endY - startY lastF.runFill = runFill print >>outfile, lastF # end if firstF = curF runFill = 0 # end if runFill += curF.x_length lastF = curF # end if # end for if (None != lastF): x1 = firstF.x_start x2 = lastF.x_start startX = cvm( x1 < x2, x1, x2) x1 += firstF.x_length x2 += lastF.x_length endX = cvm( x1 > x2, x1, x2) y1 = firstF.y_start y2 = lastF.y_start startY = cvm( y1 < y2, y1, y2) y1 += firstF.y_length y2 += lastF.y_length endY = cvm( y1 > y2, y1, y2) lastF.subtype = 'r' lastF.matchid = lastF.runid lastF.runid = "." # the agreed NULL value lastF.x_start = startX lastF.y_start = startY lastF.x_length = endX - startX lastF.y_length = endY - startY lastF.runFill = runFill print >>outfile, lastF # end if return outfile # end def def main(inpname, outname, maxJump, runIdPrefix): print >>sys.stderr, "Beware /tmp!\n" inpfile = open(inpname) tempdata1 = formPerfectRuns(inpfile, MatchRecord.sortInXorderAP, MatchRecord.sortInYorderAP, int(maxJump), runIdPrefix ) tempdata2 = runsAsMatches( tempdata1) # Argh! All our work is done in temporary files in /tmp, # but this wants to create hard links to save the last # result -- the output. tempdata1.link("/tmp/"+outname+".matches") tempdata2.link("/tmp/"+outname+".runs") if __name__ == '__main__': inpname = sys.argv[1] outname = sys.argv[2] maxJump = int(sys.argv[3]) runIdPrefix = sys.argv[4] # defaults, 100000, r main(inpname, outname, maxJump, runIdPrefix) kmer-code-2013-trunk/atac-driver/chainer/python/fillIntraRunGaps.py0000644000000000000000000003607210546555025024116 0ustar rootroot#!/usr/bin/env python import sys import string import MatchRecord import IdxStore import localAlignerInterface import halign #import shelve # True=1 False=0 def analyzeGap(x,y,left,right,outfile,maxgap,erate,margin): inter_run_gap_count = 0 x_pos = 0 x_len = 0 y_pos = 0 y_len = 0 if( (left.x_scaf_uid == right.x_scaf_uid) and (left.y_scaf_uid == right.y_scaf_uid) and (left.runid == right.runid) ): # (left.sindex + 1 == right.sindex) ): # This is obsolete in Russell's file format. # sys.stderr.write("Intra-run gap\n") left_forward = (left.x_orientation == left.y_orientation) right_forward = (right.x_orientation == right.y_orientation) if( left_forward != right_forward): sys.stderr.write("Bad orientations\n") assert(left_forward == right_forward) sorted_by_x = (left.x_start <= right.x_start) and \ (left.x_start+left.x_length <= right.x_start+right.x_length) sorted_by_y = (left.y_start <= right.y_start) and \ (left.y_start+left.y_length <= right.y_start+right.y_length) if(not(sorted_by_x or sorted_by_y)): print >>sys.stderr, "bad sorting in runs" print >>sys.stderr, left print >>sys.stderr, right assert(sorted_by_x or sorted_by_y) # This concept of sorted allows neggaps but not containmant in both axes. if(not((not left_forward) or (sorted_by_x and sorted_by_y))): print >>sys.stderr, "bad sorting in runs" print >>sys.stderr, left print >>sys.stderr, right if(not((left_forward) or (not(sorted_by_x and sorted_by_y)))): print >>sys.stderr, "bad sorting in runs" print >>sys.stderr, left print >>sys.stderr, right assert((not left_forward) or (sorted_by_x and sorted_by_y)) assert((left_forward) or (not(sorted_by_x and sorted_by_y))) if(sorted_by_x): # Sorted by x positions. x_pos = left.x_start + left.x_length # Start of the intra-run gap. x_len = right.x_start - x_pos # Length of the intra-run gap. if(left_forward): y_pos = left.y_start + left.y_length y_len = right.y_start - y_pos else: y_pos = right.y_start + right.y_length y_len = left.y_start - y_pos # end if else: # Assume sorted by y positions y_pos = left.y_start + left.y_length y_len = right.y_start - y_pos if(left_forward): x_pos = left.x_start + left.x_length x_len = right.x_start - x_pos else: x_pos = right.x_start + right.x_length x_len = left.x_start - x_pos # end if # end if # print "Left %d,%d Right %d,%d Width %d,%d" % (x_pos,y_pos,x_pos+x_len,y_pos+y_len,x_len,y_len) assert(left.x_start >= 0) assert(left.x_length > 0) assert(left.y_start >= 0) assert(left.y_length > 0) assert(right.x_start >= 0) assert(right.x_length > 0) assert(right.y_start >= 0) assert(right.y_length > 0) if( 1 and 0 < x_len and 0 < y_len and x_len < maxgap and y_len < maxgap ): if 0: sys.stderr.write("About to call local aligner with %d margins\n" % margin); sys.stderr.write("# left = %s\n" % str(left)) sys.stderr.write("# right= %s\n" % str(right)) sys.stderr.write("x_len=%d y_len=%d\n" % (x_len, y_len) ); # Why two orientation flags? We want the output matches # to be in the same sorted order as the left and right # matches. parent_x_start = x_pos - margin parent_y_start = y_pos - margin parent_x_length = x_len + 2*margin parent_y_length = y_len + 2*margin if 0: print >>sys.stderr, "parent_x_start=%d" % parent_x_start print >>sys.stderr, "parent_y_start=%d" % parent_y_start print >>sys.stderr, "parent_x_length=%d" % parent_x_length print >>sys.stderr, "parent_y_length=%d" % parent_y_length x_seq = "" if(x_len > 0): x_seq = string.upper( x.getStringFromFasta( sorted_by_x, left.x_scaf_uid, parent_x_start, parent_x_length)); # end if y_seq = "" if(y_len > 0): y_seq = string.upper( y.getStringFromFasta( sorted_by_y, left.y_scaf_uid, parent_y_start, parent_y_length)); # end if if 0: print >>outfile, "# STARTED localAlignerInterface.syntenicSegments" print >>outfile, "# left = %s" % str(left) print >>outfile, "# right= %s" % str(right) print >>sys.stderr, "x_seq="+x_seq print >>sys.stderr, "len(x_seq)=",len(x_seq) print >>sys.stderr, "y_seq="+y_seq print >>sys.stderr, "len(y_seq)=",len(y_seq) outfile.flush() try: localAlignerInterface.syntenicSegments(outfile, x_seq, 0, parent_x_length, y_seq, 0, parent_y_length, erate) FM = left parent_id = FM.matchid #FM.x_orientation = sorted_by_x #FM.y_orientation = sorted_by_y # Why two orientations and not just a flipped flag? # Because we want the resulting matches to come out in # the same sorted order as the input matches. ii = 0 for segment in iter(localAlignerInterface.iterateSegments,None): #print >>outfile, segment (bgn1,bgn2,len1,len2,fid) = segment assert(len1 >= 0) assert(len2 >= 0) assert(bgn1 >= 0) assert(bgn2 >= 0) if(not(bgn1 + len1 <= parent_x_length)): print >>sys.stdout,"# warn(not(bgn1 + len1 <= parent_x_length))" print >>sys.stdout,"# bgn1=%d len1=%d parent_x_length=%d" % (bgn1,len1,parent_x_length) print >>sys.stdout,"# left = %s" % str(left) print >>sys.stdout,"# right= %s" % str(right) print >>sys.stdout,"# bgn1,bgn2,len1,len2=", bgn1,bgn2,len1,len2 #print >>sys.stdout,"# xseq=%s" % x_seq[bgn1:bgn1+len1] #print >>sys.stdout,"# yseq=%s" % y_seq[bgn2:bgn2+len2] len1 = parent_x_length - bgn1 print >>sys.stdout, "# Change len1 = %d" % len1 if(not(bgn2 + len2 <= parent_y_length)): print >>sys.stdout,"# warn(not(bgn2 + len2 <= parent_y_length))" print >>sys.stdout,"# bgn2=%d len2=%d parent_y_length=%d" % (bgn2,len2,parent_y_length) print >>sys.stdout,"# left = %s" % str(left) print >>sys.stdout,"# right= %s" % str(right) print >>sys.stdout,"# bgn1,bgn2,len1,len2=", bgn1,bgn2,len1,len2 #print >>sys.stdout,"# xseq=%s" % x_seq[bgn1:bgn1+len1] #print >>sys.stdout,"# yseq=%s" % y_seq[bgn2:bgn2+len2] len2 = parent_y_length - bgn2 print >>sys.stdout,"# Change len2 = %d" % len2 if (len1 == 0): print >>sys.stdout,"# warn(len1 == 0)" print >>sys.stdout,"# bgn1,bgn2,len1,len2=", bgn1,bgn2,len1,len2 print >>sys.stdout,"# bgn1=%d len1=%d parent_x_length=%d" % (bgn1,len1,parent_x_length) continue if (len2 == 0): print >>sys.stdout,"# warn(len2 == 0)" print >>sys.stdout,"# bgn1,bgn2,len1,len2=", bgn1,bgn2,len1,len2 print >>sys.stdout,"# bgn2=%d len2=%d parent_y_length=%d" % (bgn2,len2,parent_y_length) continue assert(bgn1 >= 0) assert(bgn2 >= 0) assert(len1 > 0) assert(len2 > 0) assert(bgn1 + len1 <= parent_x_length); assert(bgn2 + len2 <= parent_y_length); # Filter by a minimum length? say four bp. ii += 1 FM.subtype = 'l' FM.matchid = parent_id + 'l' + str(ii) # FM.runid = parent_id child_x_start = parent_x_start + (parent_x_length-bgn1-len1,bgn1)[sorted_by_x] child_y_start = parent_y_start + (parent_y_length-bgn2-len2,bgn2)[sorted_by_y] child_x_length = len1 child_y_length = len2 #FM.identifier = " %f" % fid # CMM BEWARE FM.x_start = child_x_start FM.y_start = child_y_start FM.x_length = child_x_length FM.y_length = child_y_length #FM.extend['fid'] = str(fid) #print >>outfile, FM # Here we call the dedasher. #assert(len1 > 0) #assert(len2 > 0) #assert(bgn1 >= 0) #assert(bgn2 >= 0) #assert(bgn1+len1 <= parent_x_length) #assert(bgn2+len2 <= parent_y_length) if 0: print >>sys.stderr, "# x_seq=%s" % x_seq print >>sys.stderr, "# y_seq=%s" % y_seq print >>sys.stderr, "# bgn1,bgn2,len1,len2=", bgn1,bgn2,len1,len2 print >>sys.stderr, "# xseq=%s" % x_seq[bgn1:bgn1+len1] print >>sys.stderr, "# yseq=%s" % y_seq[bgn2:bgn2+len2] halign.halignStart(x_seq[bgn1:bgn1+len1], y_seq[bgn2:bgn2+len2]) outfile.flush() for hsegment in iter(halign.halignDedash,None): #print >>outfile, segment (bgn1h,bgn2h,len1h,len2h,nmat) = hsegment # Filter by a minimum length? say four bp. ii += 1 FM.subtype = 'u' FM.matchid = parent_id + 'a' + str(ii) # FM.runid = parent_id FM.x_start = child_x_start + (child_x_length-bgn1h-len1h,bgn1h)[sorted_by_x] FM.y_start = child_y_start + (child_y_length-bgn2h-len2h,bgn2h)[sorted_by_y] FM.x_length = len1h FM.y_length = len2h assert(len1h == len2h) mismatches = 0 for ic in range(len1h): if(x_seq[bgn1+bgn1h+ic] != y_seq[bgn2+bgn2h+ic]): mismatches += 1 FM.extend['mm'] = str(mismatches) #FM.identifier = "" # BEWARE print >>outfile, FM # localAlignerInterface.free() # print >>outfile,"# FINISHED localAlignerInterface.syntenicSegments" except RuntimeError: print >>outfile, "# NOTE syntenicSegments failed between these records" print >>outfile, "# STARTED localAlignerInterface.syntenicSegments" print >>outfile, "# left = %s" % str(left) print >>outfile, "# right= %s" % str(right) print >>sys.stderr, "NOTE syntenicSegments failed in fillIntraRunGaps for:" print >>sys.stderr, "x_seq="+x_seq print >>sys.stderr, "len(x_seq)=",len(x_seq) print >>sys.stderr, "y_seq="+y_seq print >>sys.stderr, "len(y_seq)=",len(y_seq) # end if else: # sys.stderr.write("Inter-run gap\n") inter_run_gap_count += 1 # sys.stderr.write("done\n") return (inter_run_gap_count,) # end def def mainLoop( inpfile, outfile, xIdx, yIdx, maxgap, erate): margin = 20 # This should be set by an ATAC global. countLines = 0 inter_run_gap_count_total = 0 closed_gap_count_total = 0 squeezed_total = 0 x_len_total = 0 y_len_total = 0 x_nonACGT_total = 0 y_nonACGT_total = 0 inpfile.seek(0) inpfileIter = iter(inpfile) sys.stderr.write("begin\n") left = None for line in inpfileIter: if(line[0] == 'M'): left = MatchRecord.MatchRecord(line) print >>outfile, left countLines += 1 break; sys.stderr.write("countLines=%d\n" % countLines) for line in inpfileIter: if(line[0] == 'M'): newRight = MatchRecord.MatchRecord(line) if( newRight.subtype == 'u' ): right = newRight #if( countLines % 10000 == 0): # sys.stderr.write("countLines=%d\n" % countLines) (inter_run_gap_count,) = analyzeGap( xIdx, yIdx, left,right, outfile, maxgap, erate, margin) inter_run_gap_count_total += inter_run_gap_count # Output the record which was possibly trimmed. print >>outfile, right countLines += 1 left = right # end if # end for sys.stderr.write("countLines %d inter_run_gap_count %d \n" % (countLines,inter_run_gap_count_total)) # end def import AtacFile import MyFile def main( inpname, outname): obj = AtacFile.AtacFile(inpname) assemblyId1 = obj.globals['assemblyId1'] assemblyId2 = obj.globals['assemblyId2'] assemblyFilePrefix1 = obj.globals['assemblyFilePrefix1'] assemblyFilePrefix2 = obj.globals['assemblyFilePrefix2'] if(not obj.globals.has_key('fillIntraRunGapsErate')): obj.globals['fillIntraRunGapsErate'] = 0.10 if(not obj.globals.has_key('fillIntraRunGapsMaxGap')): obj.globals['fillIntraRunGapsMaxGap'] = 100000 fillIntraRunGapsErate = float(obj.globals['fillIntraRunGapsErate']) fillIntraRunGapsMaxGap = int(obj.globals['fillIntraRunGapsMaxGap']) # mismatches = checkExactMatches( x, y, inpfile) # sys.stderr.write("mismatches = %d\n" % mismatches) xIdx = IdxStore.IdxStore(assemblyFilePrefix1,assemblyId1) yIdx = IdxStore.IdxStore(assemblyFilePrefix2,assemblyId2) tempfile = MyFile.myfile() mainLoop( obj.matches, tempfile, xIdx, yIdx, fillIntraRunGapsMaxGap, fillIntraRunGapsErate) obj.matches = tempfile obj.checkpoint(outname) # Allow each module to have its own main for testing. if __name__ == '__main__': inpname = sys.argv[1] outname = sys.argv[2] main( inpname, outname) # end if kmer-code-2013-trunk/atac-driver/chainer/python/AtacDriver.py0000755000000000000000000005366210546430076022723 0ustar rootroot#!/usr/bin/env python """ The environmental variable "PYTHONPATH" is a colon separated list of directories of imported Python modules (*.py) and C/C++ shared libraries (*.so for Unix or *.dll for Windows). Written by Clark Mobarry, Applied Biosystems, 2002-2004. """ """ Known issues: (1) I need to remove from parameters from the output: /inpname=, /outname=,. (4) The checkpointing scheme assumes that a previous existing checkpoint file is GOOD. """ import os, sys, time, getopt, tempfile import MyFile import MatchRecord import AtacFile import IdxStore import UniqueFilter import PerfectRuns import TrimMatchOverlaps import squeezeIntraRunGaps import localAlignerInterface import fillIntraRunGaps #import dedashMatches STDERR=sys.stderr STDOUT=sys.stdout def die(message): print >>STDERR, message os.exit(1) def cvm(f,x,y): # A cvm variant (flag ? y : x) = (x,y)[f] if f : return x else: return y # end if # end def class GlobalParam: def __init__(self,line): pass def __str__ (self): return "/%s=%s" % (self._key,self._value) def get(self): return (self._key,self._value) def put(self,key,value): (self._key,self._value) = (key,value) def usage (*_): print >>STDERR, "Usage: atacdriver.py matchFilePrefix" # end def def filterByMatchLength( inpfile, outfile, minimum_length): "Only keep matches that are long enough." inpfile.seek(0) for line in inpfile: if(line[0] == 'M'): FM = MatchRecord.MatchRecord(line) if (FM.x_length >= minimum_length and FM.y_length >= minimum_length ): print >>outfile, FM # end if # end if # end for # end def def onlyKeepLongRuns ( inpfile, outname, lengthThreshold ): outfile = MyFile.myfile() rejectsfile = MyFile.myfile() FL = None store = [] lenInMatches = 0 inpfile.seek(0) for line in inpfile: if(line[0] == 'M'): FM = MatchRecord.MatchRecord(line) SL = FM.x_length if FL != None and FL.runid != FM.runid : for x in store: print >>rejectsfile, x # end for store = [] lenInMatches = SL else: lenInMatches += SL # end if if lenInMatches < lengthThreshold: store.append(FM) else: for x in store: print >>outfile, x # end for store = [] print >>outfile, FM # end if FL = FM # end if # end for rejectsfile.close() return outfile # end def def coalesceMatches ( inpfile, outfile, needs_to_share_diagonal ): "Coalesce overlapping and abutting matches within the same run." firstF = None lastF = None lastLX = -3 lastLY = -4 lastForward = 0 lowHitPX = None lowHitPY = None hghHitPX = None hghHitPY = None inpfile.seek(0) outfile.seek(0) for line in inpfile: if(line[0] == 'M'): curF = MatchRecord.MatchRecord(line) px = curF.x_start nx = curF.x_length py = curF.y_start ny = curF.y_length assert(px >= 0) assert(nx >= 0) assert(py >= 0) assert(ny >= 0) if (not (not needs_to_share_diagonal or nx == ny)): print >>STDERR, 'Bombed on:' print >>STDERR, str(curF) print >>STDERR, 'needs_to_share_diagonal=' + str(needs_to_share_diagonal) print >>STDERR, 'nx=' + str(nx) + ' ny=' + str(ny) # end if assert((hghHitPX == None or (not needs_to_share_diagonal) or nx == ny)) forward = (curF.x_orientation == curF.y_orientation) lx = px ly = cvm( forward, py, py + ny) rx = px + nx ry = cvm( forward, py + ny, py) overlapping = ((lastF != None) and (curF.x_scaf_uid == lastF.x_scaf_uid) and (curF.y_scaf_uid == lastF.y_scaf_uid) and (((lx >= lowHitPX and lx <= hghHitPX) and (ly >= lowHitPY and ly <= hghHitPY)) or ((rx >= lowHitPX and rx <= hghHitPX) and (ry >= lowHitPY and ry <= hghHitPY)))) on_diagonal = ((forward == lastForward) and ((lx - lastLX) == ((ly - lastLY) * cvm(forward, 1, -1)))) # print >>STDOUT, lastF, curF # print >>STDOUT, lx,rx,ly,ry # print >>STDOUT, lowHitPX,hghHitPX,lowHitPY,hghHitPY # print >>STDOUT, "overlapping=",overlapping # print >>STDOUT, "on_diagonal=",on_diagonal lowMerPX = px lowMerPY = py hghMerPX = px + nx hghMerPY = py + ny if (not (overlapping and (not needs_to_share_diagonal or on_diagonal))): if (firstF != None): # if (lastF == None or firstF.runid != lastF.runid): # end if firstF.subtype = ('g','u')[needs_to_share_diagonal] firstF.x_start = lowHitPX firstF.y_start = lowHitPY firstF.x_length = hghHitPX - lowHitPX firstF.y_length = hghHitPY - lowHitPY print >>outfile, firstF # end if firstF = curF lowHitPX = lowMerPX lowHitPY = lowMerPY hghHitPX = hghMerPX hghHitPY = hghMerPY # end if lowHitPX = cvm(lowHitPX < lowMerPX, lowHitPX, lowMerPX) lowHitPY = cvm(lowHitPY < lowMerPY, lowHitPY, lowMerPY) hghHitPX = cvm(hghHitPX > hghMerPX, hghHitPX, hghMerPX) hghHitPY = cvm(hghHitPY > hghMerPY, hghHitPY, hghMerPY) lastLX = lx lastLY = ly lastForward = forward lastF = curF # end if # end for if (firstF != None): firstF.subtype = ('g','u')[needs_to_share_diagonal] firstF.x_start = lowHitPX firstF.y_start = lowHitPY firstF.x_length = hghHitPX - lowHitPX firstF.y_length = hghHitPY - lowHitPY print >>outfile, firstF return # end def # Note that if record has an initial rank for the X and Y sorting, # then re-sorting and box recovery are simplified. # Resorting becomes making the inital sparse ranking dense then a # scattering to the destination. # NOTE THAT outname is unused here. def boxRecovery( inpfile, rawfile, outname): inpfile.seek(0) rawfile.seek(0) outfile = MyFile.myfile() rawfileIter = iter(rawfile) # This is a modified merge operation? # The two input files must be sorted the same manner. leftMatch = None for line in inpfile: if(line[0] == 'M'): rightMatch = MatchRecord.MatchRecord(line) if( leftMatch != None and leftMatch.inSameRunAs(rightMatch) ): # print >>STDERR, "In same run leftMatch=", leftMatch, " rightMatch=", rightMatch for rawline in rawfileIter: if( rawline[0] == 'M'): rawMatch = MatchRecord.MatchRecord(rawline) if(rawMatch.sameAs(rightMatch)): print >>outfile, rightMatch break else: # print "Inside run rawMatch=", rawMatch if(rawMatch.isInsideBox(leftMatch,rightMatch)): print >>outfile, rawMatch # end if # end if # end if # end for # We should die here if there is no rawMatch that matched the rightMatch ... else: # print >>STDERR, "Between runs leftMatch=", leftMatch, " rightMatch=", rightMatch for rawline in rawfileIter: if( rawline[0] == 'M'): rawMatch = MatchRecord.MatchRecord(rawline) if(rawMatch.sameAs(rightMatch)): print >>outfile, rightMatch break else: # print >>STDERR, "Discard rawMatch=", rawMatch pass # end if # end if # end for # We should die here if there is no rawMatch that matched the rightMatch ... # Discard raw Matches until it is ge to the right match. # end if leftMatch = rightMatch # end if # end for return outfile # end def class AtacDriver(AtacFile.AtacFile): def runOld(self): self.globals['atacAlgorithmVersion'] = str(17) print >>STDERR, "runName = %s\n" % self.runName # The ATAC globals used by this script: opt_t = int(self.globals['globalMatchMinSize']) opt_l = int(self.globals['globalPerfectRunMinLen']) maxdiff = int(self.globals['globalPerfectRunMaxGapLen']) assemblyId1 = self.globals['assemblyId1'] assemblyId2 = self.globals['assemblyId2'] assemblyFile1 = self.globals['assemblyFile1'] assemblyFile2 = self.globals['assemblyFile2'] boxRecoveryOn = 0 # Deprecated for same species comparisons 2003/09/09. if(self.globals.has_key("boxRecoveryOn")): boxRecoveryOn = int(self.globals['boxRecoveryOn']) t0 = time.time() assemblyIdx1 = IdxStore.IdxStore(assemblyFile1,assemblyId1) assemblyIdx2 = IdxStore.IdxStore(assemblyFile2,assemblyId2) rawfile = None ################################################################### # Setup for checkpointing scheme. redo = 0 keep = 0 step = 0 if(self.globals.has_key("ckpKeep")): keep = int(self.globals['ckpKeep']) ckpName = "AllDone" ################################################################### print >>STDERR, 'Keep step=' + str(keep) print >>STDERR, 'At step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) outprefix = self.runName step += 1 print >>STDERR, 'At uniqueFilter, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 if(not(self.globals.has_key('uniqueFilterOn') and self.globals['uniqueFilterOn']=="0")): print >>STDERR, 'Running UniqueFilter' outfile = MyFile.myfile() UniqueFilter.main( self.matches, outfile) self.matches = outfile outprefix += '.uniq' self.checkpoint(outprefix) step += 1 print >>STDERR, 'At filterByMatchLength, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, 'Running filterByMatchLength' outfile = MyFile.myfile() filterByMatchLength( self.matches, outfile, opt_t) self.matches = outfile outprefix += '.t' + str(opt_t) self.checkpoint(outprefix) step += 1 print >>STDERR, 'At trimMatchOverlaps, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, "Start trimming for bp one-to-one-ness" tempdata = MyFile.myfile() TrimMatchOverlaps.trimMatchOverlapsInBoth(self.matches,tempdata,'u') self.matches = tempdata print >>STDERR, "Finished trimming for bp one-to-one-ness" outprefix += '.trim' self.checkpoint(outprefix) if( boxRecoveryOn == 1 ): # For box recovery later ... but what if we start from a checkpoint? rawfile = self.matches step += 1 print >>STDERR, 'At formPerfectRuns, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, 'from ' + outprefix + ' making ' + outprefix + '.p6' tempdata = PerfectRuns.formPerfectRuns(self.matches, MatchRecord.sortInXorderAP, MatchRecord.sortInYorderAP, maxdiff, 'r') self.matches = tempdata outprefix += ".p6" # end if step += 1 print >>STDERR, 'At onlyKeepLongRuns, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, 'from ' + outprefix + ' making ' + outprefix + '.l' + str(opt_l) tempdata = onlyKeepLongRuns( self.matches, outprefix, opt_l) self.matches = tempdata outprefix += '.l' + str(opt_l) self.checkpoint(outprefix) step += 1 print >>STDERR, 'At formPerfectRuns, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, 'Heal the perfect runs' tempdata = PerfectRuns.formPerfectRuns(self.matches, MatchRecord.sortInYorderAP, MatchRecord.sortInXorderAP, maxdiff, 'r') self.matches = tempdata outprefix += '.pr' self.checkpoint(outprefix) if(boxRecoveryOn == 1): # This is a box recovery step. step += 1 print >>STDERR, 'At boxRecovery, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, 'from ' + outprefix + ' making ' + outprefix + '.br' print >>STDERR, "Make sorted raw matches" outfile = MyFile.myfile() MatchRecord.sortInXorderAP( rawfile, outfile) rawfile = outfile print >>STDERR, "perform box recovery" tempdata = boxRecovery( self.matches, rawfile, outprefix) self.matches = tempdata outprefix += '.br' self.checkpoint(outprefix) # end if step += 1 print >>STDERR, 'At formPerfectRuns, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ( (keep < step) and not self.globals.has_key(ckpName))): print >>STDERR, "form perfect runs" redo = 1 print >>STDERR, 'from ' + outprefix + ' to ' + outprefix + '.p6' tempdata = PerfectRuns.formPerfectRuns(self.matches, MatchRecord.sortInXorderAP, MatchRecord.sortInYorderAP, maxdiff, 'r') self.matches = tempdata outprefix += '.pr' self.checkpoint(outprefix) step += 1 print >>STDERR, 'At squeezeIntraRunGaps, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, 'from ' + outprefix + ' to ' + outprefix + '.sq' tempdata = MyFile.myfile() squeezeIntraRunGaps.mainLoop( self.matches, tempdata, assemblyIdx1, assemblyIdx2) tempy = MyFile.myfile() # Beware the current match subtypes are 'x', 'L', and 'R'! coalesceMatches( tempdata, tempy, 1) self.matches = tempy outprefix += '.sq' self.checkpoint(outprefix) step += 1 print >>STDERR, 'At TrimMatchOverlaps, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, "Start trimming for bp one-to-one-ness" tempdata = MyFile.myfile() TrimMatchOverlaps.trimMatchOverlapsInBoth(self.matches,tempdata,'u') self.matches = tempdata outprefix += '.trim' print >>STDERR, "Finished trimming for bp one-to-one-ness" step += 1 print >>STDERR, 'At RunsAsMatches, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 self.runs = PerfectRuns.runsAsMatches( self.matches) outprefix += '.runs' self.checkpoint(outprefix) # end if if(self.globals.has_key('fillIntraRunGapsOn') and self.globals['fillIntraRunGapsOn']=="1" ): # Next comes the DNA sequence dependent stuff. step += 1 print >>STDERR, 'At fillIntraRunGaps, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, "fill the intrarun gaps" if(not self.globals.has_key('fillIntraRunGapsErate')): self.globals['fillIntraRunGapsErate'] = 0.10 if(not self.globals.has_key('fillIntraRunGapsMaxGap')): self.globals['fillIntraRunGapsMaxGap'] = 100000 fillIntraRunGapsErate = float(self.globals['fillIntraRunGapsErate']) fillIntraRunGapsMaxGap = int(self.globals['fillIntraRunGapsMaxGap']) tempdata = MyFile.myfile() fillIntraRunGaps.mainLoop(self.matches, tempdata, assemblyIdx1, assemblyIdx2, fillIntraRunGapsMaxGap, fillIntraRunGapsErate) self.matches = tempdata outprefix += '.fill' self.checkpoint(outprefix) step += 1 print >>STDERR, 'At TrimMatchOverlaps, step=' + str(step) print >>STDERR, 'Time elapsed=' + str(time.time()-t0) if (redo or ((keep < step) and not self.globals.has_key(ckpName))): redo = 1 print >>STDERR, "trim the overlaps" tempdata = MyFile.myfile() TrimMatchOverlaps.trimMatchOverlapsInBoth(self.matches,tempdata,'u') self.matches = tempdata outprefix += '.trim' self.checkpoint(outprefix) # end if "fillIntraRunGapsOn" # end def # end class class localExecutable : def __init__(self, name): self.name = name def run(self,argline,inpfile,outfile): cmd = "%s %s %s %s" % (self.name,argline,inpfile,outfile) print >>STDERR,"cmd =", cmd iret = os.system(cmd) assert(iret == 0) def main(runName): t0 = time.time() obj = AtacDriver(runName) t1 = time.time() print >>STDERR, "Read checkpoint in %d seconds." % (t1-t0) t0=t1 # The following are required: assert(obj.globals.has_key('assemblyId1')) assert(obj.globals.has_key('assemblyId2')) assert(obj.globals.has_key('assemblyFile1')) assert(obj.globals.has_key('assemblyFile2')) assemblyId1 = obj.globals['assemblyId1'] assemblyId2 = obj.globals['assemblyId2'] assemblyFile1 = obj.globals["assemblyFile1"] assemblyFile2 = obj.globals["assemblyFile2"] assert(os.path.exists(assemblyFile1)) assert(os.path.exists(assemblyFile2)) if(not os.path.exists(assemblyFile1+".idxStore")): IdxStore.createIndexedFasta( assemblyFile1, assemblyId2) if(not os.path.exists(assemblyFile2+".idxStore")): IdxStore.createIndexedFasta( assemblyFile2, assemblyId2) assert(os.path.exists(assemblyFile1+".idxStore")) assert(os.path.exists(assemblyFile2+".idxStore")) if not obj.globals.has_key('matchesFile'): print >>STDERR, "We need to make the raw matches." if(not obj.globals.has_key('rawMatchMerSize')): obj.globals['rawMatchMerSize'] = 20 if(not obj.globals.has_key('rawMatchMerMaxDegeneracy')): obj.globals['rawMatchMerMaxDegeneracy'] = 1 if(not obj.globals.has_key('rawMatchMinFillSize')): obj.globals['rawMatchMinSize'] = obj.globals['rawMatchMerSize'] # Many 2*rawMatchMerSize-1 matches are due to isolated single # nucleotide mutations in otherwise perfect repeats. if(not obj.globals.has_key('globalMatchMinSize')): obj.globals['globalMatchMinSize'] = 2*int(obj.globals['rawMatchMerSize']) if(not obj.globals.has_key('globalPerfectRunMinLen')): obj.globals['globalPerfectRunMinLen'] = 100 if(not obj.globals.has_key('globalPerfectRunMaxGapLen')): obj.globals['globalPerfectRunMaxGapLen'] = 100000 if(not obj.globals.has_key('intraRunGapIsolatedMismatchLen')): obj.globals['intraRunGapIsolatedMismatchLen'] = 20 obj.runOld() t1 = time.time() print >>STDERR, "Ran in %d seconds." % (t1-t0) obj.checkpoint(runName + ".chained.atac") if __name__ == '__main__': if (len(sys.argv) == 1): print >>sys.stderr, "usage: $sys.argv[0] file.atac" sys.exit(1) if (sys.argv[1] == "justtestingifitworks"): sys.exit(0) main(sys.argv[1]) kmer-code-2013-trunk/atac-driver/chainer/python/MatchRecord.py0000644000000000000000000001721710215205540023050 0ustar rootroot#!/usr/bin/env python import sys, os, copy, string, tempfile class AtacRow: "A general ATAC row object" def __init__(self,line): self.kind = line[0] splitline = line[1:].split(">") self.fixed = splitline[0].split() if(len(splitline)>1): self.defline = splitline[1] else: self.defline = "" # end if # end if # end class class MatchRecord: """Class representing an exact match.""" #def __init__ (self, *args): #print args # if(args): # (line,) = args # # print " parse line= " + line # self.fromString(line) # # end if # end def def sameAs(self,other): return ( (self.x_orientation == other.x_orientation) and (self.x_scaf_uid == other.x_scaf_uid) and (self.x_start == other.x_start) and (self.y_orientation == other.y_orientation) and (self.y_scaf_uid == other.y_scaf_uid) and (self.y_start == other.y_start) and (self.x_length == other.x_length) and (self.y_length == other.y_length) ) # end def def isInsideBox(self, one, two): # We need to modify this because the matches are not points. dxone = self.x_start - one.x_start dxtwo = self.x_start - two.x_start dyone = self.y_start - one.y_start dytwo = self.y_start - two.y_start flag = ( # (self.x_orientation == one.x_orientation) and # (self.x_orientation == two.x_orientation) and # (self.y_orientation == one.y_orientation) and # (self.y_orientation == two.y_orientation) and (self.x_scaf_uid == one.x_scaf_uid) and (self.x_scaf_uid == two.x_scaf_uid) and (self.y_scaf_uid == one.y_scaf_uid) and (self.y_scaf_uid == two.y_scaf_uid) and (((dxone > 0) and ( dxtwo < 0)) or ((dxone < 0) and ( dxtwo > 0))) and (((dyone > 0) and ( dytwo < 0)) or ((dyone < 0) and ( dytwo > 0))) ) return flag # end def def inSameRunAs(self,x): return self.runid == x.runid # same parent # end def def copy(self): other = copy.copy(self) return other def convertFromAtacMatchFormat(self,line): fields = line.split() if(line[0] == 'M'): self.rowtype = fields[0] self.subtype = fields[1] self.matchid = fields[2] self.runid = fields[3] self.x_scaf_uid = fields[4] self.x_start = int(fields[5]) self.x_length = int(fields[6]) self.x_orientation = int(fields[7]) self.y_scaf_uid = fields[8] self.y_start = int(fields[9]) self.y_length = int(fields[10]) self.y_orientation = int(fields[11]) #self.mismatches = int(fields[12]) elif(line[0] == '-'): orientation = (fields[0][1:]=='f') self.rowtype = 'M' self.subtype = 'x' self.matchid = '.' # "BMX"+str(lineCount) self.runid = '.' self.x_scaf_uid = assemblyId1 + ":" + fields[2] self.x_start = int(fields[3]) self.x_length = int(fields[4]) self.x_orientation = 1 self.y_scaf_uid = assemblyId2 + ":" + fields[6] self.y_start = int(fields[7]) self.y_length = int(fields[8]) self.y_orientation = (-1,1)[orientation] # A cvm variant (flag ? x : y). # end def def __init__ (self, line, *args): #print args #if(args): # (line,) = args if '>' in line: (line1, line2) = line.split('>') else: line1 = line line2 = "" try: self.convertFromAtacMatchFormat(line1) except IndexError: sys.stderr.write("MatchRecord-- IndexError: line did not split correctly: %s\n" % line1) raise except ValueError: sys.stderr.write("MatchRecord-- ValueError: line did not unpack correctly: %s\n" % line1) raise self.extend = {} extensions = line2.split('/') self.identifier = extensions[0].strip() for argpair in extensions[1:]: if '=' in argpair: (key,value) = argpair.split('=') self.extend[key] = value.strip() return def __str__ (self): extension = " >" + self.identifier for key in self.extend: extension += ' /' + key + '=' + str(self.extend[key]) if(len(extension)<3): extension = "" return "%s %s %s %s %s %d %d %d %s %d %d %d %s" % ( self.rowtype, self.subtype, self.matchid, self.runid, self.x_scaf_uid, self.x_start, self.x_length, self.x_orientation, self.y_scaf_uid, self.y_start, self.y_length, self.y_orientation, extension ) # end def # end class def convertBrianRecordFormat( inpfile, outfile, assemblyId1, assemblyId2): "Convert the match record format from Brian's to atac format." lineCount = 0 for line in inpfile: lineCount += 1 if(lineCount % 100000 == 0): print >>sys.stderr, "lineCount=%d" % lineCount FB = line.split() orientation = (FB[0][1:]=='f') FM = MatchRecord("M x . . . 0 0 0 . 0 0 0 0\n") FM.x_orientation = 1 FM.matchid = "BMX"+str(lineCount) FM.x_scaf_uid = assemblyId1 + ":" + FB[2] FM.x_start = int(FB[3]) FM.x_length = int(FB[4]) FM.y_orientation = (-1,1)[orientation] # A cvm variant (flag ? x : y). FM.y_scaf_uid = assemblyId2 + ":" + FB[6] FM.y_start = int(FB[7]) FM.y_length = int(FB[8]) FM.identifier = "" FM.extend = {} #FM.mismatches = 0 print >>outfile, FM # end for print >>sys.stderr, "convertRecordFormat done: lineCount=%d" % lineCount outfile.seek(0) return # end def def sortInXorderAP( inpfile, outfile): # (x_scaf_uid, x_start, x_length, y_scaf_uid, y_start, y_length) InXOrderAP = '-k 1,1 -k 2,2 -k 5,5 -k 6n -k 7nr -k 8nr -k 9,9 -k 10n -k 11nr -k 12nr' # Use -u to remove the palindromes. # Use -k 7nr -k 11nr to remove abutting contained matches. inpfile.seek(0) outfile.seek(0) inpfile.flush() outfile.flush() ierr = os.system("sync;sync;sync") assert(ierr == 0) ierr = os.system("sort -T . %s %s > %s" % (InXOrderAP, inpfile.name, outfile.name)); assert(ierr == 0) ierr = os.system("sync;sync;sync") assert(ierr == 0) inpfile.seek(0) outfile.seek(0) return # end def def sortInYorderAP( inpfile, outfile): # (x_scaf_uid, x_start, x_length, y_scaf_uid, y_start, y_length) InYOrderAP = '-k 1,1 -k 2,2 -k 9,9 -k 10n -k 11nr -k 12nr -k 5,5 -k 6n -k 7nr -k 8nr' # Use -u to remove the palindromes. # Use -k 7nr -k 11nr to remove abutting contained matches. inpfile.seek(0) outfile.seek(0) inpfile.flush() outfile.flush() ierr = os.system("sync;sync;sync") assert(ierr == 0) ierr = os.system("sort -T . %s %s > %s" % (InYOrderAP, inpfile.name, outfile.name)); assert(ierr == 0) ierr = os.system("sync;sync;sync") assert(ierr == 0) inpfile.seek(0) outfile.seek(0) return # end def def sortInXorderPP( inpname, outfile): # (x_win, ywin, x_scaf_uid, y_scaf_uid, x_start, y_start, x_length, y_length) assert(1) # end def def sortInYorderPP( inpname, outfile): # (y_win, x_win, y_scaf_uid, x_scaf_uid, y_start, x_start, y_length, x_length) assert(1) # end def kmer-code-2013-trunk/atac-driver/chainer/python/MyFile.py0000755000000000000000000000522010211413065022033 0ustar rootroot#!/usr/bin/env python import sys, os, copy, tempfile, cStringIO # from __future__ import generators # Necessary before Python 2.3 class myfile(file): "A temporary anonymous file" def __init__(self): filename = tempfile.mktemp() #print >>sys.stderr, "myfile: creating " + filename file.__init__(self,filename,"w+") def __del__(self): #print >>sys.stderr, "myfile: deleting " + self.name self.close() os.system("rm -f " + self.name) def link(self,othername): #print >>sys.stderr, "myfile: linking %s to %s" % ( self.name, othername) self.flush() os.system("ln -f %s %s" % (self.name, othername)) class ListLikeFileIter: # See http://www.python.org/peps/pep-0234.html # for file iterators. def __init__(self,filename): self._filename = filename self._fileptr = open(self._filename,"r") self._fileIter = iter(self._fileptr.readline,"") def __del__(self): self._fileptr.close() def next(self): line = self._fileIter.next() if line: return line else: raise StopInteration # end if def __getitem__(self,ii): # For files, the list location ii is ignored. # line = self._fileptr.readline() line = self._fileIter.next() if line: return line else: raise IndexError # end if # end def class ListLikeFile: # See Mark Lutz, Programming Python, edition 1, page 18 and page 128. def __init__(self): #self._filename = tempfile.mktemp() #self._fileptr = open(self._filename,"w") self._fileptr = cStringIO.StringIO() #self._list = [] def __del__(self): self._fileptr.close() #pass def __iter__(self): self._fileptr.flush() return iter(cStringIO.StringIO(self._fileptr.getvalue())) #return iter(self._fileptr) #return ListLikeFileIter(self._filename) return iter(self._list) def write(self,x): self._fileptr.write(x) #self._list.append(x) # end def # end class def tester(): x = ListLikeFile() print >>x, 4 print >>x, 5 xi = iter(x) print "test 1i" for i in xi: print i, print >>x, 6 print >>x, 7 print "test 2i" for i in xi: print i, xj = iter(x) print "test 3j" for i in xj: print i, print >>x, 8 print >>x, 9 print "test 3j" for i in xj: print i, print "test 3i" for i in xi: print i, xk = iter(x) print "test 3k" for i in xk: print i, x = None if __name__ == '__main__': tester() kmer-code-2013-trunk/atac-driver/chainer/python/DNA.py0000644000000000000000000000413410210717773021264 0ustar rootrootclass DNA: __doc__ = """Class representing DNA as a string sequence.""" basecomplement = {'A':'T', 'C':'G', 'G':'C', 'T':'A', 'a':'t', 'c':'g', 'g':'c', 't':'a', 'M':'K', 'R':'Y', 'W':'W', 'S':'S', 'Y':'R', 'K':'M', 'm':'k', 'r':'y', 'w':'w', 's':'s', 'y':'r', 'k':'m', 'V':'B', 'H':'D', 'D':'H', 'B':'V', 'v':'b', 'h':'d', 'd':'h', 'b':'v', 'N':'N', 'X':'X', 'n':'n', 'x':'x', '-':'-',} # IUB encoding: # M = A/C, R = A/G, W = A/T, S = C/G, Y = C/T, K = G/T, # V = A/C/G, H = A/C/T, D = A/G/T, B = C/G/T, N/X = A/C/G/T, # Celera encoding: # m = -/A/C, r = -/A/G, w = -/A/T, s = -/C/G, y = -/C/T, k = -/G/T, # v = -/A/C/G, h = -/A/C/T, d = -/A/G/T, b = -/C/G/T, n/x = -/A/C/G/T, def __init__(self, s): """Create DNA instance initialized to string s.""" self.seq = s return def transcribe(self): """Return as RNA string.""" return self.seq.replace('T','U') def reverse(self): """Return DNA string in reverse order.""" letters = list(self.seq) letters = letters.reverse() return ''.join(letters) def complement(self): """Return the complementary DNA string.""" letters = list(self.seq) letters = [self.basecomplement[base] for base in letters] return ''.join(letters) def reversecomplement(self): """Return the reverse complement of the DNA string.""" letters = list(self.seq) letters.reverse() letters = [self.basecomplement[base] for base in letters] return ''.join(letters) def gc(self): """Return the portion of DNA composed of G or C.""" s = self.seq gc = s.count('G') + s.count('C') return gc * 1. / len(s) def codons(self): """Return list of codons for the DNA string.""" s = self.seq end = len(s) - (len(s) % 3) - 1 codons = [s[i:i+3] for i in range(0,end,3)] return codons kmer-code-2013-trunk/atac-driver/chainer/python/IdxStore.py0000644000000000000000000001442210546555025022426 0ustar rootrootimport os, sys, DNA ####################################################### # Begin class methods ####################################################### # bpw, 20050312 - .seqStore is exactly a compressed fasta file. Stop # building it and assume the input is compressed. def createIndexedFasta( prefix, nickname): # This is a class method (as opposed to an object method). # This method creates an indexed FASTA file on disk. print >>sys.stderr, "Creating %s.idxStore" % (prefix) the_uid = None defline = None seqline = None linenumber = 0 cur_offset = 0 FASTA = file( prefix, "r") IDXSTORE = file( prefix + ".idxStore", "w") for line in FASTA: linenumber += 1 line = line.strip() if(line[0:1] == ">"): # Clear current data to make space for new data. if( the_uid != None ): # The first time thru the_uid is equal to None. assert(defline != None) assert(seqline != None) # If we are using a database, then this might be the place to register the data. # uid2defline[the_uid] = defline # uid2seqline[the_uid] = seqline def_offset = cur_offset def_length = len(defline) cur_offset += def_length + 1 # remember the UNIX newline inserted by print. seq_offset = cur_offset seq_length = len(seqline) cur_offset += seq_length + 1; # remember the UNIX newline inserted by print. print >>IDXSTORE, the_uid, def_length, seq_length, def_offset, seq_offset # end if # Now process the new data. defline = line the_uid = line.split()[0][1:] seqline = "" # Clear any accumulated sequence. else: seqline += line # Accumulate more DNA sequence # end if # end for if(the_uid != None): # Now make sure that accumulated data makes it to disk. # If we are using a database, then this might be the place to register the data. # uid2defline[the_uid] = defline # uid2seqline[the_uid] = seqline def_offset = cur_offset def_length = len(defline) cur_offset += def_length + 1; # remember the UNIX newline inserted by print. seq_offset = cur_offset seq_length = len(seqline) cur_offset += seq_length + 1; # remember the UNIX newline inserted by print. print >>IDXSTORE, the_uid, def_length, seq_length, def_offset, seq_offset # end if FASTA.close() IDXSTORE.close() # end if # end def ####################################################### # End class methods ####################################################### class IdxStore: __doc__ = "Class for fast access to multiFASTA files." ####################################################### # Begin instance methods ####################################################### def __init__(self,prefix,*optargs): __doc__ = "Create an instance of the class" if(optargs): self.nickname = optargs[0] else: self.nickname = None # end if self.uid2iid = {} # declare an empty mapping self.iid2uid = [] # empty self.iid2def_length = [] self.iid2seq_length = [] self.iid2def_offset = [] self.iid2seq_offset = [] filename = prefix + ".idxStore" idxstore = file(filename, "r"); the_iid=0; while 1: line = idxstore.readline(); if not line: break # sys.stderr.write("idxline %s\n" % line) cols = line.split() the_uid=cols[0]; def_length=int(cols[1]); seq_length=int(cols[2]); def_offset=eval(cols[3]); seq_offset=eval(cols[4]) if(self.nickname): self.uid2iid[self.nickname + ':' + str(the_iid)] = the_iid; # hashed self.uid2iid[the_uid] = the_iid; # hashed self.iid2uid.append(the_uid); # vector self.iid2def_length.append(def_length) # vector self.iid2seq_length.append(seq_length) self.iid2def_offset.append(def_offset) self.iid2seq_offset.append(seq_offset) the_iid += 1; filename = prefix self.seqstore = file(filename, "r"); return def getStringFromFasta(self, forward, scaf_uid, start, length ): try: scaf_iid = self.uid2iid[scaf_uid] except KeyError: scaf_iid = eval(scaf_uid) if(scaf_iid < 1000000000): # sys.stderr.write("Using scaf_uid as the index\n") pass else: sys.stderr.write("scaf_uid=<%s> is invalid.\n" % scaf_uid) return "" seq_length = self.iid2seq_length[scaf_iid]; seq_offset = self.iid2seq_offset[scaf_iid]; # print >>sys.stderr, "seq_length, seq_offset, start =", seq_length, seq_offset, start # print >>sys.stderr, "seek to offset =", seq_offset+start self.seqstore.seek(seq_offset+start, 0); # from the beginning of file substring = self.seqstore.read(length) if(not forward): # sys.stderr.write("Taking reversecomplement\n") try: substring = DNA.DNA(substring).reversecomplement() except KeyError: sys.stderr.write("KeyError in DNA.DNA.reversecomplement()\n") sys.stderr.write("The query %d %s %d %d\n" % (forward,scaf_uid,start,length)) sys.stderr.write("%s\n" % substring) #else: #sys.stderr.write("Leave as is\n") return substring # end class def convertIndexToUID ( x_prefix, y_prefix, inpfile, outname, assemblyId1, assemblyId2 ): outfile = myfile() DefLines = file(x_prefix, 'r') the_x_uid = {} # Declare an empty dictionary ii = 0 for line in DefLines: # A valid idxStore format (ga_uid, sln, cln, sst, cst) = line.split() the_x_uid[assemblyId1+":"+str(ii)] = ga_uid ii += 1 # end for DefLines.close() DefLines = file(y_prefix, 'r') the_y_uid = {} # Declare an empty dictionary ii = 0 for line in DefLine: # A valid idxStore format (ga_uid, sln, cln, sst, cst) = line.split() the_y_uid[assemblyId2+":"+str(ii)] = ga_uid ii += 1 # end for DefLines.close() inpfile.seek(0) for line in inpfile: if(line[0] == 'M'): FM = MatchRecord.MatchRecord(line) FM.x_scaf_uid = the_x_uid[FM.x_scaf_uid] FM.y_scaf_uid = the_y_uid[FM.y_scaf_uid] print >>outfile, FM # end if # end for outfile.finished() return outfile # end def #if __name__ == '__main__': # main(sys.argv[1],sys.argv[2]) #main() kmer-code-2013-trunk/atac-driver/chainer/python/UniqueFilter.py0000755000000000000000000002517310215205540023274 0ustar rootroot#!/usr/bin/env python import os, sys, time, tempfile import MyFile import MatchRecord import AtacFile def xorIntervals( inpname, outname): # not tested yet leftPicket = 0 rghtPicket = 0 inpfile = open(inpname,"r") outfile = open(outname,"w") for line in inpfile: fields = line.split() newstart = int(fields[0]) newend = int(fields[1]) assert(leftPicket <= newstart) rghtSide = min(newstart,rghtPicket) if rghtSide > leftPicket: # interval has positive length print >>outfile, leftPicket, rghtSide leftPicket = max(newstart, min(rightPicket,newend)) rghtPicket = max(leftPicket,max(rightPicket,newend)) def findUniformCoverageIntervals(inpfile,outfile): # The input records are ("E", id, position, coverage_increment). # The output records are ("C", id, start_position, length, coverage_level). inpfile.seek(0) outfile.seek(0) oldaxis = None; oldposition = 0; cov = 0 for line in inpfile: ( recordtype, newaxis, newposition, newchange) = line.split() if(recordtype == "E"): newposition = int(newposition) newchange = int(newchange) if(newaxis != oldaxis and cov != 0): print >>sys.stderr, "Woops" len = newposition - oldposition if(cov>0 and len>0): print >>outfile, "C", oldaxis, oldposition, len, cov; cov += newchange; assert(cov >= 0) oldaxis = newaxis; oldposition = newposition; assert(cov == 0) outfile.flush() def findCoverageIntervals( inpfile, outfile, processFirstAxis): # The input file is an ATAC matches file. # The output file is an ATAC coverage intervals file. inpfile.seek(0) outfile.seek(0) t0 = time.time() tmpfile3 = MyFile.myfile() for line in inpfile: if(line[0]=="M"): fields = line.split() if(fields[1]=="u" or fields[1]=="x"): if(processFirstAxis): axis = fields[4] bgn = int(fields[5]) end = bgn+int(fields[6]) else: axis = fields[8] bgn = int(fields[9]) end = bgn+int(fields[10]) print >>tmpfile3, "E", axis,bgn,1 print >>tmpfile3, "E", axis,end,-1 tmpfile3.close() tmpname = tempfile.mktemp() cmd = "sort -T . -k 1,1 -k 2,2 -k 3n -k 4nr %s > %s" % (tmpfile3.name, tmpname) print >>sys.stderr, cmd iret = os.system(cmd); assert(iret==0) print >>sys.stderr,"time elapsed is ", (time.time() - t0) tmpfile4 = open(tmpname) t0 = time.time() findUniformCoverageIntervals( tmpfile4, outfile) print >>sys.stderr,"time elapsed is ", (time.time() - t0) tmpfile4.close() os.system("rm -f " + tmpname) outfile.seek(0) def applyOneKeepMask( inpfile, outfile, keepMaskFile, processFirstAxis): # Note that the following merge-like control structure is # influenced by the function property of keep intevals to matches. debug = 0 inpfile.seek(0) outfile.seek(0) keepMaskFile.seek(0) # Put the first valid match record into FM. Each input ATAC match # record produces zero, one or more output ATAC matches. FM = None; ma = None; ms = None; me = None qa = None; qs = None; ql = None; # the set of masking intervals, using the q variables and iline maskiter = iter(keepMaskFile) # the set of masked matches using the m variables and mline inpiter = iter(inpfile) iline = None mline = None last_matchid = None; subcount = 0 try: # StopIteration exception from either iterator gets us out while 1: if(iline == None): iline = maskiter.next() (subtype, qa, qs, ql, cov, ) = iline.split() assert(subtype=='C') cov= int(cov) if(cov != 1): iline = None continue qs = int(qs) ql = int(ql) qe = qs + ql if(mline == None): mline = inpiter.next() if(mline[0] != 'M'): # not a match record, so just pass it through print >>outfile, mline, mline = None continue FM = MatchRecord.MatchRecord(mline) assert(FM.subtype == "u" or FM.subtype == "x") if(processFirstAxis): ma = FM.x_scaf_uid ms = FM.x_start # match start me = ms + FM.x_length # match end else: ma = FM.y_scaf_uid ms = FM.y_start # match start me = ms + FM.y_length # match end # holding valid iline and mline data now if not(ma==qa): # not on same axis, need to get a new one if(ma < qa): mline = None else: iline = None elif not( (ms < qe) and (qs < me) ): # we are not overlapping, need to get a new one of them if(ms < qs): mline = None else: iline = None else: # processing for overlaps FT = FM.copy() mx = max(ms,qs) mn = min(me,qe) trimFromStart = mx - ms trimFromEnd = me - mn trimmedLength = mn - mx if( FT.x_orientation == FT.y_orientation): FT.x_start += trimFromStart FT.y_start += trimFromStart else: if(processFirstAxis): FT.x_start += trimFromStart FT.y_start += trimFromEnd else: FT.y_start += trimFromStart FT.x_start += trimFromEnd FT.x_length = trimmedLength FT.y_length = trimmedLength if debug: print >>sys.stdout, "# trimmed " print >>sys.stdout, FT # We must insure that the match identifier is still unique. if last_matchid == FM.matchid : subcount += 1 else: subcount = 0 # print >>sys.stderr, last_matchid, FM.matchid, subcount last_matchid = FM.matchid if(subcount > 0): if processFirstAxis : FT.matchid = FT.matchid + "x" + str(subcount) else: FT.matchid = FT.matchid + "y" + str(subcount) print >>outfile, FT # we need to get a new one if(qe < me): iline = None else: mline = None except StopIteration: # If there are any left over non-match lines, then output them! for mline in inpiter: if(mline[0] != "M"): print >>outfile, mline, def applyBothKeepMasks( inpfile, outfile ): # Maybe we can think of a masking implementation where each ATAC match # is treated atomicly. Assume that the keep mask intervals are sorted # by start postition. Assume that the ATAC matches are sorted by start # postion. Assert that all keep mask intervals are non-overlapping and # were cut from only one ATAC match. Thus the mapping from keep mask # intervals is a function. Note that this requires that we do not # coalesce abutting keep mask intervals that originate from multiple # matches. Note this still allows an ATAC match to overlap more than # one keep mask interval. Ignore all keep mask intervals with zero # length their creation has tie breaking problems. See notes on 2003 # Jul 29. debug = 0 debugnum = 0 inpfile.seek(0) outfile.seek(0) # Apply the keepMask for the first axis. # Make the sorted the keep mask intervals for the first axis. processFirstAxis = 1 keepMaskFile = MyFile.myfile() tmpfile2 = inpfile tmpfile3 = MyFile.myfile() tmpfile4 = MyFile.myfile() findCoverageIntervals( inpfile, keepMaskFile, processFirstAxis) if debug: debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in keepMaskFile: print >>debugfile, line, MatchRecord.sortInXorderAP(tmpfile2,tmpfile3) if debug: #tmpfile2.seek(0) #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") #for line in tmpfile2: print >>debugfile, line, tmpfile3.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile3: print >>debugfile, line, applyOneKeepMask( tmpfile3, tmpfile4, keepMaskFile, processFirstAxis) if debug: tmpfile4.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile4: print >>debugfile, line, # Apply the keepMask for the second axis. # Make the sorted the keep mask intervals for the second axis. processFirstAxis = 0 keepMaskFile = MyFile.myfile() tmpfile2 = tmpfile4 tmpfile3 = MyFile.myfile() tmpfile4 = outfile findCoverageIntervals( inpfile, keepMaskFile, processFirstAxis) if debug: debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in keepMaskFile: print >>debugfile, line, MatchRecord.sortInYorderAP(tmpfile2,tmpfile3) if debug: #tmpfile2.seek(0) #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") #for line in tmpfile2: print >>debugfile, line, tmpfile3.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile3: print >>debugfile, line, applyOneKeepMask( tmpfile3, tmpfile4, keepMaskFile, processFirstAxis) if debug: tmpfile4.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile4: print >>debugfile, line, def main( inpfile, outfile): applyBothKeepMasks( inpfile, outfile) # Should we check if the first and last characters of the masked # matches are matching? # Should we compute the percent identity in this module? # Allow each module to have its own main for testing. if __name__ == '__main__': inpname = sys.argv[1] outname = sys.argv[2] inpfile = open(inpname) outfile = open(outname,"w") main(inpfile, outfile) # end if kmer-code-2013-trunk/atac-driver/chainer/python/mkstats.py0000755000000000000000000000510010210717773022345 0ustar rootroot#!/usr/bin/env python # Must look in /usr/local/ir/bin on the Compaqs for the correct Python interpreter. # export PYTHONPATH=${PYTHONPATH}:$WORK/cds/IR/COMPASS/src/AtacPipeline """ Extensive documentation for the Python language is available at http://www.python.org. """ import os, sys, re, tempfile def main(glist): for inpname in glist: if(0): inpfile = open(inpname,'r') tmpname = tempfile.mktemp(".tmp") tmpfile = open(tmpname,'w') pattern = re.compile(r"^M [gl] ") for line in inpfile: if(pattern.search(line)): print >>tmpfile, line, tmpfile.close() os.system("celagram -c 7 -t 'gapped match lengths' %s" % (tmpname,)) if(0): inpfile = open(inpname,'r') tmpname = tempfile.mktemp(".tmp") tmpfile = open(tmpname,'w') pattern = re.compile(r"^M x ") for line in inpfile: if(pattern.search(line)): print >>tmpfile, line, tmpfile.close() os.system("celagram -c 7 -t 'exact match lengths' %s" % (tmpname,)) if(0): inpfile = open(inpname,'r') tmpname = tempfile.mktemp(".tmp") tmpfile = open(tmpname,'w') pattern = re.compile(r"^M u ") for line in inpfile: if(pattern.search(line)): print >>tmpfile, line, tmpfile.close() os.system("celagram -c 7 -t 'ungapped match lengths' %s" % (tmpname,)) inpfile = open(inpname,'r') tmpname = tempfile.mktemp(".tmp") tmpfile = open(tmpname,'w') # pattern = re.compile(r"^M\s*[xu]\s") pattern = re.compile(r"^M [xu] ") for line in inpfile: if(pattern.search(line)): print >>tmpfile, line, tmpfile.close() os.system("celagram -c 7 -t '%s ungapped match lengths' %s" % (inpname,tmpname)) inpfile = open(inpname,'r') tmpname = tempfile.mktemp(".tmp") tmpfile = open(tmpname,'w') pattern = re.compile(r"^M\s*r\s") for line in inpfile: if(pattern.search(line)): print >>tmpfile, line, tmpfile.close() os.system("celagram -c 7 -t '%s spans in 1st assembly' %s" % (inpname,tmpname)) os.system("celagram -c 11 -t '%s spans in 2nd assembly' %s" % (inpname,tmpname)) if __name__ == '__main__': #glist = [ "humR27vsB31-V2.atac", "humB31vsVAN-V1.atac", "humB31vsSC-V3.atac", ] main(sys.argv[1:]) kmer-code-2013-trunk/atac-driver/chainer/python/TrimMatchOverlaps.py0000644000000000000000000002540010344525010024252 0ustar rootroot#!/usr/bin/env python import sys import MyFile import MatchRecord def cvm(f,x,y): # A cvm variant (flag ? x : y) = (x,y)[f] if f : return x else: return y # end if # end def def coalesceMatches ( inpfile, outfile, needs_to_share_diagonal ): "Coalesce overlapping and abutting matches within the same run." firstF = None lastF = None lastLX = -3 lastLY = -4 lastForward = 0 lowHitPX = None lowHitPY = None hghHitPX = None hghHitPY = None inpfile.seek(0) outfile.seek(0) for line in inpfile: if(line[0] == 'M'): curF = MatchRecord.MatchRecord(line) px = curF.x_start nx = curF.x_length py = curF.y_start ny = curF.y_length assert(px >= 0) assert(nx >= 0) assert(py >= 0) assert(ny >= 0) if (not (not needs_to_share_diagonal or nx == ny)): print >>sys.stderr, 'Bombed on:' print >>sys.stderr, str(curF) print >>sys.stderr, 'needs_to_share_diagonal=' + str(needs_to_share_diagonal) print >>sys.stderr, 'nx=' + str(nx) + ' ny=' + str(ny) # end if assert((hghHitPX == None or (not needs_to_share_diagonal) or nx == ny)) forward = (curF.x_orientation == curF.y_orientation) lx = px ly = cvm( forward, py, py + ny) rx = px + nx ry = cvm( forward, py + ny, py) overlapping = ((lastF != None) and (curF.x_scaf_uid == lastF.x_scaf_uid) and (curF.y_scaf_uid == lastF.y_scaf_uid) and (((lx >= lowHitPX and lx <= hghHitPX) and (ly >= lowHitPY and ly <= hghHitPY)) or ((rx >= lowHitPX and rx <= hghHitPX) and (ry >= lowHitPY and ry <= hghHitPY)))) on_diagonal = ((forward == lastForward) and ((lx - lastLX) == ((ly - lastLY) * cvm(forward, 1, -1)))) # print >>sys.stdout, lastF, curF # print >>sys.stdout, lx,rx,ly,ry # print >>sys.stdout, lowHitPX,hghHitPX,lowHitPY,hghHitPY # print >>sys.stdout, "overlapping=",overlapping # print >>sys.stdout, "on_diagonal=",on_diagonal lowMerPX = px lowMerPY = py hghMerPX = px + nx hghMerPY = py + ny if (not (overlapping and (not needs_to_share_diagonal or on_diagonal))): if (firstF != None): # if (lastF == None or firstF.runid != lastF.runid): # end if firstF.subtype = ('g','u')[needs_to_share_diagonal] firstF.x_start = lowHitPX firstF.y_start = lowHitPY firstF.x_length = hghHitPX - lowHitPX firstF.y_length = hghHitPY - lowHitPY print >>outfile, firstF # end if firstF = curF lowHitPX = lowMerPX lowHitPY = lowMerPY hghHitPX = hghMerPX hghHitPY = hghMerPY # end if lowHitPX = cvm(lowHitPX < lowMerPX, lowHitPX, lowMerPX) lowHitPY = cvm(lowHitPY < lowMerPY, lowHitPY, lowMerPY) hghHitPX = cvm(hghHitPX > hghMerPX, hghHitPX, hghMerPX) hghHitPY = cvm(hghHitPY > hghMerPY, hghHitPY, hghMerPY) lastLX = lx lastLY = ly lastForward = forward lastF = curF # end if # end for if (firstF != None): firstF.subtype = ('g','u')[needs_to_share_diagonal] firstF.x_start = lowHitPX firstF.y_start = lowHitPY firstF.x_length = hghHitPX - lowHitPX firstF.y_length = hghHitPY - lowHitPY print >>outfile, firstF return # end def def trimMatchOverlapsInX(inpfile,outfile, trim_subtype): "Trim the match overlaps with respect to the X assembly." overlaps=0 abuts=0 posgaps=0 contained = 0 trimmed = 0 left = None picket = 0 # For each genomic axis we scan left to right using this picket # position to annihilating any part of the current match to the # left of this picket. inpfile.seek(0) for line in iter(inpfile): if(line[0] == 'M'): right = MatchRecord.MatchRecord(line) if( right.subtype != trim_subtype): print >>outfile, line, continue if( left == None or #left.x_scaf_uid < right.x_scaf_uid): left.x_scaf_uid != right.x_scaf_uid): picket = 0 else: assert(left != None) assert(right != None) if(left.x_scaf_uid > right.x_scaf_uid): print >>sys.stderr, "sequence ids out of x sorted order" print >>sys.stderr, left print >>sys.stderr, right assert(left.subtype == right.subtype) assert(left.x_scaf_uid == right.x_scaf_uid) if(not(left.x_start <= right.x_start)): print >>sys.stderr, "trimMatchOverlapsInX: Woops not sorted anymore!" print >>sys.stderr, left print >>sys.stderr, right #assert(0) thisbgn = right.x_start thisend = right.x_start + right.x_length if(picket < thisend): gaplen = thisbgn - picket if(gaplen > 0): posgaps += 1 if(gaplen == 0): abuts += 1 if(gaplen < 0): overlaps += 1 trimmed -= gaplen right.x_start -= gaplen # modify the match right.x_length += gaplen right.y_length += gaplen if(right.x_orientation == right.y_orientation): right.y_start -= gaplen # modify the match else: # picketed region contains right. #print >>sys.stderr, "trimMatchOverlapsInX: Contained" #print >>sys.stderr, left #print >>sys.stderr, right contained += 1 right = None # remove this match if(right != None): print >>outfile, right newpicket = right.x_start + right.x_length assert(picket < newpicket) picket = newpicket left = right else: print >>outfile, line, print >>sys.stderr, "trimMatchOverlapsInX:\n", print >>sys.stderr, "#posgaps, #abuts, #overlaps, #contained, bp_trimmed= %d %d %d %d %d\n" \ % (posgaps, abuts, overlaps, contained, trimmed, ) return def trimMatchOverlapsInY(inpfile,outfile, trim_subtype): "Trim the match overlaps with respect to the Y assembly." overlaps=0 abuts=0 posgaps=0 contained = 0 trimmed = 0 left = None picket = 0 # For each genomic axis we scan left to right using this picket # position to annihilating any part of the current match to the # left of this picket. inpfile.seek(0) for line in iter(inpfile): if(line[0] == 'M'): right = MatchRecord.MatchRecord(line) if( right.subtype != trim_subtype): print >>outfile, line, continue if( left == None or #left.y_scaf_uid < right.y_scaf_uid): left.y_scaf_uid != right.y_scaf_uid): picket = 0 else: assert(left != None) assert(right != None) if(left.y_scaf_uid > right.y_scaf_uid): print >>sys.stderr, "sequence ids out of y sorted order" print >>sys.stderr, left print >>sys.stderr, right assert(left.subtype == right.subtype) assert(left.y_scaf_uid == right.y_scaf_uid) if(not(left.y_start <= right.y_start)): print >>sys.stderr, "trimMatchOverlapsInY: Woops not sorted anymore!" print >>sys.stderr, left print >>sys.stderr, right #assert(0) thisbgn = right.y_start thisend = right.y_start + right.y_length if(picket < thisend): gaplen = thisbgn - picket if(gaplen > 0): posgaps += 1 if(gaplen == 0): abuts += 1 if(gaplen < 0): overlaps += 1 trimmed -= gaplen right.y_start -= gaplen # modify the match right.y_length += gaplen right.x_length += gaplen if(right.x_orientation == right.y_orientation): right.x_start -= gaplen # modify the match else: # picketed region contains right. #print >>sys.stderr, "trimMatchOverlapsInY: Contained" #print >>sys.stderr, left #print >>sys.stderr, right contained += 1 right = None # remove this match if(right != None): print >>outfile, right newpicket = right.y_start + right.y_length assert(picket < newpicket) picket = newpicket left = right else: print >>outfile, line, print >>sys.stderr, "trimMatchOverlapsInY:\n", print >>sys.stderr, "#posgaps, #abuts, #overlaps, #contained, bp_trimmed= %d %d %d %d %d\n" \ % (posgaps, abuts, overlaps, contained, trimmed, ) return def trimMatchOverlapsInBoth(inpfile,outfile,trim_subtype): gp = MyFile.myfile() MatchRecord.sortInXorderAP(inpfile,gp) # The following coalescing assumes perfect runs. hp = MyFile.myfile() coalesceMatches( gp, hp, ((trim_subtype == 'x') or (trim_subtype == 'u')) ) gp = MyFile.myfile() trimMatchOverlapsInX(hp,gp,trim_subtype) hp = MyFile.myfile() MatchRecord.sortInYorderAP(gp,hp) trimMatchOverlapsInY(hp,outfile,trim_subtype) return def main(inpname, outname, trim_subtype): inpfile = open(inpname) outfile = open(outname,"w") trimMatchOverlapsInBoth(inpfile,outfile,trim_subtype) if __name__ == '__main__': inpname = sys.argv[1] outname = sys.argv[2] trim_subtype = sys.argv[3] main(inpname, outname, trim_subtype) kmer-code-2013-trunk/atac-driver/chainer/python/AtacDriver.txt0000644000000000000000000002016410215770307023073 0ustar rootroot SET GLOBALS, if not already set, and if using samespecies, parameter set 1 obj.globals["heavyChainsOn"] = "1" obj.globals["matchExtenderOn"] = "1" obj.globals["uniqueFilterOn"] = "1" obj.globals["fillIntraRunGapsOn"] = "1" obj.globals["numsegments"] = "1" RUN BRIATAC HERE MAKE SURE MATCHES ARE IN ATAC FORMAT RUN HEAVYCHAINS, if enabled -g /assemblyId1=XXXX -g /assemblyId2=XXXX -g /heavyMaxJump=XXXX (100000) -g /heavyMinFill=XXXX (100) RUN GLOBAL CHAINING, if enabled # /work/assembly/floreald/ASM/src/Ross/chain-global # /work/assembly/floreald/ASM/src/break-chains All %s's are the prefix chain-global %s -M 30 -p DP > %s.M30.dp 2> %s.M30.dp.errs" break-chains %s.M30.dp -D 0 -M 10 -p DPR | grep -v 'M r ' > %s.M30.dp.runs 2> %s.M30.dp.runs.errs" RUN CHAIN CONSERVATION(?), if enabled # /work/assembly/floreald/ASM/src/Ross/chain-consv # /work/assembly/floreald/ASM/src/break-chains chain-consv %s -p CS > %s.cons 2> %s.cons.errs" % (inpname,inpname,inpname) break-chains %s.cons -diffrun -D 0 -M 10 -p TMP > %s.cons.runs.tmp 2> %s.cons.runs.tmp.errs" % (inpname,inpname,inpname) break-chains %s.cons.runs.tmp -D 0 -M 10 -p CSR | grep -v 'M r ' > %s.cons.runs 2> %s.cons.runs.errs" % (inpname,inpname,inpname) RUN CHAIN GREEDY, if enabled # /work/assembly/floreald/ASM/src/Ross/chain-greedy # /work/assembly/floreald/ASM/src/break_chains chain-greedy %s -p GR -M 10 -W 500 > %s.greedy 2> %s.greedy.errs" % (inpname, inpname, inpname) break-chains %s.greedy -D 0 -M 10 -p GRR | grep -v 'M r ' > %s.greedy.runs 2> %s.greedy.runs.errs" % (inpname, inpname, inpname) RUN MATCH EXTENDER, if enabled matchextender inpname outname SET SOME DEFAULTS (unless already set) obj.globals['rawMatchMerSize'] = 20 obj.globals['rawMatchMerMaxDegeneracy'] = 1 obj.globals['rawMatchMinSize'] = obj.globals['rawMatchMerSize'] # Many 2*rawMatchMerSize-1 matches are due to isolated single # nucleotide mutations in otherwise perfect repeats. # obj.globals['globalMatchMinSize'] = 2*int(obj.globals['rawMatchMerSize']) obj.globals['globalPerfectRunMinLen'] = 100 obj.globals['globalPerfectRunMaxGapLen'] = 100000 obj.globals['intraRunGapIsolatedMismatchLen'] = 20 RUN OLD self.globals['atacAlgorithmVersion'] = str(17) print >>STDERR, "runName = %s\n" % self.runName # The ATAC globals used by this script: opt_t = int(self.globals['globalMatchMinSize']) opt_l = int(self.globals['globalPerfectRunMinLen']) maxdiff = int(self.globals['globalPerfectRunMaxGapLen']) assemblyId1 = self.globals['assemblyId1'] assemblyId2 = self.globals["assemblyId2"] assemblyFilePrefix1 = self.globals['assemblyFilePrefix1'] assemblyFilePrefix2 = self.globals['assemblyFilePrefix2'] # Deprecated for same species comparisons 2003/09/09. boxRecoveryOn = 0 if(self.globals.has_key("boxRecoveryOn")): boxRecoveryOn = int(self.globals["boxRecoveryOn"]) BUILD IDXSTORE (assemblyIdx1) for the files GENERALLY, after each step, the outfile replaces self.matches STEP RUN UNIQUE FILTER UniqueFilter.main( self.matches, outfile) STEP RUN FILTER BY MATCH LENGTH only keep those M records with both pieces at least as long as opt_t STEP RUN TRIMMING for bp one-to-one-ness (rewrite-trimMatches) inpfile = self.matches trim_subtype = 'u' gp = MyFile.myfile() MatchRecord.sortInXorderAP(inpfile,gp) # The following coalescing assumes perfect runs. # this is the same as rewrite-coalesceMatches hp = MyFile.myfile() coalesceMatches( gp, hp, ((trim_subtype == 'x') or (trim_subtype == 'u')) ) gp = MyFile.myfile() trimMatchOverlapsInX(hp,gp,trim_subtype) hp = MyFile.myfile() MatchRecord.sortInYorderAP(gp,hp) trimMatchOverlapsInY(hp,outfile,trim_subtype) if boxRecoveryOn, save these self.matches (outfile from last step) for later STEP FORM PERFECT RUNS (rewrite-perfectRuns) tempdata = PerfectRuns.formPerfectRuns(self.matches, MatchRecord.sortInXorderAP, MatchRecord.sortInYorderAP, maxdiff, 'r') STEP ONLY KEEP LONG RUNS tempdata = onlyKeepLongRuns( self.matches, outprefix, opt_l) description: find all runs (matches with the same run id) that have a sum of lengths larger than opt_l details: if there is a last match, and it is a different runid than this match, dump all the saved matches, reset the length to zero add in the length of this match to the length if the length we've seen so far is less than lengthThreshold, save this match otherwise (the length is bigger) print all saved matches, and this match. clear the list of saved matches, but do not clear the length remember the runid of this match (call it lastId) STEP 'HEAL' THE PERFECT RUNS (rewrite-perfectRuns) tempdata = PerfectRuns.formPerfectRuns(self.matches, MatchRecord.sortInYorderAP, MatchRecord.sortInXorderAP, maxdiff, 'r') STEP DO BOX RECOVERY, if enabled (rewrite-boxRecovery) print >>STDERR, "Make sorted raw matches" outfile = MyFile.myfile() MatchRecord.sortInXorderAP( rawfile, outfile) rawfile = outfile print >>STDERR, "perform box recovery" tempdata = boxRecovery( self.matches, rawfile, outprefix) form perfect runs again tempdata = PerfectRuns.formPerfectRuns(self.matches, MatchRecord.sortInXorderAP, MatchRecord.sortInYorderAP, maxdiff, 'r') STEP SQUEEZE INTRA RUN GAPS squeezeIntraRunGaps.squeezeIntraRunGaps( self.matches, tempdata, assemblyIdx1, assemblyIdx2) tempy = MyFile.myfile() # Beware the current match subtypes are 'x', 'L', and 'R'! coalesceMatches( tempdata, tempy, 1) self.matches = tempy STEP TRIMMING FOR bp one-to-one-ness # THIS IS ALSO DONE ABOVE! trimMatchOverlapsInBoth was a metafunction TrimMatchOverlaps.trimMatchOverlapsInBoth(self.matches,tempdata,'u') STEP PERFECT RUNS AS MATCHES (rewrite-runsAsMatches) self.runs = PerfectRuns.runsAsMatches( self.matches) STEP FILL INTRA RUN GAPS, if enabled set defaults if not set self.globals['fillIntraRunGapsErate'] = 0.10 self.globals['fillIntraRunGapsMaxGap'] = 100000 fillIntraRunGapsErate = float(self.globals['fillIntraRunGapsErate']) fillIntraRunGapsMaxGap = int(self.globals['fillIntraRunGapsMaxGap']) fillIntraRunGaps.mainLoop( self.matches, tempdata, assemblyIdx1, assemblyIdx2, fillIntraRunGapsMaxGap, fillIntraRunGapsErate) print >>STDERR, "trim the overlaps" TrimMatchOverlaps.trimMatchOverlapsInBoth(self.matches,tempdata,'u') self.matches = tempdata STEP COUNT NUMBER OF SUBSTITUTIONS countMisMatches.countMisMatches(self.matches, tempdata, assemblyIdx1, assemblyIdx2) ALL DONE kmer-code-2013-trunk/atac-driver/chainer/python/AtacFile.py0000755000000000000000000000546710210717773022347 0ustar rootroot#!/usr/bin/env python # Looking in /usr/local/ir/bin on the Compaqs for the correct Python interpreter. # export PYTHONPATH=${PYTHONPATH}:$WORK/cds/IR/COMPASS/src/AtacPipeline """ Extensive documentation for the Python language is available at http://www.python.org. """ import os, sys, time, getopt, tempfile import MyFile import MatchRecord class AtacFile: # The data flow is a pipeline augmented by two read-only indexed # FASTA files. def __init__( self, runName): "You must supply a atac file called runName.atac." self.runName = runName self.comments = [] self.metacommands = [] self.globals = {} self.tableformat = {} self.tabledata = {} self.matches = MyFile.myfile() self.runs = MyFile.myfile() fp = open(runName,"r") for line in fp: self.atac_file_parse_line(line) def atac_file_parse_line( self, line): line = line.strip() if(not line): return # end if linetype = line[0] if(linetype == '#'): # Just a comment: squirrel away or ignore self.comments.append(line) return elif(linetype == '!'): self.metacommands.append(line) return elif(linetype == '/'): # Add to the globals dictionary (key,value) = line[1:].split('=') self.globals[key] = value.strip() return elif(linetype == '@'): list = line[1:].split() name = list[0] self.tableformat[name] = list[1:] self.tabledata[name] = [] # an empty list return elif(linetype == 'M'): fields = line.split() if(fields[1] == 'r'): print >>self.runs, line else: print >>self.matches, line return elif(line == ''): pass else: print >>sys.stderr, "The offending line:" print >>sys.stderr, line assert(0) # end if # end def def checkpoint(self, filename): self.globals["modificationDate"] = time.asctime() fp = open(filename,"w") for line in self.metacommands: print >>fp, line for line in self.comments: print >>fp, line # Output the globals in lexigraphical order. list = [] for key in self.globals: list.append("/" + key + "=" + str(self.globals[key])) list.sort() for line in list: print >>fp, line self.matches.seek(0) for line in self.matches: fp.write(line) self.matches.seek(0) self.runs.seek(0) for line in self.runs: fp.write(line) self.runs.seek(0) fp.close() kmer-code-2013-trunk/atac-driver/chainer/python/dedashMatches.py0000755000000000000000000001126710454403366023427 0ustar rootroot#!/usr/bin/env python # dedashMatches.py /prod/IR02/synteny/mus-vs-rat/mouse_celera_R13_chr_20030210-vs-rat_celera_R1_chr_20030507-V3.atac.t20.l100.br.squeezed.filled.coalesced mus-vs-rat.out /prod/IR05/GENOMES/mouse_celera_R13_chr_20030210 /prod/IR05/GENOMES/rat_celera_R1_chr_20030507 MR13 RR1 # dedashMatches.py mouse_celera_R13_chr_20030210-vs-rat_celera_R1_chr_20030507-V3.atac.t20.l100.br.squeezed.filled.coalesced mus-vs-rat.out mouse_celera_R13_chr_20030210 rat_celera_R1_chr_20030507 MR13 RR1 import sys import string import time import MatchRecord import IdxStore import halign #import shelve class dedasher: def __init__(self,xstr,ystr): pass def __iter__(self): return iter([1]) x = 3 def suba(): global x x = 7 def subb(): global x x -= 1 return (x,None)[x == 0] def subc(): suba() it = iter(subb,None) for y in it: print y def main( inpfile, outfile, xIdx, yIdx): inpfile.seek(0) outfile.seek(0) lineCount = 0 t0 = time.time() for line in inpfile: lineCount += 1 if((lineCount % 10000)==0): print >>sys.stderr, "lineCount=",lineCount," time=",time.time()-t0 if(line[0] == 'M'): FM = MatchRecord.MatchRecord(line) if(FM.subtype == 'g'): parentid = FM.matchid parent_x_forward = (FM.x_orientation == 1) parent_y_forward = (FM.y_orientation == 1) parent_x_start = FM.x_start parent_y_start = FM.y_start parent_x_length = FM.x_length parent_y_length = FM.y_length # Why two orientations and not just a flipped flag? # Because we want the resulting matches to come out in # the same sorted order as the input matches. x_substring = string.upper( xIdx.getStringFromFasta( parent_x_forward, FM.x_scaf_uid, FM.x_start, FM.x_length)); y_substring = string.upper( yIdx.getStringFromFasta( parent_y_forward, FM.y_scaf_uid, FM.y_start, FM.y_length)); ii = 0 # Here we call the dedasher. halign.halignStart(x_substring, y_substring) for segment in iter(halign.halignDedash,None): #print >>outfile, segment (bgn1,bgn2,len1,len2,nmat) = segment # Filter by a minimum length? say four bp. ii += 1 FM.subtype = 'u' FM.matchid = parentid + 'u' + str(ii) # FM.runid = parentid FM.x_start = parent_x_start + (parent_x_length-bgn1-len1,bgn1)[parent_x_forward] FM.y_start = parent_y_start + (parent_y_length-bgn2-len2,bgn2)[parent_y_forward] FM.x_length = len1 FM.y_length = len2 assert(len1 == len2) mismatches = 0 for ic in range(len1): if(x_seq[bgn1+ic] != y_seq[bgn2+ic]): mismatches += 1 FM.extend['mm'] = str(mismatches) FM.identifier = "" # BEWARE print >>outfile, FM else: print >>outfile, line, else: print >>outfile, line, def oldmain(): inpname = sys.argv[1] outname = sys.argv[2] xIndexName = sys.argv[3] yIndexName = sys.argv[4] assemblyId1 = sys.argv[5] assemblyId2 = sys.argv[6] # mismatches = checkExactMatches( x, y, inpfile) # sys.stderr.write("mismatches = %d\n" % mismatches) inpfile = open(inpname) outfile = open(outname,"w") xIdx = IdxStore.IdxStore(xIndexName,assemblyId1) yIdx = IdxStore.IdxStore(yIndexName,assemblyId2) main( inpfile, outfile, xIdx, yIdx) outfile.close() import AtacFile import MyFile def newmain(): inpname = sys.argv[1] outname = sys.argv[2] obj = AtacFile.AtacFile(inpname) xname = obj.globals["assemblyFilePrefix1"] yname = obj.globals["assemblyFilePrefix1"] assemblyId1 = obj.globals["assemblyId1"] assemblyId2 = obj.globals["assemblyId2"] xIdx = IdxStore.IdxStore(xname,assemblyId1) yIdx = IdxStore.IdxStore(yname,assemblyId2) inpfile = obj.matches outfile = MyFile.myfile() main( inpfile, outfile, xIdx, yIdx) obj.matches = outfile obj.checkpoint(outname) outfile.close() # Allow each module to have its own main for testing. if __name__ == '__main__': newmain() # end if kmer-code-2013-trunk/atac-driver/chainer/Make.include0000644000000000000000000000446412371234636021221 0ustar rootroot# -*- makefile -*- $/.CXX_SRCS := $/localalign/GF_ALN_dpaligner.C \ $/localalign/GF_ALN_local.C \ $/localalign/GF_ALN_overlap.C \ $/localalign/GF_ALN_loverlapper.C \ $/localalign/GF_ALN_pieceOlap.C \ $/localalign/localAlignerInterfacemodule.C \ $/halign/halign.C \ $/halign/halignmodule.C $/.CXX_SHLIBS := $/localAlignerInterfacemodule.so \ $/halignmodule.so $/.PY_EXES := $/python/AtacDriver.py $/.PY_LIBS := $/python/AtacDriver.py \ $/python/AtacFile.py \ $/python/DNA.py \ $/python/IdxStore.py \ $/python/MatchRecord.py \ $/python/MyFile.py \ $/python/PerfectRuns.py \ $/python/TrimMatchOverlaps.py \ $/python/UniqueFilter.py \ $/python/dedashMatches.py \ $/python/fillIntraRunGaps.py \ $/python/mkstats.py \ $/python/squeezeIntraRunGaps.py $/.CLEAN := $/*.o $/*/*.o $/*.so $/python/*.pyc # Dependency generation doesn't know about CFLAGS_PYTHON, so this can appear as a dependency # if Python.h isn't in the standard include paths, and then 'No rule to make target 'Python.h', # needed by ...' appears. We fix by explicitly pointing to Python.h # # Unfortunately, Python.h remains out of date (as it would with .PHONY), so we build every time. # Python.h: ${PYTHON_H} $/localalign/localAlignerInterfacemodule.o: $/localalign/localAlignerInterfacemodule.C ${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} ${CFLAGS_PYTHON} -o $@ -c $< $/localAlignerInterfacemodule.so: $/localalign/GF_ALN_dpaligner.o \ $/localalign/GF_ALN_local.o \ $/localalign/GF_ALN_overlap.o \ $/localalign/GF_ALN_loverlapper.o \ $/localalign/GF_ALN_pieceOlap.o \ $/localalign/localAlignerInterfacemodule.o $/halign/halignmodule.o: $/halign/halignmodule.C ${-CXX} ${CXX} ${CXXFLAGS} ${CXXFLAGS_COMPILE} ${CFLAGS_PYTHON} -o $@ -c $< $/halignmodule.so: $/halign/halign.o \ $/halign/halignmodule.o kmer-code-2013-trunk/atac-driver/chainer/localalign/0000755000000000000000000000000012641613361021070 5ustar rootrootkmer-code-2013-trunk/atac-driver/chainer/localalign/GF_ALN_local.C0000644000000000000000000007367710750171365023362 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include #include #include "GF_ALN_local.H" // Note, KMERLEN 5, MINMATCH 20, MAXERROR 2, KTHRESH 6 is reasonable // for performing fragment against fragment comparisons; it permits // relatively small segments to be found; but it will not give // acceptable run time for large comparisons such as a BAC against a // BAC etc. So, ... #define KMERLEN 6 // Must be >= 1 #define MINMATCH 20 // (MINMATCH-KMERLEN) is the maximum jump distance. #define MAXERROR 2 // maximum slop in diagnols for chaining KMERLEN hits. // The minimum number kmer hits that constitutes an acceptable chain. #define KTHRESH (MINMATCH - (KMERLEN-1) - KMERLEN*MAXERROR) #define min(a,b) (ab?a:b) /* D.P. extension alignment scoring */ /* N.B.: the larger MAXIGAP, the longer subpart of a trapezoid (potential segment) can be missed; this occurs if (a) Align_Recursion starts in the subpart, with a bad region on both sides of the subpart; the forward pass extends across one of the low-quality regions into a long high-quality region but then the reverse pass doesn't get back across the low-quality region--or rather, it does, but the best score doesn't. Of course, the smaller MAXIGAP, the shorter the segments and the less the chance of extending alignments across gaps between trapezoids. Setting MAXIGAP small allows alignment extension to end relatively early, so that a short high-quality region bounded by low-quality regions can end up being its own segment. And, it also has the effect that if a low-quality region is small enough to get through, then it doesn't take that long a high-quality segment to increase the score back up to a new maximum value. */ #define MAXIGAP_DEFAULT 3 static int MAXIGAP=MAXIGAP_DEFAULT; /*amount to subtract from score for mismatch */ //#define DIFFCOST 14 // we can define it to 14 in order not to extend the alignment // only at a high level of stringency #define DIFFCOST 3 /*amount to add to score for match*/ #define SAMECOST 1 static int diffcost=DIFFCOST; static int samecost=SAMECOST; /* Trapezoid merging padding */ #define DPADDING 2 #define BPADDING KMERLEN+2 static int BLOCKCOST = DIFFCOST*MAXIGAP_DEFAULT; static int MATCHCOST = DIFFCOST+SAMECOST; /* Major data types */ /* Hit Record: Description of indexed based seed-match */ typedef struct { int diagonal; /* Diagonal of hit */ int bstart; /* B position of start of hit */ int bfinish; /* B position of end of hit */ } HitRecord; /* Trapezoid Record: Description of trapezoidal match zone */ typedef struct _Trap_Tag { struct _Trap_Tag *next; /* Organized in a list linked on this field */ int top, bot; /* B-coords of top and bottom of trapzoidal zone */ int lft, rgt; /* Left and right diagonals of trapzoidal zone */ } Trapezoid; /*** UTILITY ROUTINES ***/ static void OutOfMemory(char const * const where) { fprintf(stderr,"COMPARE_LOCAL: Out of memory (%s)\n",where); exit (1); } static void Complement(char * const seq, int const len) { static char WCinvert[256]; static int Firstime = 1; if (Firstime) { /* Setup complementation array */ int i; Firstime = 0; for(i = 0; i < 256;i++){ WCinvert[i] = '?'; } WCinvert[(int)'a'] = 't'; WCinvert[(int)'c'] = 'g'; WCinvert[(int)'g'] = 'c'; WCinvert[(int)'t'] = 'a'; WCinvert[(int)'n'] = 'n'; WCinvert[(int)'A'] = 'T'; WCinvert[(int)'C'] = 'G'; WCinvert[(int)'G'] = 'C'; WCinvert[(int)'T'] = 'A'; WCinvert[(int)'N'] = 'N'; WCinvert[(int)'-'] = '-'; // added this to enable alignment of gapped consensi } /* Complement and reverse sequence */ { register char *s, *t; int c; s = seq; t = seq + (len-1); while (s < t) { c = *s; *s++ = WCinvert[(int) *t]; *t-- = WCinvert[c]; } if (s == t) *s = WCinvert[(int) *s]; } } /*** INDEX CONSTRUCTION AND APPLICATION TO FILTERING ***/ /* Shared index and filter arrays used in this subsection */ typedef struct { int minim; int maxim; int count; } DiagRecord; static int Kmask = -1; static int *Table = NULL; /* [0..Kmask+1] */ static int *Tuples = NULL; /* [0..-KMERLEN] */ static int Map[128]; static DiagRecord *DiagVec; /* [-(Alen-KMERLEN)..(Blen-KMERLEN) + MAXERROR] */ /* Reverse complement sequences -- so we do not recompute them over and over */ static char *BrevC=NULL; /* Build index table for sequence S of length Slen. */ static void TableBuild(char const * const S, int const Slen) { int i, c; int x, h; char const * const s = S+(KMERLEN-1); for (c = 0; c <= Kmask; c++) Table[c] = 0; h = -KMERLEN; c = 0; for (i = 0; i < KMERLEN-1; i++) { x = Map[(int) (S[i])]; if (x >= 0) c = (c << 2) | x; else { c <<= 2; h = i-(KMERLEN-1); } } for (i = 0; i <= Slen-KMERLEN; i++) { x = Map[(int) (s[i])]; if (x >= 0) c = ((c << 2) | x) & Kmask; else { c = (c << 2) & Kmask; h = i; } if (i >= h+KMERLEN) Table[c+1] += 1; } for (c = 2; c <= Kmask; c++) Table[c] += Table[c-1]; h = -KMERLEN; c = 0; for (i = 0; i < KMERLEN-1; i++) { x = Map[(int) (S[i])]; if (x >= 0) c = (c << 2) | x; else { c <<= 2; h = i-(KMERLEN-1); } } for (i = 0; i <= Slen-KMERLEN; i++) { x = Map[(int) (s[i])]; if (x >= 0) c = ((c << 2) | x) & Kmask; else { c = (c << 2) & Kmask; h = i; } if (i >= h+KMERLEN) Tuples[Table[c]++] = i; } for (c = Kmask; c >= 0; c--) Table[c+1] = Table[c]; Table[0] = 0; } /* Apply index to find filtered hits between sequences, returning pointer to array of HitRecords of length in the integer pointed at by Hitlen */ static int HSORT(const void *l, const void *r) { HitRecord *x, *y; x = (HitRecord *) l; y = (HitRecord *) r; return (x->bstart - y->bstart); } static HitRecord *Find_Hits (char const * const A, int const Alen, char const * const B, int const Blen, int * const Hitlen) { static int HitMax = -1; static HitRecord *HitList; int hits, disconnect; if (HitMax < 0) { HitMax = 10000; HitList = (HitRecord *) malloc(sizeof(HitRecord)*HitMax); if (HitList == NULL) OutOfMemory("Hit list"); } { int i, j, c; int x, h; char const * const b = B + (KMERLEN-1); for (j = -Alen; j <= Blen+MAXERROR; j++) { DiagRecord *dp; dp = DiagVec + j; dp->count = dp->maxim = 0; } hits = 0; disconnect = MINMATCH - KMERLEN; h = -KMERLEN; c = 0; for (i = 0; i < KMERLEN-1; i++) { x = Map[(int) (B[i])]; if (x >= 0) c = (c << 2) | x; else { c <<= 2; h = i-(KMERLEN-1); } } for (i = 0; i <= Blen-KMERLEN; i++) { x = Map[(int) (b[i])]; if (x >= 0) c = ((c << 2) | x) & Kmask; else { c = (c << 2) & Kmask; h = i; } if (i >= h+KMERLEN) for (j = Table[c]; j < Table[c+1]; j++) { DiagRecord *dp; int e, k; k = i-Tuples[j]; dp = DiagVec + k; for (e = 0; e <= MAXERROR; e++) { if (dp->maxim < i-disconnect) { if (dp->count >= KTHRESH) { HitRecord *hp; if (hits >= HitMax) { HitMax = (int)(1.2*hits) + 5000; HitList = (HitRecord *) realloc(HitList, sizeof(HitRecord)*HitMax); if (HitList == NULL) OutOfMemory("Hit list"); } hp = HitList + hits; hp->diagonal = k; hp->bstart = dp->minim; hp->bfinish = dp->maxim + KMERLEN; hits += 1; } dp->count = 0; } if (dp->count == 0) dp->minim = i; dp->count += 1; dp->maxim = i; dp += 1; } } } for (j = -Alen; j <= Blen+MAXERROR; j++) { DiagRecord *dp; dp = DiagVec + j; if (dp->count >= KTHRESH) { HitRecord *hp; if (hits >= HitMax) { HitMax = (int)(1.2*hits) + 5000; HitList = (HitRecord *)realloc(HitList,sizeof(HitRecord)*HitMax); if (HitList == NULL) OutOfMemory("Hit list"); } hp = HitList + hits; hp->diagonal = j; hp->bstart = dp->minim; hp->bfinish = dp->maxim + KMERLEN; hits += 1; } } } qsort(HitList,hits,sizeof(HitRecord),HSORT); *Hitlen = hits; return (HitList); } /*** FORWARD AND REVERSE D.P. EXTENSION ROUTINES ***/ /* Called at the mid-point of trapezoid -- mid X [lo,hi], the extension is computed to an end point and the lowest and highest diagonals are recorded. These are returned in a partially filled Local_Segment record, that will be merged with that returned for extension in the opposite direction. */ Local_Segment *TraceForwardPath ( char const * const A, int const Alen, char const * const B, int const Blen, int const mid, int lo, int hi) { static Local_Segment rez; int *V; int mxv, mxl, mxr, mxi, mxj; int i, j; int *Base1, *Base2; Base1 = ((int *) DiagVec); Base2 = Base1 + (Blen+1); /* Set basis from (mid,lo) .. (mid,hi) */ V = Base1; if (lo < 0) lo = 0; if (hi > Blen) hi = Blen; for (j = lo; j <= hi; j++) V[j] = 0; hi += MAXIGAP; if (hi > Blen) hi = Blen; for (; j <= hi; j++) V[j] = V[j-1] - diffcost; mxv = 0; mxr = mid - lo; mxl = mid - hi; mxi = mid; mxj = lo; /* Advance to next row */ for (i = mid; lo <= hi && i < Alen; i++) { int c, v; int *W; W = V; if (V == Base1) V = Base2; else V = Base1; v = W[lo]; c = V[lo] = v - diffcost; for (j = lo+1; j <= hi; j++) { int r, t; t = c; c = v; v = W[j]; if (Map[(int)A[i]] == Map[(int)B[j-1]] && Map[(int) (A[i])] >= 0) c += MATCHCOST; r = c; if (v > r) r = v; if (t > r) r = t; V[j] = c = r - diffcost; if (c >= mxv) { mxv = c; mxi = i+1; mxj = j; //printf("reset mxv = %d at [%d,%d]\n",mxv,mxi,mxj); } } if (j <= Blen) { int r; if (Map[(int)A[i]] == Map[(int)B[j-1]] && Map[(int) (A[i])] >= 0) v += MATCHCOST; r = v; if (c > r) r = c; V[j] = v = r - diffcost; if (v > mxv) { mxv = v; mxi = i+1; mxj = j; //printf("reset mxv = %d at [%d,%d]\n",mxv,mxi,mxj); } for (j++; j <= Blen; j++) { v -= diffcost; if (v < mxv - BLOCKCOST) break; V[j] = v; } } hi = j-1; while (lo <= hi && V[lo] < mxv - BLOCKCOST) lo += 1; while (lo <= hi && V[hi] < mxv - BLOCKCOST) hi -= 1; if ((i+1) - lo > mxr) mxr = (i+1) - lo; if ((i+1) - hi < mxl) mxl = (i+1) - hi; } rez.aepos = mxj; rez.bepos = mxi; rez.ldiag = mxl; rez.hdiag = mxr; rez.score = mxv; return (&rez); } Local_Segment *TraceReversePath(char const * const A, int const Alen, char const * const B, int const Blen, int const top, int lo, int hi, int const bot, int xfactor) { static Local_Segment rez; int *V; int mxv, mxl, mxr, mxi, mxj; int i, j; int *Base1, *Base2; Base1 = ((int *) DiagVec); Base2 = Base1 + (Blen+1); /* Set basis from (top,lo) .. (top,hi) */ V = Base1; if (lo < 0) lo = 0; if (hi > Blen) hi = Blen; for (j = hi; j >= lo; j--) V[j] = 0; lo -= MAXIGAP; if (lo < 0) lo = 0; for (; j >= lo; j--) V[j] = V[j+1] - diffcost; mxv = 0; mxr = top - lo; mxl = top - hi; mxi = top; mxj = lo; /* Advance to next row */ if (top-1 <= bot) xfactor = BLOCKCOST; for (i = top-1; lo <= hi && i >= 0; i--) { int c, v; int *W; W = V; if (V == Base1) V = Base2; else V = Base1; v = W[hi]; c = V[hi] = v - diffcost; for (j = hi-1; j >= lo; j--) { int r, t; t = c; c = v; v = W[j]; if (Map[(int)A[i]] == Map[(int)B[j]] && Map[(int) (A[i])] >= 0) c += MATCHCOST; r = c; if (v > r) r = v; if (t > r) r = t; V[j] = c = r - diffcost; if (c >= mxv) { mxv = c; mxi = i; mxj = j; //printf("reset mxv = %d at [%d,%d]\n",mxv,mxi,mxj); } } if (j >= 0) { int r; if (Map[(int)A[i]] == Map[(int)B[j]] && Map[(int) (A[i])] >= 0) v += MATCHCOST; r = v; if (c > r) r = c; V[j] = v = r - diffcost; if (v > mxv) { mxv = v; mxi = i; mxj = j; //printf("reset mxv = %d at [%d,%d]\n",mxv,mxi,mxj); } for (j--; j >= 0; j--) { v -= diffcost; if (v < mxv - xfactor) break; V[j] = v; } } lo = j+1; while (lo <= hi && V[lo] < mxv - xfactor) lo += 1; while (lo <= hi && V[hi] < mxv - xfactor) hi -= 1; if (i == bot) xfactor = BLOCKCOST; if (i-lo > mxr) mxr = i-lo; if (i-hi < mxl) mxl = i-hi; } rez.abpos = mxj; rez.bbpos = mxi; rez.ldiag = mxl; rez.hdiag = mxr; rez.score = mxv; return (&rez); } /*** MERGING INDEX HITS INTO TRAPEZOIDAL ZONES ***/ static Trapezoid *Build_Trapezoids(char const * const A, int const Alen, char const * const B, int const Blen, HitRecord const * const list, int const Hitlen, int * const Traplen) { static Trapezoid *free = NULL; Trapezoid *traporder, *traplist, *tailend; Trapezoid *b, *f, *t; int i, inserted; int trapcount, traparea; trapcount = 0; traparea = 0; traporder = NULL; traplist = NULL; for (i = 0; i < Hitlen; i++) { inserted = 0; f = NULL; for (b = traporder; b != NULL; b = t) { t = b->next; if (b->top < list[i].bstart - BPADDING) { trapcount += 1; traparea += (b->top - b->bot + 1) * (b->rgt - b->lft + 1); if (f == NULL) traporder = t; else f->next = t; b->next = traplist; traplist = b; } else if (list[i].diagonal > b->rgt + DPADDING) f = b; else if (list[i].diagonal >= b->lft - DPADDING) { if (list[i].diagonal < b->lft) b->lft = list[i].diagonal; if (list[i].diagonal > b->rgt) b->rgt = list[i].diagonal; if (list[i].bfinish > b->top) b->top = list[i].bfinish; if (f != NULL && f->rgt + DPADDING >= b->lft) { f->rgt = b->rgt; if (f->bot > b->bot) f->bot = b->bot; if (f->top < b->top) f->top = b->top; f->next = t; b->next = free; free = b; } else if (t != NULL && t->lft - DPADDING <= b->rgt) { b->rgt = t->rgt; if (b->bot > t->bot) b->bot = t->bot; if (b->top < t->top) b->top = t->top; b->next = t->next; t->next = free; free = t; t = b->next; f = b; } else f = b; inserted = 1; } else if (! inserted) { if (free == NULL) { free = (Trapezoid *)malloc(sizeof(Trapezoid)); if (free == NULL) OutOfMemory("Trapezoid scan list"); free->next = NULL; } if (f == NULL) f = traporder = free; else f = f->next = free; free = f->next; f->next = b; f->top = list[i].bfinish; f->bot = list[i].bstart; f->lft = f->rgt = list[i].diagonal; f = b; inserted = 1; } else f = b; } if (! inserted) { if (free == NULL) { free = (Trapezoid *)malloc(sizeof(Trapezoid)); if (free == NULL) OutOfMemory("Trapezoid scan list"); free->next = NULL; } if (f == NULL) f = traporder = free; else f = f->next = free; free = f->next; f->next = b; f->top = list[i].bfinish; f->bot = list[i].bstart; f->lft = f->rgt = list[i].diagonal; } } for (b = traporder; b != NULL; b = t) { t = b->next; trapcount += 1; traparea += (b->top - b->bot + 1) * (b->rgt - b->lft + 1); b->next = traplist; traplist = b; } { int lag, lst, lclip; int abot, atop; for (b = traplist; b != NULL; b = b->next) { lag = (b->bot-MAXIGAP)+1; if (lag < 0) lag = 0; lst = b->top+MAXIGAP; if (lst > Blen) lst = Blen; for (i = lag; i < lst; i++) { if (Map[(int) (B[i])] >= 0) { if (i-lag >= MAXIGAP) { if (lag - b->bot > 0) { if (free == NULL) { free = (Trapezoid *)malloc(sizeof(Trapezoid)); if (free == NULL) OutOfMemory("Trapezoid cutter"); free->next = NULL; } t = free->next; *free = *b; b->next = free; free = t; b->top = lag; b = b->next; b->bot = i; trapcount += 1; } else b->bot = i; } lag = i+1; } } if (i-lag >= MAXIGAP) b->top = lag; } tailend = NULL; for (b = traplist; b != NULL; b = b->next) { if (b->top - b->bot < KMERLEN) continue; abot = b->bot - b->rgt; atop = b->top - b->lft; lag = (abot - MAXIGAP) + 1; if (lag < 0) lag = 0; lst = atop + MAXIGAP; if (lst > Alen) lst = Alen; lclip = abot; for (i = lag; i < lst; i++) { if (Map[(int) (A[i])] >= 0) { if (i-lag >= MAXIGAP) { if (lag > lclip) { if (free == NULL) { free = (Trapezoid *)malloc(sizeof(Trapezoid)); if (free == NULL) OutOfMemory("Trapezoid cutter"); free->next = NULL; } t = free->next; *free = *b; b->next = free; free = t; { int x, m; x = lclip + b->lft; if (b->bot < x) b->bot = x; x = lag + b->rgt; if (b->top > x) b->top = x; m = (b->bot + b->top) / 2; x = m - lag; if (b->lft < x) b->lft = x; x = m - lclip; if (b->rgt > x) b->rgt = x; } b = b->next; trapcount += 1; } lclip = i; } lag = i+1; } } if (i-lag < MAXIGAP) lag = atop; { int x, m; x = lclip + b->lft; if (b->bot < x) b->bot = x; x = lag + b->rgt; if (b->top > x) b->top = x; m = (b->bot + b->top) / 2; x = m - lag; if (b->lft < x) b->lft = x; x = m - lclip; if (b->rgt > x) b->rgt = x; } tailend = b; } } if (tailend != NULL) { tailend->next = free; free = traplist; } *Traplen = trapcount; return (traplist); } /*** FINDING ALIGNMENTS WITHIN A TRAPEZOIDAL ZONE ***/ static int TSORT(const void *l, const void *r) { Trapezoid *x, *y; x = *((Trapezoid **) l); y = *((Trapezoid **) r); return (x->bot - y->bot); } static int StSORT(const void *l, const void *r) { Local_Segment *x, *y; x = (Local_Segment *) l; y = (Local_Segment *) r; if (x->abpos < y->abpos) return (-1); else if (x->abpos > y->abpos) return (1); else return (x->bbpos - y->bbpos); } static int FnSORT(const void *l, const void *r) { Local_Segment *x, *y; x = (Local_Segment *) l; y = (Local_Segment *) r; if (x->aepos < y->aepos) return (-1); else if (x->aepos > y->aepos) return (1); else return (x->bepos - y->bepos); } static Trapezoid **Tarray = NULL; static int *Covered; static Local_Segment *SegSols = NULL; static int SegMax = -1; static int NumSegs; static void Align_Recursion(char const * const A, int const Alen, char const * const B, int const Blen, Trapezoid const * const b, int const current, int const comp, int const MinLen, double const MaxDiff, int const Traplen) { int j, mid, indel; double pcnt; Local_Segment *hend, *lend; Trapezoid ltrp, htrp; mid = (b->bot + b->top) / 2; lend = TraceForwardPath(B,Blen,A,Alen,mid,mid-b->rgt,mid-b->lft); { int x = 0; do { x += 1; hend = TraceReversePath(B,Blen,A,Alen, lend->bepos,lend->aepos,lend->aepos, mid+MAXIGAP,BLOCKCOST+2*x*diffcost); } while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score); hend->aepos = lend->aepos; hend->bepos = lend->bepos; } ltrp = htrp = *b; ltrp.top = min(b->top,hend->bbpos) - MAXIGAP; htrp.bot = max(b->bot,hend->bepos) + MAXIGAP; if (hend->bepos - hend->bbpos >= MinLen && hend->aepos - hend->abpos >= MinLen) { indel = abs( (hend->abpos - hend->bbpos) - (hend->aepos - hend->bepos) ); pcnt = (-hend->score+samecost*(hend->bepos-hend->bbpos))*1./ (1.*(MATCHCOST)*(hend->bepos-hend->bbpos)); if (pcnt <= MaxDiff) { hend->error = pcnt; for (j = current+1; j < Traplen; j++) { Trapezoid *t; int ta, tb, ua, ub; t = Tarray[j]; if (t->bot >= hend->bepos) break; tb = t->top - t->bot + 1; ta = t->rgt - t->lft + 1; if (t->lft < hend->ldiag) ua = hend->ldiag; else ua = t->lft; if (t->rgt > hend->hdiag) ub = hend->hdiag; else ub = t->rgt; if (ua > ub) continue; ua = ub - ua + 1; if (t->top > hend->bepos) ub = hend->bepos - t->bot + 1; else ub = tb; if (((1.*ua)/ta)*((1.*ub)/tb) > .99) Covered[j] = 1; } if (NumSegs >= SegMax) { SegMax = (int)(1.2*NumSegs) + 500; SegSols = (Local_Segment *) realloc(SegSols, sizeof(Local_Segment)*SegMax); if (SegSols == NULL) OutOfMemory("Segment Alignment array"); } { int d; d = hend->hdiag; /* Oops, diags to this point are b-a, not a-b. */ hend->hdiag = - (hend->ldiag); hend->ldiag = - d; if (comp) { hend->bbpos = Blen - hend->bbpos; hend->bepos = Blen - hend->bepos; hend->ldiag = Blen + hend->ldiag; hend->hdiag = Blen + hend->hdiag; } } SegSols[NumSegs++] = *hend; } } if (ltrp.top - ltrp.bot > MinLen && ltrp.top < b->top - MAXIGAP) Align_Recursion(A,Alen,B,Blen,<rp,current,comp,MinLen,MaxDiff,Traplen); if (htrp.top - htrp.bot > MinLen) Align_Recursion(A,Alen,B,Blen,&htrp,current,comp,MinLen,MaxDiff,Traplen); } static Local_Segment *Align_Trapezoids(char const * const A, int const Alen, char const * const B, int const Blen, Trapezoid const * const Traplist, int const Traplen, int const start, int const comp, int const MinLen, double const MaxDiff, int * const Seglen) { static int fseg; static int TarMax = -1; if (Traplen >= TarMax) { TarMax = (int)(1.2*Traplen) + 500; Tarray = (Trapezoid **) realloc(Tarray,(sizeof(Trapezoid *) + sizeof(int))*TarMax); if (Tarray == NULL) OutOfMemory("Trapezoid array"); Covered = (int *) (Tarray + TarMax); } if (SegMax < 0) { SegMax = 1000; SegSols = (Local_Segment *) malloc(sizeof(Local_Segment)*SegMax); if (SegSols == NULL) OutOfMemory("Segment Alignment array"); } { Trapezoid * b = (Trapezoid *)Traplist; int i; for (i = 0; i < Traplen; i++) { Tarray[i] = b; Covered[i] = 0; b = b->next; } } qsort(Tarray,Traplen,sizeof(Trapezoid *),TSORT); if (start) NumSegs = 0; fseg = NumSegs; { int i; for (i = 0; i < Traplen; i++) if (! Covered[i]) { Trapezoid * b = Tarray[i]; if (b->top - b->bot < KMERLEN) continue; //printf("Trying hit %d\n",i); Align_Recursion(A,Alen,B,Blen,b,i,comp,MinLen,MaxDiff,Traplen); } } if (NumSegs > fseg) { int i; int j=0; qsort(SegSols+fseg,NumSegs-fseg,sizeof(Local_Segment),StSORT); assert(j==0); for (i = fseg; i < NumSegs; i = j) { for (j = i+1; j < NumSegs; j++) { if (SegSols[j].abpos != SegSols[i].abpos) break; if (SegSols[j].bbpos != SegSols[i].bbpos) break; if (/* segments in opposite orientations */ ((SegSols[j].bepos-SegSols[j].bbpos) > 0 && (SegSols[i].bepos-SegSols[i].bbpos) < 0 ) || ((SegSols[j].bepos-SegSols[j].bbpos) < 0 && (SegSols[i].bepos-SegSols[i].bbpos) > 0 ) )break; if (SegSols[j].error <= MaxDiff && SegSols[i].error <= MaxDiff){ if (abs(SegSols[i].bepos-SegSols[i].bbpos)+abs(SegSols[i].aepos-SegSols[i].abpos) < abs(SegSols[j].bepos-SegSols[j].bbpos)+abs(SegSols[j].aepos-SegSols[j].abpos)) { SegSols[i].score=-1;i=j; } else { SegSols[j].score=-1; } } else { if(SegSols[j].error<=MaxDiff){ SegSols[i].score=-1; i=j; } else { SegSols[j].score=-1; } } } } qsort(SegSols+fseg,NumSegs-fseg,sizeof(Local_Segment),FnSORT); for ( i = fseg; i < NumSegs; i = j) { for (j = i+1; j < NumSegs; j++) { if (SegSols[j].abpos != SegSols[i].abpos) break; if (SegSols[j].bbpos != SegSols[i].bbpos) break; if (SegSols[j].score > SegSols[i].score) { SegSols[i].score = -1; i = j; } else SegSols[j].score = -1; } } for (i = fseg; i < NumSegs; i++) if (SegSols[i].score >= 0) SegSols[fseg++] = SegSols[i]; NumSegs = fseg; } *Seglen = NumSegs; return (SegSols); } /*** MASTER ROUTINE ***/ Local_Segment *Find_Local_Segments(char const * const A, int const Alen, char const * const B, int const Blen, int const Action, int const MinLen, double const MaxDiff, int * const Seglen) { static int DagMax = -1; static int BseqLen = -1; int numhit = 0; HitRecord *hits = 0L; int numtrap = 0; Trapezoid *traps = 0L; int numseg = 0; Local_Segment *segs = 0L; // Defining this causes scoring to be set so that nearly all // extensions terminate within the user-defined error rate; this // means we don't completely miss a high-quality segment that is // part of a larger segment of lower quality--e.g., we could find // more-conserved portions of a repeat even if the repeat as a // whole was lower fidelity, due to varying selectional pressure at // different positions; the down-side to this is that we won't // chain together perfect matches across a few bases of // lower-fidelity sequence. Leaving it undefined uses the #define // values of DIFFCOST and SAMECOST // samecost = (int)ceil(100.0 * MaxDiff); diffcost = 100 - samecost; MATCHCOST = samecost + diffcost; BLOCKCOST = diffcost * MAXIGAP; if (Action != LOCAL_FORW) { if(BseqLen= DagMax || Blen >= DagMax) { if (Kmask < 0) { int i; for (i = 0; i < 128; i++) Map[i] = -1; Map[(int)'a'] = Map[(int)'A'] = 0; Map[(int)'c'] = Map[(int)'C'] = 1; Map[(int)'g'] = Map[(int)'G'] = 2; Map[(int)'t'] = Map[(int)'T'] = 3; Kmask = (1 << (2*KMERLEN)) - 1; Table = (int *)malloc(sizeof(int)*(Kmask+2)); if (Table == NULL) OutOfMemory("K-mer index"); } if (Alen > Blen) DagMax = (int)(1.2*Alen) + 5000; else DagMax = (int)(1.2*Blen) + 5000; DagMax += sizeof(DiagRecord) - (DagMax % sizeof(DiagRecord)); Tuples = (int *)realloc(Tuples,sizeof(int)*DagMax + sizeof(DiagRecord)*(2*DagMax+MAXERROR+1)); if (Tuples == NULL) OutOfMemory("K-mer index"); DiagVec = ((DiagRecord *) (Tuples + DagMax)) + (DagMax+1); } TableBuild(A,Alen); int start = 1; if (Action != LOCAL_REVR) { hits = Find_Hits(A,Alen,B,Blen,&numhit); traps = Build_Trapezoids(A,Alen,B,Blen,hits,numhit,&numtrap); segs = Align_Trapezoids(A,Alen,B,Blen,traps,numtrap, start,0,MinLen,MaxDiff,&numseg); start = 0; } if (Action != LOCAL_FORW) { hits = Find_Hits(A,Alen,BrevC,Blen,&numhit); traps = Build_Trapezoids(A,Alen,BrevC,Blen,hits,numhit,&numtrap); segs = Align_Trapezoids(A,Alen,BrevC,Blen,traps,numtrap, start,1,MinLen,MaxDiff,&numseg); } *Seglen = numseg; return (segs); } kmer-code-2013-trunk/atac-driver/chainer/localalign/GF_ALN_pieceOlap.C0000644000000000000000000004173111040247445024147 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include "GF_ALN_local.H" typedef struct { char *aseg; char *bseg; } PAIRALIGN; // safely copy a substring of a string into static space which is // enlarged as needed // static int safe_substr(char **seg, int *segspace, const char *seq, int beg, int end){ if(*segspacechain[piece].piece.abpos, O->chain[piece].piece.aepos); if(iret == 0){ fprintf(stderr,"EXCEPTION get_trace: For aseg: len(aseg)=%d, len(bseg)=%d, alen=%d, blen=%d\n", (int)strlen(aseg),(int)strlen(bseg), alen,blen); return NULL; } iret = safe_substr(&bseg,&bsegspace,bseq,O->chain[piece].piece.bbpos, O->chain[piece].piece.bepos); if(iret == 0){ fprintf(stderr,"EXCEPTION get_trace: For bseg: len(aseg)=%d, len(bseg)=%d, alen=%d, blen=%d\n", (int)strlen(aseg),(int)strlen(bseg), alen,blen); return NULL; } alen=O->chain[piece].piece.aepos-O->chain[piece].piece.abpos; blen=O->chain[piece].piece.bepos-O->chain[piece].piece.bbpos; //printf("get_trace: len(aseg)=%d, len(bseg)=%d, alen=%d, blen=%d\n", // strlen(aseg),strlen(bseg), alen,blen); /* get trace for the segment from AS_ALN_OKNAlign */ spnt=0; /* subtract because Gene likes to point to one before string start */ aseg--; bseg--; segdiff=(int)((O->chain[piece].piece.aepos-O->chain[piece].piece.abpos) *(1.5*O->chain[piece].piece.error) +10); tmptrace=AS_ALN_OKNAlign(aseg,alen,bseg,blen,&spnt,segdiff); if(spnt!=0){ if(spnt>0){ O->chain[piece].agap+=spnt; O->chain[piece].piece.abpos+=spnt; i=0; while(tmptrace[i]!=0){ if(tmptrace[i]<0){ tmptrace[i]+=spnt; } i++; } } else { O->chain[piece].bgap+=-spnt; O->chain[piece].piece.bbpos+=-spnt; i=0; while(tmptrace[i]!=0){ if(tmptrace[i]>0){ tmptrace[i]+=spnt; } i++; } } } aseg++; /* restore because need to know where memory block is allocated, and so that next time around strncpy will work right! */ bseg++; i=0; while(tmptrace[i]!=0){ segtrace[which][i]=tmptrace[i]; i++; if(i==tracespace[which]){ tracespace[which]*=2; segtrace[which]=(int*)ckrealloc(segtrace[which], sizeof(int)*tracespace[which]); } } segtrace[which][i]=0; return(segtrace[which]); } static void safe_add_to_seg(char **seg,int pos,char c,int *len){ if(pos==*len){ (*len)=(*len)*2; *seg=(char*)ckrealloc(*seg,sizeof(char)*((*len)+1)); } (*seg)[pos]=c; } static PAIRALIGN *construct_pair_align(const char *aseq, const char *bseq, Local_Overlap *O, int piece, int *trace, int which){ static char *aseg[2]={NULL,NULL},*bseg[2]={NULL,NULL}; static int alen[2]={0,0},blen[2]={0,0}; static PAIRALIGN pairalign[2]; int starta,startb; int offseta,offsetb; int tpos,apos,bpos; if(aseg[which]==NULL){ alen[which]=blen[which]=1000; aseg[which]=(char*)ckalloc((alen[which]+1)*sizeof(char)); bseg[which]=(char*)ckalloc((blen[which]+1)*sizeof(char)); } starta=offseta=O->chain[piece].piece.abpos; startb=offsetb=O->chain[piece].piece.bbpos; tpos=0; apos=0; bpos=0; while(trace[tpos]!=0){ if(trace[tpos]<0){ for(;offseta<-trace[tpos]+starta-1;apos++,offseta++){ safe_add_to_seg(&(aseg[which]),apos,aseq[offseta],&(alen[which])); } safe_add_to_seg(&(aseg[which]),apos,'-',&(alen[which])); apos++; } else { for(;offsetbchain[piece].piece.aepos;apos++,offseta++){ safe_add_to_seg(&(aseg[which]),apos,aseq[offseta],&(alen[which])); } for(;offsetbchain[piece].piece.bepos;bpos++,offsetb++){ safe_add_to_seg(&(bseg[which]),bpos,bseq[offsetb],&(blen[which])); } assert(offseta==O->chain[piece].piece.aepos); assert(offsetb==O->chain[piece].piece.bepos); assert(offseta-O->chain[piece].piece.abpos+ offsetb-O->chain[piece].piece.bbpos+ tpos == apos+bpos); safe_add_to_seg(&(aseg[which]),apos,'\0',&(alen[which])); safe_add_to_seg(&(bseg[which]),bpos,'\0',&(blen[which])); pairalign[which].aseg=aseg[which]; pairalign[which].bseg=bseg[which]; return(pairalign+which); } static PAIRALIGN *get_align(const char *aseq,const char *bseq,Local_Overlap *O,int piece, int which){ int *trace=get_trace(aseq,bseq,O,piece,which); if(trace == NULL) return NULL; PAIRALIGN *pairalign = construct_pair_align(aseq,bseq,O,piece,trace,which); return(pairalign); } void fix_overlapping_pieces(const char *aseq, const char *bseq, Local_Overlap *O,int piece0, int piece1){ PAIRALIGN *pair_align1,*pair_align2; int offseta1,offsetb1,offseta2,offsetb2; int bestend1a,bestend1b,bestbeg2a,bestbeg2b; int into1,into2,bestinto2=0; int errs1,errs2,minerrs; assert(O->chain[piece0].piece.aepos>=O->chain[piece1].piece.abpos|| O->chain[piece0].piece.bepos>=O->chain[piece1].piece.bbpos); assert(O->chain[piece0].piece.aepos<=O->chain[piece1].piece.aepos); assert(O->chain[piece0].piece.bepos<=O->chain[piece1].piece.bepos); /* create alignments for the two segments */ pair_align1=get_align(aseq,bseq,O,piece0,0); pair_align2=get_align(aseq,bseq,O,piece1,1); if(pair_align1 == NULL || pair_align2 == NULL){ fprintf(stderr,"EXCEPTION pair_align1=%p pair_align2=%p\n", pair_align1, pair_align2); fprintf(stderr,"EXCEPTION while fixing gap(%d,%d) (%d,%d)---(%d,%d) vs. gap(%d,%d) (%d,%d)---(%d,%d)\n", O->chain[piece0].agap, O->chain[piece0].bgap, O->chain[piece0].piece.abpos,O->chain[piece0].piece.bbpos, O->chain[piece0].piece.aepos,O->chain[piece0].piece.bepos, O->chain[piece1].agap, O->chain[piece1].bgap, O->chain[piece1].piece.abpos,O->chain[piece1].piece.bbpos, O->chain[piece1].piece.aepos,O->chain[piece1].piece.bepos); } if(pair_align1 == NULL){ fprintf(stderr,"EXCEPTION Fixing by pseudo-deleting piece0.\n"); O->chain[piece0].agap=0; O->chain[piece0].bgap=0; if(piece0>0){ O->chain[piece0].piece.abpos=O->chain[piece0-1].piece.aepos; O->chain[piece0].piece.aepos=O->chain[piece0-1].piece.aepos; O->chain[piece0].piece.bbpos=O->chain[piece0-1].piece.bepos; O->chain[piece0].piece.bepos=O->chain[piece0-1].piece.bepos; } else { O->chain[piece0].piece.abpos=0; O->chain[piece0].piece.aepos=0; O->chain[piece0].piece.bbpos=0; O->chain[piece0].piece.bepos=0; } O->chain[piece1].agap = O->chain[piece1].piece.abpos - O->chain[piece0].piece.aepos; O->chain[piece1].bgap = O->chain[piece1].piece.bbpos - O->chain[piece0].piece.bepos; return; } if(pair_align2 == NULL){ fprintf(stderr,"EXCEPTION Fixing by pseudo-deleting piece1.\n"); O->chain[piece1].agap=0; O->chain[piece1].bgap=0; O->chain[piece1].piece.abpos = O->chain[piece0].piece.aepos; O->chain[piece1].piece.aepos = O->chain[piece0].piece.aepos; O->chain[piece1].piece.bbpos = O->chain[piece0].piece.bepos; O->chain[piece1].piece.bepos = O->chain[piece0].piece.bepos; if(piece1+1<=O->num_pieces){ O->chain[piece1+1].agap=O->chain[piece1+1].piece.abpos - O->chain[piece0].piece.aepos; O->chain[piece1+1].bgap=O->chain[piece1+1].piece.bbpos - O->chain[piece0].piece.bepos; } return; } /* if, in finding the alignments, we shift the ends of the alignment of the first segment to after the starts of the alignment of the second segment, then the overlap has been resolved, so we do nothing more */ if(!(O->chain[piece0].piece.aepos>=O->chain[piece1].piece.abpos|| O->chain[piece0].piece.bepos>=O->chain[piece1].piece.bbpos)){ return; } /* if, in finding the alignments, we shift the end of the alignment of the second segment to before the start of the alignment of the first segment, then the second is contained in the first and we need to do something exceptional; the most heuristic, but consistent with the practice elsewhere in the local overlapper, is to pseudo-delete the second segment */ if(!(O->chain[piece0].piece.aepos<=O->chain[piece1].piece.aepos)|| !(O->chain[piece0].piece.bepos<=O->chain[piece1].piece.bepos)){ O->chain[piece1].agap=0; O->chain[piece1].bgap=0; O->chain[piece1].piece.abpos=O->chain[piece0].piece.aepos; O->chain[piece1].piece.aepos=O->chain[piece0].piece.aepos; O->chain[piece1].piece.bbpos=O->chain[piece0].piece.bepos; O->chain[piece1].piece.bepos=O->chain[piece0].piece.bepos; if(piece1+1<=O->num_pieces){ O->chain[piece1+1].agap=O->chain[piece1+1].piece.abpos- O->chain[piece0].piece.aepos; O->chain[piece1+1].bgap=O->chain[piece1+1].piece.bbpos- O->chain[piece0].piece.bepos; } return; } /* if, in finding the alignments, we shift the start of the alignment of the first segment to after the start of the alignment of the second segment, then the first is contained in the second and we need to do something exceptional; the most heuristic, but consistent with the practice elsewhere in the local overlapper, is to pseudo-delete the first segment */ if(O->chain[piece0].piece.abpos>O->chain[piece1].piece.abpos|| O->chain[piece0].piece.bbpos>O->chain[piece1].piece.bbpos){ O->chain[piece0].agap=0; O->chain[piece0].bgap=0; if(piece0>0){ O->chain[piece0].piece.abpos=O->chain[piece0-1].piece.aepos; O->chain[piece0].piece.aepos=O->chain[piece0-1].piece.aepos; O->chain[piece0].piece.bbpos=O->chain[piece0-1].piece.bepos; O->chain[piece0].piece.bepos=O->chain[piece0-1].piece.bepos; } else { O->chain[piece0].piece.abpos=0; O->chain[piece0].piece.aepos=0; O->chain[piece0].piece.bbpos=0; O->chain[piece0].piece.bepos=0; } O->chain[piece1].agap=O->chain[piece1].piece.abpos- O->chain[piece0].piece.aepos; O->chain[piece1].bgap=O->chain[piece1].piece.bbpos- O->chain[piece0].piece.bepos; return; } /* find start of region for evaluation in first alignment */ /* when done, offseta1 and offsetb1 should be the offsets into the sequences such that they correspond to a column in the alignment of the first segment and that column contains the first possible overlap with the second segment */ offseta1=O->chain[piece0].piece.abpos; offsetb1=O->chain[piece0].piece.bbpos; into1=0; while(offseta1chain[piece1].piece.abpos&& offsetb1chain[piece1].piece.bbpos){ assert(pair_align1->aseg[into1]!='\0'); assert(pair_align1->bseg[into1]!='\0'); if(pair_align1->aseg[into1]!='-')offseta1++; if(pair_align1->bseg[into1]!='-')offsetb1++; into1++; } // if(pair_align1->aseg[into1-1]!='-')offseta1--; // if(pair_align1->bseg[into1-1]!='-')offsetb1--; /* count mismatches in the second alignment */ into2=0; errs2=0; while(pair_align2->aseg[into2]!='\0'){ assert(pair_align2->bseg[into2]!='\0'); if(pair_align2->aseg[into2]!=pair_align2->bseg[into2]){ errs2++; } into2++; } /* initialize solution variables and auxiliaries */ into2=0; errs1 = (pair_align1->aseg[into1]!=pair_align1->bseg[into1] ? 1 : 0); minerrs=errs2; offseta2=O->chain[piece1].piece.abpos; offsetb2=O->chain[piece1].piece.bbpos; bestend1a=offseta1 - (pair_align1->aseg[into1-1]!='-' ? 1 : 0); bestend1b=offsetb1 - (pair_align1->bseg[into1-1]!='-' ? 1 : 0); bestbeg2a=offseta2; bestbeg2b=offsetb2; /* while there is potential overlap still to come ... */ while(pair_align1->aseg[into1]!='\0'&&pair_align2->aseg[into2]!='\0'){ // Once, we did the following assert, assuming that the alignment // of pair_align2 would not run out before pair_align1, since otherwise // there would be a containment or some such that shouldn't happen; // But, as luck would have it, alignment trimming quirks etc can // make it happen. So ... no more assert // // assert(pair_align2->aseg[into2]!='\0'); /* while a position in the second segment is no greater than the position in the first segment, check for mismatch in second segment, counting errors, incrementing the sequence position counters as appropriate; advance the second segment position */ while(offseta1>=offseta2||offsetb1>=offsetb2){ errs2-= (pair_align2->aseg[into2]!=pair_align2->bseg[into2] ? 1 : 0); offseta2+= ( pair_align2->aseg[into2]!='-' ? 1 : 0 ); offsetb2+= ( pair_align2->bseg[into2]!='-' ? 1 : 0 ); into2++; if(pair_align2->aseg[into2]=='\0'){ break; } // assert(pair_align2->aseg[into2]!='\0'); // assert(pair_align2->bseg[into2]!='\0'); } if(errs1+errs2<=minerrs&& pair_align1->aseg[into1]==pair_align1->bseg[into1]){ minerrs=errs1+errs2; bestend1a=offseta1 /* -(pair_align1->aseg[into1-1]!='-' ? 1 : 0 )*/; bestend1b=offsetb1 /* -(pair_align1->bseg[into1-1]!='-' ? 1 : 0 )*/; bestbeg2a=offseta2; bestbeg2b=offsetb2; bestinto2=into2; } /* while the positions in the first segment are no greater than the positions in the second segment, check for mismatch in first segment, counting errors, incrementing the sequence position counters as appropriate; advance the first segment position */ while(offseta1aseg[into1]!='-' ? 1 : 0 ); offsetb1+= ( pair_align1->bseg[into1]!='-' ? 1 : 0 ); into1++; errs1+= (pair_align1->aseg[into1]!=pair_align1->bseg[into1] ? 1 : 0); if(pair_align1->aseg[into1]=='\0'){ break; } } } if(bestend1achain[piece0].piece.aepos) bestend1a++; if(bestend1bchain[piece0].piece.bepos) bestend1b++; O->chain[piece0].piece.aepos=bestend1a; O->chain[piece0].piece.bepos=bestend1b; O->chain[piece1].piece.abpos=bestbeg2a; O->chain[piece1].piece.bbpos=bestbeg2b; O->chain[piece1].agap=bestbeg2a-bestend1a; O->chain[piece1].bgap=bestbeg2b-bestend1b; assert(O->chain[piece1].agap>=0); assert(O->chain[piece1].bgap>=0); assert(O->chain[piece1].agap==0||O->chain[piece1].bgap==0); // now, adjust the beginning of the second piece to skip any mismatches while(pair_align2->aseg[bestinto2]!=pair_align2->bseg[bestinto2]&& pair_align2->aseg[bestinto2]!='\0'){ bestbeg2a += ( pair_align2->aseg[bestinto2]!='-' ? 1 : 0 ); bestbeg2b += ( pair_align2->bseg[bestinto2]!='-' ? 1 : 0 ); bestinto2++; } O->chain[piece1].piece.abpos=bestbeg2a; O->chain[piece1].piece.bbpos=bestbeg2b; O->chain[piece1].agap=bestbeg2a-bestend1a; O->chain[piece1].bgap=bestbeg2b-bestend1b; assert(O->chain[piece1].piece.abpos<=O->chain[piece1].piece.aepos); assert(O->chain[piece1].piece.bbpos<=O->chain[piece1].piece.bepos); } kmer-code-2013-trunk/atac-driver/chainer/localalign/GF_ALN_loverlapper.C0000644000000000000000000004601511040247445024601 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include "GF_ALN_local.H" #define min(a,b) (ab?a:b) /* Handle Local_Overlap pieces (local segments) which overlap, * by trimming them back until they abut, * such that the number of mismatches is minimized */ void fix_overlapping_pieces(const char *aseq, const char *bseq, Local_Overlap *O, int piece0, int piece1); //maximum number of matching segments that can be pieced together int MaxGaps= 3; //maximum allowed mismatch at end of overlap int MaxBegGap= 200; //maximum allowed mismatch at end of overlap int MaxEndGap= 200; //biggest gap internal to overlap allowed int MaxInteriorGap=400; //whether to treat the beginning of the b fragment and // the end of the a fragment as allowed to have more error int asymmetricEnds=0; //amount of mismatch at end of the overlap that can cause // an overlap to be rejected int MaxFreeFlap=20; // set useSizeToOrderBlocks to 1 to get block mismatches resolved such that // the smaller block comes first: // // .........AA------......... // .........--BBBBBB........ // // or set it to 0 to get resolution with the A block always first int useSizeToOrderBlocks = 1; /* Create a trace to be interpreted as with DP_Compare_AS, but based on a Local_Overlap record. A Local_Segment within the overlap will be aligned using AS_ALN_OKNAlign(), generating a subtrace. Subtraces, with their indices appropriately adjusted, will be spliced together by an encoding of the gaps between segments; for now, we'll simply insert gaps as follows: A "gap" with x bases in A and y bases in B will become a section of the alignment x+y positions long, with the A fragment first and the B fragment second (with '=' indicating an aligned match): ====AAAAAAAAAA--------------====== ====----------BBBBBBBBBBBBBB====== Obviously, a more compact treatment is possible, but this makes clear the presumptive blocks involved in the mismatch; also, Karin says that it will make Consensus happy. One slight change Assumptions: (do they matter?) By the usual conventions, the ahang should be nonnegative, the bhang negative only if the ahang is positive, and both sequences should be in the forward orientation. */ int *AS_Local_Trace(Local_Overlap *O, const char *aseq, const char *bseq){ static int *TraceBuffer=NULL; int i,j,k,segdiff,*segtrace; int lastgood=-1; static int allocatedspace=0; int tracespace=0; static char *aseg=NULL,*bseg=NULL; // Not computing traces generates slight differences. Why? // const int computeTraceFlag = 0; static int aseglen=0,bseglen=0; int abeg=0,bbeg=0; /* begining of segment; overloaded */ int tracep=0; /* index into TraceBuffer */ int spnt=0; /* to pass to AS_ALN_OKNAlign */ { int n=O->num_pieces; assert(O->num_pieces>0); O->chain[n].piece.abpos=O->chain[n].agap+O->chain[n-1].piece.aepos; O->chain[n].piece.bbpos=O->chain[n].bgap+O->chain[n-1].piece.bepos; } if(computeTraceFlag){ /*Estimate length required to store trace*/ tracespace=0; tracespace+=abs(O->begpos)+abs(O->endpos); for(i=0;i<=O->num_pieces;i++){ tracespace+=max(O->chain[i].agap,1); tracespace+=max(O->chain[i].bgap,1); tracespace+=(int)((O->chain[i].piece.aepos -O->chain[i].piece.abpos) *1.5*O->chain[i].piece.error); tracespace+=1000; } /*(Re)allocate space for the trace as necessary; Note that this is persistent storage so ... ... it doesn't need to get allocated on every call ... it shouldn't get freed ... results stored here need to be copied elsewhere if they are to be saved */ if(allocatedspacenum_pieces;i++){ /* if conditions indicate the segment was deleted in previous loop, skip! */ if(O->chain[i].agap==0 && O->chain[i].bgap==0 && O->chain[i].piece.abpos==O->chain[i].piece.aepos && O->chain[i].piece.bbpos==O->chain[i].piece.bepos){ continue; } /* guesstimate the required number of diagonals/edits to consider to get optimal alignment */ segdiff=1+(int)((O->chain[i].piece.aepos -O->chain[i].piece.abpos) *1.5*O->chain[i].piece.error); /* Building an alignment/trace under the usual assumptions does not allow a given position in one sequence to simultaneously align to two or more positions in the other sequence. However, the Find_Local_Overlap() routine can chain together local alignment segments that overlap. In order to make the local overlaps compatible with everything else we do, we need to trim back the overlaps. Since we will "output" this segment at the end of the loop, we need to fix its overlap with the following segment in this cycle through the loop */ k=i+1; while(knum_pieces){ /* if conditions indicate the segment was deleted previously, skip! */ if(O->chain[k].agap==0 && O->chain[k].bgap==0 && O->chain[k].piece.abpos==O->chain[k].piece.aepos && O->chain[k].piece.bbpos==O->chain[k].piece.bepos){ k++; continue; } if(O->chain[k].piece.abposchain[i].piece.aepos|| O->chain[k].piece.bbposchain[i].piece.bepos){ /* handle possibility of the first segment being contained within the second;originally simply asserted against this; now, try to handle by deleting first segment */ if(O->chain[i].piece.abpos>O->chain[k].piece.abpos|| O->chain[i].piece.bbpos>O->chain[k].piece.bbpos){ O->chain[i].agap=0; O->chain[i].bgap=0; if(lastgood>=0){ O->chain[i].piece.abpos=O->chain[lastgood].piece.aepos; O->chain[i].piece.aepos=O->chain[lastgood].piece.aepos; O->chain[i].piece.bbpos=O->chain[lastgood].piece.bepos; O->chain[i].piece.bepos=O->chain[lastgood].piece.bepos; } else { O->chain[i].piece.abpos=0; O->chain[i].piece.aepos=0; O->chain[i].piece.bbpos=0; O->chain[i].piece.bepos=0; } O->chain[k].agap=O->chain[k].piece.abpos- O->chain[i].piece.aepos; O->chain[k].bgap=O->chain[k].piece.bbpos- O->chain[i].piece.bepos; if(lastgood<0){ //printf("Shrinking gaps for segment %d\n",k); O->chain[k].agap--; O->chain[k].bgap--; } } else /* otherwise, check for 2nd piece contained within first */ if(O->chain[i].piece.aepos>O->chain[k].piece.aepos|| O->chain[i].piece.bepos>O->chain[k].piece.bepos){ /* if the next piece is completely within current piece, effectively remove it */ O->chain[k].agap = 0; O->chain[k].bgap = 0; O->chain[k].piece.abpos=O->chain[i].piece.aepos; O->chain[k].piece.aepos=O->chain[i].piece.aepos; O->chain[k].piece.bbpos=O->chain[i].piece.bepos; O->chain[k].piece.bepos=O->chain[i].piece.bepos; if(k+1<=O->num_pieces){ int l; l=k-1; while(O->chain[l].agap==0 && O->chain[l].bgap==0 && O->chain[l].piece.abpos==O->chain[l].piece.aepos && O->chain[l].piece.bbpos==O->chain[l].piece.bepos){ l--; assert(l>=0); } O->chain[k+1].agap=O->chain[k+1].piece.abpos- O->chain[l].piece.aepos; O->chain[k+1].bgap=O->chain[k+1].piece.bbpos- O->chain[l].piece.bepos; } /* else, fix the overlap */ } else { fix_overlapping_pieces(aseq, bseq, O,i,k); // if the second piece disappeared if(O->chain[k].piece.abpos==O->chain[k].piece.aepos|| O->chain[k].piece.bbpos==O->chain[k].piece.bepos){ O->chain[k].agap = 0; O->chain[k].bgap = 0; O->chain[k].piece.abpos=O->chain[i].piece.aepos; O->chain[k].piece.aepos=O->chain[i].piece.aepos; O->chain[k].piece.bbpos=O->chain[i].piece.bepos; O->chain[k].piece.bepos=O->chain[i].piece.bepos; if(k+1<=O->num_pieces){ int l; l=k-1; while(O->chain[l].agap==0 && O->chain[l].bgap==0 && O->chain[l].piece.abpos==O->chain[l].piece.aepos && O->chain[l].piece.bbpos==O->chain[l].piece.bepos){ l--; assert(l>=0); } O->chain[k+1].agap=O->chain[k+1].piece.abpos- O->chain[l].piece.aepos; O->chain[k+1].bgap=O->chain[k+1].piece.bbpos- O->chain[l].piece.bepos; } } else { // if the first piece disappeared if (O->chain[i].piece.abpos==O->chain[i].piece.aepos|| O->chain[i].piece.bbpos==O->chain[i].piece.bepos){ O->chain[i].agap=0; O->chain[i].bgap=0; if(lastgood>=0){ O->chain[i].piece.abpos=O->chain[lastgood].piece.aepos; O->chain[i].piece.aepos=O->chain[lastgood].piece.aepos; O->chain[i].piece.bbpos=O->chain[lastgood].piece.bepos; O->chain[i].piece.bepos=O->chain[lastgood].piece.bepos; } else { O->chain[i].piece.abpos=0; O->chain[i].piece.aepos=0; O->chain[i].piece.bbpos=0; O->chain[i].piece.bepos=0; } O->chain[k].agap=O->chain[k].piece.abpos- O->chain[i].piece.aepos; O->chain[k].bgap=O->chain[k].piece.bbpos- O->chain[i].piece.bepos; if(lastgood<0){ //printf("Shrinking gaps for segment %d\n",k); O->chain[k].agap--; O->chain[k].bgap--; } } } } } k++; } /* if conditions indicate the segment was deleted previously, skip! */ if(O->chain[i].agap==0 && O->chain[i].bgap==0 && O->chain[i].piece.abpos==O->chain[i].piece.aepos && O->chain[i].piece.bbpos==O->chain[i].piece.bepos){ continue; } /* set up positions before which gaps are inserted to handle the gap portion of a chain piece */ /* put gaps before beginning of aligned piece (but after the portion of aseq in the gap); location is relative to the beginning of the alignment (i.e., ignores ahang worth of positions) */ if(i!=O->num_pieces){ abeg=O->chain[i].piece.abpos; } else { assert(lastgood>=0&&lastgoodnum_pieces); abeg=O->chain[lastgood].piece.aepos+ O->chain[i].agap; } /*handle boundary case to prevent gaps preceding the b sequence*/ if((i==0||lastgood<0)&&O->chain[i].bgap>0){ assert(O->chain[i].agap>=0); if(O->begpos>=0){ O->begpos=O->chain[i].piece.abpos-1; assert(O->begpos>=0); O->chain[i].agap=0; // Instead of asserting, an ifdef previously printed stuff // out and continued happily along. // assert( ( i==0&& O->chain[i].bgap==O->chain[i].piece.bbpos-1) ||( i>0&&lastgood<0&&O->chain[i].bgap==O->chain[i].piece.bbpos-1)) ; if(lastgood<0){ O->chain[i].bgap=O->chain[i].piece.bbpos-1; } } else { if(i==0){ O->begpos-=O->chain[i].bgap; } else{ O->begpos=-O->chain[i].bgap; } O->chain[i].bgap=0; } } /* now prevent gaps at end of A sequence */ // if(i==O->num_pieces&&O->endpos>=0){ // O->endpos+=O->chain[i].bgap; // O->chain[i].bgap=0; // } /* now make sure that end mismatches are treated by tucking the shorter tail into a gap before the longer tail, or, ifdef FORCEPOSITIVEBHANG, by tucking the A tail into a gap before the B tail */ if(i==O->num_pieces){ if(O->endpos>=0){ O->endpos+=O->chain[i].bgap; O->chain[i].bgap=0; }else { O->endpos-=O->chain[i].agap; abeg-=O->chain[i].agap; O->chain[i].agap=0; } } /* put gaps before the portion of bseq in the gap; for the first piece, this means before position 0 */ if(i==0 || lastgood<0){ bbeg = 1-min(O->begpos,0); } else { assert(lastgoodnum_pieces); bbeg = O->chain[lastgood].piece.bepos; } /* now insert the right number of gaps! */ if(i==O->num_pieces){ if(O->endpos<0){ O->chain[i].agap+=-O->endpos; O->endpos=0; } else { O->chain[i].bgap+=O->endpos; O->endpos=0; } if(O->chain[i].agap <= O->chain[i].bgap){ O->endpos=O->chain[i].bgap; O->chain[i].bgap=0; }else{ O->endpos=-O->chain[i].agap; O->chain[i].agap=0; } } if (computeTraceFlag) { if(O->chain[i].agap <= O->chain[i].bgap || ! useSizeToOrderBlocks ){ /* start by putting len(agap) gaps before the chunk of B in the gap */ for(j=0; jchain[i].agap ;j++) TraceBuffer[tracep++]=bbeg; /* then put len(bgap) gaps before the chunk of A in the gap */ for(j=0; jchain[i].bgap ;j++) TraceBuffer[tracep++]=-abeg; } else { // if the bgap is smaller, abeg-=O->chain[i].agap; bbeg+=O->chain[i].bgap; /* start by putting len(bgap) gaps before the chunk of A in the gap */ for(j=0;jchain[i].bgap ;j++) TraceBuffer[tracep++]=-abeg; /* then put len(agap) gaps before the chunk of B in the gap */ for(j=0;jchain[i].agap ;j++) TraceBuffer[tracep++]=bbeg; } } else { // Not computing traces! if(O->chain[i].agap <= O->chain[i].bgap || ! useSizeToOrderBlocks ){ } else { abeg-=O->chain[i].agap; bbeg+=O->chain[i].bgap; } } /////////////////////////////////////// /* if last piece, there is no aligned segment */ if(i==O->num_pieces)break; /* set bbeg to beginning of aligned segment for piece */ abeg=O->chain[i].piece.abpos; bbeg=O->chain[i].piece.bbpos; /* set lengths of segments */ int alen=O->chain[i].piece.aepos-abeg; /* check +1?? */ int blen=O->chain[i].piece.bepos-bbeg; /* check +1?? */ /* create strings for just the parts of the sequences in the aligned segment */ /* make sure there is (persistant) space for the strings */ if(aseglen\n", aseg); fprintf(stderr,"EXCEPTION bseg=<%s>\n", bseg); fprintf(stderr,"EXCEPTION aseq=<%s>\n", aseq + 1); fprintf(stderr,"EXCEPTION bseq=<%s>\n", bseq + 1); return NULL; // Return an exceptional value. } /* guesstimate the required number of diagonals/edits to consider to get optimal alignment */ segdiff = 1 + (int)((O->chain[i].piece.aepos - O->chain[i].piece.abpos) * 1.5 * O->chain[i].piece.error); /* get trace for the segment from AS_ALN_OKNAlign */ spnt=0; /* subtract from aseg, bseg because Gene likes to index from 1, not 0 */ segtrace=AS_ALN_OKNAlign(aseg-1,alen,bseg-1,blen,&spnt,segdiff); // This adjusts the beginning coordinates so that segment is // consistent with the back-trace. if(spnt>0){ O->chain[i].agap+=spnt; O->chain[i].piece.abpos+=spnt; } else { O->chain[i].bgap-=spnt; O->chain[i].piece.bbpos-=spnt; } /* get trace for the segment from AS_ALN_OKNAffine */ /* Seems like it should be a good idea, but doesn't work as well as we might expect! */ //bpnt=0; //epnt=0; //segtrace=AS_ALN_OKNAffine(aseg,alen,bseg,blen,&bpnt,&epnt,segdiff); assert(segtrace!=NULL); /* Now copy the segment trace into master trace, adjusting positions */ j=0; if(spnt<0){ for(int ctr=0;ctr= allocatedspace) fprintf(stderr,"ERROR memory is already corrupted in %s at %d.\n", __FILE__, __LINE__); assert(tracep < allocatedspace); } return(TraceBuffer); } kmer-code-2013-trunk/atac-driver/chainer/localalign/GF_ALN_dpaligner.C0000644000000000000000000001123410546446644024222 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* Dynamic programming sequence comparison of two fragments. General purpose utility that uses bit-vector d.p. for detection (see, "A Fast Bit-Vector Algorithm for Approximate String Matching on Dynamic Programming" J. ACM., to appear, by Gene Myers.) and the O(kn) greedy algorithm for alignment delivery (see "An O(ND) Difference Algorithm and Its Variations" Algorithmica 1 (1986), 251-266, by Gene Myers.) Both papers can be downloaded from "http://www.cs.arizona.edu/people/gene/vita.html" */ #include #include #include #include #include #include "GF_ALN_local.H" /* O(kn) identity-based alignment algorithm. Find alignment between a and b (of lengths alen and blen), that begins at finishing boundary position *spnt. Return at *spnt the diagonal at which the alignment starts. */ int *AS_ALN_OKNAlign(const char *a, int alen, const char *b, int blen, int *spnt, int diff) { int diag, wpos, level; int fcell, infinity; static int Wtop = -1; static int *Wave; static int *TraceBuffer; if (diff >= Wtop) /* Space for diff wave? */ { int max, del, *newp; max = (int)(1.2*diff) + 50; del = (max+5)*(max+1); //fprintf(stderr,"DP_COMPARE (AS_ALN_OKNAlign): reallocing %ld bytes\n",del*sizeof(int)+(max+1)*sizeof(int)); newp = (int *) realloc(Wave,del*sizeof(int) + (max+1)*sizeof(int)); if (newp == NULL) return (NULL); Wtop = max-1; Wave = newp; TraceBuffer = (int *) (Wave + del); } diag = (alen-blen) + (*spnt); /* Finish diagonal. */ infinity = blen+2; /* Process 0-wave. */ { int i, j; if (diff == 0) goto zeroscript; if ((*spnt) < 0) /* (i,j) = initial boundary pt. */ j = blen; else j = blen - (*spnt); i = diag + j; while (1) { if (i <= 0 || j <= 0) goto zeroscript; if (a[i] != b[j]) break; i -= 1; j -= 1; } Wave[0] = Wave[1] = infinity; Wave[2] = j; Wave[3] = Wave[4] = infinity; } /* Compute waves 1 through d-1 do, each wave has two boundary cells at each of its ends. */ { int m, n, k; m = 5; n = 0; for (level = 1; 1; level++) { Wave[m++] = infinity; Wave[m++] = infinity; n += 1; for (k = -level; k <= level; k++) { int i, j; j = Wave[n] - 1; if ((i = Wave[n-1]-1) < j) j = i; if ((i = Wave[n+1]) < j) j = i; i = (diag+k) + j; while (1) { if (i <= 0 || j <= 0) { if (i <= 0) *spnt = -j; else *spnt = i; goto madeit; } if (a[i] != b[j]) break; i -= 1; j -= 1; } Wave[m++] = j; n += 1; } Wave[m++] = infinity; Wave[m++] = infinity; n += 1; } madeit: fcell = n; wpos = k; } /* Trace back through wave structure and record trace of the alignment traced. */ { int d, n, k, t; t = 0; n = fcell; k = wpos; for (d = level-1; d >= 0; d--) { int i, j, m; j = Wave[m=n]-1; if ((i = Wave[n-1]-1) < j) { j = i; m = n-1; } if ((i = Wave[n+1]) < j) { j = i; m = n+1; } if (m < n) { TraceBuffer[t++] = - ((diag+k) + (j+1)); k -= 1; } else if (m > n) { TraceBuffer[t++] = j+1; k += 1; } n = m - (2*d+4); } TraceBuffer[t] = 0; } return (TraceBuffer); /* If perfect match, your done. */ zeroscript: TraceBuffer[0] = 0; *spnt = diag; return (TraceBuffer); } kmer-code-2013-trunk/atac-driver/chainer/localalign/GF_ALN_local.H0000644000000000000000000003037410454652152023351 0ustar rootroot/************************************************************************** * This file is part of A2Amapper. * Copyright (c) 2004 Applera Corporation * Author: Clark Mobarry * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received (LICENSE.txt) a copy of the GNU General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **************************************************************************/ #ifndef CA_ALN_local_h #define CA_ALN_local_h #include inline void * ckalloc(size_t size) { void *newp = NULL; assert(size>0); assert(NULL != (newp = malloc(size))); return(newp); } inline void * ckrealloc(void* ptr, size_t size) { void *newp = NULL; assert(size>0); assert(NULL != (newp = realloc(ptr, size))); return(newp); } /* O(kn) identity-based alignment algorithm. Find alignment between a and b (of lengths alen and blen), that begins at finishing boundary position *spnt. Return at *spnt the diagonal at which the alignment starts. */ int *AS_ALN_OKNAlign(const char *a, int alen, const char *b, int blen, int *spnt, int diff); /* Local alignment record: Coordinates are in terms of the d.p. matrix that go from (0,0) to (|A|,|B|), where A is the A-sequence argument and B is the B-sequence argument. A coordinate is a position between chars of the sequence. For example, an alignment from (3,5) to (6,9) aligns characters 4-5 of A with characters 7-9 of B. If the B start coordinate is greater than the B end coordinate then the alignment is of A versus the complement of B, i.e. the alignment runs along an anti-diagonal, not a diagonal. Diagonal k is the set of coordinates (a,b) s.t. a-b = k. Anti-diagonal k is the set of coordinates (a,b) s.t. a+b = k. */ typedef struct { int abpos, bbpos; /* Start coordinate of local alignment */ int aepos, bepos; /* End coordinate of local alignment */ int ldiag, hdiag; /* Alignment is between (anti)diagonals ldiag & hdiag */ int score; /* Score of alignment where match = 1, difference = -3 */ double error; /* Lower bound on error rate of match */ } Local_Segment; #define LOCAL_FORW 0 /* Compare A to B only */ #define LOCAL_REVR 1 /* Compare A to complement(B) only */ #define LOCAL_BOTH 2 /* Compare A to both B and its complement */ /* Find_Local_Segments compares sequence A of length Alen against sequence B of length Blen, in the same, opposite, or both orientations depending on the setting of Action, and returns a pointer to an array of local alignment records, where the number of such records is in the integer pointed at by NumSegs. Find_Local_Segments only reports alignments that are longer than MinLen bps. and contain less than MaxDiff errors as a fraction of the alignment's length. Find_Local_Segments reuses the storage for the array of local alignment segments with each successive call, a user should copy the array if they which for it to persist beyond a given invocation. Find_Local_Segments finds alignments that contain at least 36bp that match at 95% or better. The encompassing local alignment has to match at about 75% or better as matching chars are scored SAMECOST (1) and differences are scored -DIFFCOST (3). Extension of a local alignment in a given direction ends at a cumulative maximum from which all extensions drop by BLOCKCOST (15 = DIFFCOST(3)*MAXIGAP(5)) in score, the equivalent of MAXIGAP(5) consecutive differences. Find_Local_Segments is most efficiently applied to large sequences. Any application that is doing an all-against-all of smaller fragment sequences, would best utilize Find_Local_Segments by concatenating the fragments, applying Find_Local_Segments to the large concatenations, and then mapping the local alignments back to the fragments and coordinates to which they pertain. By separating fragments in the concatentaion by MAXIGAP+1(6) N's, one guarantees that local alignments do not span the boundaries between fragment sequences. Find_Local_Segments builds an index of the A sequence as part of its acceleration method. If successive calls to Find_Local_Segments involve the same A-sequence, this table is built only once, improving efficiency. */ Local_Segment *Find_Local_Segments (char const * const A, int const Alen, char const * const B, int const Blen, int const Action, int const MinLen, double const MaxDiff, int * const Seglen); /* Local_Overlap Record: A local overlap is a chain of Local_Segments computed by Find_Local_Segments that when strung together form an overlap between the two sequences involved. The Local_Overlap record contains the the number of segments, a pointer to an ordered array of the segments in the chain, and the following parameters: score: The score of a chain is the total number of indels required to build an overlap out of the chained elements. begpos,endpos: As for DP_Compare, the diagonals on which the overlap begins and ends. diffs: The number of substitutions and indels required to build an overlap out of the chained elements. length: (|A-seg|+|B-seg|)/2. The field chain points to an array of num_pieces+1 Local_Chain records. Records 0..num_pieces encode the nature of the gap between segments so that record 0 gives the gap to the start border (if any), record num_pieces gives the gap to the finish border (if any), and record i gives the gap between the segment of record i-1 and record i. Record num_pieces does not contain a segment description. The parameters agap and bgap give the delta in the A- and B-coordinates if of the end of the previous segment and the start of the next one. The coordinates can be negative and both can be zero only for the boundary gaps (first and last). Each segment is identified by its position in the array of segments passed to the routine, and if the segment was complemented in order to form part of the chain, then and only then is the field reversed set to a non-zero value. The type field gives an indication of the type of the gap as follows: LOCAL_BOUNDARY -- if both agap and bgap are zero at a boundary gap then this indication is given. LOCAL_MINOR -- if the a- and b-gaps are less than a user-supplied limit "MinorThresh", then the gap is considered a minor break between two segments of similarity. LOCAL_INDEL -- if the gap in one sequence is minor, but major, positive, and at least 4 times as large in the other, then the gap is considered an indel. LOCAL_REPEAT -- if the gap in one sequence is minor or negative, but major and negative then the gap is considered a repeat gap in the sense that a tandem repeat must occur in one or both of the sequences around the junction between the two adjacent segments. LOCAL_REPnDEL -- if the gap in one sequence is major and negative, and the other is major and positive, then there is a repeated element on both sides of the sequence with the inserted sequence. LOCAL_DISAGREE -- anything else, i.e. both gap deltas are positive, at least one is major, and if the other is minor then the ration is less than 1 to 4. */ #define LOCAL_BOUNDARY 0x0 /* No gap, at boundary */ #define LOCAL_MINOR 0x1 /* Small break in alignment */ #define LOCAL_DISAGREE 0x2 /* The two sequences significantly disagree */ #define LOCAL_INDEL 0x3 /* One sequence has missing/added sequence */ #define LOCAL_REPEAT 0x4 /* A tandem repeat occurs at the junction */ #define LOCAL_REPnDEL 0x5 /* Both a tandem repeat and an indel */ typedef struct { int agap, bgap; /* A- and B-seq deltas from last segment to this one */ short type; /* Type of gap as given by the defined cons. above */ short reversed; /* Is segment reversed for inclusion in chain */ Local_Segment piece; /* Segment in the chain */ } Local_Chain; typedef struct { int begpos; /* Entry diagonal of boundary point (a,b) on which overlap starts, where diagonal = a - b. */ int endpos; /* Exit diagonal of boundary point (a,b) on which overlap ends, where diagoanl = (|B|-b) - (|A|-a) */ int length; /* Length of overlap (|A|+|B|)/2 */ int diffs; /* Estimated number of differences in overlap */ int comp; /* B sequence was complemented for this comparison */ int indif; /* Estimated number of diffs in segments of overlap */ int score; /* Sum of all gap lengths */ int num_pieces; /* # of segments in overlap chain */ int next; /* for iteration through the chain - CMM */ Local_Chain *chain; /* chain[0..num_pieces] describe each gap between local segments in the overlap chain */ } Local_Overlap; /* Find_Local_Overlap takes an array of local alignments as returned by Compare_Local and finds the best scoring local overlap between the underlying sequences. One must pass in the length of the two sequences from which Compare_Local produced the local alignments as well as the number of local alignments in the array. If the parameter comp is nonzero then the comparison will effectively be between A and the complement of B. Normally, the parameter nextbest is zero -- after such a call, a second alternate overlap, third alternate, and so on can be generated by subsequent calls with nextbest set to a nonzero value. The alternates are the best scoring overlaps that starts with a segment not in any previous overlap. The best overlap is returned as a pointer to local overlap structure described above. Unlike many of my routines, the reclamation of the storage for this data structure is the responsibility of the caller and requires simply calling free on it, as the entire structure, including the chain array, is in a single memory block. The parameter MinorThresh determines whether a gap delta is consider minor or major (see the description above on gap types). An overlap is returned only if the ratio of the difference to the length of the overlap is less than GapThresh, otherwise NULL is returned. */ Local_Overlap *Find_Local_Overlap(int Alen, int Blen, int comp, int nextbest, Local_Segment *Segs, int NumSegs, int MinorThresh, double GapThresh); /* Create a trace to be interpreted as with DP_Compare_AS, but based on a Local_Overlap record. A Local_Segment within the overlap will be aligned using OKNAlign(), generating a subtrace. Subtraces, with their indices appropriately adjusted, will be spliced together by an encoding of the gaps between segments; for now, we'll simply insert gaps as follows: A "gap" with x bases in A and y bases in B will become a section of the alignment x+y positions long, with the A fragment first and the B fragment second: AAAAAAAAAA-------------- ----------BBBBBBBBBBBBBB Obviously, a more compact treatment is possible! Assumptions: both sequences should be in the forward orientation and all segments are forward. */ int *AS_Local_Trace(Local_Overlap *local_overlap, const char *aseq, const char *bseq); #endif kmer-code-2013-trunk/atac-driver/chainer/localalign/GF_ALN_overlap.C0000644000000000000000000005453410546446644023737 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include #include "GF_ALN_local.H" #define max(x,y) ((xy) ? (y):(x)) #define MIN_USABLE 3 /* Smallest subpart of a segment usable for chaining */ int MIN_ALIGNED_COLS=30; /* minimum length of a local overlap in the following sense: an overlap is defined by a set of segments; each segment has a length (ignoring minor troubles in determining length in the presence of small indels); let the sum of the overlap for this purpose be the sum of the lengths of the segments that make up the overlap */ #define BIG_INT 0x7FFFFFFF typedef struct { int start; int base; int segment; int best; } Candidate; typedef struct { Local_Segment *item; int isadd; } Event; typedef struct { int value; int source; int start; int colsAligned; } TraceElement; /*** AVL-TREE LIST ROUTINES ***/ static void OutOfMemory(char *where) { fprintf(stderr,"COMPARE_LOCAL: Out of memory (%s)\n",where); exit (1); } typedef struct _AVLnode { int RC, LN; short H; struct _AVLnode *L, *R; Candidate V; } AVLnode; #define CP(v) ((v)->L->LN) static AVLnode *freept; static AVLnode *NIL; #define INC AVLinc #define DEC AVLdec #define SEL AVLselect #define RNK AVLrank #define ADD AVLinsert #define DEL AVLdelete static void AVLinit(void) { freept = NULL; NIL = (AVLnode *) malloc(sizeof(AVLnode)); if (NIL == NULL) OutOfMemory("Candidate list"); NIL->LN = NIL->RC = 1; NIL->H = 0; NIL->V.base = BIG_INT; NIL->V.best = BIG_INT; } static AVLnode *AVLinc(AVLnode *v) { v->RC++; return (v); } static void AVLdec(AVLnode *v) { v->RC--; if (v->RC == 0) { DEC(v->L); DEC(v->R); v->L = freept; freept = v; } } static int AVLlength(AVLnode *v) { DEC(v);return (v->LN - 1); } static AVLnode *NEW(AVLnode *l, Candidate *x, AVLnode *r) { AVLnode *v; int b; if (freept == NULL) { v = (AVLnode *) malloc(sizeof(AVLnode)); if (v == NULL) OutOfMemory("Candidate list"); } else { v = freept; freept = v->L; } v->RC = 1; v->V = *x; v->L = l; v->R = r; v->LN = l->LN + r->LN; v->H = (l->H < r->H ? r->H : l->H) + 1; b = v->V.base; if (v->L->V.best < b) b = v->L->V.best; if (v->R->V.best < b) b = v->R->V.best; v->V.best = b; return (v); } static AVLnode *BAL(AVLnode *l, Candidate *x, AVLnode *r) { AVLnode *t; if (l->H - r->H >= -1 && l->H - r->H <= 1) t = NEW(INC(l),x,INC(r)); else if (l->H > r->H) if (l->L->H >= l->R->H) t = NEW(INC(l->L),&(l->V),NEW(INC(l->R),x,INC(r))); else t = NEW(NEW(INC(l->L),&(l->V),INC(l->R->L)),&(l->R->V), NEW(INC(l->R->R),x,INC(r))); else if (r->R->H >= r->L->H) t = NEW(NEW(INC(l),x,INC(r->L)),&(r->V),INC(r->R)); else t = NEW(NEW(INC(l),x,INC(r->L->L)),&(r->L->V), NEW(INC(r->L->R),&(r->V),INC(r->R))); DEC(l); DEC(r); return (t); } static Candidate *AVLselect(AVLnode *v, int k) { Candidate *x; if (k < CP(v)) x = SEL(INC(v->L),k); else if (k > CP(v)) x = SEL(INC(v->R),k-CP(v)); else x = &(v->V); DEC(v); return (x); } static int AVLrank(AVLnode *v, int pos) { int k; if (v == NIL) k = 0; else if (pos < v->V.start) k = RNK(INC(v->L),pos); else k = CP(v) + RNK(INC(v->R),pos); DEC(v); return (k); } static AVLnode *AVLminprf(AVLnode *v, int hgh, int bst) { AVLnode *r; int b = 0; if (v == NIL) r = v; else if (hgh < v->V.start) r = AVLminprf(INC(v->L),hgh,bst); else { if (v->L->V.best < bst) b = v->L->V.best; if (v->V.base < bst) b = v->V.base; r = AVLminprf(INC(v->R),hgh,b); if (r->V.base < bst) bst = r->V.base; if (v->V.base < bst) { r = v; bst = v->V.base; } if (v->L->V.best < bst) r = AVLminprf(INC(v->L),hgh,bst); } DEC(v); return (r); } static AVLnode *AVLminsuf(AVLnode *v, int low, int bst) { AVLnode *r; int b = 0; if (v == NIL) r = v; else if (low > v->V.start) r = AVLminsuf(INC(v->R),low,bst); else { if (v->R->V.best < bst) b = v->R->V.best; if (v->V.base < bst) b = v->V.base; r = AVLminsuf(INC(v->L),low,b); if (r->V.base < bst) bst = r->V.base; if (v->V.base < bst) { r = v; bst = v->V.base; } if (v->R->V.best < bst) r = AVLminsuf(INC(v->R),low,bst); } DEC(v); return (r); } static AVLnode *AVLminrng(AVLnode *v, int low, int hgh) { AVLnode *r, *t; if (v == NIL) r = v; else if (hgh < v->V.start) r = AVLminrng(INC(v->L),low,hgh); else if (low > v->V.start) r = AVLminrng(INC(v->R),low,hgh); else { r = v; t = AVLminprf(INC(v->R),hgh,r->V.base); if (t->V.base < r->V.base) r = t; t = AVLminsuf(INC(v->L),low,r->V.base); if (t->V.base < r->V.base) r = t; } DEC(v); return (r); } static AVLnode *AVLinsert(AVLnode *v, int k, Candidate *x) { AVLnode *t; if (v == NIL) t = BAL(INC(NIL),x,INC(NIL)); else if (k < CP(v)) t = BAL(ADD(INC(v->L),k,x),&(v->V),INC(v->R)); else t = BAL(INC(v->L),&(v->V),ADD(INC(v->R),k-CP(v),x)); DEC(v); return (t); } static AVLnode *AVLdelete(AVLnode *v, int k) { AVLnode *t; if (v->L == NIL && v->R == NIL) t = INC(NIL); else if (k <= CP(v) && v->L != NIL) if (k == CP(v)) t = BAL(DEL(INC(v->L),k-1),SEL(INC(v->L),k-1),INC(v->R)); else t = BAL(DEL(INC(v->L),k),&(v->V),INC(v->R)); else if (k == CP(v)) t = BAL(INC(v->L),SEL(INC(v->R),1),DEL(INC(v->R),1)); else t = BAL(INC(v->L),&(v->V),DEL(INC(v->R),k-CP(v))); DEC(v); return (t); } static int SSORT(const void *l, const void *r) { Event *x, *y; int ax, ay, bx, by; x = (Event *) l; y = (Event *) r; if (x->isadd) { ax = x->item->abpos; bx = x->item->bbpos; } else { ax = x->item->aepos; bx = x->item->bepos; } if (y->isadd) { ay = y->item->abpos; by = y->item->bbpos; } else { ay = y->item->aepos; by = y->item->bepos; } if (ax < ay) return (-1); else if (ax > ay) return (1); else if (x->isadd != y->isadd) return (x->isadd - y->isadd); else return (bx - by); } static void convert_segs(Local_Segment *Segs,int NumSegs,int comp, int Alen,int Blen) { int i; /* Mark and reverse all complemented local segs */ if (comp) for (i = 0; i < NumSegs; i++) { Segs[i].bbpos = Blen - Segs[i].bbpos; Segs[i].bepos = Blen - Segs[i].bepos; } for (i = 0; i < NumSegs; i++) if (Segs[i].bbpos > Segs[i].bepos) { int x; x = Segs[i].bbpos; Segs[i].bbpos = Segs[i].bepos; Segs[i].bepos = x; Segs[i].score = -Segs[i].score-1; } } static void restore_segs(Local_Segment *Segs,int NumSegs,int comp,int Alen,int Blen) { int i; /* Unmark and reverse all complemented local segs */ for (i = 0; i < NumSegs; i++) if (Segs[i].score < 0) { int x; x = Segs[i].bbpos; Segs[i].bbpos = Segs[i].bepos; Segs[i].bepos = x; Segs[i].score = -Segs[i].score-1; } if (comp) { for (i = 0; i < NumSegs; i++) { Segs[i].bbpos = Blen - Segs[i].bbpos; Segs[i].bepos = Blen - Segs[i].bepos; } } } Local_Overlap *Find_Local_Overlap(int Alen, int Blen, int comp, int nextbest, Local_Segment *Segs, int NumSegs, int MinorThresh, double GapThresh) { static Candidate Cvals; static int MaxTrace = -1; static TraceElement *Trace = NULL; static Event *EventList; Local_Overlap *Descriptor; Local_Chain *Chain; if (NumSegs == 0) return (NULL); if (nextbest) { if (Trace == NULL) return (NULL); convert_segs(Segs,NumSegs,comp,Alen,Blen); goto Gen_Overlap; } if (MaxTrace < 0) AVLinit(); if (NumSegs > MaxTrace) { MaxTrace = (int)(1.3*NumSegs) + 500; Trace = (TraceElement *) realloc(Trace,(sizeof(Event)+2*sizeof(TraceElement))*MaxTrace); if (Trace == NULL) OutOfMemory("Overlap Trace Array"); EventList = (Event *) (Trace + MaxTrace); { // We have to make sure that EventList is aligned on an appropriate boundary. // It is derived from Trace which has looser alignment constraints. long address = (long)EventList; // By convention "long" int is big as the size of a pointer long offset = (address % sizeof(void *)); int pad = sizeof(void *) - offset; // This is how much we need to add to get things aligned. if(offset){ // fprintf(stderr,"* Eventlist is %p adding %d up to ", EventList, pad); EventList = (Event *)(((char *)EventList) + pad); // fprintf(stderr," %p\n", EventList); } } } convert_segs(Segs,NumSegs,comp,Alen,Blen); { int i; for (i = 0; i < NumSegs; i++) { EventList[2*i].item = Segs+i; EventList[2*i].isadd = 1; EventList[2*i+1].item = Segs+i; EventList[2*i+1].isadd = 0; } } qsort(EventList,2*NumSegs,sizeof(Event),SSORT); { int e; AVLnode *elist, *ilist, *olist; elist = AVLinc(NIL); ilist = AVLinc(NIL); olist = AVLinc(NIL); for (e = 0; e < 2*NumSegs; e++) { int i, bb, be, ab, ae; double err; /* Determine least gapped path to i'th segment */ i = EventList[e].item - Segs; bb = Segs[i].bbpos; be = Segs[i].bepos; ab = Segs[i].abpos; ae = Segs[i].aepos; err = Segs[i].error; if (EventList[e].isadd) /* Segment begins */ { int clen, best, srce; // this definition of best differs from the original (below) // it is designed to encourage global alignment // best = ab+bb; /* Best from boundary */ //best = ab; /* Best from boundary */ //if (best > bb) // best = bb; //best *= 2; srce = -1; clen = AVLlength(AVLinc(elist)); { int p; /* Examine bests from elist */ p = AVLrank(AVLinc(elist),bb); /* Best @ start of seg */ if (p > 0) { Candidate *cand; int altr; cand = AVLselect(AVLinc(elist),p); altr = cand->base + (ab + bb); if (altr < best) { best = altr; srce = cand->segment; } } while (++p <= clen) /* Bests @ midpoints of seg */ { Candidate *cand; int altr; cand = AVLselect(AVLinc(elist),p); if (cand->start > be - MIN_USABLE) break; altr = cand->base + 2*cand->start + (ab - bb); if (altr < best) { best = altr; srce = cand->segment; } } } /* Examine bests from ilist and olist */ { AVLnode *m; int bdiag, ldiag, altr; bdiag = bb - ab; ldiag = bdiag + ((ae-ab) - MIN_USABLE); m = AVLminprf(AVLinc(ilist),bdiag,BIG_INT); if (m != NIL) { altr = m->V.base + bdiag; if (altr < best) { srce = m->V.segment; best = altr; } } m = AVLminrng(AVLinc(olist),-ldiag,-bdiag); if (m != NIL) { altr = m->V.base - bdiag; if (altr < best) { srce = m->V.segment; best = altr; } } } /* Record best linkage for segment */ Trace[i].value = best; Trace[i].source = srce; Trace[i].colsAligned = (int)((1.-err)*(double)(min(ae-ab,be-bb)+1)); if (srce >= 0){ Trace[i].start = Trace[srce].start; Trace[i].colsAligned += Trace[srce].colsAligned; } else Trace[i].start = i; /* Add segment to ilist and olist */ { int p, d; d = be - ae; Cvals.segment = i; Cvals.start = d; Cvals.base = best - d; p = AVLrank(AVLinc(ilist),d); ilist = AVLinsert(ilist,p,&Cvals); d = -d; Cvals.start = d; Cvals.base = best - d; p = AVLrank(AVLinc(olist),d); olist = AVLinsert(olist,p,&Cvals); } } else /* Segment ends */ { int best, clen; best = Trace[i].value; clen = AVLlength(AVLinc(elist)); /* Add candidate (if any) created by i'th segment */ { Candidate *cand; int p, off; off = be + Segs[i].aepos; p = AVLrank(AVLinc(elist),be); if (p != 0) cand = AVLselect(AVLinc(elist),p); if (p == 0 || best < cand->base + off) { p += 1; while (p <= clen) { cand = AVLselect(AVLinc(elist),p); if (cand->base + off < best) break; elist = AVLdelete(elist,p); clen -= 1; } p -= 1; if (p > 0) { cand = AVLselect(AVLinc(elist),p); if (cand->start == be) elist = AVLdelete(elist,p--); } Cvals.start = be; Cvals.base = best - off; Cvals.segment = i; elist = AVLinsert(elist,p,&Cvals); } } /* Remove candidates from ilist and olist */ { int p, d; d = be-ae; p = AVLrank(AVLinc(ilist),d); while (AVLselect(AVLinc(ilist),p)->segment != i) p -= 1; ilist = AVLdelete(ilist,p); p = AVLrank(AVLinc(olist),-d); while (AVLselect(AVLinc(olist),p)->segment != i) p -= 1; olist = AVLdelete(olist,p); } } } AVLdec(elist); AVLdec(ilist); AVLdec(olist); } Gen_Overlap: { int i, npiece; int best, end, beg; best = BIG_INT; /* Determine best overall overlap */ end = -1; for (i = 0; i < NumSegs; i++){ // if (Trace[i].start >= 0) if (Trace[i].start >= 0&&Trace[i].colsAligned >= MIN_ALIGNED_COLS) { int sfx; // this definition of sfx differs from the original (below) // it is designed to encourage global alignment // sfx = Alen - Segs[i].aepos + Blen - Segs[i].bepos; //sfx = Alen - Segs[i].aepos; //if (Blen - Segs[i].bepos < sfx) // sfx = Blen - Segs[i].bepos; //sfx *= 2; // The "- 2 * Trace[i].colsAligned" makes us encourage longer alignments // if (Trace[i].value + sfx - 2*Trace[i].colsAligned < best) { best = Trace[i].value - 2*Trace[i].colsAligned + sfx; end = i; } } } if (end < 0) { restore_segs(Segs,NumSegs,comp,Alen,Blen); return (NULL); } beg = Trace[end].start; /* How many segments in the best overlap? */ npiece = 0; for (i = end; i >= 0; i = Trace[i].source) npiece += 1; /* Allocate result data structures in a single memory block */ Descriptor = (Local_Overlap *) malloc(sizeof(Local_Overlap) + (npiece+1)*sizeof(Local_Chain)); if (Descriptor == NULL) OutOfMemory("Overlap descriptor"); Chain = (Local_Chain *) (Descriptor + 1); /* Fill out the description of the chain */ { int n; n = npiece; for (i = end; i >= 0; i = Trace[i].source) Chain[--n].piece = Segs[i]; } #define ALLOW_DUP_SEGS_IN_NEXT /* allow all but the first segment to be used in later attempts */ #ifndef ALLOW_DUP_SEGS_IN_NEXT for (i = 0; i < NumSegs; i++) if (Trace[i].start == beg) Trace[i].start = -1; /* this seems to prevent reuse of segments in subsequent calls, and/or if we reject this segment as too noisy and jump back to the top */ #else Trace[end].start = -1; /* this seems to prevent reuse of segments in subsequent calls, and/or if we reject this segment as too noisy and jump back to the top */ #define REUSE_CURRENT_LAST_AS_NONTERMINAL_SEG #ifndef REUSE_CURRENT_LAST_AS_NONTERMINAL_SEG for (i = 0; i < NumSegs; i++) if (Trace[i].source == beg) Trace[i].source = -1; #endif #endif {// The last segment doesn't describe an alignment, only a gap. Initialize it to reasonable values Local_Segment *lastseg = &Chain[npiece].piece; lastseg->abpos = lastseg->bbpos = -1; lastseg->aepos = lastseg->bepos = -1; lastseg->ldiag = lastseg->hdiag = -1; lastseg->score = -1; lastseg->error = -1.0; } { int gl; /* there's basically a bug here: abpos = 1 means starts at first char of A; so, agap should be 0, but gets set to 1; i.e., every first gap size gets set to one too many; but there's existing code that relies on this fact, so leave it alone for now */ gl = Chain[0].piece.abpos; if (gl > Chain[0].piece.bbpos) gl = Chain[0].piece.bbpos; Chain[0].agap = gl; Chain[0].bgap = gl; } for (i = 1; i < npiece; i++) { Chain[i].agap = Chain[i].piece.abpos - Chain[i-1].piece.aepos; Chain[i].bgap = Chain[i].piece.bbpos - Chain[i-1].piece.bepos; } { int gl; gl = Alen - Chain[npiece-1].piece.aepos; if (gl > Blen - Chain[npiece-1].piece.bepos) gl = Blen - Chain[npiece-1].piece.bepos; Chain[npiece].agap = gl; Chain[npiece].bgap = gl; } for (i = 0; i <= npiece; i++) { if (abs(Chain[i].agap) <= MinorThresh) { if (abs(Chain[i].bgap) <= MinorThresh) { if (Chain[i].agap != 0 || Chain[i].bgap != 0) Chain[i].type = LOCAL_MINOR; else Chain[i].type = LOCAL_BOUNDARY; } else if (Chain[i].bgap < 0) Chain[i].type = LOCAL_REPEAT; else if (Chain[i].bgap > 4*Chain[i].agap) Chain[i].type = LOCAL_INDEL; else Chain[i].type = LOCAL_DISAGREE; } else if (Chain[i].agap < 0) { if (Chain[i].bgap < MinorThresh) Chain[i].type = LOCAL_REPEAT; else Chain[i].type = LOCAL_REPnDEL; } else { if (abs(Chain[i].bgap) < MinorThresh) if (Chain[i].agap > 4*Chain[i].bgap) Chain[i].type = LOCAL_INDEL; else Chain[i].type = LOCAL_DISAGREE; else if (Chain[i].bgap < 0) Chain[i].type = LOCAL_REPnDEL; else Chain[i].type = LOCAL_DISAGREE; } } /* Fill out overlap descriptor */ Descriptor->num_pieces = npiece; Descriptor->score = best; Descriptor->chain = Chain; Descriptor->comp = comp; { Local_Segment *sg; int ln; Descriptor->indif = 0; for (i = 0; i < npiece; i++) { sg = &(Chain[i].piece); ln = ((sg->aepos - sg->abpos) + (sg->bepos - sg->bbpos)) / 2; if (i > 0 && Chain[i-1].piece.error < sg->error) { if (Chain[i].agap < Chain[i].bgap) { if (Chain[i].agap < 0) ln += Chain[i].agap; } else { if (Chain[i].bgap < 0) ln += Chain[i].bgap; } } if (i < npiece-1 && Chain[i+1].piece.error <= sg->error) { if (Chain[i+1].agap < Chain[i+1].bgap) { if (Chain[i+1].agap < 0) ln += Chain[i+1].agap; } else { if (Chain[i+1].bgap < 0) ln += Chain[i+1].bgap; } } if (ln > 0) Descriptor->indif += (int)(ln * sg->error); } } Descriptor->diffs = Descriptor->indif; for (i = 0; i <= npiece; i++) { int d; if (Chain[i].agap < 0 || Chain[i].bgap < 0) d = abs( (Chain[i].piece.bbpos - Chain[i].piece.abpos) - (Chain[i-1].piece.bepos - Chain[i-1].piece.aepos)); else { d = Chain[i].agap; if (d < Chain[i].bgap) d = Chain[i].bgap; } Descriptor->diffs += d; } { int overa, overb; overa = (Chain[npiece-1].piece.aepos + Chain[npiece].agap) - (Chain[0].piece.abpos - Chain[0].agap); overb = (Chain[npiece-1].piece.bepos + Chain[npiece].bgap) - (Chain[0].piece.bbpos - Chain[0].bgap); Descriptor->length = (overa + overb) / 2; } Descriptor->begpos = Chain[0].piece.abpos - Chain[0].piece.bbpos; Descriptor->endpos = (Blen - Chain[npiece-1].piece.bepos) - (Alen - Chain[npiece-1].piece.aepos); for (i = 0; i < npiece; i++) if (Chain[i].piece.score < 0) { int x; x = Chain[i].piece.bbpos; Chain[i].piece.bbpos = Chain[i].piece.bepos; Chain[i].piece.bepos = x; Chain[i].piece.score = - Chain[i].piece.score-1; Chain[i].reversed = 1; } else Chain[i].reversed = 0; } restore_segs(Segs,NumSegs,comp,Alen,Blen); /* undo comp and rc changes */ return (Descriptor); } kmer-code-2013-trunk/atac-driver/chainer/localalign/localAlignerInterfacemodule.C0000644000000000000000000001615510546446644026641 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include #include "GF_ALN_local.H" static Local_Overlap *desc = NULL; // This should be in the "library" not in the client. Sigh. void syntenicSegments(char const * const Aseq, int const Astart, int const Astop, char const * const Bseq, int const Bstart, int const Bstop, double const erate) { desc = NULL; // In case an early exit happens! // Key data types ("Local_Segment" and "Local_Overlap") are defined // in "CA_ALN_local.H" assert(Astop >= Astart); assert(Bstop >= Bstart); // Step 1: get local segments: char const * const Ausable = Aseq + Astart; char const * const Busable = Bseq + Bstart; int const alen = Astop - Astart; int const blen = Bstop - Bstart; int NumSegs = 0; /* number of local matches returned */ Local_Segment *local_results = Find_Local_Segments(Ausable, /* sequence A */ alen, Busable, blen, LOCAL_FORW, /* whether to compute a forward search , reverse, or both */ 16, /* minimum length of a reportable match */ erate, /* maximum error for a match to be returned */ &NumSegs); /* number of local matches returned */ if(NumSegs==0) return; // Step 2: get a chain of local segments: Local_Overlap *Ov = Find_Local_Overlap(alen, /* length of sequence A */ blen, /* length of sequence B */ 0, /* comp==0 -> fwd orientation */ 0, /* nextbest==0 -> find best overlap*/ local_results, /* the input set of local segments */ NumSegs, /* number of input local segments */ 20 - 6, /* shortest "overlap" to report" */ 1.0); /* fraction of overlap not in a match -- needs to be large to allow substantial mismatches */ if(Ov == NULL) return; // Step 3 (optional): // // NOT optional! AS_Local_Trace seems to have been extended to // clean up segments. // // a) fix the chain of segments so that the segments don't overlap. // It must be a 1-1 mapping. (can either trim or delete segments--or // leave them completely alone) // // b) construct an alignment "trace" // // The "trace" is the standard "AS" encoding of an alignment. // coordinate munge between Gene's local aligner and // DP_Compare()-related routines coordinates from Find_Local // routines will be one off from those expected by the trace // routines, so adjust them! for(int i=0;i<=Ov->num_pieces;i++){ if(inum_pieces){ Ov->chain[i].piece.abpos++; Ov->chain[i].piece.bbpos++; Ov->chain[i].piece.aepos++; Ov->chain[i].piece.bepos++; } } // AS_Local_Trace assumes string pointer one before start of string! // // The original used to complain if no trace was returned, but we // don't care...and, in fact, we disabled trace generation anyway! // AS_Local_Trace(Ov, Ausable - 1, Busable - 1); for(int i=0;i<=Ov->num_pieces;i++){ if(inum_pieces){ Ov->chain[i].piece.abpos--; Ov->chain[i].piece.bbpos--; Ov->chain[i].piece.aepos--; Ov->chain[i].piece.bepos--; } } Ov->next = 0; desc = Ov; } int iterate_Local_Overlap(int &seg_abpos, int &seg_bbpos, int &seg_alen, int &seg_blen, double &seg_error) { if (desc == NULL) return(0); Local_Chain *chain = desc->chain; assert(NULL != desc->chain); for(; 0 <= desc->next && desc->next < desc->num_pieces; ) { int the_piece = (desc->next)++; Local_Segment *seg = &(chain[the_piece].piece); assert(NULL != seg); assert(!chain[the_piece].reversed); // Set the return data seg_abpos = seg->abpos; seg_alen = seg->aepos - seg->abpos; seg_bbpos = seg->bbpos; seg_blen = seg->bepos - seg->bbpos; seg_error = seg->error; // Skip over the "deleted in-place" segments. if((seg->aepos <= seg->abpos)&&(seg->bepos <= seg->bbpos)) continue; // the data is valid return(1); } // Nothing left. return(0); } static PyObject * spam_syntenicSegments(PyObject *self, PyObject *args) { char *Aseq = "undefined"; int Astart = -1; int Astop = -1; // substring of Aseq char *Bseq = "undefined"; int Bstart = -1; int Bstop = -1; // substring of Bseq double erate = 1.0 / 3.0; PyObject *py_outfile = NULL; if (!PyArg_ParseTuple(args, "Osiisiid", &py_outfile, &Aseq, &Astart, &Astop, &Bseq, &Bstart, &Bstop, &erate)) return NULL; try { syntenicSegments(Aseq, Astart, Astop, // substring of Aseq Bseq, Bstart, Bstop, // substring of Bseq erate); } catch (...) { PyErr_SetString(PyExc_RuntimeError,"sytenicSegments failed"); return(Py_None); } Py_INCREF(Py_None); // This is a module function returning void. return(Py_None); } static PyObject * spam_iterateSegments(PyObject *self, PyObject *args) { int seg_bgn1 = 0; int seg_bgn2 = 0; int seg_len1 = 0; int seg_len2 = 0; double seg_error = 0.0; if (iterate_Local_Overlap(seg_bgn1, seg_bgn2, seg_len1, seg_len2, seg_error)) return(Py_BuildValue("(iiiid)", seg_bgn1, seg_bgn2, seg_len1, seg_len2, seg_error)); Py_INCREF(Py_None); // This is a module function returning void. return(Py_None); } static PyMethodDef registration_table[] = { {"syntenicSegments", spam_syntenicSegments, METH_VARARGS, "Compute syntenic segments"}, {"iterateSegments", spam_iterateSegments, METH_VARARGS, "Iterator returning syntenic segments"}, {NULL, NULL, 0, NULL} }; extern "C" void initlocalAlignerInterface() { Py_InitModule("localAlignerInterface", registration_table); } kmer-code-2013-trunk/atac-driver/chainer/halign/0000755000000000000000000000000012641613361020225 5ustar rootrootkmer-code-2013-trunk/atac-driver/chainer/halign/halign.H0000644000000000000000000000366010454403366021607 0ustar rootroot/************************************************************************** * This file is part of A2Amapper. * Copyright (c) 2004 Applera Corporation * Author: Clark Mobarry * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received (LICENSE.txt) a copy of the GNU General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **************************************************************************/ #ifndef _ALIGN__HALIGN_H_ #define _ALIGN__HALIGN_H_ // CMM 2004 Feb 05: The variable "first" does not really belong in // the alignment object. We should put it into a separate iterator // over alignment struct/class. Currently, this is called by Python // so I would need to learn how to make C++ glue code for Python // objects. Currently I just make glue code for Python modules. typedef struct H_Alignment_t { int offset1; int offset2; int len1; int len2; int score; int first; int scriptAsArrayMax; int *scriptAsArray; } H_Alignment_t; void halignStart(char *string1, char *string2, H_Alignment_t *alignment); int iterateUngappedAlignSharpEnds(H_Alignment_t *aln, int &bgn1, int &bgn2, int &len1, int &len2, int &nmat); #endif kmer-code-2013-trunk/atac-driver/chainer/halign/halign.C0000644000000000000000000003147412036742702021604 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include #include #include #include #include "halign.H" #if defined (__SVR4) && defined (__sun) // Solaris defines SS in sys/regset.h #undef SS #endif #define min(x,y) ((x)<=(y) ? (x):(y)) #define max(x,y) ((x)>=(y) ? (x):(y)) typedef enum { DEL, INS, SUB, MOV } OpType; #define START 1 // The position of the first character. #define GAP_OPEN 1 // These are costs. #define GAP_EXTEND 1 #define MISMATCH 1 class EditScript { public: EditScript(OpType op, int nm, EditScript *nx) { op_type = op; num = nm; next = nx; }; OpType op_type; // SUB, MOV, INS, or DEL int num; // Number of operations EditScript *next; }; void convertScriptToAlignment(EditScript *head, H_Alignment_t *aln) { EditScript *tp = head; EditScript *tp1; int scriptLen = 3; // Three sets of comments, from three different sources: // // Condense_script - merge contiguous operations of the same type // together. Remove the leftmost dummy script (Ascript[0][0]) // // Condense the script in block operations; this will modify the // script and will cut off the dummy edit op from the linked list // // Condense any repeat operations. For example, condense this // script: "DEL 5; DEL 3" into this array representation: "DEL 8". while (tp && tp->next) { scriptLen += 2; while (((tp1 = tp->next) != NULL) && (tp->op_type == tp1->op_type)) { tp->num += tp1->num; tp->next = tp1->next; free(tp1); } tp = tp->next; } // Allocate space for the alignment // if (aln->scriptAsArrayMax <= scriptLen) { free(aln->scriptAsArray); aln->scriptAsArrayMax = scriptLen; aln->scriptAsArray = (int *)malloc(scriptLen * sizeof(int)); assert(aln->scriptAsArray != NULL); } aln->scriptAsArray[0] = 0; // Convert. // int arySize = 0; EditScript *tpdel = 0L; tp = head; while (tp != NULL) { aln->scriptAsArray[++arySize] = tp->op_type; aln->scriptAsArray[++arySize] = tp->num; tpdel = tp; tp = tp->next; delete tpdel; } aln->scriptAsArray[0] = arySize; } static int diff(const char *s1, const char *s2, int len1, int len2, int *CC, int *DD, int *RR, int *SS, int g, int h, int x, int start_cgap, int end_cgap, int free_start, int free_end, int tb, int te, EditScript **head, EditScript **tail) { int i, j, s, t, c, e, tmp; const char *a, *b; int mincost, mintype, midi, midj; EditScript *tmp_head = 0L; EditScript *tmp_tail = 0L; if (len1==0 && len2==0) { *head = *tail = NULL; return 0; } else if (len2 == 0) { int tmpb, tmpe; *head = *tail = new EditScript(DEL,len1,NULL); tmpb = (len1 <= start_cgap) ? 0 : tb+h*(len1-start_cgap); tmpe = (len1 <= end_cgap) ? 0 : te+h*(len1-end_cgap); return min(tmpb,tmpe); } else if (len1 == 0) { *head = *tail = new EditScript(INS,len2,NULL); return ((free_start || free_end) ? 0 : g+(len2*h)); } else if (len1 == 1) { int tmpcost; char ch; /* insert B, delete A; or delete A, insert B */ mincost = (start_cgap ? 0 : min(tb,te)+h) + ((free_start || free_end) ? 0 : g+len2*h); mintype = 2; midj = (free_start ? len2 : 0); /* ... or insert some B, substitute A, insert the rest of B */ for (j=0, ch=*s1; j0) ? new EditScript(INS,midj,aux) : aux; } return mincost; } else { int tmph, tmpg; midi = (int)(len1/2); /* compute CC and DD in the forward phase */ tmph = free_start ? 0 : h; tmpg = free_start ? 0 : g; for (CC[0]=0, t=tmpg, j=1; j<=len2; j++) { /* if free_start, allow gap-free ends in the genomic sequence */ CC[j] = DD[j] = t = t+tmph; DD[j] += tmpg; } for (a=s1, i=1; i<=midi; i++, a++) { s = CC[0]; CC[0] = c = t = max(i-start_cgap,0)*h + (i>start_cgap)*tb; e = t + g; for (b=s2, j=1; j<=len2; j++, b++) { e = min(e, c+g) + h; DD[j] = (j==len2 && i>=len1-end_cgap+1) ? min(DD[j], CC[j]) : (min(DD[j]+(i==start_cgap+1)*g, CC[j]+g) + h); c = min(DD[j], min(e, s+x*(*a!=*b))); s = CC[j]; CC[j] = c; } } DD[0] = CC[0]; /* compute RR and SS in the reverse phase */ tmph = free_end ? 0 : h; tmpg = free_end ? 0 : g; for (RR[len2]=0, t=tmpg, j=len2-1; j>=0; --j) { /* if free_end, allow gap-free ends in the genomic sequence */ RR[j] = SS[j] = t = t+tmph; SS[j] += tmpg; } for (a=s1+len1-1, i=len1-1; i>=midi; --i, --a) { s = RR[len2]; RR[len2] = c = t = max((len1-end_cgap)-i,0)*h + (i=0; --j, --b) { e = min(e, c+g) + h; SS[j] = (j==0 && istart_cgap)) { mincost = CC[0]+RR[0]; mintype = 1; } else { mincost = DD[0]+SS[0]-g*(midi>start_cgap); mintype = 2; } for (j=1; j tmp) { mincost = tmp; midj = j; mintype = (tmp == CC[j]+RR[j]) ? 1:2; } } tmp = min(CC[len2]+RR[len2],DD[len2]+SS[len2]-g*(midi tmp) { mincost = tmp; midj = len2; mintype = (tmp==CC[len2]+RR[len2]) ? 1:2; } /* compute recursively in the two subregions */ if (mintype==1) { int cost1, cost2; cost1 = diff(s1, s2, midi, midj, CC, DD, RR, SS, g, h, x, min(start_cgap,midi), max(end_cgap-len1+midi,0), free_start, 0, tb, g, head, &tmp_tail); cost2 = diff(s1+midi, s2+midj, len1-midi, len2-midj, CC, DD, RR, SS, g, h, x, max(0,start_cgap-midi), min(end_cgap,len1-midi), 0, free_end, g, te, &tmp_head, tail); if (*head) tmp_tail->next = tmp_head; else *head = tmp_head; assert(NULL != *tail); } else { EditScript *aux; int cost1, cost2; cost1 = diff(s1, s2, midi-1, midj, CC, DD, RR, SS, g, h, x, min(start_cgap,midi-1), max(end_cgap-len1+midi-1,0), free_start, 0, tb, 0, head, &tmp_tail); aux = new EditScript(DEL,2,NULL); if (*head) tmp_tail->next = aux; else tmp_tail = *head = aux; cost2 = diff(s1+midi+1, s2+midj, len1-midi-1, len2-midj, CC, DD, RR, SS, g, h, x, max(0,start_cgap-midi-1), min(end_cgap,len1-midi-1), 0, free_end, 0, te, &tmp_head, tail); aux->next = tmp_head; if (*tail==NULL) *tail = aux; } return mincost; } void halignStart(char *s1, char *s2, H_Alignment_t *alignment) { int const offset1 = 0; // Sequence coordinates are base-based, starting from 0 int const offset2 = 0; // but start from 1 in Liliana's code. if ((s1[0] == 0) || (s2[0] == 0)) return; int len1 = strlen(s1); int len2 = strlen(s2); int start_cgap = 0; int end_cgap = 0; int free_start = 0; int free_end = 0; int score = 0; EditScript *Script_head=NULL; EditScript *Script_tail=NULL; int *CC = (int *)malloc(4 * (len2+1) * sizeof(int)); assert(NULL != CC); score = diff(s1, s2, len1, len2, CC, CC+1*(len2+1), CC+2*(len2+1), CC+3*(len2+1), GAP_OPEN, GAP_EXTEND, MISMATCH, start_cgap, end_cgap, free_start, free_end, GAP_OPEN, GAP_OPEN, &Script_head, &Script_tail); free(CC); assert(NULL != Script_head); assert(NULL != Script_tail); Script_tail->next = NULL; convertScriptToAlignment(Script_head, alignment); alignment->offset1 = offset1+START; // Convert from zero to one start sequence. alignment->offset2 = offset2+START; // Convert from zero to one start sequence. alignment->len1 = len1; alignment->len2 = len2; alignment->score = score; alignment->first = 1; } int iterateUngappedAlignSharpEnds(H_Alignment_t *aln, int &bgn1, int &bgn2, int &len1, int &len2, int &nmatInSeg) { // Returns zero when exhasted. // Returns one when the args are valid. static int *lastS, *endS; static int i, j; nmatInSeg = 0; if(aln == NULL) return 0; // not valid output if(aln->first){ aln->first = 0; i = aln->offset1; j = aln->offset2; lastS = aln->scriptAsArray + 1; endS = aln->scriptAsArray + aln->scriptAsArray[0]; } while (lastS <= endS) { int b1, l1, b2, l2; int nmat; switch (*lastS) { case SUB: ++lastS; i += *(lastS); j += *(lastS); ++lastS; break; case MOV: nmat = *(lastS+1); nmatInSeg ++; b1 = i; b2 = j; lastS++; i += *lastS; j += *lastS; l1 = i-b1; l2 = j-b2; lastS++; while (lastS<=endS && (*lastS==SUB || *lastS==MOV)) { nmat += (*lastS==MOV) ? *(lastS+1) : 0; nmatInSeg += (*lastS==MOV) ? *(lastS+1) : 0; lastS++; i += *lastS; j += *lastS; if (*(lastS-1) == MOV) { l1 = i-b1; l2 = j-b2; } lastS++; } bgn1=b1-START; bgn2=b2-START; len1=l1; len2=l2; return 1; // valid output break; case INS: j += *(++lastS); ++lastS; break; case DEL: i += *(++lastS); ++lastS; break; default : fprintf(stderr, "Unrecognized opcode in alignment.\n"); exit(1); break; } } return 0; // not valid output } kmer-code-2013-trunk/atac-driver/chainer/halign/halignmodule.C0000644000000000000000000000340510546446644023015 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include "halign.H" H_Alignment_t aln = { 0, 0, 0, 0, 0, 0, 0, 0}; static PyObject * spam_halignStart(PyObject *self, PyObject *args) { char *seq1 = 0L; char *seq2 = 0L; if (!PyArg_ParseTuple(args, "ss", &seq1, &seq2)) return(NULL); halignStart(seq1, seq2, &aln); Py_INCREF(Py_None); return(Py_None); } static PyObject * spam_halignDedash( PyObject *self, PyObject *args) { int bgn1=0, bgn2=0, len1=0, len2=0, nmat=0; if (iterateUngappedAlignSharpEnds(&aln, bgn1, bgn2, len1, len2, nmat)) return(Py_BuildValue("(iiiii)", bgn1, bgn2, len1, len2, nmat)); Py_INCREF(Py_None); return(Py_None); } static PyMethodDef registration_table[] = { {"halignStart", spam_halignStart, METH_VARARGS, "initialize halign"}, {"halignDedash", spam_halignDedash, METH_VARARGS, "dedashed subalignment"}, {NULL, NULL, 0, NULL} }; extern "C" void inithalign(void) { Py_InitModule("halign", registration_table); } kmer-code-2013-trunk/atac-driver/chainer/halign/halignDriver.C0000644000000000000000000000375010546446644022766 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Author: Clark Mobarry // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "halign.H" int main(int argc, char **argv) { char *seq1 = "ATCGTCCGGATGAAAATGTCTCGGGGGGGGGGGTCGGG"; char *seq2 = "ATCGTCTGGATGAAAAAGTCTCAAGGG"; // This is the setting for the coordinate system to be just the match. int offset1 = 0; int offset2 = 0; int bgn1, bgn2, len1, len2, nmat; H_Alignment_t * aln_ptr = NULL; // Sequence coordinates are base-based, starting from 0 halignStart(seq1+offset1, // This is the first base in the comparison. seq2+offset2, offset1, offset2, strlen(seq1), strlen(seq2), &aln_ptr); //printUngappedAlign(aln_ptr); //printUngappedAlignSharpEnds(aln_ptr); printUngappedAlignSharpEndsOnConsole(aln_ptr, seq1, seq2, 0); printUngappedAlignSharpEndsOnConsole(aln_ptr, seq1, seq2, 1); printUngappedAlignSharpEndsOnConsole(aln_ptr, seq1, seq2, 2); while(iterateUngappedAlignSharpEnds(aln_ptr, bgn1, bgn2, len1, len2, nmat)) printf("%d %d %d %d\n", bgn1, bgn2, len1, len2 ); if(aln_ptr != NULL) Free_align(aln_ptr); // Must call for each halign() but after printing output. exit(0); } kmer-code-2013-trunk/atac-driver/Make.include0000644000000000000000000000077511676744271017621 0ustar rootroot# -*- makefile -*- $(eval $(call Include,$/libatac/)) $(eval $(call Include,$/alignOverlap/)) $(eval $(call Include,$/gapShifter/)) $(eval $(call Include,$/lengthFilter/)) $(eval $(call Include,$/matchExtender/)) $(eval $(call Include,$/mismatchCounter/)) $(eval $(call Include,$/statsGenerator/)) $(eval $(call Include,$/uniqueFilter/)) $(eval $(call Include,$/clumpMaker/)) $(eval $(call Include,$/chainer/)) $(eval $(call Include,$/chimera/)) $/.PERL_EXES := $/atac.pl \ $/makeplot.pl kmer-code-2013-trunk/atac-driver/runatac.pl0000644000000000000000000000320110454423673017345 0ustar rootroot#!/usr/bin/perl my $genomeDir = "/bioinfo/assembly/walenz/GENOMES"; my $id1; my $id2; while (scalar(@ARGV) > 0) { my $arg = shift @ARGV; if ($arg =~ m/^-g/) { $genomeDir = shift @ARGV; } elsif ($arg =~ m/^-1/) { $id1 = shift @ARGV; } elsif ($arg =~ m/^-2/) { $id2 = shift @ARGV; } } die "usage: $0 [-genomedir path] -1 id1 -2 id2\n" if (!defined($id1) || !defined($id2)); die "No bin dir?\n" if (! -e "/bioinfo/assembly/walenz/src/genomics/linux64/bin"); die "No bin?\n" if (! -x "/bioinfo/assembly/walenz/src/genomics/linux64/bin/snapper2"); $genomeDir = "$ENV{'PWD'}/$genomeDir" if ($genomeDir !~ m!^/!); my $name = "${id1}vs${id2}"; my $cmd; $cmd .= "perl /bioinfo/assembly/walenz/src/genomics/atac-driver/briatac.pl "; $cmd .= " -dir $name "; $cmd .= " -id1 $id1 -id2 $id2 "; $cmd .= " -genomedir $genomeDir "; $cmd .= " -meryldir $genomeDir "; $cmd .= " -bindir /bioinfo/assembly/walenz/src/genomics/linux64/bin "; $cmd .= " -merylthreads 4 "; $cmd .= " -numsegments 2 "; $cmd .= " -numthreads 4 "; $cmd .= " -samespecies"; print "$cmd\n"; system($cmd) and die "Failed to briatac.pl!\n"; $cmd = "cd $name && ln -s $name.k20.u1.f20.g0.matches.sorted.extended $name.atac && "; $cmd .= "time sh /bioinfo/assembly/walenz/src/genomics/atac/atacdriver.sh $name.atac && "; $cmd .= "grep ^M $name.atac.ckpLast | cut -d' ' -f 1-12 | sort -k5,5 -k6n > $name.atac.ckpLast.sorted && "; $cmd .= "/bioinfo/assembly/walenz/src/genomics/atac-driver/clumpMaker/clumpMaker -c 5000 -2 -S -f $name.atac.ckpLast.sorted > $name.atac.ckpLast.clumps"; system($cmd) and die "Failed to atacdriver.sh!\n"; kmer-code-2013-trunk/atac-driver/clumpMaker/0000755000000000000000000000000012641613360017451 5ustar rootrootkmer-code-2013-trunk/atac-driver/clumpMaker/clumpMaker.C0000644000000000000000000002445212322046702021660 0ustar rootroot#include #include #include "util.h" #include "atac.H" // Aaron Halpern's clumpMaker algorithm. // // To reproduce the original clumpMaker exactly, assuming that your // atac mapping is for a QUERYvsREFERENCE: // // in=VISD6vsB35LC/VISD6vsB35LC // cut -d' ' -f 1-12 < $in.atac.ckpLast | grep "^M u" | sort -k5,5 -k6n > tmp.a.clumpMaker // $clumpMaker -S -c 50000 -2 -f tmp.a.clumpMaker > $in.50000.clumps // // That is, use only the first 12 columns of info, only ungapped // matches (the original also allows gapped matches, but we don't // have any of those), then sort by the QUERY iid and position. Yes, // the sort is supposed to be alphanumeric. // // Then, run clumpMaker DISABLING it's sort (which sorts iids // numerically), using the second sequence as the reference. // class tClumpHit { public: void set(atacMatch *m, bool seq1IsRef) { match = *m; matchIID = m->matchiid; if (seq1IsRef) { refIID = m->iid1; refBeg = m->pos1; refEnd = m->pos1 + m->len1; qryIID = m->iid2; qryBeg = m->pos2; qryEnd = m->pos2 + m->len2; } else { refIID = m->iid2; refBeg = m->pos2; refEnd = m->pos2 + m->len2; qryIID = m->iid1; qryBeg = m->pos1; qryEnd = m->pos1 + m->len1; } ori = m->fwd2 ? 1 : -1; bestStart = -1; bestExtend = -1; scoreStart = 0; scoreExtend = 0; clump = -1; }; int64 get_bestScore() const { return(max(scoreStart, scoreExtend)); }; atacMatch match; uint32 matchIID; uint32 refIID; int32 refBeg; int32 refEnd; uint32 qryIID; int32 qryBeg; int32 qryEnd; int32 ori; int32 scoreStart; int32 bestStart; int32 scoreExtend; int32 bestExtend; int32 clump; }; int clumpHitCompareQry(const void *A, const void *B) { const tClumpHit *a = (const tClumpHit *)A; const tClumpHit *b = (const tClumpHit *)B; if (a->qryIID > b->qryIID) return(1); if (a->qryIID < b->qryIID) return(-1); if (a->qryBeg > b->qryBeg) return(1); if (a->qryBeg < b->qryBeg) return(-1); if (a->qryEnd > b->qryEnd) return(1); if (a->qryEnd < b->qryEnd) return(-1); if (a->refIID > b->refIID) return(1); if (a->refIID < b->refIID) return(-1); if (a->refBeg > b->refBeg) return(1); if (a->refBeg < b->refBeg) return(-1); if (a->refEnd > b->refEnd) return(1); if (a->refEnd < b->refEnd) return(-1); return(0); } int clumpHitCompareIID(const void *A, const void *B) { const tClumpHit *a = (const tClumpHit *)A; const tClumpHit *b = (const tClumpHit *)B; if (a->matchIID > b->matchIID) return(1); if (a->matchIID < b->matchIID) return(-1); return(0); } bool chainable(tClumpHit *a, tClumpHit *b, int32 maxjump) { // return false if // hits are to different chromosomes // hits are not similarly oriented // hits are too far apart on query axis // hits are too far apart on reference axis // hits are "out of order" (we're sorted by the qry) // return(!((a->refIID != b->refIID) || (a->qryIID != b->qryIID) || (a->ori != b->ori) || (b->qryBeg - a->qryEnd > maxjump) || (a->ori * (b->refBeg - a->refEnd) > maxjump) || (a->ori * (b->refBeg - a->refBeg) < 0))); } int32 score_all_hits(tClumpHit *hits, int32 clumpcost, int32 maxjump, uint32 num_hits){ // location of best score so far (to which we point whenever starting a new clump) int32 bestEnd = -1; // best scores so far internal to a reference unit (scaffold, chromosome, etc) int32 bestEndThis = -1; int32 bestScoreThis = -clumpcost; // furthest back still accessible ... uint32 furthest_back=0; for(uint32 i=0; i= 0) && (bestScoreThis >= 0)) { // start new clump that is not the first for this reference unit hits[i].scoreStart = hits[i].qryEnd - hits[i].qryBeg + bestScoreThis - clumpcost; hits[i].bestStart = bestEndThis; } else { // clump would be first (to be used) for this reference unit hits[i].scoreStart = hits[i].qryEnd - hits[i].qryBeg - clumpcost; hits[i].bestStart = bestEnd; } // find best way of extending a clump, if any if (furthest_back < i) { int32 cutoff = hits[i].qryBeg - maxjump; while ((hits[furthest_back].qryIID != hits[i].qryIID) || (hits[furthest_back].qryEnd < cutoff)) furthest_back++; } int32 extendScore = -clumpcost; int32 extendprev = -1; for (uint32 j=furthest_back; j bestScoreThis) { bestScoreThis = tmpscore; bestEndThis = i; } } return(bestEndThis); } int main(int argc, char **argv) { int32 clumpcost = 50000; int32 maxjump = 200000; bool seq1IsRef = false; char *filename = 0L; bool isSorted = false; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-c") == 0) { clumpcost = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-j") == 0) { maxjump = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-1") == 0) { seq1IsRef = true; } else if (strcmp(argv[arg], "-2") == 0) { seq1IsRef = false; } else if (strcmp(argv[arg], "-f") == 0) { filename = argv[++arg]; } else if (strcmp(argv[arg], "-S") == 0) { isSorted = true; } else { fprintf(stderr, "Unknown argument '%s'\n", argv[arg]); } arg++; } if (filename == 0L) { fprintf(stderr, "usage: %s [] -f filename\n", argv[0]); fprintf(stderr, " -c x penalty for clump start, default 50000\n"); fprintf(stderr, " -j x max jump between consistent hits in a clump, default 200000\n"); fprintf(stderr, " -1 the reference assembly is the first one.\n"); fprintf(stderr, " -2 the reference assembly is the second one (default).\n"); fprintf(stderr, " -S assume the input is already sorted by the query IID, position.\n"); fprintf(stderr, " this will also make the output sorted by queryIID, queryPosition\n"); exit(1); } fprintf(stderr, "1 load the matches\n"); atacFile *AF = new atacFile(filename); uint32 hitsLen = AF->matches()->numberOfMatches(); tClumpHit *hits = new tClumpHit [hitsLen]; for (uint32 i=0; imatches()->getMatch(i), seq1IsRef); fprintf(stderr, "2 sort the matches\n"); qsort(hits, hitsLen, sizeof(tClumpHit), clumpHitCompareQry); fprintf(stderr, "3 score the matches\n"); int32 bestEnd = score_all_hits(hits, clumpcost, maxjump, hitsLen); // Mark the clumps // fprintf(stderr, "4 mark clumps\n"); uint32 clump = 0; while(bestEnd >= 0) { hits[bestEnd].clump = clump; if (hits[bestEnd].scoreExtend > hits[bestEnd].scoreStart) { bestEnd = hits[bestEnd].bestExtend; } else { bestEnd = hits[bestEnd].bestStart; clump++; } } // Sort the hits by iid, then merge into the output // fprintf(stderr, "5 sort the matches\n"); qsort(hits, hitsLen, sizeof(tClumpHit), clumpHitCompareIID); // For each clump, find the min/max extent in both sequences. We // use this to output the clump match record. // int32 *clumpLoRef = new int32 [clump]; int32 *clumpHiRef = new int32 [clump]; int32 *clumpLoQry = new int32 [clump]; int32 *clumpHiQry = new int32 [clump]; bool *clumpOut = new bool [clump]; for (uint32 xx=0; xx= 0) { if (hits[xx].refBeg < clumpLoRef[cc]) clumpLoRef[cc] = hits[xx].refBeg; if (hits[xx].refEnd > clumpHiRef[cc]) clumpHiRef[cc] = hits[xx].refEnd; if (hits[xx].qryBeg < clumpLoQry[cc]) clumpLoQry[cc] = hits[xx].qryBeg; if (hits[xx].qryEnd > clumpHiQry[cc]) clumpHiQry[cc] = hits[xx].qryEnd; } } // Dump the clumps // fprintf(stderr, "6 output matches with clumps\n"); AF->writeHeader(stdout); for (uint32 mm=0; mm= 0) && (clumpOut[cc] == false)) { atacMatch C; sprintf(C.matchuid, "clump"int32FMTW(06), cc); sprintf(C.parentuid, "."); C.matchiid = 0; C.type[0] = 'c'; C.type[1] = 0; C.iid1 = hits[mm].match.iid1; C.iid2 = hits[mm].match.iid2; // Set the position and length based on the correct reference // -- in particular, since we get the IID and orientation from // the copy of the match we don't need to listen to the // seq1IsRef flag for those. if (seq1IsRef) { C.pos1 = clumpLoRef[cc]; C.len1 = clumpHiRef[cc] - clumpLoRef[cc]; C.pos2 = clumpLoQry[cc]; C.len2 = clumpHiQry[cc] - clumpLoQry[cc]; } else { C.pos1 = clumpLoQry[cc]; C.len1 = clumpHiQry[cc] - clumpLoQry[cc]; C.pos2 = clumpLoRef[cc]; C.len2 = clumpHiRef[cc] - clumpLoRef[cc]; } C.fwd1 = hits[mm].match.fwd1; C.fwd2 = hits[mm].match.fwd2; C.print(stdout, AF->labelA(), AF->labelB()); clumpOut[cc] = true; } if (cc >= 0) sprintf(hits[mm].match.parentuid, "clump"int32FMTW(06), cc); else sprintf(hits[mm].match.parentuid, "."); hits[mm].match.print(stdout, AF->labelA(), AF->labelB()); } return(0); } kmer-code-2013-trunk/atac-driver/clumpMaker/Make.include0000644000000000000000000000077611512763666021720 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_EXES := $/clumpMaker $/.CXX_SRCS := $/clumpMaker.C $/.CLEAN :=$/*.o $/*~ $/core $/clumpMaker: $/clumpMaker.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/statsGenerator/0000755000000000000000000000000012641613360020356 5ustar rootrootkmer-code-2013-trunk/atac-driver/statsGenerator/stats-to-xls.pl0000644000000000000000000004517410425334631023307 0ustar rootroot#!/usr/bin/perl # Convers the output of statsGenerator into a nice Excel spreadsheet. use strict; use lib "/home/bwalenz/linux/lib/perl5/site_perl/5.8.0"; use Spreadsheet::WriteExcel; use Spreadsheet::WriteExcel::Big; if (scalar(@ARGV) != 1) { die "usage: $0 stats-prefix\n"; } my $prefix = shift @ARGV; ################################################################################ # # First, suck in the big ugly stdout from statsGenerator. # if (! -e "$prefix.out") { die "I looked for the stdout from statsGenerator in '$prefix.out', but didn't find it.\n"; } my $workbook = Spreadsheet::WriteExcel::Big->new("$prefix.xls"); my $summary = $workbook->add_worksheet("Summary"); my $format = $workbook->add_format(); $format->set_size(10); $format->set_color('black'); $format->set_num_format(1); my $formatFP = $workbook->add_format(); $format->set_size(10); $format->set_color('black'); my $format_heading = $workbook->add_format(); $format_heading->set_size(10); $format_heading->set_bold(); $format_heading->set_color('black'); $format_heading->set_num_format(1); my $format_label = $workbook->add_format(); $format_heading->set_size(10); $format_heading->set_bold(); $format_heading->set_color('black'); $format_heading->set_num_format(1); my $format_comment = $workbook->add_format(); $format_heading->set_size(10); $format_heading->set_bold(); $format_heading->set_color('black'); $format_heading->set_num_format(1); $summary->set_column(0, 2, 20); $summary->set_column(3, 3, 30); my %stats; # scratch space open(F, "< $prefix.out"); while (!eof(F)) { $_ = ; if (m/^\s*$/) { # Nop; } if (m/^SEQUENCE$/) { $summary->write(1, 0, "Input Sequences", $format_heading); $summary->write(2, 0, "totalLength", $format_label); $summary->write(2, 3, "all letters, including N", $format_comment); $summary->write(3, 0, "totalLength", $format_label); $summary->write(3, 3, "ACGT only", $format_comment); $_ = ; if (m/totalLength\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s+#\s+all\s+letters/) { $summary->write(0, 1, $1, $format_heading); $summary->write(0, 2, $3, $format_heading); # remember which column is for which assembly $stats{$1} = 1; $stats{$3} = 2; # and which assembly is in which column $stats{1} = $1; $stats{2} = $3; $summary->write(2, 1, "$2", $format); $summary->write(2, 2, "$4", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/totalLength\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s+#\s+ACGT/) { $summary->write(3, 1, "$2", $format); $summary->write(3, 2, "$4", $format); } else { die "Parse error $_\n"; } } if (m/^TANDEM REPEATS in (.*)$/) { my $asm = $1; $summary->write(5, 0, "Tandem Repeats", $format_heading); $summary->write(6, 0, "number", $format_label); $summary->write(7, 0, "totalLength", $format_label); $summary->write(8, 0, "coveredLength", $format_label); $_ = ; if (m/numberOfItems\s+(\d+)/) { $summary->write(6, $stats{$asm}, "$1", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/totalLength\s+(\d+)\s+#\s+sum\s+of\s+lengths/) { $summary->write(7, $stats{$asm}, "$1", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/coveredLength\s+(\d+)\s+#\s+sequence\s+covered/) { $summary->write(8, $stats{$asm}, "$1", $format); } else { die "Parse error $_\n"; } } if (m/^MATCHES IN RUNS$/) { $summary->write(10, 0, "Matches in Runs", $format_heading); $summary->write(11, 0, "runMissingFull", $format_label); $summary->write(11, 3, "covered by a run, not by a match, including N", $format_comment); $summary->write(12, 0, "runMissingACGT", $format_label); $summary->write(12, 3, "covered by a run, not by a match, ACGT only", $format_comment); $_ = ; if (m/runMissingFull\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s+#\s+sequence\s+in\s+run,\s+not\s+covered,\s+including\s+N/) { $summary->write(11, 1, "$2", $format); $summary->write(11, 2, "$4", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/runMissingFull\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s+#\s+sequence\s+in\s+run,\s+not\s+covered,\s+ACGT\s+only/) { $summary->write(12, 1, "$2", $format); $summary->write(12, 2, "$4", $format); } else { die "Parse error $_\n"; } } if ((m/^MATCHES$/) || (m/^RUNS$/)) { my $begin; my $chrcov; if (m/^MATCHES$/) { $begin = 14; $summary->write($begin, 0, "Matches", $format_heading); $chrcov = $workbook->add_worksheet("Chr Cov Match"); } else { $begin = 26; $summary->write($begin, 0, "Runs", $format_heading); $chrcov = $workbook->add_worksheet("Chr Cov Run"); } $chrcov->set_column(0, 0, 10); $chrcov->set_column(1, 6, 20); $summary->write($begin+1, 0, "number", $format_label); $summary->write($begin+2, 0, "totalLength", $format_label); $summary->write($begin+3, 1, "histogram", $format_label); $summary->write($begin+4, 2, "histogram", $format_label); $summary->write($begin+5, 0, "coveredLengthFull", $format_label); $summary->write($begin+6, 0, "coveredLengthACGT", $format_label); $summary->write($begin+7, 0, "coveredLengthNonACGT", $format_label); $summary->write($begin+8, 1, "histogram", $format_label); $summary->write($begin+9, 2, "histogram", $format_label); $_ = ; if (m/numberOfItems\s+(\d+)/) { $summary->write($begin+1, 1, "$1", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/matchLength\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s#\sSum\s+of\s+lengths/) { $summary->write($begin+2, 1, "$2", $format); $summary->write($begin+2, 2, "$4", $format); } else { die "Parse error $_\n"; } histogram($begin+3); histogram($begin+4); $_ = ; if (m/coveredLength\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s/) { $summary->write($begin+5, 1, "$2", $format); $summary->write($begin+5, 2, "$4", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/coveredLength\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s/) { $summary->write($begin+6, 1, "$2", $format); $summary->write($begin+6, 2, "$4", $format); } else { die "Parse error $_\n"; } $_ = ; if (m/coveredLength\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)\s/) { $summary->write($begin+7, 1, "$2", $format); $summary->write($begin+7, 2, "$4", $format); } else { die "Parse error $_\n"; } histogram($begin+8); histogram($begin+9); # chromosome covered $chrcov->write(0, 1, "all sequence covered", $format_heading); $chrcov->write(0, 2, "all sequence length", $format_heading); $chrcov->write(0, 3, "percent covered", $format_heading); $chrcov->write(0, 4, "ACGT sequence covered", $format_heading); $chrcov->write(0, 5, "ACGT sequence length", $format_heading); $chrcov->write(0, 6, "percent covered", $format_heading); for (my $i=1; $i<23; $i++) { $chrcov->write($i, 0, "Chr$i", $format_heading); } $chrcov->write(23, 0, "ChrX", $format_heading); $chrcov->write(24, 0, "ChrY", $format_heading); $chrcov->write(25, 0, "ChrMT", $format_heading); $chrcov->write(26, 0, "ChrUn", $format_heading); $_ = ; while (m/chrCoveredLength\[\s*(\d+)\]\s+\S+\s+(\d+)\s+(\d+)\s+(\d+.\d+)%\s+(\d+)\s+(\d+)\s+(\d+.\d+)%\s+/) { my $i = $1 + 1; my $j = $1 + 2; $chrcov->write($i, 1, "$2", $format); $chrcov->write($i, 2, "$3", $format); $chrcov->write($i, 3, "=B$j / C$j * 100", $formatFP); $chrcov->write($i, 4, "$5", $format); $chrcov->write($i, 5, "$6", $format); $chrcov->write($i, 6, "=E$j / F$j * 100", $formatFP); $_ = ; } } } close(F); sub histogram ($) { my $x = shift @_; $_ = ; if (m/histogram\s+\S+\s+(\d+)\s+items\s+(\d+.\d+)\s+average\s+(\d+.\d+)\s+std.dev./) { $summary->write($x, 3, "items, average, std.dev.", $format_comment); $summary->write($x, 4, "$1", $format); $summary->write($x, 5, "$2", $formatFP); $summary->write($x, 6, "$3", $formatFP); } else { die "Parse error $_\n"; } } ################################################################################ # # Nx # my $nx = $workbook->add_worksheet("Nx"); $nx->write(0, 1, "matches", $format_heading); $nx->write(0, 2, "runs", $format_heading); open(A, "< $prefix-matches.Nx"); open(B, "< $prefix-runs.Nx"); while (!eof(A)) { my $a = ; my $b = ; my ($ai, $an) = split '\s+', $a; my ($bi, $bn) = split '\s+', $b; die "Nx error: ai=$ai != bi=$bi\n" if ($ai != $bi); $nx->write($ai, 0, $ai); $nx->write($ai, 1, $an); $nx->write($ai, 2, $bn); } close(B); close(A); ################################################################################ # # Histograms of lengths # # my $sheet = $workbook->add_worksheet("Matches Histogram"); $sheet->set_column(0, 4, 25); $sheet->write(0, 1, "$stats{1} length", $format_heading); $sheet->write(0, 2, "$stats{2} length", $format_heading); $sheet->write(0, 3, "$stats{1} covered N", $format_heading); $sheet->write(0, 4, "$stats{2} covered N", $format_heading); dumpHistogram($sheet, "$prefix-matches.AmatchLength.histogramdat", "$prefix-matches.BmatchLength.histogramdat", "$prefix-matches.AcoveredN.histogramdat", "$prefix-matches.BcoveredN.histogramdat"); my $sheet = $workbook->add_worksheet("Runs Histogram"); $sheet->set_column(0, 4, 25); $sheet->write(0, 1, "$stats{1} length", $format_heading); $sheet->write(0, 2, "$stats{2} length", $format_heading); $sheet->write(0, 3, "$stats{1} covered N", $format_heading); $sheet->write(0, 4, "$stats{2} covered N", $format_heading); dumpHistogram($sheet, "stats-runs.AmatchLength.histogramdat", "stats-runs.BmatchLength.histogramdat", "stats-runs.AcoveredN.histogramdat", "stats-runs.BcoveredN.histogramdat"); my $sheet = $workbook->add_worksheet("Run Missing Histogram"); $sheet->set_column(0, 4, 25); $sheet->write(0, 1, "$stats{1} full missing", $format_heading); $sheet->write(0, 3, "$stats{2} full missing", $format_heading); $sheet->write(0, 2, "$stats{1} ACGT missing", $format_heading); $sheet->write(0, 4, "$stats{2} ACGT missing", $format_heading); dumpHistogram($sheet, "stats.ARunMissingFull.histogramdat", "stats.BRunMissingFull.histogramdat", "stats.ARunMissingACGT.histogramdat", "stats.BRunMissingACGT.histogramdat"); sub dumpHistogram { my $sheet = shift @_; my @files = @_; my $col = 1; my $idx = 0; my @range; # I can't seem to find any way of deleting a cell once it's # written (opposed to simply clearing the cell). We want # to know what the maximum value in any histogram is, so # we can stop writing after that point. # my $maxIdx = 0; foreach my $f (@files) { $idx = 0; open(F, "< $f") or die "Failed to open dumpHistogram1 '$f'\n"; my @lines = ; close(F); # Don't use the last line in the file - this is the number # of things bigger than the max, we always report this. pop @lines; foreach my $l (@lines) { my ($r, $v) = split '\s+', $l; $maxIdx = $idx if ($v > 0) && ($maxIdx < $idx); $idx++; } } # Read the range from the first file -- we'll check that all the other files # use the same range. # $idx = 0; open(F, "< $files[0]") or die "Failed to open dumpHistogram2 '$files[0]'\n"; while () { my ($r, $v) = split '\s+', $_; $range[$idx] = $r; $sheet->write($idx+1, 0, "$r", $format); $idx++; last if ($idx > $maxIdx); } my $lastVal; while () { my ($r, $v) = split '\s+', $_; $lastVal = $r; } $sheet->write($idx+1, 0, "$lastVal", $format); close(F); foreach my $f (@files) { $idx = 0; open(F, "< $f") or die "Failed to open dumpHistogram3 '$f'\n"; while () { my ($r, $v) = split '\s+', $_; die "range error in file '$f' at idx $idx; $range[$idx] != $r\n" if ($range[$idx] != $r); $sheet->write($idx+1, $col, "$v", $format); $idx++; last if ($idx > $maxIdx); } my $lastVal; while () { my ($r, $v) = split '\s+', $_; $lastVal = $v; } $sheet->write($idx+1, $col, "$lastVal", $format); close(F); $col++; } } ################################################################################ # # By chromosome histograms of lengths # my $sheet = $workbook->add_worksheet("Matches Chr Histogram"); $sheet->set_column(0, 32, 12); for (my $i=1; $i<23; $i++) { $sheet->write(0, $i, "Chr$i ACGT", $format_heading); } $sheet->write(0, 23, "ChrX ACGT", $format_heading); $sheet->write(0, 24, "ChrY ACGT", $format_heading); $sheet->write(0, 25, "ChrMT ACGT", $format_heading); $sheet->write(0, 26, "ChrUn ACGT", $format_heading); dumpHistogram($sheet, "stats-matches.chr00acgt.histogramdat", "stats-matches.chr01acgt.histogramdat", "stats-matches.chr02acgt.histogramdat", "stats-matches.chr03acgt.histogramdat", "stats-matches.chr04acgt.histogramdat", "stats-matches.chr05acgt.histogramdat", "stats-matches.chr06acgt.histogramdat", "stats-matches.chr07acgt.histogramdat", "stats-matches.chr08acgt.histogramdat", "stats-matches.chr09acgt.histogramdat", "stats-matches.chr10acgt.histogramdat", "stats-matches.chr11acgt.histogramdat", "stats-matches.chr12acgt.histogramdat", "stats-matches.chr13acgt.histogramdat", "stats-matches.chr14acgt.histogramdat", "stats-matches.chr15acgt.histogramdat", "stats-matches.chr16acgt.histogramdat", "stats-matches.chr17acgt.histogramdat", "stats-matches.chr18acgt.histogramdat", "stats-matches.chr19acgt.histogramdat", "stats-matches.chr20acgt.histogramdat", "stats-matches.chr21acgt.histogramdat", "stats-matches.chr22acgt.histogramdat", "stats-matches.chr23acgt.histogramdat", "stats-matches.chr24acgt.histogramdat", "stats-matches.chr25acgt.histogramdat"); my $sheet = $workbook->add_worksheet("Runs Chr ACGT Histogram"); $sheet->set_column(0, 32, 12); for (my $i=1; $i<23; $i++) { $sheet->write(0, $i, "Chr$i ACGT", $format_heading); } $sheet->write(0, 23, "ChrX ACGT", $format_heading); $sheet->write(0, 24, "ChrY ACGT", $format_heading); $sheet->write(0, 25, "ChrMT ACGT", $format_heading); $sheet->write(0, 26, "ChrUn ACGT", $format_heading); dumpHistogram($sheet, "stats-runs.chr00acgt.histogramdat", "stats-runs.chr01acgt.histogramdat", "stats-runs.chr02acgt.histogramdat", "stats-runs.chr03acgt.histogramdat", "stats-runs.chr04acgt.histogramdat", "stats-runs.chr05acgt.histogramdat", "stats-runs.chr06acgt.histogramdat", "stats-runs.chr07acgt.histogramdat", "stats-runs.chr08acgt.histogramdat", "stats-runs.chr09acgt.histogramdat", "stats-runs.chr10acgt.histogramdat", "stats-runs.chr11acgt.histogramdat", "stats-runs.chr12acgt.histogramdat", "stats-runs.chr13acgt.histogramdat", "stats-runs.chr14acgt.histogramdat", "stats-runs.chr15acgt.histogramdat", "stats-runs.chr16acgt.histogramdat", "stats-runs.chr17acgt.histogramdat", "stats-runs.chr18acgt.histogramdat", "stats-runs.chr19acgt.histogramdat", "stats-runs.chr20acgt.histogramdat", "stats-runs.chr21acgt.histogramdat", "stats-runs.chr22acgt.histogramdat", "stats-runs.chr23acgt.histogramdat", "stats-runs.chr24acgt.histogramdat", "stats-runs.chr25acgt.histogramdat"); my $sheet = $workbook->add_worksheet("Runs Chr Full Histogram"); $sheet->set_column(0, 32, 12); for (my $i=1; $i<23; $i++) { $sheet->write(0, $i, "Chr$i Full", $format_heading); } $sheet->write(0, 23, "ChrX FULL", $format_heading); $sheet->write(0, 24, "ChrY FULL", $format_heading); $sheet->write(0, 25, "ChrMT FULL", $format_heading); $sheet->write(0, 26, "ChrUn FULL", $format_heading); dumpHistogram($sheet, "stats-runs.chr00full.histogramdat", "stats-runs.chr01full.histogramdat", "stats-runs.chr02full.histogramdat", "stats-runs.chr03full.histogramdat", "stats-runs.chr04full.histogramdat", "stats-runs.chr05full.histogramdat", "stats-runs.chr06full.histogramdat", "stats-runs.chr07full.histogramdat", "stats-runs.chr08full.histogramdat", "stats-runs.chr09full.histogramdat", "stats-runs.chr10full.histogramdat", "stats-runs.chr11full.histogramdat", "stats-runs.chr12full.histogramdat", "stats-runs.chr13full.histogramdat", "stats-runs.chr14full.histogramdat", "stats-runs.chr15full.histogramdat", "stats-runs.chr16full.histogramdat", "stats-runs.chr17full.histogramdat", "stats-runs.chr18full.histogramdat", "stats-runs.chr19full.histogramdat", "stats-runs.chr20full.histogramdat", "stats-runs.chr21full.histogramdat", "stats-runs.chr22full.histogramdat", "stats-runs.chr23full.histogramdat", "stats-runs.chr24full.histogramdat", "stats-runs.chr25full.histogramdat"); kmer-code-2013-trunk/atac-driver/statsGenerator/Make.include0000644000000000000000000000101211512763666022605 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ $/.CXX_EXES := $/statsGenerator $/.CXX_SRCS := $/statsGenerator.C $/.CLEAN :=$/*.o $/*~ $/core $/statsGenerator: $/statsGenerator.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/statsGenerator/statsGenerator.C0000644000000000000000000005665512415073322023505 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005, 2006 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Compute some simple statistics on a set of matches #include #include #include #include #include "atac.H" #include "util++.H" #include "bio++.H" #include "seqCache.H" bool noHistogramPlots = true; // Sort uint32 backwards int uint32compare(const void *a, const void *b) { const uint32 A = *((const uint32 *)a); const uint32 B = *((const uint32 *)b); if (A < B) return(1); if (A > B) return(-1); return(0); } class histogram { public: histogram(uint64 blockSize, uint64 maxSize) { _b = blockSize; _m = maxSize; _l = 0; _h = new uint32 [maxSize / blockSize + 1]; for (uint32 i=0; i= _eMax) { _eMax *= 2; uint32 *e = new uint32 [_eMax]; memcpy(e, _e, sizeof(uint32) * _eLen); delete [] _e; _e = e; } _e[_eLen++] = x; if (x > _m) _l++; else _h[x/_b]++; }; void show(char const *label) { double average = 0; double stddev = 0; for (uint32 i=0; i<_eLen; i++) average += _e[i]; average /= _eLen; for (uint32 i=0; i<_eLen; i++) stddev += (_e[i] - average) * (_e[i] - average); stddev = sqrt(stddev / _eLen); fprintf(stdout, "histogram %s "uint32FMT" items %8.3f average %8.3f std.dev.\n", label, _eLen, average, stddev); }; void dump(char const *prefix, char const *label) { if (noHistogramPlots) return; char filename[1024]; sprintf(filename, "%s.%s.histogramdat", prefix, label); FILE *out = fopen(filename, "w"); for (uint64 i=0; i<_m / _b; i++) fprintf(out, uint64FMT" "uint32FMT"\n", i * _b, _h[i]); fprintf(out, ">"uint64FMT" "uint32FMT"\n", _m, _l); fclose(out); } void plot(char const *prefix, char const *label) { if (noHistogramPlots) return; // Find max's of the data uint64 maxx = 0; uint64 maxy = 0; for (uint64 i=0; i<_m / _b; i++) { if (_h[i] > 0) maxx = i * _b; if (maxy < _h[i]) maxy = _h[i]; } if ((maxx == 0) || (maxy == 0)) return; char filename[1024]; sprintf(filename, "%s.%s.histogram.gnuplot", prefix, label); FILE *out = fopen(filename, "w"); fprintf(out, "set terminal postscript color\n"); fprintf(out, "set output \"%s.%s.histogram.ps\"\n", prefix, label); fprintf(out, "set xlabel \"length bp\"\n"); fprintf(out, "set ylabel \"number of matches\"\n"); fprintf(out, "plot [0:"uint64FMT"][0:"uint64FMT"] \"%s.%s.histogramdat\" using 2 with lines\n", maxx, maxy, prefix, label); fprintf(out, "set output \"%s.%s.histogram.closeup.ps\"\n", prefix, label); fprintf(out, "plot [0:"uint64FMT"][0:"uint64FMT"] \"%s.%s.histogramdat\" using 2 with lines\n", maxx/10, maxy, prefix, label); fprintf(out, "quit\n"); fclose(out); sprintf(filename, "gnuplot < %s.%s.histogram.gnuplot", prefix, label); if (system(filename)) fprintf(stderr, "Failed to execute '%s'\n", filename); }; private: uint64 _b; // blockSize uint64 _m; // maximum element size uint32 _l; // number of things bigger than _m uint32 *_h; // the histogram uint32 _eMax; uint32 _eLen; uint32 *_e; // the elements -- for computing the stats; }; // Compute the total gapped and ungapped length of the input // sequences. Uses atacMatchList only to access the underlying fasta // sequences. // void totalLength(atacFile &AF, seqCache *A, seqCache *B) { uint64 length1 = 0; uint64 length2 = 0; for (uint32 i=0; igetNumberOfSequences(); i++) length1 += A->getSequenceLength(i); for (uint32 i=0; igetNumberOfSequences(); i++) length2 += B->getSequenceLength(i); fprintf(stdout, "totalLength %s "uint64FMT" %s "uint64FMT" # all letters, including N\n", AF.labelA(), length1, AF.labelB(), length2); length1 = 0; length2 = 0; for (uint32 i=0; igetNumberOfSequences(); i++) { seqInCore *S = A->getSequenceInCore(i); char *s = S->sequence(); for (uint32 j=0; jsequenceLength(); j++) if (letterToBits[s[j]] != 0xff) length1++; } for (uint32 i=0; igetNumberOfSequences(); i++) { seqInCore *S = B->getSequenceInCore(i); char *s = S->sequence(); for (uint32 j=0; jsequenceLength(); j++) if (letterToBits[s[j]] != 0xff) length2++; } fprintf(stdout, "totalLength %s "uint64FMT" %s "uint64FMT" # ACGT only\n", AF.labelA(), length1, AF.labelB(), length2); } uint64 tandemRepeatACGTLength(intervalList &il, uint64 *offset, seqCache *A) { // s -- the sequence // i -- the interval list index il.merge(); uint64 length = 0; uint64 unknown[256] = {0}; for (uint32 i=0, s=0; igetSequenceInCore(s)->sequence(); uint64 lo = il.lo(i) - offset[s]; uint64 hi = il.hi(i) - offset[s]; for (uint64 j=lo; j < hi; j++) if (letterToBits[S[j]] != 0xff) length++; else unknown[S[j]]++; } //fprintf(stderr, "tandemRepeatACGTLength: "uint64FMT"\n", length); //for (uint32 i=0; i<256; i++) // if (unknown[i] > 0) // fprintf(stderr, "tandemRepeatACGTLength["uint32FMT"] = "uint64FMT" (%c)\n", i, unknown[i], i); return(length); } uint64 * buildOffset(seqCache *F) { uint64 *offset = new uint64 [F->getNumberOfSequences() + 1]; offset[0] = 1000000; for (uint32 i=0; igetNumberOfSequences(); i++) offset[i+1] = offset[i] + F->getSequenceLength(i) + 1; return(offset); } void tandemRepeatStats(atacFileStream &featuresA, atacFileStream &featuresB, atacFile &AF, seqCache *A, seqCache *B) { intervalList ifa, ifb; intervalList ima, imb; intervalList mma, mmb; atacMatchList &matches = *AF.matches(); uint64 *offset1 = buildOffset(A); uint64 *offset2 = buildOffset(B); // ifa, ifb are intervalLists, storing the intervals labeled as // tandem repeats. They are using the offset[] to encode the // entire sequence as one consecutive string. // atacFeature *f = 0L; while ((f = featuresA.nextFeature("tr")) != 0L) ifa.add(offset1[f->iid] + f->pos, f->len); while ((f = featuresB.nextFeature("tr")) != 0L) ifb.add(offset2[f->iid] + f->pos, f->len); // ima, imb, like if?, encode the matches in one string. // for (uint32 m=0; miid1] + (uint64)matches[m]->pos1, (uint64)matches[m]->len1); for (uint32 m=0; miid2] + (uint64)matches[m]->pos2, (uint64)matches[m]->len2); fprintf(stdout, "\nTANDEM REPEATS in %s\n", AF.labelA()); fprintf(stdout, "numberOfItems "uint64FMT"\n", (uint64)ifa.numberOfIntervals()); fprintf(stdout, "totalLength "uint64FMT" # sum of lengths of all features\n", ifa.sumOfLengths()); ifa.merge(); fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions\n", (uint64)ifa.numberOfIntervals()); fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, including N\n", ifa.sumOfLengths()); fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, ACGT only\n", tandemRepeatACGTLength(ifa, offset1, A)); mma.intersect(ifa, ima); fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions, only in matches\n", (uint64)mma.numberOfIntervals()); fprintf(stdout, "inMatches "uint64FMT" # sequence covered by a feature and in a match, including N\n", mma.sumOfLengths()); fprintf(stdout, "inMatches "uint64FMT" # sequence covered by a feature and in a match, ACGT only\n", tandemRepeatACGTLength(mma, offset1, A)); fprintf(stdout, "\nTANDEM REPEATS in %s\n", AF.labelB()); fprintf(stdout, "numberOfItems "uint64FMT"\n", (uint64)ifb.numberOfIntervals()); fprintf(stdout, "totalLength "uint64FMT" # sum of lengths of all features\n", ifb.sumOfLengths()); ifb.merge(); fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions\n", (uint64)ifb.numberOfIntervals()); fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, including N\n", ifb.sumOfLengths()); fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, ACGT only\n", tandemRepeatACGTLength(ifb, offset2, B)); mmb.intersect(ifb, imb); fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions, only in matches\n", (uint64)mmb.numberOfIntervals()); fprintf(stdout, "inMatches "uint64FMT" # sequence covered by a feature and in a match, including N\n", mmb.sumOfLengths()); fprintf(stdout, "inMatches "uint64FMT" # sequence covered by a feature and in a match, ACGT only\n", tandemRepeatACGTLength(mmb, offset2, B)); delete [] offset1; delete [] offset2; } void mappedLengths(atacFile &AF, atacMatchList &matches, seqCache *A, seqCache *B, char *prefix) { histogram h1(100, 1000000); histogram h2(100, 1000000); // For the coverage to work correctly, we need to either have one // intervalList per input sequence, or build a table of the chained // sequence positions. // uint64 *offset1 = buildOffset(AF.fastaA()); uint64 *offset2 = buildOffset(AF.fastaB()); intervalList intervalA; intervalList intervalB; for (uint32 m=0; miid1] + (uint64)matches[m]->pos1, (uint64)matches[m]->len1); intervalB.add(offset2[matches[m]->iid2] + (uint64)matches[m]->pos2, (uint64)matches[m]->len2); h1.add(matches[m]->len1); h2.add(matches[m]->len2); } fprintf(stdout, "numberOfItems "uint64FMT"\n", (uint64)matches.numberOfMatches()); fprintf(stdout, "matchLength %s "uint64FMT" %s "uint64FMT" # Sum of lengths of sequence in matches\n", AF.labelA(), (uint64)intervalA.sumOfLengths(), AF.labelB(), (uint64)intervalB.sumOfLengths()); h1.show("AmatchLength"); h2.show("BmatchLength"); h1.dump(prefix, "AmatchLength"); h1.plot(prefix, "AmatchLength"); h2.dump(prefix, "BmatchLength"); h2.plot(prefix, "BmatchLength"); intervalA.merge(); intervalB.merge(); fprintf(stdout, "coveredLength %s "uint64FMT" %s "uint64FMT" # sequence covered by a match, including N\n", AF.labelA(), (uint64)intervalA.sumOfLengths(), AF.labelB(), (uint64)intervalB.sumOfLengths()); fprintf(stdout, "coveredLength %s "uint64FMT" %s "uint64FMT" # sequence covered by a match, ACGT only (new)\n", AF.labelA(), tandemRepeatACGTLength(intervalA, offset1, A), AF.labelB(), tandemRepeatACGTLength(intervalB, offset2, B)); delete [] offset1; delete [] offset2; } // Generate an Nx plot void NxOfMapped(atacFile &AF, atacMatchList &matches, uint64 genomeSize, char *prefix) { uint32 *n50 = new uint32 [matches.numberOfMatches()]; for (uint32 i=0; ilen1; // Compute the total length of the sequence uint64 totalLength = 0; switch (genomeSize) { case 0: for (uint32 i=0; igetNumberOfSequences(); i++) totalLength += AF.fastaA()->getSequenceLength(i); break; case 1: for (uint32 i=0; igetNumberOfSequences(); i++) totalLength += AF.fastaB()->getSequenceLength(i); break; default: totalLength = genomeSize; break; } // Sort the n50 list of lengths qsort(n50, matches.numberOfMatches(), sizeof(uint32), uint32compare); // It's slow and obvious and, yes, there is a better way. Dump the // Nx plot as it's being generated. // char filename[1024]; sprintf(filename, "%s.Nx", prefix); FILE *out = fopen(filename, "w"); for (uint64 n=1; n<100; n++) { uint64 limit = totalLength / 100 * n; uint64 iter = 0; uint64 sum = 0; while ((sum < limit) && (iter < matches.numberOfMatches())) sum += n50[iter++]; fprintf(out, uint64FMT" "uint32FMT"\n", n, n50[iter-1]); } fclose(out); // Now plot it. // if (noHistogramPlots == false) { sprintf(filename, "%s.Nx.gnuplot", prefix); out = fopen(filename, "w"); fprintf(out, "set terminal postscript color\n"); fprintf(out, "set output \"%s.Nx.ps\"\n", prefix); fprintf(out, "set xlabel \"N\"\n"); fprintf(out, "set ylabel \"match length\"\n"); fprintf(out, "plot \"%s.Nx\" using 2 with lines\n", prefix); fclose(out); sprintf(filename, "gnuplot < %s.Nx.gnuplot", prefix); if (system(filename)) fprintf(stderr, "Failed to execute '%s'\n", filename); } delete [] n50; } // Computes the percentage of each chromosome (assumes chromosomes are A) // that is mapped, with and without N's. // void MappedByChromosome(atacFile &AF, atacMatchList &matches, seqCache *A, seqCache *B, char *prefix) { uint32 maxIID1 = A->getNumberOfSequences(); intervalList *il1full; intervalList *il1acgt; histogram **hist1full; histogram **hist1acgt; if (A->getNumberOfSequences() > 24) { fprintf(stderr, "WARNING: too many sequences to be chromosomes, only using the first 24.\n"); maxIID1 = 24; } // We could cache this when we compute the totalLength() above uint64 *nonNlength = new uint64 [maxIID1+1]; for (uint32 i=0; igetSequenceInCore(i); char *s = S->sequence(); nonNlength[i] = 0; for (uint32 j=0; jsequenceLength(); j++) if (letterToBits[s[j]] != 0xff) nonNlength[i]++; } il1full = new intervalList [maxIID1 + 1]; il1acgt = new intervalList [maxIID1 + 1]; hist1full = new histogram * [maxIID1 + 1]; hist1acgt = new histogram * [maxIID1 + 1]; for (uint32 i=0; iiid1 < maxIID1) { il1full[matches[m]->iid1].add(matches[m]->pos1, matches[m]->len1); hist1full[matches[m]->iid1]->add(matches[m]->len1); seqInCore *Sa = A->getSequenceInCore(matches[m]->iid1); char *sa = Sa->sequence() + matches[m]->pos1; uint32 length = 0; for (uint32 j=0; jlen1; j++) { bool invalid = (letterToBits[sa[j]] == 0xff); if (!invalid) length++; if (length && invalid) { // Last time we were ACGT, this time not. il1acgt[matches[m]->iid1].add(matches[m]->pos1 + j - length, length); hist1acgt[matches[m]->iid1]->add(length); length = 0; } } if (length) { il1acgt[matches[m]->iid1].add(matches[m]->pos1 + matches[m]->len1 - length, length); hist1acgt[matches[m]->iid1]->add(length); } } } for (uint32 c=0; cgetSequenceLength(c), 100.0 * il1full[c].sumOfLengths() / A->getSequenceLength(c), il1acgt[c].sumOfLengths(), nonNlength[c], 100.0 * il1acgt[c].sumOfLengths() / nonNlength[c]); } for (uint32 c=0; cdump(prefix, label); hist1full[c]->plot(prefix, label); sprintf(label, "chr"uint32FMTW(02)"acgt", c); hist1acgt[c]->dump(prefix, label); hist1acgt[c]->plot(prefix, label); } delete [] il1full; delete [] il1acgt; for (uint32 i=0; i *IL, histogram *HI) { char *s = S->sequence() + beg; uint32 length = 0; for (uint32 j=0; jadd(beg + j - length, length); if (HI) HI->add(length); length = 0; } } if (length) { if (IL) IL->add(beg + len - length, length); if (HI) HI->add(length); } } // Computes the amount of ACGT in runs that is unmapped // void unmappedInRuns(atacFile &AF, seqCache *A, seqCache *B, char *prefix) { atacMatchList &matches = *AF.matches(); // We must sort by the location and not the parentID; when we // stream through, we check that the pair of matches are in the // same parent. // atacMatchOrder MO(matches); MO.sortA(); intervalList il1full, il2full; intervalList il1acgt, il2acgt; histogram hist1full(100, 1000000), hist2full(100, 1000000); histogram hist1acgt(100, 1000000), hist2acgt(100, 1000000); for (uint32 i=1; iparentuid, MO[i]->parentuid) == 0) { uint32 l1, r1, l2, r2; if (MO[i]->fwd2 == 1) { l1 = MO[i-1]->pos1 + MO[i-1]->len1; r1 = MO[i]->pos1; l2 = MO[i-1]->pos2 + MO[i-1]->len2; r2 = MO[i]->pos2; } else { l1 = MO[i-1]->pos1 + MO[i-1]->len1; r1 = MO[i]->pos1; l2 = MO[i]->pos2 + MO[i]->len2; r2 = MO[i-1]->pos2; } il1full.add(l1, r1-l1); il2full.add(l2, r2-l2); hist1full.add(r1-l1); hist2full.add(r2-l2); statsInACGT(A->getSequenceInCore(MO[i]->iid1), l1, r1-l1, &il1acgt, &hist1acgt); statsInACGT(B->getSequenceInCore(MO[i]->iid2), l2, r2-l2, &il2acgt, &hist2acgt); } } // Dump the stats fprintf(stdout, "runMissingFull %s "uint64FMT" %s "uint64FMT" # sequence in run, not covered, including N\n", AF.labelA(), (uint64)il1full.sumOfLengths(), AF.labelB(), (uint64)il2full.sumOfLengths()); fprintf(stdout, "runMissingFull %s "uint64FMT" %s "uint64FMT" # sequence in run, not covered, ACGT only\n", AF.labelA(), (uint64)il1acgt.sumOfLengths(), AF.labelB(), (uint64)il2acgt.sumOfLengths()); hist1full.dump(prefix, "ARunMissingFull"); hist1full.plot(prefix, "ARunMissingFull"); hist2full.dump(prefix, "BRunMissingFull"); hist2full.plot(prefix, "BRunMissingFull"); hist1acgt.dump(prefix, "ARunMissingACGT"); hist1acgt.plot(prefix, "ARunMissingACGT"); hist2acgt.dump(prefix, "BRunMissingACGT"); hist2acgt.plot(prefix, "BRunMissingACGT"); } int main(int argc, char **argv) { uint64 genomeSize = 0; char *atacFileName = 0L; char *prefix = 0L; char *trFile1 = 0L; char *trFile2 = 0L; char prefixFull[1024]; bool error = false; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-g") == 0) { ++arg; if (argv[arg][0] == 'A') { genomeSize = 0; } else if (argv[arg][0] == 'B') { genomeSize = 1; } else { genomeSize = strtouint64(argv[arg], 0L); } } else if (strcmp(argv[arg], "-a") == 0) { atacFileName = argv[++arg]; } else if (strcmp(argv[arg], "-p") == 0) { prefix = argv[++arg]; } else if (strcmp(argv[arg], "-ta") == 0) { trFile1 = argv[++arg]; } else if (strcmp(argv[arg], "-tb") == 0) { trFile2 = argv[++arg]; } else { error = true; } arg++; } if (!atacFileName || !prefix || error) { fprintf(stderr, "usage: %s -a -p [-ta trfile] [-tb trfile] [-g {A | B | g}]\n", argv[0]); fprintf(stderr, " -a read input from 'file.atac'\n"); fprintf(stderr, " -p write stats to files prefixed with 'outprefix'\n"); fprintf(stderr, " -g use a genome size of g for the Nx computation, defaults to\n"); fprintf(stderr, " the length of the A sequence. Or use the actual length\n"); fprintf(stderr, " of sequence A or B.\n"); fprintf(stderr, " -ta read tandem repeats for A from trfile\n"); fprintf(stderr, " -tb read tandem repeats for B from trfile\n"); exit(1); } atacFile AF(atacFileName); atacMatchList &matches = *AF.matches(); atacMatchList &runs = *AF.runs(); atacMatchList &clumps = *AF.clumps(); // We end up using sequences a lot here, so just bite it and load them in a cache. // seqCache *A = new seqCache(AF.assemblyFileA(), 0, true); seqCache *B = new seqCache(AF.assemblyFileB(), 0, true); A->loadAllSequences(); B->loadAllSequences(); fprintf(stdout, "\nSEQUENCE\n"); totalLength(AF, A, B); if (trFile1 && trFile2) { atacFileStream tr1(trFile1); atacFileStream tr2(trFile2); tandemRepeatStats(tr1, tr2, AF, A, B); } // XXX unmappedInRuns only works on runs, and if we have clumps in // the input it fails. // if ((runs.numberOfMatches() > 0) && (clumps.numberOfMatches() == 0)) { fprintf(stdout, "\nMATCHES IN RUNS\n"); unmappedInRuns(AF, A, B, prefix); } if (matches.numberOfMatches() > 0) { fprintf(stdout, "\nMATCHES\n"); sprintf(prefixFull, "%s-matches", prefix); mappedLengths(AF, matches, A, B, prefixFull); NxOfMapped(AF, matches, genomeSize, prefixFull); MappedByChromosome(AF, matches, A, B, prefixFull); } if (runs.numberOfMatches() > 0) { fprintf(stdout, "\nRUNS\n"); sprintf(prefixFull, "%s-runs", prefix); mappedLengths(AF, runs, A, B, prefixFull); NxOfMapped(AF, runs, genomeSize, prefixFull); MappedByChromosome(AF, runs, A, B, prefixFull); } if (clumps.numberOfMatches() > 0) { fprintf(stdout, "\nCLUMPS\n"); sprintf(prefixFull, "%s-clumps", prefix); mappedLengths(AF, clumps, A, B, prefixFull); NxOfMapped(AF, clumps, genomeSize, prefixFull); MappedByChromosome(AF, clumps, A, B, prefixFull); } delete A; delete B; return(0); } kmer-code-2013-trunk/atac-driver/run-length-histogram.pl0000644000000000000000000000164010236607065021770 0ustar rootroot#!/usr/bin/perl # Reads a list of numbers on stdin, computes a (blocked) histogram. # # If there are two numbers per line, they are assumed to be # a begin-end pair. # grep "M u " ATAC/atac.shift.atac | cut -d' ' -f 7 | perl run-length-histogram.pl > atac.histogram # grep "M u " ATAC/box2.shift.atac | cut -d' ' -f 7 | perl run-length-histogram.pl > box2.histogram my @histogram; my $blocksize = 1000; while () { s/^\s+//; s/\s$//; s/\s+/ /; my @vals = split '\s+', $_; my $val; if (scalar(@vals) == 1) { $val = $vals[0]; } elsif (scalar(@vals) == 1) { $val = $vals[1] - $vals[0]; $val = $vals[0] - $vals[1] if ($val < 0); } else { } $val = ($val / $blocksize); $histogram[$val]++; } my $max = scalar(@histogram) + 1; for (my $i=0; $i<$max; $i++) { $histogram[$i] = 0 if ($histogram[$i] == 0); print "$i $histogram[$i]\n"; } kmer-code-2013-trunk/atac-driver/alignOverlap/0000755000000000000000000000000012641613360017774 5ustar rootrootkmer-code-2013-trunk/atac-driver/alignOverlap/overlap-process.C0000644000000000000000000000677212322046702023234 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "overlap.H" // I really wanted this to be parameterized with two macros, but the // preprocessor merges, then replaces: // #define INDEXA 1 // #define INDEXB 2 // #define NODE node ## INDEXA // results in 'nodeINDEXA' not 'node1' void NAME(FILE *outfile, spanTree *S, atacMatchList *M1, atacMatchList *M2, overlapStats &stats, annoList *AL, uint32 &ALlen, uint32 &ALmax) { dnode_t *node = dict_first(S->_tree); while (node) { span_t *span = (span_t *)dnode_getkey(node); uint32 spanLen = span->_end - span->_beg; if (span->_matchesLen == 0) { stats.unmapped += spanLen; printAnno(outfile, AL, ALlen, 'U', INDEX, span); } else if (span->_matchesLen == 1) { uint32 match = span->_matches[0]; atacMatch *m; if (match >> COLORSHIFT) { m = M2->getMatch(match & COLORMASK); stats.map2unique += spanLen; } else { m = M1->getMatch(match & COLORMASK); stats.map1unique += spanLen; } printAnno(outfile, AL, ALlen, '1', INDEX, span, match, m); } else if ((span->_matchesLen == 2) && ((span->_matches[0] >> COLORSHIFT) == (span->_matches[1] >> COLORSHIFT))) { stats.inconsistent += spanLen; printAnno(outfile, AL, ALlen, '?', INDEX, span); } else if (span->_matchesLen == 2) { uint32 match1 = span->_matches[0]; uint32 match2 = span->_matches[1]; if (match1 >> COLORSHIFT) { match1 = span->_matches[1]; match2 = span->_matches[0]; } atacMatch *m1 = M1->getMatch(match1 & COLORMASK); atacMatch *m2 = M2->getMatch(match2 & COLORMASK); if (m1->iid2 == m2->iid2) { uint32 off1 = span->_beg - m1->POS1; uint32 pos1l = m1->POS2 + off1; uint32 pos1r = m1->POS2 + m1->LEN2 - off1; uint32 off2 = span->_beg - m2->POS1; uint32 pos2l = m2->POS2 + off2; uint32 pos2r = m2->POS2 + m2->LEN2 - off2; if ((pos1l == pos2l) || (pos1r == pos2r)) { stats.same += spanLen; printAnno(outfile, AL, ALlen, 'Y', INDEX, span, match1, m1, match2, m2); } else { stats.different += spanLen; printAnno(outfile, AL, ALlen, 'N', INDEX, span, match1, m1, match2, m2); } } else { // Wildly different matches! Mapped to different scaffolds! stats.wilddiff += spanLen; printAnno(outfile, AL, ALlen, '!', INDEX, span, match1, m1, match2, m2); } } else { stats.inconsistent += spanLen; printAnno(outfile, AL, ALlen, '?', INDEX, span); } node = dict_next(S->_tree, node); } } kmer-code-2013-trunk/atac-driver/alignOverlap/summarizeDisagree.pl0000644000000000000000000000610310232510526024003 0ustar rootroot#!/usr/bin/perl use strict; # Computes the number and cumulative length of regions where two atac # mappings disagree. # # Reports when the region maps to: # small -- same sequence, close together # large -- same scaffold, not close together # major -- different scaffold # # Automagically generates a plot # if (scalar(@ARGV != 2)) { print STDERR "usage: $0 some.atac outprefix\n"; exit(1); } my $filename = shift @ARGV; my $outprefix = shift @ARGV; my $smallLimit = 400; my @smallH; my @smallHLen; my $large = 0; my $largeLen = 0; my $major = 0; my $majorLen = 0; open(F, "< $filename") or die "Failed to open $filename.\n"; while () { if (m/^[N!]\s+(\d+):(\d+)-(\d+)\[\s*\d+\].*\s(\d+):\s*(\d+)-\s*(\d+)\).*\s(\d+):\s*(\d+)-\s*(\d+)\)/) { my ($id1, $b1, $e1) = ($1, $2, $3); my ($id2a, $b2a, $e2a) = ($4, $5, $6); my ($id2b, $b2b, $e2b) = ($7, $8, $9); if ($id2a == $id2b) { my $diff; $diff = $b2b - $b2a; $diff = $b2a - $b2b if ($b2a > $b2b); if ($diff < $smallLimit) { $smallH[$diff]++; $smallHLen[$diff] += $e1 - $b1; } else { $large++; $largeLen += $e1 - $b1; } } else { $major++; $majorLen += $e1 - $b1; } } } close(F); # output is # # distance away # number of regions # number of bp in those regions # cumulative number of regions # cumulative number of bp in those regions # my $sumH = 0; my $sumHLen = 0; open(F, "> $outprefix.dat"); for (my $i=1; $i<$smallLimit; $i++) { $sumH += $smallH[$i]; $sumHLen += $smallHLen[$i] / 10; print F "$i $smallH[$i] $smallHLen[$i] $sumH $sumHLen\n" if (defined($smallH[$i])); } close(F); print STDERR "at most $smallLimit bp away: $sumH regions $sumHLen bp\n"; print STDERR "at least $smallLimit bp away: $large regions $largeLen bp\n"; print STDERR "different sequence: $major regions $majorLen bp\n"; open(F, "> $outprefix.gnuplot"); print F "set terminal postscript color\n"; print F "set output \"$outprefix.ps\"\n"; print F "set xlabel \"bp Difference in Match Location\"\n"; print F "set ylabel \"\"\n"; print F "plot [][0:300000] \"$outprefix.dat\" using 2 with lines title \"Number of Regions\", \\\n"; print F " \"$outprefix.dat\" using 3 with lines title \"bp in Regions\", \\\n"; print F " \"$outprefix.dat\" using 4 with lines title \"Cumulative Number of Regions\", \\\n"; print F " \"$outprefix.dat\" using 5 with lines title \"Cumulative bp in Regions / 10\"\n"; print F "plot [0:100][0:300000] \"$outprefix.dat\" using 2 with lines title \"Number of Regions\", \\\n"; print F " \"$outprefix.dat\" using 3 with lines title \"bp in Regions\", \\\n"; print F " \"$outprefix.dat\" using 4 with lines title \"Cumulative Number of Regions\", \\\n"; print F " \"$outprefix.dat\" using 5 with lines title \"Cumulative bp in Regions / 10\"\n"; close(F); system("gnuplot < $outprefix.gnuplot"); kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-printAnno.C0000644000000000000000000000623212322046702023515 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "overlap.H" void printAnno(FILE *F, annoList *AL, uint32 &ALlen, char label, uint32 axis, span_t *span, uint32 match1, atacMatch *m1, uint32 match2, atacMatch *m2) { // If we're just given match1, make it match2 if it is the second mapping // if ((match1 >> COLORSHIFT) && (match2 == uint32ZERO)) { match2 = match1; m2 = m1; match1 = 0; m1 = 0; } uint32 len = span->_end - span->_beg; // axis is 1 or 2; if we're the first axis (B35 centric) make a // list of the matches for later processing if (axis == 1) AL[ALlen++].add(label, span->_iid, span->_beg, len, match1 & COLORMASK, m1, match2 & COLORMASK, m2); fprintf(F, "%c "uint32FMTW(4)":"uint32FMTW(09)"-"uint32FMTW(09)"["uint32FMTW(6)"] ", label, span->_iid, span->_beg, span->_end, len); if (m1) { fprintf(F, "%s ", m1->matchuid); uint32 off1 = span->_beg - m1->pos1; if (axis == 1) { uint32 sta = m1->pos2 + off1; uint32 end = m1->pos2 + off1 + len; if (m1->fwd2 == 0) { sta = m1->pos2 + m1->len2 - off1; end = m1->pos2 + m1->len2 - off1 - len; } fprintf(F, "("uint32FMTW(8)": "uint32FMTW(9)"-"uint32FMTW(9)") ", m1->iid2, sta, end); } else { fprintf(F, "("uint32FMTW(8)": "uint32FMTW(9)"-"uint32FMTW(9)") ", m1->iid1, m1->pos1 + off1, m1->pos1 + off1 + len); } } else { fprintf(F, uint32FMTW(07)" ", uint32ZERO); fprintf(F, "("uint32FMTW(8)": "uint32FMTW(9)"-"uint32FMTW(9)") ", uint32ZERO, uint32ZERO, uint32ZERO); } if (m2) { fprintf(F, "%s ", m2->matchuid); uint32 off2 = span->_beg - m2->pos1; if (axis == 1) { uint32 sta = m2->pos2 + off2; uint32 end = m2->pos2 + off2 + len; if (m2->fwd2 == 0) { sta = m2->pos2 + m2->len2 - off2; end = m2->pos2 + m2->len2 - off2 - len; } fprintf(F, "("uint32FMTW(8)": "uint32FMTW(9)"-"uint32FMTW(9)") ", m2->iid2, sta, end); } else { fprintf(F, "("uint32FMTW(8)": "uint32FMTW(9)"-"uint32FMTW(9)") ", m2->iid1, m2->pos1 + off2, m2->pos1 + off2 + len); } } else { fprintf(F, uint32FMTW(07)" ", uint32ZERO); fprintf(F, "("uint32FMTW(8)": "uint32FMTW(9)"-"uint32FMTW(9)") ", uint32ZERO, uint32ZERO, uint32ZERO); } fprintf(F, "\n"); } kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-find.C0000644000000000000000000001646412322046702022475 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "overlap.H" // Looks for 1's surrounded by U's void findIsolatedUnique(annoList *AL, uint32 ALlen) { bool only1 = true; uint32 sumA = 0, tA=0; uint32 sumB = 0, tB=0; for (uint32 i=1; i \n", argv[0]); exit(1); } atacFile *AF1 = new atacFile(argv[1]); atacFile *AF2 = new atacFile(argv[2]); atacMatchList *M1 = AF1->matches(); atacMatchList *M2 = AF2->matches(); char *OP = argv[3]; // We want to annotate the two assembies with: // a) mapped by both, the same // b) mapped by both, differently // c) mapped by the first, unmapped by the second // d) mapped by the second, unmapped by the first // e) unmapped by both // // If unmapped, we could further annotate with the reason it was // unmapped -- not found, or found multiple times. // // Our annotation datastructure is a tree of spans. Each span is a // sequence, and an interval on that sequence. We assume that the // tree contains the spans for the whole sequence, that is, that we // never need to increase a span, just split. // spanTree *S1 = new spanTree(); spanTree *S2 = new spanTree(); // Initialize the tree of spans by inserting a single span for each // sequence in the file. // for (uint32 i=0; ifastaA()->getNumberOfSequences(); i++) S1->addNewSpan(i, AF1->fastaA()->getSequenceLength(i)); for (uint32 i=0; ifastaB()->getNumberOfSequences(); i++) S2->addNewSpan(i, AF1->fastaB()->getSequenceLength(i)); // Add every match to the spanTrees. for (uint32 i=0; inumberOfMatches(); i++) { S1->addMatch(M1->getMatch(i), 0, 0); S2->addMatch(M1->getMatch(i), 1, 0); } for (uint32 i=0; inumberOfMatches(); i++) { S1->addMatch(M2->getMatch(i), 0, 1); S2->addMatch(M2->getMatch(i), 1, 1); } // Dump each spanTree: For each span, we need to check that // it has matches? // only one match, or only matches from one mapping? // matches from both mappings? need to check that // the span in the other tree also has the same matches // // Doesn't handle weird stuff like this span (on sequence 1) // mapping onto seq2 correctly, but the span in seq2 having an // extra match to somewhere else in seq1. // // we want to find the single span in the other spanTree that // corresponds to this span. once we do that, we can verify that // all the matches are the same. // // because we are gapless matches, we can, for each match, // compute the exact location this span should occur on the other // sequence. then, do a lookup() to get that span, or just // verify that everybody is the same location. char outname[1024]; FILE *outfile; overlapStats statsA; uint32 ALmax = (uint32)dict_count(S1->_tree); uint32 ALlen = 0; annoList *AL = new annoList [ ALmax ]; sprintf(outname, "%s.map1annotation", OP); errno = 0; outfile = fopen(outname, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", outname, strerror(errno)); process1(outfile, S1, M1, M2, statsA, AL, ALlen, ALmax); fclose(outfile); overlapStats statsB; uint32 BLmax = (uint32)dict_count(S1->_tree); uint32 BLlen = 0; annoList *BL = new annoList [ ALmax ]; sprintf(outname, "%s.map2annotation", OP); errno = 0; outfile = fopen(outname, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", outname, strerror(errno)); process2(outfile, S2, M1, M2, statsB, BL, BLlen, BLmax); fclose(outfile); fprintf(stderr, "unmapped: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.unmapped.getSum(), statsB.unmapped.getSum()); fprintf(stderr, "unique mapping 1: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.map1unique.getSum(), statsB.map1unique.getSum()); fprintf(stderr, "unique mapping 2: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.map2unique.getSum(), statsB.map2unique.getSum()); fprintf(stderr, "different: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.different.getSum(), statsB.different.getSum()); fprintf(stderr, "wild diff: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.wilddiff.getSum(), statsB.wilddiff.getSum()); fprintf(stderr, "same: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.same.getSum(), statsB.same.getSum()); fprintf(stderr, "inconsistent: A:"uint32FMTW(10)" B:"uint32FMTW(10)"\n", statsA.inconsistent.getSum(), statsB.inconsistent.getSum()); // Dump the histograms for each of the labelings // sprintf(outname, "%s.asm1histogram", OP); statsA.writeHistogram(outname); sprintf(outname, "%s.asm2histogram", OP); statsB.writeHistogram(outname); // Draw some pretty pictures // sprintf(outname, "%s.histogram.gnuplot", OP); errno = 0; outfile = fopen(outname, "w"); if (errno) fprintf(stderr, "failed to open '%s': %s\n", outname, strerror(errno)), exit(1); fprintf(outfile, "set terminal postscript color\n"); fprintf(outfile, "set output \"%s.unmapped.histogram.ps\"\n", OP); fprintf(outfile, "set ylabel \"number of regions\"\n"); fprintf(outfile, "set xlabel \"length of region\"\n"); fprintf(outfile, "plot [0:10000][0:400] \\\n"); fprintf(outfile, " \"%s.asm1histogram.unmapped\" using 2 title \"assembly 1 unmapped\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm2histogram.unmapped\" using 2 title \"assembly 2 unmapped\" with lines\n", OP); fprintf(outfile, "set output \"%s.same.histogram.ps\"\n", OP); fprintf(outfile, "plot [0:20000][0:2000] \\\n"); fprintf(outfile, " \"%s.asm1histogram.same\" using 2 title \"assembly 1 same\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm2histogram.same\" using 2 title \"assembly 2 same\" with lines\n", OP); fprintf(outfile, "set output \"%s.histogram.ps\"\n", OP); fprintf(outfile, "plot [0:2000][0:100] \\\n"); fprintf(outfile, " \"%s.asm1histogram.different\" using 2 title \"assembly 1 different\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm2histogram.different\" using 2 title \"assembly 2 different\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm1histogram.wilddiff\" using 2 title \"assembly 1 wildly diff\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm2histogram.wilddiff\" using 2 title \"assembly 2 wildly diff\" with lines\n", OP); fprintf(outfile, "set output \"%s.unique.histogram.ps\"\n", OP); fprintf(outfile, "plot [0:2000][0:100] \\\n"); fprintf(outfile, " \"%s.asm1histogram.map1unique\" using 2 title \"map 1, assembly 1 unique\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm1histogram.map2unique\" using 2 title \"map 2, assembly 1 unique\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm2histogram.map1unique\" using 2 title \"map 1, assembly 2 unique\" with lines, \\\n", OP); fprintf(outfile, " \"%s.asm2histogram.map2unique\" using 2 title \"map 2, assembly 2 unique\" with lines\n", OP); fclose(outfile); sprintf(outname, "gnuplot < %s.histogram.gnuplot", OP); if (system(outname)) fprintf(stderr, "Failed to '%s'\n", outname); #if 0 findIsolatedUnique(AL, ALlen); findExtended(AL, ALlen); #endif // Deleting the spanTrees takes a long time, so we don't bother with any cleanup. return(0); } kmer-code-2013-trunk/atac-driver/alignOverlap/findDifferentScaffold.pl0000644000000000000000000001034310232510526024535 0ustar rootroot#!/usr/bin/perl use strict; # Examines an atac mapping, counts the number of times a scaffold is # mapped to wildly different places -- wildly being more than a few # bp away on the same chromosome (mind the gap, please!) or (gasp!) a # different chromosome. # # Assumes that the Aannotation primary axis is chromosomes. # # Change the first . in the m// below to restrict to specific types # of regions, e.g., N's. Useful choices here are: # . - all regions # U - unmapped (will do nothing) # 1 - has only one mapping # Y - just those they agree on # N - disagree, but on the same destination # ! - disagree, and on different destinations # ? - inconsistent mapping (OK, this one isn't useful) # # 1Y - the regions that have no disagreement # 1YN! - all consistent regions if (scalar(@ARGV != 2)) { print STDERR "usage: $0 some.Aannotation outprefix\n"; exit(1); } my $filename = shift @ARGV; my $outprefix = shift @ARGV; # scafA is really scaffolds-from-map1, and scafB is scaffolds-from-map2. my (%scafA_to_chr, %scafA_to_chr_mismatch, $scafAlen); my (%scafB_to_chr, %scafB_to_chr_mismatch, $scafBlen); open(F, "< $filename"); while () { chomp; if (m/^[1YN!]\s+(\d+):(\d+)-(\d+)\[\s*\d+\].*\s(\d+):\s*(\d+)-\s*(\d+)\).*\s(\d+):\s*(\d+)-\s*(\d+)\)/) { my ($id1, $b1, $e1) = ($1, $2, $3); my ($id2a, $b2a, $e2a) = ($4, $5, $6); my ($id2b, $b2b, $e2b) = ($7, $8, $9); # If we have a mapping from method A or method B, save the # chromosome that the scaffold mapped to. If we've already # mapped this scaffold to some other chromosome, call it a # mismatch. if (($id2a > 0) && ($e2a > 0)) { if (defined($scafA_to_chr{$id2a})) { if ($scafA_to_chr{$id2a} != $id1) { $scafA_to_chr_mismatch{$id2a} = $scafA_to_chr{$id2a} if (! defined($scafA_to_chr_mismatch{$id2a})); $scafA_to_chr_mismatch{$id2a} .= "\1$id1\0$_"; } } else { $scafA_to_chr{$id2a} = "$id1\0$_"; } } if (($id2b > 0) && ($e2b > 0)) { if (defined($scafB_to_chr{$id2b})) { if ($scafB_to_chr{$id2b} != $id1) { $scafB_to_chr_mismatch{$id2b} = $scafB_to_chr{$id2b} if (! defined($scafB_to_chr_mismatch{$id2b})); $scafB_to_chr_mismatch{$id2b} .= "\1$id1\0$_"; } } else { $scafB_to_chr{$id2b} = "$id1\0$_"; } } } } close(F); # Count the number of things in *_mismatch that are the same. # my %merge; my $both; foreach my $f (keys %scafA_to_chr_mismatch) { $merge{$f}++; } foreach my $f (keys %scafB_to_chr_mismatch) { $merge{$f}++; } foreach my $f (keys %merge) { if (defined($scafA_to_chr_mismatch{$f}) && defined($scafB_to_chr_mismatch{$f})) { $both++; } } print "num scafA: ", scalar(keys %scafA_to_chr_mismatch), "\n"; print "num scafB: ", scalar(keys %scafB_to_chr_mismatch), "\n"; print "num both: ", $both, "\n"; # Run through the input again, pulling out matches that map a # single scaffold to two different chromosomes, then what? we # saved the iid of the scaffold that maps to different # chromosomes as the key, so just parse the matches again, # pulling out all those scaffolds. open(A, "| sort -k6n -k7n > $outprefix.scaffold-consistency.map1dups"); open(B, "| sort -k11n -k12n > $outprefix.scaffold-consistency.map2dups"); my $matchesA = 0; my $matchesB = 0; open(F, "< $filename"); while () { if (m/^.\s+(\d+):(\d+)-(\d+)\[\s*\d+\].*\s(\d+):\s*(\d+)-\s*(\d+)\).*\s(\d+):\s*(\d+)-\s*(\d+)\)/) { my ($id1, $b1, $e1) = ($1, $2, $3); my ($id2a, $b2a, $e2a) = ($4, $5, $6); my ($id2b, $b2b, $e2b) = ($7, $8, $9); if (defined($scafA_to_chr_mismatch{$id2a}) || defined($scafA_to_chr_mismatch{$id2b})) { print A $_; $matchesA++; } if (defined($scafB_to_chr_mismatch{$id2a}) || defined($scafB_to_chr_mismatch{$id2b})) { print B $_; $matchesB++; } } } close(A); close(B); print STDERR "matches for map1: $matchesA\n"; print STDERR "matches for map2: $matchesB\n"; kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-sort.C0000644000000000000000000000413010415627702022535 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "overlap.H" int sortMatches1(const void *a, const void *b) { const atacMatch *A = *((const atacMatch * const *)a); const atacMatch *B = *((const atacMatch * const *)b); if (A->iid1 < B->iid1) return(-1); if (A->iid1 > B->iid1) return(1); if (A->pos1 < B->pos1) return(-1); if (A->pos1 > B->pos1) return(1); if (A->len1 > B->len1) return(-1); if (A->len1 < B->len1) return(1); if (A->fwd1 > B->fwd1) return(-1); if (A->fwd1 < B->fwd1) return(1); return(0); } int sortMatches2(const void *a, const void *b) { const atacMatch *A = *((const atacMatch * const *)a); const atacMatch *B = *((const atacMatch * const *)b); if (A->iid2 < B->iid2) return(-1); if (A->iid2 > B->iid2) return(1); if (A->pos2 < B->pos2) return(-1); if (A->pos2 > B->pos2) return(1); if (A->len2 > B->len2) return(-1); if (A->len2 < B->len2) return(1); if (A->fwd2 > B->fwd2) return(-1); if (A->fwd2 < B->fwd2) return(1); return(0); } int spanCompare(const void *a, const void *b) { const span_t *A = *((const span_t * const *)a); const span_t *B = *((const span_t * const *)b); if (A->_iid < B->_iid) return(-1); if (A->_iid > B->_iid) return(1); if (A->_beg < B->_beg) return(-1); if (A->_beg > B->_beg) return(1); return(0); } kmer-code-2013-trunk/atac-driver/alignOverlap/Make.include0000644000000000000000000000251011513002456022207 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBSEQ/ :=$(realpath $/../../libseq/)/ LIBATAC/ :=$(realpath $/../libatac/)/ THIS/ :=$(realpath $/)/ $/.CXX_SRCS := $/overlap.C $/overlap-sort.C $/overlap-printAnno.C $/overlap-find.C $/.CXX_EXES := $/overlap $/.CLEAN :=$/*.o $/*~ $/core $/overlap-process1.C $/overlap-process2.C $/overlap: $/overlap.o \ $/overlap-find.o \ $/overlap-matchTree.o \ $/overlap-printAnno.o \ $/overlap-sort.o \ $/overlap-process1.o \ $/overlap-process2.o \ ${LIBATAC/}libatac.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a # Symlinks below don't work, they either get the source or the destination wrong. # Hardlinks aren't as obvious as to what's going on. $/overlap-process1.o: CXXFLAGS+=-DINDEX=1 -DNAME=process1 -DPOS1=pos1 -DPOS2=pos2 -DLEN2=len2 $/overlap-process1.C: $/overlap-process.C ln -f ${THIS/}overlap-process.C ${THIS/}overlap-process1.C $/overlap-process2.o: CXXFLAGS+=-DINDEX=2 -DNAME=process2 -DPOS1=pos2 -DPOS2=pos1 -DLEN2=len1 $/overlap-process2.C: $/overlap-process.C ln -f ${THIS/}overlap-process.C ${THIS/}overlap-process2.C $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBATAC/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-annoList.H0000644000000000000000000000366012322046702023343 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef OVERLAP_ANNOLIST_H #define OVERLAP_ANNOLIST_H #include "overlap.H" // List of the annotation. Used for classifying each piece of the // annotation, e.g., U followed by 1 followed by U means that // somebody really did map something uniquely, where Y followed by 1 // is probably just an extension. // // This only works if assemblyA is the reference! // class annoList { public: char type; uint32 iid1, pos1, len1; // The position on the reference axis uint32 iid2a, pos2a, len2a; // The position on mapping 1 uint32 iid2b, pos2b, len2b; // The position on mapping 2 void add(char type_, uint32 iid1_, uint32 pos1_, uint32 len1_, uint32 match1, atacMatch *m1, uint32 match2, atacMatch *m2) { type = type_; iid1 = iid1_; pos1 = pos1_; len1 = len1_; iid2a = match1; pos2a = 0; len2a = 0; if (m1) { pos2a = m1->pos2; len2a = m1->len2; } iid2b = match2; pos2b = 0; len2b = 0; if (m2) { pos2b = m2->pos2; len2b = m2->len2; } } }; #endif // OVERLAP_ANNOLIST_H kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-matchTree.C0000644000000000000000000000461212322046702023461 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "overlap.H" matchTree::matchTree(atacMatchList *L, uint32 side) { // Construct a list of pointers to the atacMatchList data // // kazlib was modified to be qsort() compatible and so it passes a // pointer to whatever it is sorting. Since kazlib operates on // pointers anyway, this means that it passes the compare function // a pointer to a pointer to the object. // // Which really fails in this case. We have a list of pointers to // objects that we sort, then want to load. // // Uhhh, no, this is correct. We give kazlib a pointer to the // object, it gives the compare function a pointer to that pointer. // // qsort() below sorts pointers to objects, and does the same. atacMatch **matchPointers = new atacMatch * [L->numberOfMatches()]; for (uint32 i=0; inumberOfMatches(); i++) matchPointers[i] = L->getMatch(i); // Choose a comparison function based on the side we want int (*sortMatches)(const void *, const void *) = sortMatches1; if (side == 1) sortMatches = sortMatches2; // Sort qsort(matchPointers, L->numberOfMatches(), sizeof(atacMatch *), sortMatches); // Load the tree (use DICTCOUNT_T_MAX for max nodes) _tree = dict_create(L->numberOfMatches(), sortMatches); dict_allow_dupes(_tree); dict_load_begin(&_load, _tree); for (uint32 i=0; inumberOfMatches(); i++) { dnode_t *node = (dnode_t *)malloc(sizeof(dnode_t)); dnode_init(node, 0L); dict_load_next(&_load, node, matchPointers[i]); } dict_load_end(&_load); // Clean up delete [] matchPointers; } kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-span.H0000644000000000000000000000537512322046702022522 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef OVERLAP_SPAN_H #define OVERLAP_SPAN_H #include "overlap.H" #define COLORSHIFT 24 #define COLORMASK 0x00ffffff class span_t { public: uint32 _iid; uint32 _beg; uint32 _end; uint32 _matchesLen; uint32 _matchesMax; uint32 *_matches; span_t(uint32 iid, uint32 beg, uint32 end) { _iid = iid; _beg = beg; _end = end; _matchesLen = 0; _matchesMax = 0; _matches = 0L; }; ~span_t() { delete [] _matches; }; // The top X bits of the _matches is for storing the color. This // does cut down the number of matches we can store. Human-Human // is ~1 million matches. void addMatch(uint32 matchiid, uint32 color) { if (_matchesLen >= _matchesMax) { if (_matchesMax == 0) _matchesMax = 2; _matchesMax *= 2; uint32 *X = new uint32 [_matchesMax]; memcpy(X, _matches, sizeof(uint32) * _matchesLen); delete [] _matches; _matches = X; } if (matchiid >> COLORSHIFT) fprintf(stderr, "ERROR! span_t::addMatch()-- match id too big, decrease the color space.\n"), exit(1); _matches[_matchesLen++] = (color << COLORSHIFT) | (matchiid); }; // Split this span at position, return two new spans // void split(uint32 position, span_t* &l, span_t* &r) { if ((position < _beg) || (_end < position)) { fprintf(stderr, "span_t::split()-- _beg="uint32FMT" _end="uint32FMT" postition="uint32FMT"?\n", _beg, _end, position); exit(1); } l = new span_t(_iid, _beg, position); r = new span_t(_iid, position, _end); l->_matchesLen = _matchesLen; l->_matchesMax = _matchesMax; l->_matches = new uint32 [_matchesMax]; memcpy(l->_matches, _matches, sizeof(uint32) * _matchesLen); r->_matchesLen = _matchesLen; r->_matchesMax = _matchesMax; r->_matches = new uint32 [_matchesMax]; memcpy(r->_matches, _matches, sizeof(uint32) * _matchesLen); }; }; #endif // OPERLAP_SPAN_H kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-spanTree.H0000644000000000000000000000653012322046702023334 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef OVERLAP_SPANTREE_H #define OVERLAP_SPANTREE_H #include "overlap.H" class spanTree { public: spanTree() { _tree = dict_create(DICTCOUNT_T_MAX, spanCompare); }; ~spanTree() { dict_free_nodes(_tree); dict_free(_tree); }; void addNewSpan(uint32 iid, uint32 len) { span_t *span = new span_t(iid, 0, len); dict_alloc_insert(_tree, span, 0L); }; uint32 size(void) { return((uint32)dict_count(_tree)); }; void addMatch(atacMatch *match, uint32 side, uint32 color) { // Query the tree for the first match before this position. // We're guaranteed to find one before, since the tree was // initialized with a span for the whole sequence. // span_t *span = 0L; uint32 beg = 0; uint32 end = 0; if (side == 0) { span = new span_t(match->iid1, match->pos1, match->pos1 + match->len1); beg = match->pos1; end = match->pos1 + match->len1; } else { span = new span_t(match->iid2, match->pos2, match->pos2 + match->len2); beg = match->pos2; end = match->pos2 + match->len2; } dnode_t *node = dict_upper_bound(_tree, span); delete span; span = (span_t *)dnode_getkey(node); // We need to split the span pointed to by node, iterate through // all the spans, and split the last one. if (span->_beg != beg) { span_t *l = 0L; span_t *r = 0L; span->split(beg, l, r); // Kill this node, insert the new ones dict_delete(_tree, node); dnode_destroy(node); dict_alloc_insert(_tree, l, 0L); dict_alloc_insert(_tree, r, 0L); delete span; span = r; // Argh! Now find the node we just inserted... node = dict_lookup(_tree, r); } // Until we hit the last span, add the match to the span while (span->_end < end) { span->addMatch(match->matchiid, color); node = dict_next(_tree, node); span = (span_t *)dnode_getkey(node); } // We're at the last span, so split it like the beginning if (span->_end != end) { span_t *l = 0L; span_t *r = 0L; span->split(end, l, r); // Kill this node, insert the new ones dict_delete(_tree, node); dnode_destroy(node); dict_alloc_insert(_tree, l, 0L); dict_alloc_insert(_tree, r, 0L); delete span; span = l; } // FInally, add the match to the last span span->addMatch(match->matchiid, color); }; dict_t *_tree; dict_load_t _load; }; #endif // OVERLAP_SPANTREE_H kmer-code-2013-trunk/atac-driver/alignOverlap/overlap.H0000644000000000000000000000446312322046702021560 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef OVERLAP_H #define OVERLAP_H #include #include #include #include "bio++.H" #include "util++.H" #include "atac.H" // Kaz Kylheku library. #include "kazlib/dict.h" #include "kazlib/except.h" #include "kazlib/hash.h" #include "kazlib/list.h" #include "kazlib/sfx.h" int sortMatches1(const void *a, const void *b); int sortMatches2(const void *a, const void *b); int spanCompare(const void *a, const void *b); #include "overlap-span.H" #include "overlap-matchTree.H" #include "overlap-spanTree.H" #include "overlap-annoList.H" #include "overlap-stats.H" void process1(FILE *outfile, spanTree *S, atacMatchList *M1, atacMatchList *M2, overlapStats &stats, annoList *AL, uint32 &ALlen, uint32 &ALmax); void process2(FILE *outfile, spanTree *S, atacMatchList *M1, atacMatchList *M2, overlapStats &stats, annoList *AL, uint32 &ALlen, uint32 &ALmax); void printAnno(FILE *F, annoList *AL, uint32 &ALlen, char label, uint32 axis, span_t *span, uint32 match1=uint32ZERO, atacMatch *m1=0L, uint32 match2=uint32ZERO, atacMatch *m2=0L); void findIsolatedUnique(annoList *AL, uint32 ALlen); void findExtended(annoList *AL, uint32 ALlen); #endif // OVERLAP_H kmer-code-2013-trunk/atac-driver/alignOverlap/overlap-stats.H0000644000000000000000000000511212322046702022704 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #ifndef OVERLAP_STATS_H #define OVERLAP_STATS_H #include "overlap.H" // Statistics and Histograms // // Index 1 is the assembly, 2 is the mapping. Stats count the // number of bases covered, histograms are of the block sizes. // class histogram { public: histogram(uint32 max=65536) { histMax = max; hist = new uint32 [histMax]; sum = 0; for (uint32 i=0; i histMax) ? 0 : length ]++; }; void operator+=(uint32 length) { update(length); }; uint32 getSum(void) { return(sum); }; void writeHistogram(char const *prefix, char const *label) { char filename[1024]; sprintf(filename, "%s.%s", prefix, label); errno = 0; FILE *out = fopen(filename, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", filename, strerror(errno)), exit(1); for (uint32 i=0; i; $seqA = $1 if (m/^\/assemblyFile1=(.*)$/); $tagA = $1 if (m/^\/assemblyId1=(.*)$/); $seqB = $1 if (m/^\/assemblyFile2=(.*)$/); $tagB = $1 if (m/^\/assemblyId2=(.*)$/); } close(F); if (!defined($seqA) || !defined($tagA) || !defined($seqB) || !defined($tagB)) { die "Something fishy. Didn't find seqs or tags in '$atacfile'.\n"; } $iid = 0; open(F, "< $seqA") or die "Failed to open '$seqA'\n"; while () { if (m/^>(\S+)\s*.*$/) { #chomp; #print STDERR "$tagA:$iid -> $_\n"; $uidA{"$tagA:$iid"} = $1; $iid++; } } close(F); $iid = 0; open(F, "< $seqB") or die "Failed to open '$seqA'\n"; while () { if (m/^>(\S+)\s*.*$/) { #chomp; #print STDERR "$tagB:$iid -> $_\n"; $uidB{"$tagB:$iid"} = $1; $iid++; } } close(F); $, = " "; $\ = "\n"; open(F, "< $atacfile") or die; while () { chomp $_; my @v = split '\s+', $_; if (m/^M/) { die "Didn't find uidA for $v[4]\n" if (!defined($uidA{$v[4]})); die "Didn't find uidB for $v[8]\n" if (!defined($uidB{$v[8]})); $v[4] = $uidA{$v[4]}; $v[8] = $uidB{$v[8]}; print @v; } else { print $_; } } close(F); kmer-code-2013-trunk/libsim4/0000755000000000000000000000000012641613357014521 5ustar rootrootkmer-code-2013-trunk/libsim4/sim4.H0000644000000000000000000000041211445534703015501 0ustar rootroot#include "sim4polish/sim4polish.H" #include "sim4polish/sim4polishList.H" #include "sim4polish/sim4polishFile.H" #include "sim4polish/sim4polishBuilder.H" #include "sim4polish/sim4polishReader.H" #include "sim4polish/sim4polishWriter.H" #include "sim4core/sim4.H" kmer-code-2013-trunk/libsim4/sim4polish/0000755000000000000000000000000012641613357016614 5ustar rootrootkmer-code-2013-trunk/libsim4/sim4polish/sim4polish-exons.C0000644000000000000000000000310112322046702022126 0ustar rootroot#include "sim4polish.H" #include #include #include #include #include "memory.h" void sim4polish::s4p_swapExons(uint32 a, uint32 b) { sim4polishExon copyofa = _exons[a]; _exons[a] = _exons[b]; _exons[b] = copyofa; } // Insert a single exon into the list at position a void sim4polish::s4p_insertExon(uint32 a, uint32 intronori, sim4polishExon *e) { sim4polish p; p._numExons = 1; p._exons = e; s4p_insertExons(a, intronori, &p); } // Inserts all the exons in e into the list at position a. void sim4polish::s4p_insertExons(uint32 a, uint32 intronori, sim4polish *e) { sim4polishExon *ne = new sim4polishExon [_numExons + e->_numExons]; // Copy exons up to the insert point. for (uint32 i=0; i_numExons; i++) ne[a+i].s4p_copyExon(e->_exons+i); // Copy the rest. for (uint32 i=a; i<_numExons; i++) { ne[i+e->_numExons] = _exons[i]; _exons[i].s4p_clearExon(); } // All done with the copy, get rid of the old stuff. s4p_clearExon() above is critical here; // without it we would delete the alignment strings. delete [] _exons; _exons = ne; _numExons += e->_numExons; // We trust that the user has set the intron orientation in the new exon, and that 'intronori' is // the correct orientation for the previous intron. // if (a > 0) _exons[a-1]._intronOrientation = intronori; } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-updatescores.C0000644000000000000000000001340012322046702023476 0ustar rootroot#include #include "sim4polish.H" void sim4polish::s4p_updateAlignmentScores(void) { uint32 ni = 0, numInDel = 0; uint32 ne = 0, numEdits = 0; uint32 nn = 0, numMatchesN = 0; uint32 nm = 0, numMatches = 0; uint32 al = 0, alignmentLength = 0; uint32 nc = 0, numCovered = 0; uint32 estn = 0; uint32 genn = 0; for (uint32 exon=0; exon<_numExons; exon++) { char *est = _exons[exon]._estAlignment; char *gen = _exons[exon]._genAlignment; al = 0; ni = 0; ne = 0; nn = 0; nm = 0; if (est && gen) { while (*est && *gen) { estn = (*est == 'N') || (*est == 'n'); genn = (*gen == 'N') || (*gen == 'n'); if ((*est == '-') || (*gen == '-')) { ni++; ne++; *est = toupper(*est); *gen = toupper(*gen); } else if (estn && genn) { // Both are N. It isn't a match and it isn't an edit. // nn++; *est = toupper(*est); *gen = toupper(*gen); } else if (estn || genn) { // One is an N. Someone has low quality sequence, and we // should penalize. We need to special case this because // IUPACidentity[][] claims N matches all. // ne++; *est = toupper(*est); *gen = toupper(*gen); } else if (IUPACidentity[(int)*est][(int)*gen]) { // Got a match. nm++; *est = tolower(*est); *gen = tolower(*gen); } else { // Got a substitution ne++; *est = toupper(*est); *gen = toupper(*gen); } est++; gen++; } } _exons[exon]._numMatches = nm; _exons[exon]._numMatchesN = nn; al = (_exons[exon]._genTo - _exons[exon]._genFrom + 1 + _exons[exon]._estTo - _exons[exon]._estFrom + 1 + ne); nc = (_exons[exon]._estTo - _exons[exon]._estFrom + 1); _exons[exon]._percentIdentity = s4p_percentIdentityApprox(ne, al); numInDel += ni; numEdits += ne; numMatchesN += nn; numMatches += nm; alignmentLength += al; numCovered += nc; } _numMatches = numMatches; _numMatchesN = numMatchesN; _numCovered = numCovered; #if 0 fprintf(stderr, "numInDel = %d\n", numInDel); fprintf(stderr, "numEdits = %d\n", numEdits); fprintf(stderr, "numMatchesN = %d\n", numMatchesN); fprintf(stderr, "numMatches = %d\n", numMatches); fprintf(stderr, "alignLen = %d\n", alignmentLength); fprintf(stderr, "numCovered = %d\n", numCovered); #endif _percentIdentity = s4p_percentIdentityApprox(numEdits, alignmentLength); _querySeqIdentity = s4p_percentCoverageApprox(); } int sim4polish::s4p_percentCoverageApprox(void) { int ret; if (_numCovered == _estLen - _estPolyA - _estPolyT) return(100); return(((ret=(int)round(100.0 * _numCovered / (double)(_estLen - _estPolyA - _estPolyT))) < 100) ? ret : 99); } int sim4polish::s4p_percentIdentityApprox(int numEdits, int alignmentLength) { int ret; if (alignmentLength == 0) return(0); if (numEdits == 0) return(100); return(((ret=(int)round(100.0 * (1 - 2.0 * numEdits / alignmentLength))) < 100) ? ret : 99); } double sim4polish::s4p_percentCoverageExact(void) { return( 100 * (double)(_numCovered) / (double)(_estLen - _estPolyA - _estPolyT) ); } double sim4polish::s4p_percentIdentityExact(void) { uint32 ni = 0, numInDel = 0; uint32 ne = 0, numEdits = 0; uint32 nn = 0, numMatchesN = 0; uint32 nm = 0, numMatches = 0; uint32 al = 0, alignmentLength = 0; uint32 nc = 0, numCovered = 0; uint32 estn = 0; uint32 genn = 0; double ret = 0.0; for (uint32 exon=0; exon<_numExons; exon++) { char *est = _exons[exon]._estAlignment; char *gen = _exons[exon]._genAlignment; al = 0; ni = 0; ne = 0; nn = 0; nm = 0; if (est && gen) { while (*est && *gen) { estn = (*est == 'N') || (*est == 'n'); genn = (*gen == 'N') || (*gen == 'n'); if ((*est == '-') || (*gen == '-')) { ni++; ne++; } else if (estn && genn) { // Both are N. It isn't a match and it isn't an edit. // nn++; } else if (estn || genn) { // One is an N. Someone has low quality sequence, and we // should penalize. We need to special case this because // IUPACidentity[][] claims N matches all. // ne++; } else if (IUPACidentity[(int)*est][(int)*gen]) { // Got a match. nm++; } else { // Got a substitution ne++; } est++; gen++; } } #if 0 _exons[exon]._numMatches = nm; _exons[exon]._numMatchesN = nn; #endif al = (_exons[exon]._genTo - _exons[exon]._genFrom + 1 + _exons[exon]._estTo - _exons[exon]._estFrom + 1 + ne); nc = (_exons[exon]._genTo - _exons[exon]._genFrom + 1); #if 0 _exons[exon]._percentIdentity = s4p_percentIdentityApprox(ne, al); #endif numInDel += ni; numEdits += ne; numMatchesN += nn; numMatches += nm; alignmentLength += al; numCovered += nc; } #if 0 _numMatches = numMatches; _numMatchesN = numMatchesN; _numCovered = numCovered; #endif #if 0 fprintf(stderr, "numInDel = %d\n", numInDel); fprintf(stderr, "numEdits = %d\n", numEdits); fprintf(stderr, "numMatchesN = %d\n", numMatchesN); fprintf(stderr, "numMatches = %d\n", numMatches); fprintf(stderr, "alignLen = %d\n", alignmentLength); fprintf(stderr, "numCovered = %d\n", numCovered); #endif if (alignmentLength > 0) ret = 100.0 * (1 - 2.0 * numEdits / (double)(alignmentLength)); return(ret); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polishReader.C0000644000000000000000000000421211467324535022137 0ustar rootroot#include "sim4polishReader.H" #include "util++.H" #include #include #include #include #include #include "sim4polishWriter.H" sim4polishReader::sim4polishReader(const char *name, sim4polishWriter *writer) { if (name) _rb = new readBuffer(name); else _rb = new readBuffer(writer->surrenderToReader()); // Attempt to decide on the style of the input, based on the first line. char firstLine[1024]; splitToWords firstWords; _rb->read(firstLine, 1024, '\n'); _rb->seek(0); // This fixes a bug in split to words, that white space at the end isn't trimmed. chomp(firstLine); //fprintf(stderr, "sim4polishReader()-- '%s'\n", firstLine); firstWords.split(firstLine); if (strcmp(firstWords[0], "sim4begin") == 0) { _style = sim4polishS4DB; } else if (strcmp(firstWords[0], "##gff-version") == 0) { if (strcmp(firstWords[1], "3") == 0) _style = sim4polishGFF3; else fprintf(stderr, "sim4polishReader()-- GFF format version %s not supported; only version 3 is supported.\n", firstWords[1]), exit(1); } else if ((strcmp(firstWords[0], "!format") == 0) && (strcmp(firstWords[1], "atac") == 0)) { if (strcmp(firstWords[2], "1.0") == 0) _style = sim4polishATAC; else fprintf(stderr, "sim4polishReader()-- ATAC format version %s not supported; only version 1.0 is supported.\n", firstWords[2]), exit(1); } else { fprintf(stderr, "sim4polishReader()-- Failed to open '%s' for reading: unknown format.\n", _rb->filename()), exit(1); } } sim4polishReader::~sim4polishReader() { delete _rb; _rb = 0L; } sim4polish * sim4polishReader::nextAlignment(void) { sim4polish *p = 0L; if (_rb->eof()) return(p); p = new sim4polish(_rb, _style); if (p->_numExons == 0) { delete p; p = 0L; } return(p); } bool sim4polishReader::nextAlignment(sim4polish * &p) { delete p; p = 0L; if (_rb->eof()) return(false); p = new sim4polish(_rb, _style); if (p->_numExons == 0) { delete p; p = 0L; return(false); } return(true); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-stringtopolish.C0000644000000000000000000003623312322046702024076 0ustar rootroot#include "sim4polish.H" #include #include #include #include #include #include void sim4polish::s4p_linesToPolishS4DB(uint32 startPosition, uint32 maxLines, char **lines, uint32 *lengths) { char mOri[65]; char sOri[65]; assert(_comment == 0L); assert(_estDefLine == 0L); assert(_genDefLine == 0L); assert(_exons == 0L); assert(_numExons == 0); if (strcmp(lines[0], "sim4begin")) { fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- Invalid sim4db format, got '%s' instead of sim4begin. Cannot convert.\n", lines[0]); return; } uint32 cl = 1; // Convert '-' into ' ', on the assumption that this is the description line. This allows us to // use scanf properly. // for (uint32 i=0; i", &_estID, &_estLen, &_estPolyA, &_estPolyT, &_genID, &_genRegionOffset, &_genRegionLength, &_numMatches, &_numMatchesN, &_percentIdentity, mOri, sOri); if (r != 12) { fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- Expecting description line, found %d tokens instead of 12.\n", r); } switch (mOri[0]) { case 'f': _matchOrientation = SIM4_MATCH_FORWARD; break; case 'c': _matchOrientation = SIM4_MATCH_COMPLEMENT; break; case 'r': // BUG FIX -- old version of sim4 used "reverse-intractable" // instead of "complement-intractable" _matchOrientation = SIM4_MATCH_COMPLEMENT; break; default: fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- unknown match orientation\n"); break; } switch (sOri[2]) { case 'r': _strandOrientation = SIM4_STRAND_POSITIVE; break; case 'v': _strandOrientation = SIM4_STRAND_NEGATIVE; break; case 'k': _strandOrientation = SIM4_STRAND_UNKNOWN; break; case 't': _strandOrientation = SIM4_STRAND_INTRACTABLE; break; case 'i': _strandOrientation = SIM4_STRAND_FAILED; break; default: fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- unknown strand orientation\n"); break; } cl++; _comment = 0L; if (strncmp(lines[cl], "comment", 7) == 0) { _comment = new char [lengths[cl] - 7]; strcpy(_comment, lines[cl] + 8); cl++; } _estDefLine = 0L; if (strncmp(lines[cl], "edef", 4) == 0) { _estDefLine = new char [lengths[cl] - 4]; strcpy(_estDefLine, lines[cl] + 5); cl++; } _genDefLine = 0L; if (strncmp(lines[cl], "ddef", 4) == 0) { _genDefLine = new char [lengths[cl] - 4]; strcpy(_genDefLine, lines[cl] + 5); cl++; } // // While we get exons, make exons. // sim4polishExon exon; uint32 maxExons = 1024; _numExons = 0; _exons = new sim4polishExon [maxExons]; _numCovered = 0; while (sscanf(lines[cl], ""uint32FMT"-"uint32FMT" ("uint32FMT"-"uint32FMT") <"uint32FMT"-"uint32FMT"-"uint32FMT">", &exon._estFrom, &exon._estTo, &exon._genFrom, &exon._genTo, &exon._numMatches, &exon._numMatchesN, &exon._percentIdentity) == 7) { // Dang, out of space! This would be a chore, except we don't have alignments yet, and so can // get by with a shallow copy. // if (_numExons >= maxExons) { maxExons *= 2; sim4polishExon *nnn = new sim4polishExon [maxExons]; memcpy(nnn, _exons, sizeof(sim4polishExon) * _numExons); delete [] _exons; _exons = nnn; } _exons[_numExons] = exon; _exons[_numExons]._intronOrientation = SIM4_INTRON_NONE; if ((lines[cl][lengths[cl]-2] == '-') && (lines[cl][lengths[cl]-1] == '>')) _exons[_numExons]._intronOrientation = SIM4_INTRON_POSITIVE; if ((lines[cl][lengths[cl]-2] == '<') && (lines[cl][lengths[cl]-1] == '-')) _exons[_numExons]._intronOrientation = SIM4_INTRON_NEGATIVE; if ((lines[cl][lengths[cl]-2] == '-') && (lines[cl][lengths[cl]-1] == '-')) _exons[_numExons]._intronOrientation = SIM4_INTRON_AMBIGUOUS; if ((lines[cl][lengths[cl]-2] == '=') && (lines[cl][lengths[cl]-1] == '=')) _exons[_numExons]._intronOrientation = SIM4_INTRON_GAP; _exons[_numExons]._estAlignment = 0L; _exons[_numExons]._genAlignment = 0L; _numCovered += _exons[_numExons]._estTo - _exons[_numExons]._estFrom + 1; _numExons++; cl++; } if (_numExons == 0) { fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishS4DB()-- WARNING: found ZERO exons?\n"); } _querySeqIdentity = s4p_percentCoverageApprox(); // Now, if we are not at 'sim4end', assume that there are alignment lines for each exon. // // We used to check that we didn't hit 'sim4end' before reading all the alignment lines, and if // we did, we'd compain about it and remove the alignment lines. Too much work. // if (strcmp(lines[cl], "sim4end") != 0) { for (uint32 el=0; el<_numExons; el++) { _exons[el]._estAlignment = new char [lengths[cl] + 1]; strcpy(_exons[el]._estAlignment, lines[cl]); cl++; _exons[el]._genAlignment = new char [lengths[cl] + 1]; strcpy(_exons[el]._genAlignment, lines[cl]); cl++; } } } // NOTE: This alters the lines array, with strtok() void sim4polish::s4p_linesToPolishGFF3(uint32 startPosition, uint32 maxLines, char **lines, uint32 *lengths) { char mOri; char sOri; char *clptr; int matchID; char *tok, *crttok; int dummy1, dummy2; char dummybuf[1000]; uint32 r; bool ok = true; assert(_comment == 0L); assert(_estDefLine == 0L); assert(_genDefLine == 0L); assert(_exons == 0L); assert(_numExons == 0); // Don't need to store matchID; re-assigned when file changes uint32 cl = 0; for (cl=0; lines[cl] && (lines[cl][0]=='#'); cl++); if (lines[cl] == NULL) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- Empty record. Cannot convert (%s).\n", lines[0]); return; } if (!strcmp(lines[0], "\tsim4db\tmRNA")) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- Invalid GFF3 format, got '%s' instead of GFF3 mRNA line. Cannot convert.\n", lines[0]); return; } cl = 0; while (lines[cl] && (lines[cl][0] == '#')) cl++; if (lines[cl] == NULL) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- ERROR: Critical error when reading GFF3 record. Skipping.\n"); return; } // Scan mRNA line _genDefLine = new char [lengths[cl]]; r = sscanf(lines[cl], ""uint32FMT":%s\tsim4db\tmRNA\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t.\t", &_genID, _genDefLine, &dummy1, &dummy2, &_percentIdentity, &sOri); if (r != 6) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- Expecting description line, found %d tokens instead of 6.\n", r); } switch (sOri) { case '+' : _strandOrientation = SIM4_STRAND_POSITIVE; break; case '-' : _strandOrientation = SIM4_STRAND_NEGATIVE; break; case '.' : _strandOrientation = SIM4_STRAND_UNKNOWN; break; default : ok = false; } if (ok == true) { // skip over the first eight columns in the GFF3 format clptr = lines[cl]; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; tok = strtok(clptr, "\n"); crttok = strtok(tok, ";"); while (crttok) { if (!strncmp(crttok, "ID=sim4db", 9)) { r = sscanf(crttok, "ID=sim4db"uint32FMT"", &matchID); if (r != 1) ok = false; } else if (!strncmp(crttok, "Name", 4)) { if (_estDefLine == 0L) _estDefLine = new char [lengths[cl]]; r = sscanf(crttok, "Name="uint32FMT":%s", &_estID, _estDefLine); if (r != 2) ok = false; } else if (!strncmp(crttok, "Target", 6)) { if (_estDefLine == 0L) _estDefLine = new char [lengths[cl]]; r = sscanf(crttok, "Target="uint32FMT":%s "uint32FMT" "uint32FMT" %c", &_estID, _estDefLine, &dummy1, &dummy2, &mOri); if (r != 5) ok = false; if (mOri == '+') _matchOrientation = SIM4_MATCH_FORWARD; else if (mOri == '-') _matchOrientation = SIM4_MATCH_COMPLEMENT; else ok = false; } else if (!strncmp(crttok, "targetLen", 9)) { r = sscanf(crttok, "targetLen="uint32FMT"", &_estLen); if (r != 1) ok = false; } else if (!strncmp(crttok, "pA", 2)) { r = sscanf(crttok, "pA="uint32FMT"", &_estPolyA); if (r != 1) ok = false; } else if (!strncmp(crttok, "pT", 2)) { r = sscanf(crttok, "pT="uint32FMT"", &_estPolyT); if (r != 1) ok = false; } else if (!strncmp(crttok, "genRegion", 9)) { r = sscanf(crttok, "genRegion="uint32FMT"-"uint32FMT"", &_genRegionOffset, &dummy1); if (r != 2) ok = false; else _genRegionLength = dummy1 - _genRegionOffset + 1; } crttok = strtok(NULL, ";"); } // Check that we read what we should have read so far if ((ok == false) || !_estDefLine || !_genDefLine || !_estLen || !_matchOrientation || !_strandOrientation) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- Expecting mRNA description line, %s.\n", (ok==false) ? "failed":"incomplete"); } } // // While we get exons, make exons. // sim4polishExon exon; uint32 maxExons = 1024; _numExons = 0; _exons = new sim4polishExon [maxExons]; _numCovered = 0; cl++; while (lines[cl] && (lines[cl][0] == '#')) cl++; while (lines[cl] && strstr(lines[cl], "\tsim4db\texon\t")) { ok = true; exon._intronOrientation = SIM4_INTRON_NONE; r = sscanf(lines[cl], ""uint32FMT":%s\tsim4db\texon\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t.\t", &dummy1, dummybuf, &exon._genFrom, &exon._genTo, &exon._percentIdentity, &sOri); if (r != 6) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- Expecting exon description line, found %d tokens instead of 6.\n", r); } if ((dummy1 != _genID) || strcmp(dummybuf, _genDefLine) || (sOri != '+') && (sOri != '-') && (sOri != '.')) ok = false; if (ok) { clptr = lines[cl]; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; while (*clptr!='\t') clptr++; clptr++; tok = strtok(clptr, "\n"); crttok = strtok(tok, ";"); while (crttok) { if (!strncmp(crttok, "Parent=sim4db", 13)) { r = sscanf(crttok, "Parent=sim4db"uint32FMT"", &dummy1); if ((r != 1) || (dummy1 != matchID)) ok = false; } else if (!strncmp(crttok, "Target=", 7)) { r = sscanf(crttok, "Target=%s "uint32FMT" "uint32FMT" %c", &dummybuf, &exon._estFrom, &exon._estTo, &mOri); if ((r != 4) || ((mOri == '+') && (_matchOrientation == SIM4_MATCH_COMPLEMENT)) || ((mOri == '-') && (_matchOrientation == SIM4_MATCH_FORWARD))) ok = false; } else if (!strncmp(crttok, "nMatches=", 9)) { r = sscanf(crttok, "nMatches="uint32FMT"", &exon._numMatches); if (r != 1) ok = false; } else if (!strncmp(crttok, "Gap=", 4)) { ; // Handle this later or, better yet, just skip alignment } else if (!strncmp(crttok, "intron=", 7)) { r = sscanf(crttok, "intron=%s", &dummybuf); if (r != 1) ok = false; if (!strcmp(dummybuf, "->")) exon._intronOrientation = SIM4_INTRON_POSITIVE; else if (!strcmp(dummybuf, "<-")) exon._intronOrientation = SIM4_INTRON_NEGATIVE; else if (!strcmp(dummybuf, "--")) exon._intronOrientation = SIM4_INTRON_AMBIGUOUS; else if (!strcmp(dummybuf, "==")) exon._intronOrientation = SIM4_INTRON_GAP; else ok = false; } crttok = strtok(NULL, ";"); } } // Check that we read what we should have read so far if ((ok == false) || !exon._estFrom || !exon._estTo || !exon._numMatches) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- Expecting exon description line, %s.\n", (ok==false) ? "failed":"incomplete"); } // Now load everything into the real exons array: // Dang, out of space! This would be a chore, except we don't have alignments yet, and so can // get by with a shallow copy. // if (_numExons >= maxExons) { maxExons *= 2; sim4polishExon *nnn = new sim4polishExon [maxExons]; memcpy(nnn, _exons, sizeof(sim4polishExon) * _numExons); delete [] _exons; _exons = nnn; } _exons[_numExons] = exon; _exons[_numExons]._numMatchesN = 0; // Most likely! _exons[_numExons]._estAlignment = 0L; _exons[_numExons]._genAlignment = 0L; _numCovered += _exons[_numExons]._estTo - _exons[_numExons]._estFrom + 1; _numMatches += _exons[_numExons]._numMatches; _numMatchesN += _exons[_numExons]._numMatchesN; _numExons++; cl++; while (lines[cl] && (lines[cl][0] == '#')) cl++; } if (_numExons == 0) { fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- byte "uint32FMT": '%s'\n", startPosition, lines[cl]); fprintf(stderr, "sim4polish::s4p_linesToPolishGFF3()-- WARNING: found ZERO exons?\n"); } _querySeqIdentity = s4p_percentCoverageApprox(); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polishList.H0000644000000000000000000000147012322046702021644 0ustar rootroot#ifndef SIM4_POLISH_LIST_H #define SIM4_POLISH_LIST_H #include "sim4polish.H" // // A list of sim4polishes // class sim4polishList { public: sim4polishList(); sim4polishList(char const *filename); ~sim4polishList(); void push(sim4polish *p); void remove(uint32 i); uint32 length(void) { return(len); }; sim4polish *operator[](uint32 i) { if (i >= len) return(0L); return(list[i]); }; sim4polish *get(uint32 i) { if (i >= len) return(0L); return(list[i]); }; void sortBycDNAIID(void); void sortByGenomicIID(void); // Removes polishes below the specified quality void filterByQuality(uint32 minI, uint32 minC); private: uint32 len; uint32 max; sim4polish **list; }; #endif // SIM4_POLISH_LIST_H kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-copy.C0000644000000000000000000000650212322046702021754 0ustar rootroot#include "sim4polish.H" #include "memory.h" #include #include void sim4polishExon::s4p_copyExon(sim4polishExon *orig) { if (orig == 0L) return; _estFrom = orig->_estFrom; _estTo = orig->_estTo; _genFrom = orig->_genFrom; _genTo = orig->_genTo; _numMatches = orig->_numMatches; _numMatchesN = orig->_numMatchesN; _percentIdentity = orig->_percentIdentity; _intronOrientation = orig->_intronOrientation; delete [] _estAlignment; delete [] _genAlignment; _estAlignment = NULL; _genAlignment = NULL; if (orig->_estAlignment) { uint32 len = strlen(orig->_estAlignment) + 1; _estAlignment = new char [len]; memcpy(_estAlignment, orig->_estAlignment, sizeof(char) * len); } if (orig->_genAlignment) { uint32 len = strlen(orig->_genAlignment) + 1; _genAlignment = new char [len]; memcpy(_genAlignment, orig->_genAlignment, sizeof(char) * len); } } void sim4polish::s4p_copyPolish(sim4polish *orig, uint32 exonNum) { if (orig == 0L) return; _estID = orig->_estID; _estLen = orig->_estLen; _estPolyA = orig->_estPolyA; _estPolyT = orig->_estPolyT; _genID = orig->_genID; _genRegionOffset = orig->_genRegionOffset; _genRegionLength = orig->_genRegionLength; _numMatches = orig->_numMatches; _numMatchesN = orig->_numMatchesN; _numCovered = orig->_numCovered; _percentIdentity = orig->_percentIdentity; _querySeqIdentity = orig->_querySeqIdentity; _matchOrientation = orig->_matchOrientation; _strandOrientation = orig->_strandOrientation; delete [] _comment; delete [] _estDefLine; delete [] _genDefLine; _comment = NULL; _estDefLine = NULL; _genDefLine = NULL; delete [] _exons; _numExons = 0; _exons = NULL; // Well, that was easy. Onto the deep copy! if (orig->_comment) { uint32 len = strlen(orig->_comment) + 1; _comment = new char [len]; memcpy(_comment, orig->_comment, sizeof(char) * len); } if (orig->_estDefLine) { uint32 len = strlen(orig->_estDefLine) + 1; _estDefLine = new char [len]; memcpy(_estDefLine, orig->_estDefLine, sizeof(char) * len); } if (orig->_genDefLine) { uint32 len = strlen(orig->_genDefLine) + 1; _genDefLine = new char [len]; memcpy(_genDefLine, orig->_genDefLine, sizeof(char) * len); } // No exons? We're done here. Should never happen... if (orig->_numExons == 0) return; // If told to copy one exon, just copy one exon....and then rebuild statistics. if (exonNum < orig->_numExons) { _numExons = 1; _exons = new sim4polishExon [_numExons]; _exons[0].s4p_copyExon(orig->_exons + exonNum); // Rebuild stats _numMatches = _exons[0]._numMatches; _numMatchesN = _exons[0]._numMatchesN; _numCovered = _exons[0]._estTo - _exons[0]._estFrom + 1; _percentIdentity = _exons[0]._percentIdentity; _querySeqIdentity = s4p_percentCoverageApprox(); return; } // Otherwise, copy all exons into the new polish _numExons = orig->_numExons; _exons = new sim4polishExon [_numExons]; for (uint32 i=0; i<_numExons; i++) _exons[i].s4p_copyExon(orig->_exons + i); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-deleteexon.C0000644000000000000000000000603512322046702023137 0ustar rootroot#include "sim4polish.H" #include #include #include #include void sim4polish::s4p_deleteExon(uint32 a) { char *ed, *gd; int editDistance = 0; int alignmentLength = 0; // Warn if we don't have alignments -- this is now done by the // driver (e.g., cleanPolishes.C) // #if 0 if ((p->exons[0]._estAlignment == 0L) || (p->exons[0]._genAlignment == 0L)) fprintf(stderr, "s4p_deleteExon()-- Need alignments to recompute scores correctly!\n"); #endif // Set the intron orientation for the exon before the one we are // deleting: // If we are deleting the first exon, there is no previous exon // If we are deleting the last exon, set the previous to SIM4_INTRON_NONE // Otherwise, set the previous to SIM4_INTRON_GAP // if (_numExons > 1) { if (a == _numExons - 1) _exons[a-1]._intronOrientation = SIM4_INTRON_NONE; else if (a > 0) _exons[a-1]._intronOrientation = SIM4_INTRON_GAP; } // Update the match scores // _numMatches -= _exons[a]._numMatches; _numMatchesN -= _exons[a]._numMatchesN; // Erase the exon we're removing, but save a copy so we can stash it in the // soon-to-be-emptied last location. // _exons[a].s4p_clearExon(); sim4polishExon d = _exons[a]; // Shift all the exons down by one, and decrement the number of // exons present in the list. // for (uint32 i=a+1; i<_numExons; i++) _exons[i-1] = _exons[i]; _numExons--; // Stash the now deleted exon in the last spot, just to clear out the old contents. // _exons[_numExons] = d; // The strand orientation becomes unknown if we delete internal // exons, or we end up with only one exon. // if (((0 < a) && (a < _numExons)) || (_numExons == 1)) _strandOrientation = SIM4_STRAND_UNKNOWN; // Compute the alignment length and the number of edits. // alignmentLength = 0; editDistance = 0; _numCovered = 0; for (uint32 i=0; i<_numExons; i++) { ed = _exons[i]._estAlignment; gd = _exons[i]._genAlignment; if (ed && gd) { alignmentLength += 2 * strlen(ed); for (; *ed && *gd; ed++, gd++) { if (*ed != *gd) editDistance++; } } else { int len = _exons[i]._estTo - _exons[i]._estFrom + 1 + _exons[i]._estTo - _exons[i]._estFrom + 1; alignmentLength += len; editDistance += len / 2 - _exons[i]._numMatches - _exons[i]._numMatchesN; } _numCovered += _exons[i]._genTo - _exons[i]._genFrom + 1; } #if 0 fprintf(stdout, "Found (new)alignLen = %d\n", alignmentLength); fprintf(stdout, "Found (new)editDist = %d\n", editDistance); #endif // Fix the scores for the match. Special case; if there is only // one exon left, the score for the exon is the score for the // match. // if (_numExons == 1) _percentIdentity = _exons[0]._percentIdentity; else _percentIdentity = s4p_percentIdentityApprox(editDistance, alignmentLength); // Update the query sequence identity // _querySeqIdentity = s4p_percentCoverageApprox(); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish.C0000644000000000000000000000133012322046702020776 0ustar rootroot#include "sim4polish.H" bool sim4polish::s4p_makeForward(void) { if (_matchOrientation == SIM4_MATCH_FORWARD) return(false); for (uint32 e=0; e < _numExons; e++) { uint32 t = _estLen - _exons[e]._estFrom + 1; _exons[e]._estFrom = _estLen - _exons[e]._estTo + 1; _exons[e]._estTo = t; } _matchOrientation = SIM4_MATCH_FORWARD; return(true); } bool sim4polish::s4p_makeReverse(void) { if (_matchOrientation == SIM4_MATCH_COMPLEMENT) return(false); for (uint32 e=0; e < _numExons; e++) { uint32 t = _estLen - _exons[e]._estFrom + 1; _exons[e]._estFrom = _estLen - _exons[e]._estTo + 1; _exons[e]._estTo = t; } _matchOrientation = SIM4_MATCH_COMPLEMENT; return(true); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish.pm0000644000000000000000000001512111676744271021254 0ustar rootroot#!/usr/local/bin/perl # Confidential -- Do Not Distribute # Copyright (c) 2002 PE Corporation (NY) through the Celera Genomics Group # All Rights Reserved. package sim4polish; use strict; use POSIX "sys_wait_h"; $| = 1; sub import () { } ###################################################################### # # Returns a modified 'raw' string, using the current values for the # info line. DOES NOT rewrite the exons. # sub updatePolishInfoLine { my %p = @_; my @L = split '\n', $p{'raw'}; my $l; shift @L; shift @L; $l = "sim4begin\n"; $l .= "$p{'estID'}\[$p{'estLen'}-$p{'pA'}-$p{'pT'}\] "; $l .= "$p{'dbID'}\[$p{'dbLo'}-$p{'dbHi'}\] "; $l .= "<$p{'numMatches'}-$p{'numMatchesN'}-$p{'percentID'}-$p{'matchOrientation'}-$p{'strandPrediction'}>\n"; foreach my $x (@L) { $l .= "$x\n"; } return($l); } sub updatePolish { my %p = @_; my $l; $l = "sim4begin\n"; $l .= "$p{'estID'}\[$p{'estLen'}-$p{'pA'}-$p{'pT'}\] "; $l .= "$p{'dbID'}\[$p{'dbLo'}-$p{'dbHi'}\] "; $l .= "<$p{'numMatches'}-$p{'numMatchesN'}-$p{'percentID'}-$p{'matchOrientation'}-$p{'strandPrediction'}>\n"; $l .= "comment=$p{'comment'}\n" if defined($p{'comment'}); $l .= "edef=$p{'estDefLine'}\n" if defined($p{'estDefLine'}); $l .= "ddef=$p{'dbDefLine'}\n" if defined($p{'estDefLine'}); foreach my $exon (@{@p{'exons'}}) { my $e; $e = "$exon->{'cDNAstart'}-$exon->{'cDNAend'} "; $e .= "($exon->{'GENOMICstart'}-$exon->{'GENOMICend'}) "; $e .= "<$exon->{'numMatches'}-$exon->{'numMatchesN'}-$exon->{'percentID'}> "; $e .= "$exon->{'intronOrientation'}"; $e =~ s/^\s+//; $e =~ s/\s+$//; $l .= "$e\n"; } foreach my $exon (@{@p{'exons'}}) { $l .= "$exon->{'cDNAalign'}\n"; $l .= "$exon->{'GENOMICalign'}\n"; } $l .= "sim4end\n"; return($l); } ###################################################################### # # Subroutine to read a single sim4 polish, and return it as a structure. # sub readPolish (*) { local *READPOLISHFH = shift; my %p; my $line; my $save; # These are the fields returned. # $p{'raw'} = undef; $p{'estID'} = undef; $p{'estDefLine'} = undef; $p{'estLen'} = undef; $p{'pA'} = undef; $p{'pT'} = undef; $p{'dbID'} = undef; $p{'dbDefLine'} = undef; $p{'dbLen'} = undef; $p{'dbLo'} = undef; $p{'dbHi'} = undef; $p{'comment'} = undef; $p{'numMatches'} = undef; $p{'numMatchesN'} = undef; $p{'percentID'} = undef; $p{'coveragebp'} = undef; $p{'coverage'} = undef; $p{'matchOrientation'} = undef; $p{'strandPrediction'} = undef; # An array of references to hashes, one hash for each exon. $p{'exons'} = (); # Skip lines until the next match. If used properly, on a proper # file, this should be silent. After the loop, we are at the # start of a polish; the line should be "sim4begin". # $line = ; while (defined($line) && ($line !~ m/^sim4begin$/)) { chomp $line; print STDERR "Skipped: '$line'\n"; $line = ; } $save = $line; # Return now if were are out of file # return(%p) if (eof(READPOLISHFH)); # Read the description line # $line = ; $save .= $line; if ($line =~ m/^(\d+)\[(\d+)-+(\d+)-+(\d+)\]\s+(\d+)\[(\d+)-(\d+)\]\s+\<(\d+)-(\d+)-(\d+)-(\w+)-(\w+)\>$/) { $p{'estID'} = $1; $p{'estLen'} = $2; $p{'pA'} = $3; $p{'pT'} = $4; $p{'dbID'} = $5; $p{'dbLo'} = $6; $p{'dbHi'} = $7; $p{'numMatches'} = $8; $p{'numMatchesN'} = $9; $p{'percentID'} = $10; $p{'matchOrientation'} = $11; $p{'strandPrediction'} = $12; } else { print STDERR "expecting description line, got: '$line'\n"; return(%p); } # Read the two deflines, if they exist. # $line = ; if ($line =~ m/^comment=\s*(.*)\s*$/) { $p{'comment'} = $1; $save .= $line; $line = ; } else { #print STDERR "libBri::readPolish()-- WARNING: Didn't get comment!\n"; #print STDERR "libBri::readPolish()-- WARNING: $line"; } if ($line =~ m/^edef=(.*)$/) { $p{'estDefLine'} = $1; $save .= $line; $line = ; } else { #print STDERR "libBri::readPolish()-- WARNING: Didn't get edef!\n"; #print STDERR "libBri::readPolish()-- WARNING: $line"; } if ($line =~ m/^ddef=(.*)$/) { $p{'dbDefLine'} = $1; $save .= $line; $line = ; } else { #print STDERR "libBri::readPolish()-- WARNING: Didn't get ddef!\n"; #print STDERR "libBri::readPolish()-- WARNING: $line"; } # Read the exons # my $exonAlign = 0; my $exonAlignFirst = 1; my $exonCoverage = 0; while (defined($line) && ($line !~ m/^sim4end$/)) { # If this match succeeds, we have an exon description. # Otherwise, it's an alignment line. # if ($line =~ /^(\d+)-(\d+)\s+\((\d+)-(\d+)\)\s+\<(\d+)-(\d+)-(\d+)\>\s+(.*)$/) { my $e = {}; $exonCoverage += $2 - $1 + 1; $e->{'cDNAstart'} = $1; $e->{'cDNAend'} = $2; $e->{'GENOMICstart'} = $3; $e->{'GENOMICend'} = $4; $e->{'numMatches'} = $5; $e->{'numMatchesN'} = $6; $e->{'percentID'} = $7; $e->{'intronOrientation'} = $8; push @{$p{'exons'}}, $e; } else { if ($exonAlignFirst) { $p{'exons'}[$exonAlign]->{'cDNAalign'} = $line; chomp $p{'exons'}[$exonAlign]->{'cDNAalign'}; $exonAlignFirst = 0; } else { $p{'exons'}[$exonAlign]->{'GENOMICalign'} = $line; chomp $p{'exons'}[$exonAlign]->{'GENOMICalign'}; $exonAlignFirst = 1; $exonAlign++; } } $save .= $line; $line = ; } $save .= $line; if (($p{'pA'} + $p{'pT'}) >= $p{'estLen'}) { $p{'coverage'} = 0; } else { $p{'coveragebp'} = $exonCoverage; $p{'coverage'} = 100.0 * $exonCoverage / ($p{'estLen'} - $p{'pA'} - $p{'pT'}); } $p{'raw'} = $save; return(%p); } 1; kmer-code-2013-trunk/libsim4/sim4polish/sim4polishWriter.H0000644000000000000000000000365612322046702022215 0ustar rootroot#ifndef SIM4POLISHWRITER #define SIM4POLISHWRITER #include "sim4polish.H" // Simple class for writing a file of alignments. The file can be either sim4db, gff3 or atac // format. This class makes sure that the file has a header (if needed) and takes care of // generating unique IDs for each gff3 file. class sim4polishWriter { public: sim4polishWriter(const char *name, sim4polishStyle style, bool hidden=false); ~sim4polishWriter(); private: // If this was opened as a 'hidden' file, this is the only way to ever see the results again. // The destructor complains if this method is never called on a hidden file. // FILE *surrenderToReader(void); void s4p_putHeaderS4DB(); void s4p_putHeaderGFF3(); void s4p_putHeaderATAC(); friend class sim4polishReader; public: // The source name is listed in column 2 of a GFF3. It is supposed to be the name of the // program that generated these alignments. // // The source name MUST be shorter than 32 letters, and if it contains /'s (a path) only the // last component is used. // void setSourceName(const char *sourceName); // The match ID prefix is used in column 9, when constructing the file-unique ID for each // alignment. If not set, it will default to the sourceName + a short salt derived from the // current time and process id + an integer count starting at zero. The default is chosen so // that the ID's in resulting files are more-or-less globally unique. // // The match ID prefix MUST be shorter than 32 letters. // void setMatchIDPrefix(const char *prefix); // Add an alignment to the file. // void writeAlignment(sim4polish *out); private: char _otName[FILENAME_MAX]; FILE *_otFile; sim4polishStyle _style; char _sourceName[32]; char _matchIDprefix[32]; char _matchIDsalt[8]; uint64 _matchID; }; #endif // SIM4POLISHWRITER kmer-code-2013-trunk/libsim4/sim4polish/sim4polish.H0000644000000000000000000001743612322046702021021 0ustar rootroot#ifndef SIM4_POLISH_H #define SIM4_POLISH_H // // Datastructures for writing, processing and reading the output of sim4 // #include #include #include "util++.H" #include "bio++.H" //#include "bio.h" #define SIM4_INTRON_ERROR '?' // '??' #define SIM4_INTRON_POSITIVE '>' // '->' #define SIM4_INTRON_NEGATIVE '<' // '<-' #define SIM4_INTRON_AMBIGUOUS '-' // '--' #define SIM4_INTRON_GAP '=' // '==' #define SIM4_INTRON_NONE '.' // ' ' #define SIM4_MATCH_ERROR '?' #define SIM4_MATCH_FORWARD 'f' #define SIM4_MATCH_COMPLEMENT 'c' #define SIM4_STRAND_ERROR '?' #define SIM4_STRAND_POSITIVE 'p' #define SIM4_STRAND_NEGATIVE 'n' #define SIM4_STRAND_UNKNOWN 'u' #define SIM4_STRAND_INTRACTABLE 'I' #define SIM4_STRAND_FAILED 'F' #define S4P_POLISH_S4DB 100 #define S4P_POLISH_GFF3 101 #define S4P_POLISH_ATAC 102 enum sim4polishStyle { sim4polishS4DB, sim4polishGFF3, sim4polishATAC }; extern sim4polishStyle sim4polishStyleDefault; // sim4polishExon and sim4polish constructors should be private (with whatever builds them a // friend) but snapper2 needs to create empty objects and fill them manually. We could provide a // constructor for that, but time is finite. class sim4polishExon { public: sim4polishExon() { _estFrom = 0; _estTo = 0; _genFrom = 0; _genTo = 0; _numMatches = 0; _numMatchesN = 0; _percentIdentity = 0; _intronOrientation = 0; _estAlignment = NULL; _genAlignment = NULL; }; ~sim4polishExon() { delete [] _estAlignment; delete [] _genAlignment; }; void s4p_clearExon(void) { _estFrom = 0; _estTo = 0; _genFrom = 0; _genTo = 0; _numMatches = 0; _numMatchesN = 0; _percentIdentity = 0; _intronOrientation = 0; _estAlignment = 0L; _genAlignment = 0L; }; void s4p_copyExon(sim4polishExon *orig); public: uint32 _estFrom; uint32 _estTo; uint32 _genFrom; uint32 _genTo; uint32 _numMatches; uint32 _numMatchesN; uint32 _percentIdentity; uint32 _intronOrientation; char *_estAlignment; char *_genAlignment; }; class sim4polish { public: void clear(void) { _estID = 0; _estLen = 0; _estPolyA = 0; _estPolyT = 0; _genID = 0; _genRegionOffset = 0; _genRegionLength = 0; _numMatches = 0; _numMatchesN = 0; _numCovered = 0; _percentIdentity = 0; _querySeqIdentity = 0; _matchOrientation = 0; _strandOrientation = 0; _comment = NULL; _estDefLine = NULL; _genDefLine = NULL; _numExons = 0; _exons = NULL; }; sim4polish() { clear(); }; friend class sim4polishBuilder; friend class sim4polishReader; friend class sim4polishWriter; public: // OBSOLETE //sim4polish(FILE *F) { // fprintf(stderr, "OBSOLETE.\n"); // exit(1); //}; sim4polish(readBuffer *rb, sim4polishStyle style) { clear(); switch (style) { case sim4polishS4DB: s4p_readPolishS4DB(rb); break; case sim4polishGFF3: s4p_readPolishGFF3(rb); break; case sim4polishATAC: s4p_readPolishATAC(rb); break; default: fprintf(stderr, "sim4polish()-- ERROR: unknown style '%d'\n", style); exit(1); } }; sim4polish(sim4polish *orig) { clear(); s4p_copyPolish(orig); }; sim4polish(sim4polish *orig, uint32 exon) { clear(); s4p_copyPolish(orig, exon); }; ~sim4polish() { delete [] _comment; delete [] _estDefLine; delete [] _genDefLine; delete [] _exons; }; private: void s4p_readPolishS4DB(readBuffer *rb); void s4p_readPolishGFF3(readBuffer *rb); void s4p_readPolishATAC(readBuffer *rb); void s4p_linesToPolishS4DB(uint32 lineNumber, uint32 maxLines, char **lines, uint32 *lengths); void s4p_linesToPolishGFF3(uint32 lineNumber, uint32 maxLines, char **lines, uint32 *lengths); void s4p_linesToPolishATAC(uint32 lineNumber, uint32 maxLines, char **lines, uint32 *lengths); void s4p_copyPolish(sim4polish *orig, uint32 exonNum=2147483648); public: // Note that there is no (public) mechanism to convert these strings back to a sim4polish. The // only mechanism is through a readBuffer (aka, a file). // char *s4p_polishToString(sim4polishStyle style); // STYLE - add =sim4polishS4DB private: char *s4p_polishToStringS4DB(void); char *s4p_polishToStringGFF3(void); char *s4p_polishToStringATAC(void); public: void s4p_removeAlignments(void) { for (uint32 i=0; i<_numExons; i++) { delete [] _exons[i]._estAlignment; _exons[i]._estAlignment = 0L; delete [] _exons[i]._genAlignment; _exons[i]._genAlignment = 0L; } }; void s4p_removeDefLines(void) { delete [] _estDefLine; _estDefLine = 0L; delete [] _genDefLine; _genDefLine = 0L; }; // Reverse complement an input polish, returns true of it was reversed. // bool s4p_makeForward(void); bool s4p_makeReverse(void); // Update the alignment scores based on the alignments that are present. // void s4p_updateAlignmentScores(void); // Approximate (integer) percent identity and coverage. // int s4p_percentIdentityApprox(int numEdits, int alignmentLength); int s4p_percentCoverageApprox(void); // A very expensive and accurate calculation of the percent identity. // double s4p_percentIdentityExact(void); double s4p_percentCoverageExact(void); void s4p_swapExons(uint32 a, uint32 b); void s4p_deleteExon(uint32 a); void s4p_insertExon(uint32 a, uint32 intronori, sim4polishExon *e); void s4p_insertExons(uint32 a, uint32 intronori, sim4polish *e); public: uint32 _estID; uint32 _estLen; uint32 _estPolyA; uint32 _estPolyT; uint32 _genID; uint32 _genRegionOffset; uint32 _genRegionLength; uint32 _numMatches; uint32 _numMatchesN; uint32 _numCovered; // Number of bp covered in alignments uint32 _percentIdentity; uint32 _querySeqIdentity; // numCovered / (estLen - pA -pT) uint32 _matchOrientation; uint32 _strandOrientation; char *_comment; char *_estDefLine; char *_genDefLine; uint32 _numExons; sim4polishExon *_exons; }; int s4p_genIDcompare(const void *a, const void *b); int s4p_estIDcompare(const void *a, const void *b); int s4p_genDEFcompare(const void *a, const void *b); int s4p_estDEFcompare(const void *a, const void *b); bool s4p_compatable(sim4polish *A, sim4polish *B); bool s4p_IsSameRegion(sim4polish *A, sim4polish *B, int tolerance); bool s4p_IsRegionOverlap(sim4polish *A, sim4polish *B); bool s4p_IsSameExonModel(sim4polish *A, sim4polish *B, int tolerance); void s4p_compareExons_Overlap(sim4polish *A, sim4polish *B, double overlapThreshold, uint32 *numSame, uint32 *numAOnly, uint32 *numBOnly); void s4p_compareExons_Ends(sim4polish *A, sim4polish *B, int32 tolerance, uint32 *numSame, uint32 *numAOnly, uint32 *numBOnly); #endif // SIM4_POLISH_H kmer-code-2013-trunk/libsim4/sim4polish/sim4polishWriter.C0000644000000000000000000001122712322046702022201 0ustar rootroot#include "sim4polishWriter.H" #include "util++.H" #include #include #include #include #include static const char base64[65] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-"; sim4polishWriter::sim4polishWriter(const char *name, sim4polishStyle style, bool hidden) { if (hidden) { // We are supposed to be a hidden file. strcpy(_otName, "(hidden)"); _otFile = makeTempFile(NULL); } else if ((name == 0L) || ((name[0] == '-') && (name[1] == 0))) { // We are stdout. strcpy(_otName, "(stdout)"); _otFile = stdout; } else { // Nope, just a regular ol' file. if (strlen(name) > FILENAME_MAX) fprintf(stderr, "sim4polishWriter()-- Failed to open '%s' for writing: file name too long.\n", name), exit(1); strncpy(_otName, name, FILENAME_MAX); errno = 0; _otFile = fopen(name, "w"); if (errno) fprintf(stderr, "sim4polishWriter()-- Failed to open '%s' for writing: %s\n", _otName, strerror(errno)), exit(1); } _style = style; switch (_style) { case sim4polishS4DB: s4p_putHeaderS4DB(); break; case sim4polishGFF3: s4p_putHeaderGFF3(); break; case sim4polishATAC: s4p_putHeaderATAC(); break; } memset(_sourceName, 0, sizeof(char) * 32); memset(_matchIDprefix, 0, sizeof(char) * 32); memset(_matchIDsalt, 0, sizeof(char) * 8); _matchID = 0; // Construct a match ID salt based on the current time and process ID. We make a 48-bit // number from the combination of process ID and curent time, then convert that to base-64. uint64 saltTime = (uint64)getTime(); // returns a double, fraction of seconds uint64 saltPID = (uint64)getpid(); uint64 saltInteger = (saltPID << 32) | (saltTime); uint64 saltMask = uint64MASK(6); _matchIDsalt[0] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 6 bits _matchIDsalt[1] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 12 bits _matchIDsalt[2] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 18 bits _matchIDsalt[3] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 24 bits _matchIDsalt[4] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 30 bits _matchIDsalt[5] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 36 bits _matchIDsalt[6] = base64[saltInteger & saltMask]; saltInteger >>= 6; // 42 bits _matchIDsalt[7] = 0; #if DEBUG_WRITER fprintf(stderr, "SALT: "uint64FMT" + "uint64FMT" = %s\n", saltPID, saltTime, _matchIDsalt); #endif } void sim4polishWriter::s4p_putHeaderS4DB() { return; } void sim4polishWriter::s4p_putHeaderATAC() { return; } void sim4polishWriter::s4p_putHeaderGFF3() { fputs( "##gff-version 3\n", _otFile); return; } sim4polishWriter::~sim4polishWriter() { if (strcmp(_otName, "(hidden)") == 0) { if (_otFile) fprintf(stderr, "sim4polishWriter()-- WARNING: Hidden output file was lost; surrenderToReader() never called.\n"); } else { errno = 0; if (_otFile) fclose(_otFile); if (errno) fprintf(stderr, "sim4polishWriter()-- WARNING: Failed to close '%s': %s\n", _otName, strerror(errno)); } _otFile = NULL; } FILE * sim4polishWriter::surrenderToReader(void) { FILE *retval = _otFile; _otFile = 0L; fflush(retval); rewind(retval); return(retval); } void sim4polishWriter::setSourceName(const char *sourceName) { // Find the last slash, if any. const char *lastSlash = strrchr(sourceName, '/'); // If found, advance one letter to the first letter in the name, otherwise // reset lastSlash to the first letter in the sourceName. if (lastSlash) lastSlash++; else lastSlash = sourceName; if (lastSlash[0] == 0) fprintf(stderr, "sim4polishWriter()-- source name is empty, or ends in a '/'; no source name used.\n"); if (strlen(lastSlash) > 32) fprintf(stderr, "sim4polishWriter()-- source name too long, truncating to 31 letters.\n"); strncpy(_sourceName, lastSlash, 32); _sourceName[31] = 0; } void sim4polishWriter::setMatchIDPrefix(const char *prefix) { if (strlen(prefix) > 32) fprintf(stderr, "sim4polishWriter()-- ID prefix too long, truncating to 31 letters.\n"); strncpy(_matchIDprefix, prefix, 32); _matchIDprefix[31] = 0; } void sim4polishWriter::writeAlignment(sim4polish *out) { char *str = 0L; switch (_style) { case sim4polishS4DB: str = out->s4p_polishToStringS4DB(); break; case sim4polishGFF3: str = out->s4p_polishToStringGFF3(); break; case sim4polishATAC: str = out->s4p_polishToStringATAC(); break; } fputs(str, _otFile); delete [] str; } kmer-code-2013-trunk/libsim4/sim4polish/sim4polishBuilder.C0000644000000000000000000001505712322046702022320 0ustar rootroot#include #include #include #include #include #include "bio++.H" #include "sim4polishBuilder.H" sim4polishBuilder::sim4polishBuilder() { it = 0L; exPos = 0; exMax = 32; exAli = 0; ex = new sim4polishExon * [exMax]; for (uint32 i=0; i_estID = estid; it->_estLen = estlen; it->_estPolyA = 0; it->_estPolyT = 0; it->_genID = genid; it->_genRegionOffset = genlo; it->_genRegionLength = genhi - genlo; it->_numMatches = 0; it->_numMatchesN = 0; it->_numCovered = 0; it->_percentIdentity = 0; it->_querySeqIdentity = 0; it->_matchOrientation = SIM4_MATCH_ERROR; it->_strandOrientation = SIM4_STRAND_ERROR; it->_comment = 0L; it->_estDefLine = 0L; it->_genDefLine = 0L; it->_numExons = 0; it->_exons = 0L; } void sim4polishBuilder::setPolyTails(uint32 pa, uint32 pt) { it->_estPolyA = pa; it->_estPolyT = pt; } void sim4polishBuilder::setESTdefline(char *defline) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::setESTdefline()-- no polish to build; create() not called\n"); return; } delete [] it->_estDefLine; it->_estDefLine = new char [strlen(defline) + 1]; memcpy(it->_estDefLine, defline, sizeof(char) * (strlen(defline) + 1)); } void sim4polishBuilder::setGENdefline(char *defline) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::setGENdefline()-- no polish to build; create() not called\n"); return; } delete [] it->_genDefLine; it->_genDefLine = new char [strlen(defline) + 1]; memcpy(it->_genDefLine, defline, sizeof(char) * (strlen(defline) + 1)); } void sim4polishBuilder::setNumberOfMatches(uint32 nummatches, uint32 nummatchesN) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::setNumberOfMatches()-- no polish to build; create() not called\n"); return; } it->_numMatches = nummatches; it->_numMatchesN = nummatchesN; } void sim4polishBuilder::setPercentIdentity(uint32 id) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::setPercentIdentitysetPercentIdentity()-- no polish to build; create() not called\n"); return; } it->_percentIdentity = id; } void sim4polishBuilder::setMatchOrientation(char o) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::setMatchOrientation()-- no polish to build; create() not called\n"); return; } switch (o) { case SIM4_MATCH_ERROR: case SIM4_MATCH_FORWARD: case SIM4_MATCH_COMPLEMENT: it->_matchOrientation = o; break; default: fprintf(stderr, "sim4polishBuilder::setMatchOrientation()-- invalid match orientation\n"); break; } } void sim4polishBuilder::setStrandOrientation(char o) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::setStrandOrientation()-- no polish to build; create() not called\n"); return; } switch (o) { case SIM4_STRAND_ERROR: case SIM4_STRAND_POSITIVE: case SIM4_STRAND_NEGATIVE: case SIM4_STRAND_UNKNOWN: case SIM4_STRAND_INTRACTABLE: case SIM4_STRAND_FAILED: it->_strandOrientation = o; break; default: fprintf(stderr, "sim4polishBuilder::setStrandOrientation()-- invalid match orientation\n"); break; } } void sim4polishBuilder::addExon(uint32 estlo, uint32 esthi, uint32 genlo, uint32 genhi, uint32 nummatches, uint32 nummatchesN, uint32 percentid, char intronorientation) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::addExon()-- no polish to build; create() not called\n"); return; } // If we need more space for exons, reallocate the list of pointers // if (exPos >= exMax) { exMax *= 2; sim4polishExon **t = new sim4polishExon* [exMax]; memcpy(t, ex, exPos * sizeof(sim4polishExon *)); delete [] ex; ex = t; for (uint32 i=exPos; i_estAlignment; delete [] ex[exPos]->_genAlignment; } ex[exPos]->_estAlignment = 0L; ex[exPos]->_genAlignment = 0L; ex[exPos]->_estFrom = estlo; ex[exPos]->_estTo = esthi; ex[exPos]->_genFrom = genlo + it->_genRegionOffset; ex[exPos]->_genTo = genhi + it->_genRegionOffset; ex[exPos]->_numMatches = nummatches; ex[exPos]->_numMatchesN = nummatchesN; ex[exPos]->_percentIdentity = percentid; ex[exPos]->_intronOrientation = intronorientation; ex[exPos]->_estAlignment = 0L; ex[exPos]->_genAlignment = 0L; exPos++; } void sim4polishBuilder::addExonAlignment(char *estalign, char *genalign) { if (it == 0L) { fprintf(stderr, "sim4polishBuilder::addExonAlignment()-- no polish to build; create() not called\n"); return; } if (exAli >= exPos) { fprintf(stderr, "sim4polishBuilder::addExonAlignment()-- tried to add alignment for exon %u which doesn't exist\n", exAli); exit(1); } ex[exAli]->_estAlignment = (char *)memdup(estalign, (strlen(estalign) + 1) * sizeof(char)); ex[exAli]->_genAlignment = (char *)memdup(genalign, (strlen(genalign) + 1) * sizeof(char)); exAli++; } sim4polish* sim4polishBuilder::release(void) { sim4polish *retval = it; if (it == 0L) { fprintf(stderr, "sim4polishBuilder::release()-- no polish to build; create() not called\n"); return(0L); } if (exPos == 0) return(0L); it->_numCovered = 0; it->_numExons = exPos; it->_exons = new sim4polishExon [exPos]; for (uint32 i=0; i_exons + i, ex[i], sizeof(sim4polishExon)); ex[i]->_estAlignment = 0L; // Owned by 'it' now ex[i]->_genAlignment = 0L; it->_numCovered += (ex[i]->_estTo - ex[i]->_estFrom + 1); } // Last, compute the querySeqIdentity using other fields (like our // just updated numCovered). // it->_querySeqIdentity = it->s4p_percentCoverageApprox(); it = 0L; exPos = 0; exAli = 0; return(retval); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-compare.C0000644000000000000000000002276212322046702022436 0ustar rootroot#include "sim4polish.H" #include #include #include #include // // Routines for comparing sim4polish structures. // // Many of these routines assume that the iid's are consistent for // the pair of polishes. In particular, that they are mapped to the // same set of genomic sequences. // int s4p_estIDcompare(const void *a, const void *b) { sim4polish *A = (*(sim4polish **)a); sim4polish *B = (*(sim4polish **)b); if (A == 0L) return(1); if (B == 0L) return(-1); if (A->_estID < B->_estID) return(-1); if (A->_estID > B->_estID) return(1); if (A->_genID < B->_genID) return(-1); if (A->_genID > B->_genID) return(1); if (A->_exons[0]._genFrom < B->_exons[0]._genFrom) return(-1); if (A->_exons[0]._genFrom > B->_exons[0]._genFrom) return(1); return(0); } int s4p_genIDcompare(const void *a, const void *b) { sim4polish *A = (*(sim4polish **)a); sim4polish *B = (*(sim4polish **)b); if (A == 0L) return(1); if (B == 0L) return(-1); if (A->_genID < B->_genID) return(-1); if (A->_genID > B->_genID) return(1); if (A->_exons[0]._genFrom < B->_exons[0]._genFrom) return(-1); if (A->_exons[0]._genFrom > B->_exons[0]._genFrom) return(1); if (A->_estID < B->_estID) return(-1); if (A->_estID > B->_estID) return(1); return(0); } int s4p_estDEFcompare(const void *a, const void *b) { sim4polish *A = (*(sim4polish **)a); sim4polish *B = (*(sim4polish **)b); int e = 0; if (A == 0L) return(1); if (B == 0L) return(-1); if (A->_estDefLine == 0L) return(1); if (B->_estDefLine == 0L) return(-1); e = strcmp(A->_estDefLine, B->_estDefLine); if (e < 0) return(-1); if (e > 0) return(1); if (A->_genDefLine == 0L) return(1); if (B->_genDefLine == 0L) return(-1); e = strcmp(A->_genDefLine, B->_genDefLine); if (e < 0) return(-1); if (e > 0) return(1); if (A->_exons[0]._genFrom < B->_exons[0]._genFrom) return(-1); if (A->_exons[0]._genFrom > B->_exons[0]._genFrom) return(1); return(0); } int s4p_genDEFcompare(const void *a, const void *b) { sim4polish *A = (*(sim4polish **)a); sim4polish *B = (*(sim4polish **)b); int e = 0; if (A == 0L) return(1); if (B == 0L) return(-1); if (A->_genDefLine == 0L) return(1); if (B->_genDefLine == 0L) return(-1); e = strcmp(A->_genDefLine, B->_genDefLine); if (e < 0) return(-1); if (e > 0) return(1); if (A->_estDefLine == 0L) return(1); if (B->_estDefLine == 0L) return(-1); e = strcmp(A->_estDefLine, B->_estDefLine); if (e < 0) return(-1); if (e > 0) return(1); if (A->_exons[0]._genFrom < B->_exons[0]._genFrom) return(-1); if (A->_exons[0]._genFrom > B->_exons[0]._genFrom) return(1); return(0); } // Return false if not from the same EST/GEN pair, or mapped to // different strands, true otherwise. // bool s4p_compatable(sim4polish *A, sim4polish *B) { if ((A->_estID != B->_estID) || (A->_genID != B->_genID) || (A->_matchOrientation != B->_matchOrientation)) return(false); else return(true); } // Returns true if the two polishes are on about the same genomic // region // bool s4p_IsSameRegion(sim4polish *A, sim4polish *B, int tolerance) { int32 Alo=0, Ahi=0; int32 Blo=0, Bhi=0; int32 Dlo=0, Dhi=0; if (A->_numExons > 0) { Alo = (int32)A->_exons[0]._genFrom; Ahi = (int32)A->_exons[A->_numExons-1]._genTo; } if (B->_numExons > 0) { Blo = (int32)B->_exons[0]._genFrom; Bhi = (int32)B->_exons[B->_numExons-1]._genTo; } Dlo = Blo - Alo; Dhi = Bhi - Ahi; if ((Dlo < -tolerance) || (Dlo > tolerance) || (Dhi < -tolerance) || (Dhi > tolerance)) return(false); else return(true); } // Returns true if the two polishes overlap genomic regions // bool s4p_IsRegionOverlap(sim4polish *A, sim4polish *B) { int32 Alo=0, Ahi=0; int32 Blo=0, Bhi=0; if (A->_genID != B->_genID) return(false); if (A->_numExons > 0) { Alo = (int32)A->_exons[0]._genFrom; Ahi = (int32)A->_exons[A->_numExons-1]._genTo; } if (B->_numExons > 0) { Blo = (int32)B->_exons[0]._genFrom; Bhi = (int32)B->_exons[B->_numExons-1]._genTo; } if (((Alo <= Blo) && (Blo <= Ahi)) || ((Blo <= Alo) && (Alo <= Bhi))) return(true); else return(false); } // Returns true if the two polishes have the same number of exons, // and each exon is mapped to about the same genomic region. // bool s4p_IsSameExonModel(sim4polish *A, sim4polish *B, int tolerance) { int32 Alo=0, Ahi=0; int32 Blo=0, Bhi=0; int32 Dlo=0, Dhi=0; if (A->_numExons != B->_numExons) return(0); for (uint32 i=0; i_numExons; i++) { Alo = (int32)A->_exons[i]._genFrom; Ahi = (int32)A->_exons[i]._genTo; Blo = (int32)B->_exons[i]._genFrom; Bhi = (int32)B->_exons[i]._genTo; Dlo = Blo - Alo; Dhi = Bhi - Ahi; if ((Dlo < -tolerance) || (Dlo > tolerance) || (Dhi < -tolerance) || (Dhi > tolerance)) return(false); } return(true); } void s4p_compareExons_Overlap(sim4polish *A, sim4polish *B, double overlapThreshold, uint32 *numSame, uint32 *numAMissed, uint32 *numBMissed) { uint32 i, j; uint32 al=0, ah=0, bl=0, bh=0; uint32 *foundA = 0L; uint32 *foundB = 0L; double overlap = 0; if (numSame) *numSame = 0; if (numAMissed) *numAMissed = 0; if (numBMissed) *numBMissed = 0; errno = 0; foundA = new uint32 [A->_numExons + B->_numExons]; foundB = foundA + A->_numExons; if (errno) { fprintf(stderr, "s4p_compareExons()-- Can't allocate "uint32FMT" + "uint32FMT" words for counting exons.\n%s\n", A->_numExons, B->_numExons, strerror(errno)); exit(1); } for (i=0; i_numExons; i++) foundA[i] = 0; for (i=0; i_numExons; i++) foundB[i] = 0; // If they overlap, declare a match // for (i=0; i_numExons; i++) { for (j=0; j_numExons; j++) { al = A->_exons[i]._genFrom; ah = A->_exons[i]._genTo; bl = B->_exons[j]._genFrom; bh = B->_exons[j]._genTo; overlap = 0; // Compute the percent overlapping as: // // ---------- // ---------- // ^^^ = 3 // ^^^^^^^^^^^^^^^^^ = 17 // // overlap = 3/17 // if ((al <= bl) && (bl <= ah)) { // B starts somewhere in A // if (ah < bh) { // B ends outside A // // aaaaaaaaaaa // bbbbbbbbbbbbb overlap = (double)(ah-bl) / (double)(bh-al); } else { // B ends inside A // // aaaaaaaaaaa // bbbbb overlap = (double)(bh-bl) / (double)(ah-al); } } if ((bl <= al) && (al <= bh)) { // B ends somewhere in A // if (bh < ah) { // B starts outside A // // aaaaaaaaaaa // bbbbbbbbbbbbb overlap = (double)(bh-al) / (double)(ah-bl); } else { // B starts inside A // // aaaa // bbbbbbbbbbbbb overlap = (double)(ah-al) / (double)(bh-bl); } } if (overlap >= overlapThreshold) { foundA[i]++; foundB[j]++; if (numSame) (*numSame)++; } } } for (i=0; i_numExons; i++) { //if (foundA[i] > 1) fprintf(stderr, "WARNING: Found exon %d %d times in A!\n", i, foundA[i]); if (numAMissed && (foundA[i] == 0)) (*numAMissed)++; } for (i=0; i_numExons; i++) { //if (foundB[i] > 1) fprintf(stderr, "WARNING: Found exon %d %d times in B!\n", i, foundB[i]); if (numBMissed && (foundB[i] == 0)) (*numBMissed)++; } delete [] foundA; } void s4p_compareExons_Ends(sim4polish *A, sim4polish *B, int32 tolerance, uint32 *numSame, uint32 *numAMissed, uint32 *numBMissed) { uint32 i, j; int32 Dlo=0, Dhi=0; uint32 *foundA = 0L; uint32 *foundB = 0L; if (numSame) *numSame = 0; if (numAMissed) *numAMissed = 0; if (numBMissed) *numBMissed = 0; foundA = new uint32 [A->_numExons + B->_numExons]; foundB = foundA + A->_numExons; if (errno) { fprintf(stderr, "s4p_compareExons()-- Can't allocate "uint32FMT" + "uint32FMT" words for counting exons.\n%s\n", A->_numExons, B->_numExons, strerror(errno)); exit(1); } for (i=0; i_numExons; i++) foundA[i] = 0; for (i=0; i_numExons; i++) foundB[i] = 0; // If they have similar end points, declare a match // for (i=0; i_numExons; i++) { for (j=0; j_numExons; j++) { Dlo = (int32)(B->_exons[j]._genFrom) - (int32)(A->_exons[i]._genFrom); Dhi = (int32)(B->_exons[j]._genTo) - (int32)(A->_exons[i]._genTo); if ((Dlo > -tolerance) && (Dlo < tolerance) && (Dhi > -tolerance) && (Dhi < tolerance)) { foundA[i]++; foundB[j]++; if (numSame) (*numSame)++; } } } for (i=0; i_numExons; i++) { //if (foundA[i] > 1) fprintf(stderr, "WARNING: Found exon %d %d times in A!\n", i, foundA[i]); if (numAMissed && (foundA[i] == 0)) (*numAMissed)++; } for (i=0; i_numExons; i++) { //if (foundB[i] > 1) fprintf(stderr, "WARNING: Found exon %d %d times in B!\n", i, foundB[i]); if (numBMissed && (foundB[i] == 0)) (*numBMissed)++; } delete [] foundA; } kmer-code-2013-trunk/libsim4/sim4polish/sim4polishFile.H0000644000000000000000000000553112322046702021612 0ustar rootroot#ifndef SIM4POLISHFILE #define SIM4POLISHFILE #include "sim4polish.H" #include "sim4polishList.H" // // (original motivation) // Needed: something to return polishes from a file without reading // in everything, and without doing lots of I/O. // // polishFile A(name) // sim4polishList l = A.getEST(id); // sim4polishList l = A.getGEN(id, lo=0, hi=0); // // sim4polish x = A.getNext(); // // A.seek(polish-ordinal to seek to); // // On first invocation, it reads the whole file, building a map of // ESTid, GENid to file position. Maybe also store GENlo and GENhi // in this map. Map is stored on disk as "file.polishFileMap" // // // The first call to getEST(), getGEN() or seek() will cause a // while file scan to be performed. From this, we build a list of // all the polishes, and their iid's. This index is cached on disk // as '${path}.sim4polishFile' // class sim4polishFile { public: sim4polishFile(char *path, sim4polishStyle style); ~sim4polishFile(); // Number of EST's actually present // uint32 maxIID(void) { return(_maxEST); }; // Returns all matches with: // ESTid == iid // GENid == iid AND that overlap the range lo...hi // // N.B. getNext() doesn't really mean much after these. // sim4polishList *getEST(uint32 iid); sim4polishList *getGEN(uint32 iid, uint32 lo=0, uint32 hi=~uint32ZERO); // Returns the next polish in the file. // sim4polish *getNext(void); // Positions the file pointer to the ordinal'th polish in the file. // getNext() will then return that polish. // void setPosition(uint32 ordinal); // 24 bytes/record. A typical large EST set has ~10 million // matches, this fits into 240MB. That's reasonable. // // I really hate to make this public, but sorting needs it. // struct polishRecord { off_t _fileposition; uint32 _ESTiid; uint32 _GENiid; uint32 _GENlo; uint32 _GENhi; }; private: char *_path; readBuffer *_file; // One record for each polish, in the order they are in the file. // One integer pointer into _polishRecord, sorted by either the EST // or GEN iid. // uint32 _polishRecordLen; uint32 _polishRecordMax; polishRecord *_polishRecord; uint32 *_polishRecordEST; uint32 *_polishRecordGEN; sim4polishStyle _style; // One integer pointer for each iid we've seen. Pointer into // _polishRecordEST or _polishRecordGEN. If memory is tight, we // could binary search those arrays instead. // uint32 _maxEST; uint32 _maxGEN; uint32 *_ESTiidLocation; uint32 *_GENiidLocation; void loadIndex(void); void saveIndex(void); void buildIndex(void); }; #endif // SIM4POLISHFILE kmer-code-2013-trunk/libsim4/sim4polish/Make.include0000644000000000000000000000217711512763666021052 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/))/ LIBBIO/ :=$(realpath $/../../libbio/))/ src :=$/sim4polish.C \ $/sim4polish-copy.C \ $/sim4polish-compare.C \ $/sim4polish-deleteexon.C \ $/sim4polish-exons.C \ $/sim4polish-polishtostring.C \ $/sim4polish-read.C \ $/sim4polish-stringtopolish.C \ $/sim4polish-updatescores.C \ $/sim4polish.H \ $/sim4polishList.H \ $/sim4polishList.C \ $/sim4polishBuilder.H \ $/sim4polishBuilder.C \ $/sim4polishFile.H \ $/sim4polishFile.C \ $/sim4polishReader.C \ $/sim4polishReader.H \ $/sim4polishWriter.C \ $/sim4polishWriter.H $/.C_SRCS :=$(filter %.c,${src}) $/.C_INCS :=$(filter %.h,${src}) $/.CXX_SRCS :=$(filter %.C,${src}) $/.CXX_INCS :=$(filter %.H,${src}) $/.CXX_LIBS :=$/libsim4polish.a $/.CLEAN :=$/*.o $/libsim4polish.a : ${$/.C_SRCS:.c=.o} ${$/.CXX_SRCS:.C=.o} $(eval $/%.d $/%.o: CFLAGS +=-I${LIBUTL/} -I${LIBBIO/}) $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBUTL/} -I${LIBBIO/}) kmer-code-2013-trunk/libsim4/sim4polish/sim4polishReader.H0000644000000000000000000000140411466015417022137 0ustar rootroot#ifndef SIM4POLISHREADER #define SIM4POLISHREADER #include "sim4polish.H" #include "util++.H" // Simple class to read the contents of a file of alignments. The file can be either sim4db, gff3 // or atac format. No support for random access is provided, just sequential access. class sim4polishWriter; class sim4polishReader { public: sim4polishReader(const char *name, sim4polishWriter *writer=0L); ~sim4polishReader(); // Returns the next alignment in the file. NULL is returned if there are no more alignments. // sim4polish *nextAlignment(void); bool nextAlignment(sim4polish * &p); sim4polishStyle getsim4polishStyle(void) { return _style; } private: readBuffer *_rb; sim4polishStyle _style; }; #endif // SIM4POLISHREADER kmer-code-2013-trunk/libsim4/sim4polish/sim4polishFile.C0000644000000000000000000002007412322046702021604 0ustar rootroot#include #include #include #include #include "sim4polishFile.H" // Global pointer used during construction of the index. // (polishRecordSortArray) // sim4polishFile::polishRecord *__prsa; int __prsaEST(const void *a, const void *b) { uint32 aid = __prsa[ *((uint32*)a) ]._ESTiid; uint32 bid = __prsa[ *((uint32*)b) ]._ESTiid; if (aid < bid) return(-1); if (aid > bid) return(1); return(0); } int __prsaGEN(const void *a, const void *b) { uint32 aid = __prsa[ *((uint32*)a) ]._GENiid; uint32 bid = __prsa[ *((uint32*)b) ]._GENiid; if (aid < bid) return(-1); if (aid > bid) return(1); return(0); } sim4polishFile::sim4polishFile(char *path, sim4polishStyle style) { _path = new char [strlen(path) + 1]; strcpy(_path, path); _file = new readBuffer(path); _style = style; _polishRecordLen = 0; _polishRecordMax = 0; _polishRecord = 0L; _polishRecordEST = 0L; _polishRecordGEN = 0L; _maxEST = 0; _maxGEN = 0; _ESTiidLocation = 0L; _GENiidLocation = 0L; } sim4polishFile::~sim4polishFile() { delete [] _path; delete [] _polishRecord; delete [] _polishRecordEST; delete [] _polishRecordGEN; delete [] _ESTiidLocation; delete [] _GENiidLocation; } sim4polishList* sim4polishFile::getEST(uint32 iid) { sim4polishList *l = new sim4polishList(); if (iid >= _maxEST) //fprintf(stderr, "Invalid EST iid "uint32FMT", max is "uint32FMT"\n", iid, _maxEST), exit(1); return(l); sim4polish *p = 0L; uint32 i = _ESTiidLocation[iid]; if (i != ~uint32ZERO) { setPosition(_polishRecordEST[i]); p = new sim4polish(_file, _style); while ((p) && (p->_numExons > 0) && (p->_estID == iid)) { l->push(p); i++; setPosition(_polishRecordEST[i]); p = new sim4polish(_file, _style); } delete p; } return(l); } sim4polishList* sim4polishFile::getGEN(uint32 iid, uint32 lo, uint32 hi) { fprintf(stderr, "sim4polishFile::getGEN() not implemented. Sorry.\n"); exit(1); return(0L); } sim4polish* sim4polishFile::getNext(void) { return(new sim4polish(_file, _style)); } void sim4polishFile::setPosition(uint32 ordinal) { if (_polishRecord == 0L) buildIndex(); if (ordinal >= _polishRecordLen) fprintf(stderr, "Failed to reposition %s to record "uint32FMT", only "uint32FMT" records\n", _path, ordinal, _polishRecordLen), exit(1); _file->seek(_polishRecord[ordinal]._fileposition); } void sim4polishFile::loadIndex(void) { char magic[8] = {0}; char cigam[8] = { 's', '4', 'p', 'F', 'i', 'l', 'e', '1'}; int len = strlen(_path) + 32; char *nam = new char [len]; sprintf(nam, "%s.sim4polishFile", _path); if (fileExists(nam)) { errno = 0; FILE *F = fopen(nam, "r"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", nam, strerror(errno)), exit(1); fread(&magic, sizeof(char), 8, F); if (strncmp(magic, cigam, 8) != 0) fprintf(stderr, "Failed to open '%s': Not a sim4polishFile!\n", nam), exit(1); fread(&_polishRecordLen, sizeof(uint32), 1, F); _polishRecord = new polishRecord [_polishRecordLen]; _polishRecordEST = new uint32 [_polishRecordLen]; _polishRecordGEN = new uint32 [_polishRecordLen]; fread( _polishRecord, sizeof(polishRecord), _polishRecordLen, F); fread( _polishRecordEST, sizeof(uint32), _polishRecordLen, F); fread( _polishRecordGEN, sizeof(uint32), _polishRecordLen, F); fread(&_maxEST, sizeof(uint32), 1, F); fread(&_maxGEN, sizeof(uint32), 1, F); _ESTiidLocation = new uint32 [_maxEST]; _GENiidLocation = new uint32 [_maxGEN]; fread( _ESTiidLocation, sizeof(uint32), _maxEST, F); fread( _GENiidLocation, sizeof(uint32), _maxGEN, F); if (errno) fprintf(stderr, "Failed to read '%s': %s\n", nam, strerror(errno)), exit(1); fclose(F); } delete [] nam; } void sim4polishFile::saveIndex(void) { char cigam[8] = { 's', '4', 'p', 'F', 'i', 'l', 'e', '1'}; int len = strlen(_path) + 32; char *nam = new char [len]; sprintf(nam, "%s.sim4polishFile", _path); errno = 0; FILE *F = fopen(nam, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", nam, strerror(errno)), exit(1); fwrite(&cigam, sizeof(char), 8, F); fwrite(&_polishRecordLen, sizeof(uint32), 1, F); fwrite( _polishRecord, sizeof(polishRecord), _polishRecordLen, F); fwrite( _polishRecordEST, sizeof(uint32), _polishRecordLen, F); fwrite( _polishRecordGEN, sizeof(uint32), _polishRecordLen, F); fwrite(&_maxEST, sizeof(uint32), 1, F); fwrite(&_maxGEN, sizeof(uint32), 1, F); fwrite( _ESTiidLocation, sizeof(uint32), _maxEST, F); fwrite( _GENiidLocation, sizeof(uint32), _maxGEN, F); if (errno) fprintf(stderr, "Failed to write '%s': %s\n", nam, strerror(errno)), exit(1); fclose(F); delete [] nam; } void sim4polishFile::buildIndex(void) { loadIndex(); if (_polishRecord == 0L) { fprintf(stderr, "sim4polishFile::buildIndex()-- building index for '%s'\n", _path); _file->seek(0); // Allocate a bunch of space for stuff // _polishRecordLen = 0; _polishRecordMax = 3355443; // ~128MB for all three _polishRecord = new polishRecord [_polishRecordMax]; // Read all polishes, storing stuff, reallocating more space if // needed. // off_t fp = _file->tell(); sim4polish *p = new sim4polish(_file, _style); while (p) { if (_polishRecordLen >= _polishRecordMax) { _polishRecordMax *= 2; polishRecord *n = new polishRecord [_polishRecordMax]; memcpy(n, _polishRecord, sizeof(polishRecord) * _polishRecordLen); delete [] _polishRecord; _polishRecord = n; } _polishRecord[_polishRecordLen]._fileposition = fp; _polishRecord[_polishRecordLen]._ESTiid = p->_estID; _polishRecord[_polishRecordLen]._GENiid = p->_genID; _polishRecord[_polishRecordLen]._GENlo = p->_exons[0]._genFrom; _polishRecord[_polishRecordLen]._GENhi = p->_exons[p->_numExons-1]._genTo; _polishRecordLen++; if ((_polishRecordLen & 0xfff) == 0) { fprintf(stderr, "polishes: "uint32FMT"\r", _polishRecordLen); fflush(stderr); } delete p; fp = _file->tell(); if (_file->eof()) p = NULL; else p = new sim4polish(_file, _style); } // Sort the indices by EST and GEN iid's. Pain in the butt, we // need to access _polishRecord to sort *EST and *GEN, but // qsort() doesn't support that. // // Three solutions: // 1) use a custom sort // 2) use a global pointer to _polishRecord // 3) use a temporary array holding the sort key and position // _polishRecordEST = new uint32 [_polishRecordLen]; _polishRecordGEN = new uint32 [_polishRecordLen]; for (uint32 i=0; i<_polishRecordLen; i++) _polishRecordEST[i] = _polishRecordGEN[i] = i; __prsa = _polishRecord; qsort(_polishRecordEST, _polishRecordLen, sizeof(uint32), __prsaEST); qsort(_polishRecordGEN, _polishRecordLen, sizeof(uint32), __prsaGEN); __prsa = 0L; // Scan the sorted lists, record the first location of each iid // _maxEST = _polishRecord[ _polishRecordEST[_polishRecordLen-1] ]._ESTiid + 1; _maxGEN = _polishRecord[ _polishRecordGEN[_polishRecordLen-1] ]._GENiid + 1; _ESTiidLocation = new uint32 [_maxEST]; _GENiidLocation = new uint32 [_maxGEN]; for (uint32 i=0; i<_maxEST; i++) _ESTiidLocation[i] = ~uint32ZERO; for (uint32 i=0; i<_polishRecordLen; i++) { uint32 iid = _polishRecord[ _polishRecordEST[i] ]._ESTiid; if (_ESTiidLocation[iid] == ~uint32ZERO) _ESTiidLocation[iid] = i; } for (uint32 i=0; i<_maxGEN; i++) _GENiidLocation[i] = ~uint32ZERO; for (uint32 i=0; i<_polishRecordLen; i++) { uint32 iid = _polishRecord[ _polishRecordGEN[i] ]._GENiid; if (_GENiidLocation[iid] == ~uint32ZERO) _GENiidLocation[iid] = i; } // Save the index // saveIndex(); // Be nice, reposition the file to the start. // _file->seek(0); } } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-polishtostring.C0000644000000000000000000003063212322046702024073 0ustar rootroot#include "sim4polish.H" #include #include #include #include //#define DEBUG_CIGAR const char *mOriFWD = "forward"; const char *mOriCMP = "complement"; const char *mOriERR = "error"; const char *mOriDEF = "UNKNOWN"; const char *sOriFWD = "forward"; const char *sOriREV = "reverse"; const char *sOriUNK = "unknown"; const char *sOriINT = "intractable"; const char *sOriABT = "aborted"; const char *sOriERR = "error"; const char *sOriDEF = "UNKNOWN"; const char *iOriPOS = " ->"; const char *iOriNEG = " <-"; const char *iOriAMB = " --"; const char *iOriGAP = " =="; const char *iOriERR = " ??"; const char *iOriNOO = ""; bool sim4polishStyleSet = false; sim4polishStyle sim4polishStyleDefault = sim4polishS4DB; uint32 sim4polishPolishID = 0; char * encodeGap(char *ref, char *tgt) { if ((ref == 0L) || (tgt == 0L)) return(0L); uint32 lenref = strlen(ref); uint32 lentgt = strlen(tgt); assert(lenref == lentgt); char *gap = new char [3 * lenref]; char *gpp = gap; char gaptyp = 0; uint32 gapcnt = 0; for (uint32 i=0; i\n", _estID, _estLen, _estPolyA, _estPolyT, _genID, _genRegionOffset, _genRegionLength, _numMatches, _numMatchesN, _percentIdentity, mOri, sOri); while (*outc) outc++; if (_comment) { sprintf(outc, "comment=%s\n", _comment); while (*outc) outc++; } if (_estDefLine) { sprintf(outc, "edef=%s\n", _estDefLine); while (*outc) outc++; } if (_genDefLine) { sprintf(outc, "ddef=%s\n", _genDefLine); while (*outc) outc++; } for (uint32 i=0; i<_numExons; i++) { switch (_exons[i]._intronOrientation) { case SIM4_INTRON_POSITIVE: iOri = iOriPOS; break; case SIM4_INTRON_NEGATIVE: iOri = iOriNEG; break; case SIM4_INTRON_AMBIGUOUS: iOri = iOriAMB; break; case SIM4_INTRON_GAP: iOri = iOriGAP; break; case SIM4_INTRON_ERROR: iOri = iOriERR; break; default: iOri = iOriNOO; break; } sprintf(outc, ""uint32FMT"-"uint32FMT" ("uint32FMT"-"uint32FMT") <"uint32FMT"-"uint32FMT"-"uint32FMT">%s\n", _exons[i]._estFrom, _exons[i]._estTo, _exons[i]._genFrom, _exons[i]._genTo, _exons[i]._numMatches, _exons[i]._numMatchesN, _exons[i]._percentIdentity, iOri); while (*outc) outc++; } for (uint32 i=0; i<_numExons; i++) { if (_exons[i]._estAlignment) { strcpy(outc, _exons[i]._estAlignment); while (*outc) outc++; *outc++ = '\n'; } if (_exons[i]._genAlignment) { strcpy(outc, _exons[i]._genAlignment); while (*outc) outc++; *outc++ = '\n'; } } strcpy(outc, "sim4end\n"); return(outs); } char * sim4polish::s4p_polishToStringGFF3(void) { // 9 columns, tab separated // tab, newline, cr and control MUST be escaped // reserved letters: ; = % & , // spaces ARE ALLOWED in fields // undefined values should use '.' // // 1 seqid, genome name (a-zA-Z0-9.:^*$@!+_?-|), no whitespace (??) and not begin with > // 2 source ("sim4db") // 3 type ("mRNA" or "exon") // 4 begin, 1-based // 5 end, zero-length start=end, to the right of this base // 6 score (percent identity) // 7 strand // 8 phase // 9 attributes // ID (unique within scope of file) // Name (display name) // Parent () // Target // Gap // Derives_from // Note // Dbxref // Ontology_term // Is_circular // others, user-defined (lowercase first letter; see below) // // Example: // 0:arm_2L sim4db mRNA 2372455 2373234 98 - . ID=sim4db0;Name=61728:gb|CA807305;Target=61728:gb|CA807305 22 685 +;targetLen=685;pA=0;pT=21;genRegion=2370482-2375223 // 0:arm_2L sim4db exon 2372455 2372770 99 - . Parent=sim4db0;Target=61728:gb|CA807305 22 337 +;Gap=M316;nMatches=313;intron=<- // 0:arm_2L sim4db exon 2372830 2373076 96 - . Parent=sim4db0;Target=61728:gb|CA807305 338 584 +;Gap=M74 D1 M2 I1 M170;nMatches=238;intron=<- // 0:arm_2L sim4db exon 2373134 2373234 99 - . Parent=sim4db0;Target=61728:gb|CA807305 585 685 +;Gap=M101;nMatches=100 // // Make a decent estimate of how much space we'll need to store the string // uint32 spaceNeeded = (1024 + 128 * _numExons + ((_comment) ? strlen(_comment) : 0) + ((_estDefLine) ? strlen(_estDefLine) : 0) + ((_genDefLine) ? strlen(_genDefLine) : 0)); for (uint32 i=0; i<_numExons; i++) if (_exons[i]._estAlignment) spaceNeeded += 2 * strlen(_exons[i]._estAlignment); char *outs = new char [spaceNeeded]; char *outc = outs; // Find extents of this match. uint32 estbgn = _exons[0]._estFrom; uint32 estend = _exons[_numExons-1]._estTo; uint32 genbgn = _exons[0]._genFrom; uint32 genend = _exons[_numExons-1]._genTo; for (uint32 i=0; i<_numExons; i++) { if (_exons[i]._genFrom < genbgn) genbgn = _exons[i]._genFrom; if (_exons[i]._genTo < genbgn) genbgn = _exons[i]._genTo; if (genend < _exons[i]._genFrom) genend = _exons[i]._genFrom; if (genend < _exons[i]._genTo) genend = _exons[i]._genTo; if (_exons[i]._estFrom < estbgn) estbgn = _exons[i]._estFrom; if (_exons[i]._estTo < estbgn) estbgn = _exons[i]._estTo; if (estend < _exons[i]._estFrom) estend = _exons[i]._estFrom; if (estend < _exons[i]._estTo) estend = _exons[i]._estTo; } // Find the orientation char mOri = '?'; if (_matchOrientation == SIM4_MATCH_FORWARD) mOri = '+'; if (_matchOrientation == SIM4_MATCH_COMPLEMENT) mOri = '-'; // Find the strand char sOri = '?'; switch (_strandOrientation) { case SIM4_STRAND_POSITIVE: sOri = '+'; break; case SIM4_STRAND_NEGATIVE: sOri = '-'; break; case SIM4_STRAND_UNKNOWN: case SIM4_STRAND_INTRACTABLE: case SIM4_STRAND_FAILED: case SIM4_STRAND_ERROR: sOri = '.'; break; default: fprintf(stderr, "sim4reader: Unknown strandOrientation '"uint32FMT"' in printPolishGFF3()\n", _matchOrientation); sOri = '.'; break; } // Get rid of spaces in the names (and do it non-destructively). uint32 estDefSpace = 0; uint32 genDefSpace = 0; while ((_estDefLine[estDefSpace]) && (isspace(_estDefLine[estDefSpace]) == 0)) estDefSpace++; while ((_genDefLine[genDefSpace]) && (isspace(_genDefLine[genDefSpace]) == 0)) genDefSpace++; char estDefChar = _estDefLine[estDefSpace]; char genDefChar = _genDefLine[genDefSpace]; _estDefLine[estDefSpace] = 0; _genDefLine[genDefSpace] = 0; // The main mRNA match line. sprintf(outc, uint32FMT":%s\tsim4db\tmRNA\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t.\t", _genID, _genDefLine, genbgn, genend, _percentIdentity, sOri); while (*outc) outc++; sprintf(outc, "ID=sim4db"uint32FMT";Name="uint32FMT":%s;Target="uint32FMT":%s "uint32FMT" "uint32FMT" %c;", sim4polishPolishID, _estID, _estDefLine, _estID, _estDefLine, estbgn, estend, mOri); while (*outc) outc++; sprintf(outc, "targetLen="uint32FMT";pA="uint32FMT";pT="uint32FMT";genRegion="uint32FMT"-"uint32FMT"\n", _estLen, _estPolyA, _estPolyT, _genRegionOffset, _genRegionOffset + _genRegionLength -1); while (*outc) outc++; // Exons. for (uint32 i=0; i<_numExons; i++) { char *gap = encodeGap(_exons[i]._genAlignment, _exons[i]._estAlignment); sprintf(outc, uint32FMT":%s\tsim4db\texon\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t.\t", _genID, _genDefLine, _exons[i]._genFrom, _exons[i]._genTo, _exons[i]._percentIdentity, sOri); while (*outc) outc++; if (gap) sprintf(outc, "Parent=sim4db"uint32FMT";Target="uint32FMT":%s "uint32FMT" "uint32FMT" %c;Gap=%s;nMatches="uint32FMT"", sim4polishPolishID, _estID, _estDefLine, _exons[i]._estFrom, _exons[i]._estTo, mOri, gap, _exons[i]._numMatches); else sprintf(outc, "Parent=sim4db"uint32FMT";Target="uint32FMT":%s "uint32FMT" "uint32FMT" %c;nMatches="uint32FMT"", sim4polishPolishID, _estID, _estDefLine, _exons[i]._estFrom, _exons[i]._estTo, mOri, _exons[i]._numMatches); while (*outc) outc++; delete [] gap; switch (_exons[i]._intronOrientation) { // +1 to exclude the front blank space case SIM4_INTRON_POSITIVE: sprintf(outc, ";intron=%s\n", iOriPOS +1); break; case SIM4_INTRON_NEGATIVE: sprintf(outc, ";intron=%s\n", iOriNEG +1); break; case SIM4_INTRON_AMBIGUOUS: sprintf(outc, ";intron=%s\n", iOriAMB +1); break; case SIM4_INTRON_GAP: sprintf(outc, ";intron=%s\n", iOriGAP +1); break; case SIM4_INTRON_ERROR: sprintf(outc, ";intron=%s\n", iOriERR +1); break; default: sprintf(outc, "\n"); break; } while (*outc) outc++; } sim4polishPolishID++; _estDefLine[estDefSpace] = estDefChar; _genDefLine[genDefSpace] = genDefChar; return(outs); } char * sim4polish::s4p_polishToStringATAC(void) { return(0L); } kmer-code-2013-trunk/libsim4/sim4polish/sim4polish-read.C0000644000000000000000000001203112322046702021707 0ustar rootroot#include "sim4polish.H" #include #include #include #include #include #include void sim4polish::s4p_readPolishS4DB(readBuffer *rb) { // Clear this polish. _numExons = 0; delete [] _comment; _comment = 0L; delete [] _estDefLine; _estDefLine = 0L; delete [] _genDefLine; _genDefLine = 0L; delete [] _exons; _exons = 0L; // Decide the type of record we're reading. // Read it. uint64 startPosition = rb->tell(); uint64 thisLineMax = 1048576; uint64 thisLineLen = 0; char *thisLine = new char [thisLineMax]; uint32 numLines = 10240; uint32 curLine = 0; char **lines = new char * [numLines + 1]; uint32 *lengths = new uint32 [numLines + 1]; memset(lines, 0, sizeof(char *) * numLines); memset(lengths, 0, sizeof(uint32) * numLines); thisLineLen = rb->read(thisLine, thisLineMax, '\n'); chompL(thisLine, thisLineLen); while (!rb->eof() && strcmp(thisLine, "sim4begin")) { fprintf(stderr, "sim4reader: Got '%s', expecting 'sim4begin' at byte "uint64FMT"\n", thisLine, startPosition); thisLineLen = rb->read(thisLine, thisLineMax, '\n'); chompL(thisLine, thisLineLen); } // Stash the 'sim4begin' line into the lines array. lines[curLine] = new char [thisLineLen + 1]; lengths[curLine] = thisLineLen; memcpy(lines[curLine++], thisLine, sizeof(char) * (thisLineLen + 1)); // Until we hit 'sim4end' stash lines into lines. Yes, we test the previous line, then read the // next. At the end of the loop, we'll read 'sim4end', stash it in lines[], then test. while (!rb->eof() && strcmp(thisLine, "sim4end")) { thisLineLen = rb->read(thisLine, thisLineMax, '\n'); chompL(thisLine, thisLineLen); if (curLine >= numLines) { #warning LAZY PROGRAMMER did not extend an array fprintf(stderr, "ERROR: too many lines, lazy programmer.\n"); exit(1); } // Stash the line in the lines array. lines[curLine] = new char [thisLineLen + 1]; lengths[curLine] = thisLineLen; memcpy(lines[curLine++], thisLine, sizeof(char) * (thisLineLen + 1)); } delete [] thisLine; if (numLines > 0) s4p_linesToPolishS4DB(startPosition, numLines, lines, lengths); for (uint32 i=0; itell(); uint64 thisLineMax = 1048576; uint64 thisLineLen = 0; char *thisLine = new char [thisLineMax]; uint32 numLines = 10240; uint32 curLine = 0; bool firstLine = true; char **lines = new char * [numLines + 1]; uint32 *lengths = new uint32 [numLines + 1]; memset(lines, 0, sizeof(char *) * numLines); memset(lengths, 0, sizeof(uint32) * numLines); thisLineLen = rb->read(thisLine, thisLineMax, '\n'); chompL(thisLine, thisLineLen); while (!rb->eof() && (!strstr(thisLine, "\tsim4db\tmRNA") || (thisLine[0]=='#'))) { if (thisLine[0]!='#') fprintf(stderr, "sim4reader: Got '%s', expecting GFF3 mRNA line at byte "uint64FMT"\n", thisLine, startPosition); thisLineLen = rb->read(thisLine, thisLineMax, '\n'); chompL(thisLine, thisLineLen); } // Check the mRNA line (!), then stash into the lines array. lines[curLine] = new char [thisLineLen + 1]; lengths[curLine] = thisLineLen; memcpy(lines[curLine++], thisLine, sizeof(char) * (thisLineLen + 1)); // Read the GFF3 record, till the next mRNA line. // We expect 'intron' on each exon line but the last; until we hit an intron-less line, // stash lines into lines. Yes, we test the previous line, then read the next. // At the end of the loop, we'll read the intron-less line, stash it in lines[], then test. while (!rb->eof() && (firstLine || strstr(thisLine, "\tsim4db\texon\t"))) { if ((firstLine == false) && !strstr(thisLine, "intron=")) break; thisLineLen = rb->read(thisLine, thisLineMax, '\n'); chompL(thisLine, thisLineLen); if (curLine >= numLines) { #warning LAZY PROGRAMMER did not extend an array fprintf(stderr, "ERROR: too many lines, lazy programmer.\n"); exit(1); } // If not a comment, stash the line in the lines array if (thisLine[0] == '#') continue; lines[curLine] = new char [thisLineLen + 1]; lengths[curLine] = thisLineLen; memcpy(lines[curLine++], thisLine, sizeof(char) * (thisLineLen + 1)); firstLine = false; } delete [] thisLine; if (curLine > 0) s4p_linesToPolishGFF3(startPosition, numLines, lines, lengths); for (uint32 i=0; i #include #include #include #include #include "bio++.H" #include "sim4polishList.H" #include "sim4polishReader.H" sim4polishList::sim4polishList() { len = 0; max = 4; list = new sim4polish* [max]; } sim4polishList::sim4polishList(char const *filename) { len = 0; max = 4; list = new sim4polish* [max]; sim4polishReader *R = new sim4polishReader(filename); sim4polish *p = 0L; while (R->nextAlignment(p)) push(p); delete R; } sim4polishList::~sim4polishList() { for (uint32 i=0; i= max) { max *= 2; sim4polish **l = new sim4polish* [max]; memcpy(l, list, len * sizeof(sim4polish*)); delete [] list; list = l; } list[len++] = p; } void sim4polishList::remove(uint32 i) { if (i >= len) return; delete list[i]; len--; for ( ; i < len; i++) list[i] = list[i+1]; } void sim4polishList::sortBycDNAIID(void) { qsort(list, len, sizeof(sim4polish *), s4p_estIDcompare); } void sim4polishList::sortByGenomicIID(void) { qsort(list, len, sizeof(sim4polish *), s4p_genIDcompare); } void sim4polishList::filterByQuality(uint32 minI, uint32 minC) { uint32 save = 0; uint32 next = 0; while (next < len) { if ((list[next]->_percentIdentity >= minI) && (list[next]->_querySeqIdentity >= minC)) { list[save++] = list[next++]; } else { delete list[next]; list[next++] = 0L; } } len = save; } kmer-code-2013-trunk/libsim4/sim4polish/sim4polishBuilder.H0000644000000000000000000000242512322046702022320 0ustar rootroot#ifndef SIM4_POLISH_BUILDER_H #define SIM4_POLISH_BUILDER_H #include "sim4polish.H" class sim4polishBuilder { public: sim4polishBuilder(); ~sim4polishBuilder(); void create(uint32 estid, uint32 estlen, uint32 genid, uint32 genlo, uint32 genhi); void setPolyTails(uint32 pa, uint32 pt); void setESTdefline(char *defline); void setGENdefline(char *defline); void setNumberOfMatches(uint32 nummatches, uint32 nummatchesN); void setPercentIdentity(uint32 id); void setMatchOrientation(char o); void setStrandOrientation(char o); void addExon(uint32 estlo, uint32 esthi, uint32 genlo, uint32 genhi, uint32 nummatches, uint32 nummatchesN, uint32 percentid, char intronorientation); void addExonAlignment(char *estalign, char *genalign); sim4polish *release(void); private: sim4polish *it; uint32 exMax; // maximum number of exons available uint32 exPos; // next exon uint32 exAli; // next exon without alignment sim4polishExon **ex; }; #endif // SIM4_POLISH_BUILDER_H kmer-code-2013-trunk/libsim4/Make.include0000644000000000000000000000532011676744271016752 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBS4P/ :=$(realpath $/sim4polish/)/ s4csrc := $/sim4core/sim4command.C \ $/sim4core/sim4parameters.C \ $/sim4core/sim4string.C \ $/sim4core/Xtend1.C \ $/sim4core/align.C \ $/sim4core/exon.H \ $/sim4core/exon_cores.C \ $/sim4core/extend.C \ $/sim4core/glimmerSplice.C \ $/sim4core/greedy.C \ $/sim4core/mspManager.C \ $/sim4core/mspManager.H \ $/sim4core/pluri_align.C \ $/sim4core/poly.C \ $/sim4core/sim4.H \ $/sim4core/sim4b1.C \ $/sim4core/sim4b1a.C \ $/sim4core/sim4b1-1.C \ $/sim4core/sim4b1-2.C \ $/sim4core/sim4b1-3.C \ $/sim4core/sim4b1-4.C \ $/sim4core/sim4b1_s.C \ $/sim4core/sim4defines.H \ $/sim4core/sim4parameters.H \ $/sim4core/sites.C \ $/sim4core/sites_donor.C \ $/sim4core/sites_acceptor.C \ $/sim4core/sites_score.C \ $/sim4core/splice.C \ $/sim4core/table.C \ $/sim4core/util.C s4psrc :=$/sim4polish/sim4polish-compare.C \ $/sim4polish/sim4polish-copy.C \ $/sim4polish/sim4polish-deleteexon.C \ $/sim4polish/sim4polish-exons.C \ $/sim4polish/sim4polish-polishtostring.C \ $/sim4polish/sim4polish-read.C \ $/sim4polish/sim4polish-stringtopolish.C \ $/sim4polish/sim4polish-updatescores.C \ $/sim4polish/sim4polish.C \ $/sim4polish/sim4polish.H \ $/sim4polish/sim4polishList.C \ $/sim4polish/sim4polishList.H \ $/sim4polish/sim4polishBuilder.C \ $/sim4polish/sim4polishBuilder.H \ $/sim4polish/sim4polishFile.C \ $/sim4polish/sim4polishFile.H \ $/sim4polish/sim4polishReader.C \ $/sim4polish/sim4polishReader.H \ $/sim4polish/sim4polishWriter.C \ $/sim4polish/sim4polishWriter.H $/.C_SRCS := $(filter %.c,${s4csrc}) $(filter %.c,${s4psrc}) $/.C_INCS := $(filter %.h,${s4csrc}) $(filter %.h,${s4psrc}) $/.CXX_SRCS := $(filter %.C,${s4csrc}) $(filter %.C,${s4psrc}) $/.CXX_INCS := $(filter %.H,${s4csrc}) $(filter %.H,${s4psrc}) $/.CXX_LIBS := $/libsim4.a $/.CLEAN := $/*.o $/sim4core/*.o $/sim4polish/*.o $/.CXX_LIBS := $/libsim4.a $/.PERL_LIBS := $/sim4polish/sim4polish.pm $/libsim4.a: ${$/.C_SRCS:.c=.o} ${$/.CXX_SRCS:.C=.o} $(eval $/%.d $/%.o: CFLAGS += -I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBS4P/}) $(eval $/%.d $/%.o: CXXFLAGS += -I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBS4P/}) kmer-code-2013-trunk/libsim4/sim4core/0000755000000000000000000000000012641613357016246 5ustar rootrootkmer-code-2013-trunk/libsim4/sim4core/Xtend1.C0000644000000000000000000005172411512744633017524 0ustar rootroot#include "sim4.H" // This is used if _accurateSequences is enabled....and it's never // enabled. The memory allocations here are NOT optimized. #include #include #include #include "bio.h" void Sim4::Xextend_link_to_data_list(void *data, ValNodePtr *head, ValNodePtr *prev) { ValNodePtr curr; curr = (ValNodePtr)ckalloc(sizeof(struct ValNode)); curr->data = data; curr->next = NULL; if(*prev == NULL) *head = curr; else (*prev)->next = curr; *prev = curr; } void Sim4::Xextend_ValNodeFreeData(ValNodePtr data_list) { ValNodePtr tmp_node; while ((tmp_node=data_list)!=NULL) { ckfree(tmp_node->data); data_list = data_list->next; ckfree(tmp_node); } } int Sim4::Xextend_bw(char *s1, char *s2, int m, int n, int offset1, int offset2, int *line1, int *line2) { int col, /* column number */ row, /* row number */ max_d, /* bound on the length of the edit script */ d, /* current compressed distance */ k, /* current diagonal */ DELTA, /* n-m */ ORIGIN, lower, upper; int *last_d, *temp_d; /* column containing the last p */ int *min_row, *min_diag; /* min (b)/ max (f) row (and diagonal) */ /* reached for cost d=0, ... m. */ coords ***trace_AG, ***trace_AC; coords *AG_cell, *AC_cell, *newcoords; ValNodePtr data_list = NULL, prev = NULL; DELTA = n-m; max_d = m+1; trace_AG = (coords ***)ckalloc((max_d+1)*sizeof(coords **)); trace_AC = (coords ***)ckalloc((max_d+1)*sizeof(coords **)); for (d=0; d<=max_d; d++) { trace_AG[d] = (coords **)ckalloc((m+n+1)*sizeof(coords *)); trace_AC[d] = (coords **)ckalloc((m+n+1)*sizeof(coords *)); } ORIGIN = m; trace_AG[0][ORIGIN+DELTA] = &last_AG; trace_AC[0][ORIGIN+DELTA] = &last_AC; for (row=m, col=n; row>0 && col>0 && (s1[row-1]==s2[col-1]); row--,col--) /*LINTED empty loop body*/; for (k=n; (k>=2) && (k>=col); k--) if (!strncmp((char *)(s2+k-2),"AG",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos2 = k-DELTA+offset1 +1; /* to compensate for -1 */ newcoords->pos1 = k+offset2 +1; /* refer to sim4b1.c */ trace_AG[0][ORIGIN+DELTA] = newcoords; } else if (!strncmp((char *)(s2+k-2),"AC",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos2 = k-DELTA+offset1 +1; newcoords->pos1 = k+offset2 +1; trace_AC[0][ORIGIN+DELTA] = newcoords; } if ((row == 0) || (col == 0)) { *line1 = row+offset1; *line2 = col+offset2; (void)memcpy(&last_AG,trace_AG[0][ORIGIN+DELTA],sizeof(coords)); (void)memcpy(&last_AC,trace_AC[0][ORIGIN+DELTA],sizeof(coords)); Xextend_ValNodeFreeData(data_list); free_coords(trace_AG,max_d+1); free_coords(trace_AC,max_d+1); return 0; } last_d = (int *)ckalloc((m+n+1)*sizeof(int)); temp_d = (int *)ckalloc((m+n+1)*sizeof(int)); for (k=0; k<=m+n; ++k) last_d[k]=m+1; last_d[ORIGIN+DELTA] = row; lower = ORIGIN + DELTA - 1; upper = ORIGIN + DELTA + 1; min_row = (int *)ckalloc((m+1)*sizeof(int)); min_diag = (int *)ckalloc((m+1)*sizeof(int)); for (d=1; d<=m; d++) min_row[d] = m+1; min_row[0] = last_d[ORIGIN+DELTA]; min_diag[0] = ORIGIN + DELTA; d = 0; while ((++d<=max_d) && ((d-1<=good_ratio(m-min_row[d-1])) || ((d>=2) && (d-2<=good_ratio(m-min_row[d-2]))))) { /* for each relevant diagonal ... */ for (k = lower; k <= upper; k++) { /* find a d on diagonal k */ if (k==-d+DELTA+ORIGIN) { /* move down from the last d-1 on diagonal k+1 */ row = last_d[k+1]; /* op = INSERT; */ AG_cell = trace_AG[d-1][k+1]; AC_cell = trace_AC[d-1][k+1]; } else if (k==d+DELTA+ORIGIN) { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]-1; /* op = DELETE; */ AG_cell = trace_AG[d-1][k-1]; AC_cell = trace_AC[d-1][k-1]; } else if ((last_d[k]-1<=last_d[k+1]) && (last_d[k]-1<=last_d[k-1]-1)) { /* substitution */ row = last_d[k]-1; /* op = SUBSTITUTE; */ AG_cell = trace_AG[d-1][k]; AC_cell = trace_AC[d-1][k]; } else if ((last_d[k-1]-1<=last_d[k+1]) && (last_d[k-1]-1<=last_d[k]-1)) { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]-1; /* op = DELETE; */ AG_cell = trace_AG[d-1][k-1]; AC_cell = trace_AC[d-1][k-1]; } else { /* move left from the last d-1 on diagonal k+1 */ row = last_d[k+1]; /* op = INSERT; */ AG_cell = trace_AG[d-1][k+1]; AC_cell = trace_AC[d-1][k+1]; } /* code common to the three cases */ /* slide down the diagonal */ col = row+k-ORIGIN; trace_AG[d][k] = AG_cell; trace_AC[d][k] = AC_cell; while ((row > 0) && (col > 0) && (s1[row-1]==s2[col-1])) { if ((col>1) && !strncmp((char *)(s2+col-2),"AG",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos1 = row + k - ORIGIN + offset2 +1; newcoords->pos2 = row + offset1 +1; trace_AG[d][k] = newcoords; } else if ((col>1) && !strncmp((char *)(s2+col-2),"AC",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos1 = row + k - ORIGIN + offset2 +1; newcoords->pos2 = row + offset1 +1; trace_AC[d][k] = newcoords; } row--; col--; } if ((col>1) && !strncmp((char *)(s2+col-2),"AG",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos1 = row + k - ORIGIN + offset2 +1; newcoords->pos2 = row + offset1 +1; trace_AG[d][k] = newcoords; } else if ((col>1) && !strncmp((char *)(s2+col-2),"AC",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos1 = row + k - ORIGIN + offset2 +1; newcoords->pos2 = row + offset1 +1; trace_AC[d][k] = newcoords; } temp_d[k] = row; if ((row == 0) && (col == 0)) { /* hit southeast corner; have the answer */ (void)memcpy(&last_AG,trace_AG[d][k],sizeof(coords)); (void)memcpy(&last_AC,trace_AC[d][k],sizeof(coords)); ckfree(last_d); ckfree(temp_d); ckfree(min_row); ckfree(min_diag); Xextend_ValNodeFreeData(data_list); free_coords(trace_AG,max_d+1); free_coords(trace_AC,max_d+1); *line1 = row+offset1; *line2 = col+offset2; return d; } if (row == 0) { /* hit first row; don't look further */ (void)memcpy(&last_AG,trace_AG[d][k],sizeof(coords)); (void)memcpy(&last_AC,trace_AC[d][k],sizeof(coords)); ckfree(last_d); ckfree(temp_d); ckfree(min_row); ckfree(min_diag); Xextend_ValNodeFreeData(data_list); free_coords(trace_AG,max_d+1); free_coords(trace_AC,max_d+1); *line1 = row+offset1; *line2 = col+offset2; return d; } if (col == 0) { /* hit last column; don't look further */ (void)memcpy(&last_AG,trace_AG[d][k],sizeof(coords)); (void)memcpy(&last_AC,trace_AC[d][k],sizeof(coords)); ckfree(last_d); ckfree(temp_d); ckfree(min_row); ckfree(min_diag); Xextend_ValNodeFreeData(data_list); free_coords(trace_AG,max_d+1); free_coords(trace_AC,max_d+1); *line1 = row+offset1; *line2 = col+offset2; return d; } } min_row[d] = last_d[ORIGIN+DELTA]; min_diag[d] = ORIGIN+DELTA; for (k=lower; k<=upper; ++k) if (temp_d[k]0) && (min_row[d-1]-min_row[d]<3)) d--; *line1 = min_row[d]+offset1; *line2 = min_row[d]+min_diag[d]-ORIGIN+offset2; (void)memcpy(&last_AG,trace_AG[d][min_diag[d]],sizeof(coords)); (void)memcpy(&last_AC,trace_AC[d][min_diag[d]],sizeof(coords)); ckfree(min_row); ckfree(min_diag); ckfree(last_d); ckfree(temp_d); Xextend_ValNodeFreeData(data_list); free_coords(trace_AG,max_d+1); free_coords(trace_AC,max_d+1); return d; } int Sim4::Xextend_fw(char *s1, char *s2, int m, int n, int offset1, int offset2, int *line1, int *line2) { int col, /* column number */ row, /* row number */ max_d, /* bound on the length of the edit script */ d, /* current compressed distance */ k, /* current diagonal */ ORIGIN, lower, upper; int *last_d, *temp_d; /* column containing the last p */ int *max_row, *max_diag; /* min (b)/ max (f) row (and diagonal) */ /* reached for cost d=0, ... m. */ coords ***trace_GT, ***trace_CT; coords *GT_cell, *CT_cell, *newcoords; ValNodePtr data_list = NULL, prev = NULL; max_d = m+1; trace_GT = (coords ***)ckalloc((max_d+1)*sizeof(coords **)); trace_CT = (coords ***)ckalloc((max_d+1)*sizeof(coords **)); for (d=0; d<=max_d; d++) { trace_GT[d] = (coords **)ckalloc((m+n+1)*sizeof(coords *)); trace_CT[d] = (coords **)ckalloc((m+n+1)*sizeof(coords *)); } ORIGIN = m; trace_GT[0][ORIGIN] = &last_GT; trace_CT[0][ORIGIN] = &last_CT; for (row=0, col=0; colpos2 = k+offset1; newcoords->pos1 = k+offset2; trace_GT[0][ORIGIN] = newcoords; } else if (!strncmp((char *)(s2+k),"CT",2)) { newcoords = (coords *)ckalloc(sizeof(coords)); Xextend_link_to_data_list((void *)newcoords, &data_list, &prev); newcoords->pos2 = k+offset1; newcoords->pos1 = k+offset2; trace_CT[0][ORIGIN] = newcoords; } if ((row == m) || (col == n)){ *line1 = row+offset1; *line2 = col+offset2; (void)memcpy(&last_GT,trace_GT[0][ORIGIN],sizeof(coords)); (void)memcpy(&last_CT,trace_CT[0][ORIGIN],sizeof(coords)); Xextend_ValNodeFreeData(data_list); free_coords(trace_GT,max_d+1); free_coords(trace_CT,max_d+1); return 0; } last_d = (int *)ckalloc((m+n+1)*sizeof(int)); temp_d = (int *)ckalloc((m+n+1)*sizeof(int)); for (k=0; k<=m+n; ++k) last_d[k]=-1; last_d[ORIGIN] = row; lower = ORIGIN - 1; upper = ORIGIN + 1; max_row = (int *)ckalloc((m+1)*sizeof(int)); max_diag = (int *)ckalloc((m+1)*sizeof(int)); for (d=1; d<=m; d++) max_row[d] = -1; max_row[0] = last_d[ORIGIN]; max_diag[0] = ORIGIN; d = 0; while ((++d<=max_d) && ((d-1<=good_ratio(max_row[d-1])) || ((d>=2) && (d-2<=good_ratio(max_row[d-2]))))) { /* for each relevant diagonal ... */ for (k = lower; k <= upper; k++) { /* find a d on diagonal k */ if (k==-d+ORIGIN) { /* move down from the last d-1 on diagonal k+1 */ row = last_d[k+1]+1; /* op = DELETE; */ GT_cell = trace_GT[d-1][k+1]; CT_cell = trace_CT[d-1][k+1]; } else if (k==d+ORIGIN) { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]; /* op = INSERT; */ GT_cell = trace_GT[d-1][k-1]; CT_cell = trace_CT[d-1][k-1]; } else if ((last_d[k]>=last_d[k+1]) && (last_d[k]+1>=last_d[k-1])) { /* substitution */ row = last_d[k]+1; /* op = SUBSTITUTE; */ GT_cell = trace_GT[d-1][k]; CT_cell = trace_CT[d-1][k]; } else if ((last_d[k+1]+1>=last_d[k-1]) && (last_d[k+1]>=last_d[k])) { /* move down from the last d-1 on diagonal k+1 */ row = last_d[k+1]+1; /* op = DELETE; */ GT_cell = trace_GT[d-1][k+1]; CT_cell = trace_CT[d-1][k+1]; } else { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]; /* op = INSERT; */ GT_cell = trace_GT[d-1][k-1]; CT_cell = trace_CT[d-1][k-1]; } /* code common to the three cases */ /* slide down the diagonal */ col = row+k-ORIGIN; trace_GT[d][k] = GT_cell; trace_CT[d][k] = CT_cell; if (row>=0) while ((row < m) && (col < n) && (s1[row]==s2[col])) { if ((colpos1 = row + k - ORIGIN + offset2; newcoords->pos2 = row + offset1; trace_GT[d][k] = newcoords; } else if ((colpos1 = row + k - ORIGIN + offset2; newcoords->pos2 = row + offset1; trace_CT[d][k] = newcoords; } row++; col++; } if ((colpos1 = row + k - ORIGIN + offset2; newcoords->pos2 = row + offset1; trace_GT[d][k] = newcoords; } else if ((colpos1 = row + k - ORIGIN + offset2; newcoords->pos2 = row + offset1; trace_CT[d][k] = newcoords; } temp_d[k] = row; if ((row == m) && (col == n)) { /* hit southeast corner; have the answer */ (void)memcpy(&last_GT,trace_GT[d][k],sizeof(coords)); (void)memcpy(&last_CT,trace_CT[d][k],sizeof(coords)); Xextend_ValNodeFreeData(data_list); free_coords(trace_GT,max_d+1); free_coords(trace_CT,max_d+1); ckfree(last_d); ckfree(temp_d); ckfree(max_row); ckfree(max_diag); *line1 = row+offset1; *line2 = col+offset2; return d; } if (row == m) { /* hit last row; don't look further */ (void)memcpy(&last_GT,trace_GT[d][k],sizeof(coords)); (void)memcpy(&last_CT,trace_CT[d][k],sizeof(coords)); Xextend_ValNodeFreeData(data_list); free_coords(trace_GT,max_d+1); free_coords(trace_CT,max_d+1); ckfree(temp_d); ckfree(last_d); ckfree(max_row); ckfree(max_diag); *line1 = row+offset1; *line2 = col+offset2; return d; } if (col == n) { /* hit last column; don't look further */ (void)memcpy(&last_GT,trace_GT[d][k],sizeof(coords)); (void)memcpy(&last_CT,trace_CT[d][k],sizeof(coords)); Xextend_ValNodeFreeData(data_list); free_coords(trace_GT,max_d+1); free_coords(trace_CT,max_d+1); ckfree(temp_d); ckfree(last_d); ckfree(max_row); ckfree(max_diag); *line1 = row+offset1; *line2 = col+offset2; return d; } } max_row[d] = last_d[ORIGIN]; max_diag[d] = ORIGIN; for (k=lower; k<=upper; ++k) if (temp_d[k]>max_row[d]) { max_row[d] = temp_d[k]; max_diag[d] = k; } for (k=lower; k<=upper; k++) { last_d[k] = temp_d[k]; } --lower; ++upper; } /* report here the previous maximal match, stored in max_diag and max_row */ while ((d>0) && (max_row[d]-max_row[d-1]<3)) d--; *line1 = max_row[d]+offset1; *line2 = max_row[d]+max_diag[d]-ORIGIN+offset2; (void)memcpy(&last_GT,trace_GT[d][max_diag[d]],sizeof(coords)); (void)memcpy(&last_CT,trace_CT[d][max_diag[d]],sizeof(coords)); ckfree(max_row); ckfree(max_diag); ckfree(last_d); ckfree(temp_d); Xextend_ValNodeFreeData(data_list); free_coords(trace_GT,max_d+1); free_coords(trace_CT,max_d+1); return d; } kmer-code-2013-trunk/libsim4/sim4core/sites_acceptor.H0000644000000000000000000000035011415365503021357 0ustar rootroot#ifndef SITES_ACCEPTOR_H #define SITES_ACCEPTOR_H /* DO NOT REMOVE or MODIFY !!!! */ #define NUM_MODELS_ACC 25 #define NUM_VALUES_ACC 928 extern double acc[NUM_MODELS_ACC][NUM_VALUES_ACC]; #endif /* SITES_ACCEPTOR_H */ kmer-code-2013-trunk/libsim4/sim4core/sim4b1-3.C0000644000000000000000000000751512415066336017617 0ustar rootroot#include "sim4.H" int Sim4::SIM4_block3(bool good_match, Exon* &tmp_Lblock, Exon* &tmp_Rblock, Exon* &tmp_block, Exon* &tmp_block1) { int I, J; int rollbflag = 0; int cost; //fprintf(stderr, "Called SIM4_block3()\n"); /* start of seq; find last_AG, last_AC */ if (_accurateSequences) findLastAGandAC(tmp_block1); // These two blocks should do the same thing. The first one isn't readable. #if 0 int diff = (int)(tmp_block1->frEST - tmp_block->toEST - 1); diff = (int)(MIN(diff,(int)(MAX_GRINIT/2))); int u = MIN(4*diff,tmp_block1->frGEN-tmp_block->toGEN-1); cost = EXTEND_BW(_estSeq+tmp_block->toEST+ (tmp_block1->frEST-tmp_block->toEST-1)-diff, _genSeq+tmp_block->toGEN+ (tmp_block1->frGEN-tmp_block->toGEN-1)-u, (int)diff, u, tmp_block->toEST+ (tmp_block1->frEST-tmp_block->toEST-1)-diff, tmp_block->toGEN+ (tmp_block1->frGEN-tmp_block->toGEN-1)-u, &I, &J); #else int diff = MIN(tmp_block1->frEST - tmp_block->toEST - 1, MAX_GRINIT/2); int u = MIN(4*diff, tmp_block1->frGEN - tmp_block->toGEN - 1); cost = EXTEND_BW(_estSeq + tmp_block1->frEST - 1 - diff, _genSeq + tmp_block1->frGEN - 1 - u, diff, u, tmp_block1->frEST - 1 - diff, tmp_block1->frGEN - 1 - u, &I, &J); #endif if ((good_match==0) || tmp_block->flag || (J==0) || (I==0)) { tmp_block1->frEST = I+1; tmp_block1->frGEN = J+1; tmp_block1->edist += cost; tmp_block1->length = tmp_block1->toEST-tmp_block1->frEST+1; } /* use blast if marginal gap still exists, and this is first scan */ if (!(diff=(int)(tmp_block1->frEST-tmp_block->toEST-1)) || tmp_block->flag) { /* blast-treated region or no gap */ tmp_Rblock = tmp_Lblock = NULL; } else { exon_cores(_genSeq+tmp_block->toGEN-1, _estSeq+tmp_block->toEST-1, tmp_block1->frGEN-tmp_block->toGEN-1, diff, tmp_block->toGEN+1, tmp_block->toEST+1, 1, spacedSeedExtMSS, mspThreshold2, TEMP); //PRINTEXONS("2\n", exon_list); tmp_block -> flag = 1; tmp_Lblock = tmp_Rblock = exon_list; while (tmp_Rblock && tmp_Rblock->next_exon) tmp_Rblock = tmp_Rblock->next_exon; if ((!tmp_Lblock && tmp_block1->frGEN-tmp_block->toGEN>50000) || (tmp_Lblock && (tmp_Lblock->frEST-tmp_block->toEST>100) && (tmp_Lblock->frGEN-tmp_block->frGEN>50000)) || (tmp_Lblock && (tmp_block1->frEST-tmp_Rblock->toEST>100) && (tmp_block1->frGEN-tmp_Rblock->frGEN>50000))) { /* possible large intron; increase the score weight */ //freeExonList(tmp_Lblock); garbage collected exon_list = _mspManager.doLinking(globalParams->_relinkWeight, DEFAULT_DRANGE, tmp_block->toGEN + 1, tmp_block->toEST + 1, 1, true, _genSeq, _estSeq); //PRINTEXONS("2a\n", exon_list); tmp_Lblock = tmp_Rblock = exon_list; while ((tmp_Rblock!=NULL) && (tmp_Rblock->next_exon!=NULL)) tmp_Rblock = tmp_Rblock->next_exon; } _mspManager.clear(); if (tmp_Lblock) { rollbflag = 1; } else { tmp_block1->frEST = I+1; tmp_block1->frGEN = J+1; tmp_block1->edist += cost; tmp_block1->length = tmp_block1->toEST-tmp_block1->frEST+1; } } return(rollbflag); } kmer-code-2013-trunk/libsim4/sim4core/sim4b1-1.C0000644000000000000000000000671010265062055017604 0ustar rootroot#include "sim4.H" // resolve overlap using the GT-AG criterion // int Sim4::resolve_overlap(Exon *tmp_block, Exon *tmp_block1, char *seq) { int diff, best_u, l0, l1, u, cost; int GTAG_score, CTAC_score; char *s1, *s2, *e1; diff = tmp_block1->frEST-tmp_block->toEST-1; if (diff>=0) return (tmp_block1->frEST-1); /* u-1 = actual position in the sequence */ l0 = tmp_block->length-diff; l1 = tmp_block1->length; best_u = u = tmp_block1->frEST-1; s1 = seq+tmp_block->toGEN-(tmp_block->toEST-u); s2 = seq-2+tmp_block1->frGEN+u-tmp_block1->frEST; cost = 0; e1 = seq+tmp_block->toGEN; while (s1<=e1) { GTAG_score = CTAC_score = 0; GTAG_score += ((char)(*s1)=='G') ? 1 : 0; GTAG_score += ((char)(*(s1+1))=='T') ? 1 : 0; GTAG_score += ((char)(*s2)=='A') ? 1 : 0; GTAG_score += ((char)(*(s2+1))=='G') ? 1 : 0; if (GTAG_score > abs(cost) && ((l0>=8) || (l1>=8))) { cost = GTAG_score; best_u = u; if (cost == 4) break; } CTAC_score += ((char)(*s1)=='C') ? 1 : 0; CTAC_score += ((char)(*(s1+1))=='T') ? 1 : 0; CTAC_score += ((char)(*s2)=='A') ? 1 : 0; CTAC_score += ((char)(*(s2+1))=='C') ? 1 : 0; if (CTAC_score > abs(cost)) { cost = -CTAC_score; best_u = u; if (cost == 4) break; } u++; s1++; s2++; l0++; l1--; } return best_u; } int Sim4::SIM4_block1(Exon* &Lblock, Exon* &tmp_block, Exon* &tmp_block1) { int rollbflag = 0; // Try to resolve the overlap int best_u = resolve_overlap(tmp_block,tmp_block1,_genSeq); tmp_block1->frGEN += best_u + 1 - tmp_block1->frEST; tmp_block1->frEST = best_u + 1; //fprintf(stderr, "sim4_block1()-- Lblock=%p tmp_block=%p tmp_block1=%p\n", Lblock, tmp_block, tmp_block1); // If the block is really short, remove it. if (((tmp_block1->toEST - tmp_block1->frEST + 1) < 8) || ((tmp_block1->toGEN - tmp_block1->frGEN + 1) < 8)) { tmp_block->next_exon = tmp_block1->next_exon; tmp_block->flag = tmp_block1->flag; rollbflag = 1; //freeExon(tmp_block1); garbage collected tmp_block1 = NULL; } tmp_block->toGEN -= tmp_block->toEST-best_u; tmp_block->toEST = best_u; if (((tmp_block->toEST - tmp_block->frEST + 1) < 8) || ((tmp_block->toGEN - tmp_block->frGEN + 1) < 8)) { Exon *prev = find_previous(Lblock, tmp_block); if (prev == 0L) { fprintf(stderr, "SIM4_block1(): Corrupted exon list, cannot find the previous exon.\n"); for (; Lblock; Lblock = Lblock->next_exon) if (tmp_block == Lblock) fprintf(stderr, " GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d <- tried to find previous of this one\n", Lblock->frGEN, Lblock->toGEN, Lblock->frEST, Lblock->toEST, Lblock->flag); else fprintf(stderr, " GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d\n", Lblock->frGEN, Lblock->toGEN, Lblock->frEST, Lblock->toEST, Lblock->flag); kill(getpid(), SIGKILL); } prev->next_exon = tmp_block->next_exon; prev->flag = tmp_block->flag; if ((tmp_block->toEST - tmp_block->frEST + 1) > 0) rollbflag = 1; //freeExon(tmp_block); garbage collected tmp_block = prev; } if (tmp_block->toGEN) tmp_block->length = tmp_block->toEST - tmp_block->frEST + 1; if (tmp_block1 && tmp_block1->toGEN) tmp_block1->length = tmp_block1->toEST - tmp_block1->frEST + 1; return(rollbflag); } kmer-code-2013-trunk/libsim4/sim4core/poly.C0000644000000000000000000003646410453350136017342 0ustar rootroot#include #include "sim4.H" #define MIN_EXON 12 void Sim4::get_polyAT(char *seq, int len, int *pT, int *pA, int flag) { register int i, sum10, sum20; register char *s, *t, *v; int last10; int MAX10 = 2; int MAX20 = 5; char encodingA[128]; char encodingT[128]; if (flag!=T_ONLY) { memset(encodingA, (char)1, 128); encodingA[(int)'A'] = encodingA[(int)'X'] = encodingA[(int)'N'] = 0; for (i=0, s=seq+len, sum10=0, last10=len+1; i<10 && s>seq && sum10<=MAX20; i++) { sum10 += encodingA[(int)*(--s)]; /* if (!encodingA[*s] && sum10<=MAX10) last10 = s-seq+1; */ } t = v = seq+len; sum20 = sum10; for ( ; s>=seq && (sum10<=MAX10 || sum20<=MAX20); ) { if (!encodingA[(int)*s] && sum10<=MAX10 && (seq+len>=s+20 || sum20seq) { sum10 += encodingA[(int)*s] - encodingA[(int)*(--t)]; sum20 += encodingA[(int)*s] -(((seq+len)-s>20) ? encodingA[(int)*(--v)] : 0); } } if (last10>len-10) *pA = len+1; else { s = seq+last10+8; while (s >= seq && !encodingA[(int)*s]) s--; if ((s-seq+1)-last10+1<=5) *pA = (int)(s-seq+2); else *pA = last10; } } else *pA = len+1; *pA = len-(*pA)+1; if (flag!=A_ONLY) { memset(encodingT, (char)1, 128); encodingT[(int)'T'] = encodingT[(int)'X'] = encodingT[(int)'N'] = 0; for (i=0, s=seq-1, sum10=0, last10=0; i<10 && i=19 || sum20=20) ? encodingT[(int)*(++v)] : 0); } } if (last10<=10) *pT = 0; else { s = seq+last10-10; while (s < seq+len && !encodingT[(int)*s]) s++; if (last10-(s-seq)+1<=5) *pT = (int)(s-seq); else *pT = last10; } } else *pT = 0; } void Sim4::trim_polyA_align(struct edit_script_list **Sptr, Exon *lblock, Exon **exons, const int bc, int *pA, char *s1,char *s2) { edit_script_list *head = *Sptr; edit_script *tp; int tmpi = 0, num, idents = 0, identsN = 0; char *a, *b; Exon *prev; int i, j; /* i index in the cDNA */ if (bc>head->offset2+head->len2-1) { *pA = bc; return; } if (bc==head->offset2) { /* cDNA gap: remove the entire script; is this properly sorted? LLL */ *Sptr = head->next_script; Free_script(head->script); ckfree(head); while ((*exons)->frEST>=bc) { prev = find_previous(lblock,*exons); if (prev == 0L) { fprintf(stderr, "trim_polyA_align(): Corrupted exon list, cannot find the previous exon (remove entire script).\n"); for (; lblock; lblock = lblock->next_exon) fprintf(stderr, " GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d\n", lblock->frGEN, lblock->toGEN, lblock->frEST, lblock->toEST, lblock->flag); kill(getpid(), SIGKILL); } prev->next_exon = (*exons)->next_exon; //freeExon(*exons); garbage collected *exons = prev; } *pA = bc; return; } Flip_script(&(head->script)); i = head->offset2 + head->len2 -1; j = head->offset1 + head->len1 -1; tp = head->script; while (i>=bc && tp) { num = tp->num; switch (tp->op_type) { case INSERT: if (i>=bc && bc>i-num+1) { (*exons)->numInDel -= i - bc + 1; (*exons)->numEdits -= i - bc + 1; tmpi += i-bc+1; tp->num -= i-bc+1; i = bc-1; } else { (*exons)->numInDel -= num; (*exons)->numEdits -= num; tmpi += num; i -= num; head->script = tp->next; ckfree(tp); tp = head->script; } break; case DELETE: (*exons)->numInDel -= num; (*exons)->numEdits -= num; j -= num; tmpi += num; head->script = tp->next; ckfree(tp); tp = head->script; break; case SUBSTITUTE: if (i>=bc && bc>i-num+1) { a = s2+i-1; b = s1+j-1; while (a>=s2+bc-1) { if (*a != *b) { (*exons)->numEdits--; tmpi++; } else { if (*a == 'N') { (*exons)->numNs--; identsN++; } else { (*exons)->numMatches--; idents++; } } a--; b--; } j -= i-bc+1; tp->num -= i-bc+1; i = bc-1; } else { /* at most 1 nt remaining */ a = s2+i-1; b = s1+j-1; while (a>=s2+i-num) { if (*a != *b) { (*exons)->numEdits--; tmpi++; } else { if (*a == 'N') { (*exons)->numNs--; identsN++; } else { (*exons)->numMatches--; idents++; } } a--; b--; } i -= num; j -= num; head->script = tp->next; ckfree(tp); tp = head->script; } break; #if 0 default: fatalf("Unrecognized opcode %d.\n",tp->op_type); #endif } /* indel walk */ } assert(i==bc-1); while ((tp != 0L) && (tp->op_type != SUBSTITUTE) && (j+1 >= (*exons)->frGEN)) { if (tp->op_type==INSERT) { i -= tp->num; tmpi += tp->num; (*exons)->numInDel -= tp->num; (*exons)->numEdits -= tp->num; } else if (j<(*exons)->frGEN && i<(*exons)->frEST) { j -= tp->num; } else { j -= tp->num; tmpi += tp->num; (*exons)->numInDel -= tp->num; (*exons)->numEdits -= tp->num; } head->script = tp->next; ckfree(tp); tp = head->script; } if (head->script==NULL) { *Sptr = head->next_script; ckfree(head); } else { head->len1 = j-head->offset1+1; head->len2 = i-head->offset2+1; head->score -= tmpi; Flip_script(&(head->script)); } if ((*exons)->frEST>i) { prev = find_previous(lblock,*exons); if (prev == 0L) { fprintf(stderr, "trim_polyA_align(): Corrupted exon list, cannot find the previous exon (frEST).\n"); for (; lblock; lblock = lblock->next_exon) fprintf(stderr, " GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d\n", lblock->frGEN, lblock->toGEN, lblock->frEST, lblock->toEST, lblock->flag); kill(getpid(), SIGKILL); } prev->next_exon = (*exons)->next_exon; //freeExon(*exons); garbage collected *exons = prev; } else { (*exons)->toEST = i; (*exons)->toGEN = j; (*exons)->length = (*exons)->toEST-(*exons)->frEST+1; (*exons)->alignmentLength = ((*exons)->toGEN - (*exons)->frGEN + 1 + (*exons)->toEST - (*exons)->frEST + 1 + (*exons)->numInDel); (*exons)->percentID = computePercentIdentity((*exons)->numEdits, (*exons)->alignmentLength); } *pA = i+1; return; } void Sim4::remove_polyA_back(struct edit_script_list **Sptr, Exon *Exons, char *s1, char *s2, int l2, int *lastA) { Exon *t; Exon *exons_tail; char *b, *end; int numA, pA, dummy, trim_p, reverse_script=0; int startPos=0, cutAmount=0; *lastA = l2+1; pA = 0; if (!Exons || ! Exons->next_exon || ! Exons->next_exon->toGEN) return; if ((*Sptr)->next_script && (*Sptr)->offset1<(*Sptr)->next_script->offset1) { reverse_script = 1; script_flip_list(Sptr); } exons_tail = Exons->next_exon; while (exons_tail->next_exon && exons_tail->next_exon->toGEN) exons_tail=exons_tail->next_exon; trim_p = 1; if (exons_tail) { startPos = exons_tail->toEST; while ((t=exons_tail)!=NULL && t->toGEN && trim_p) { /* compute the 'A' contents of the exon */ b = s2 + t->toEST-1; end = s2+t->frEST-1; numA = 0; while (b>=end && numA+(b-end)>=globalParams->_polyTailPercent*t->length) { numA += (*b--=='A'); } // Determine how much of the cut stuff was actually // poly-containing. The first method below returns the number of // bases cut from the end of the est, while the second return the // number of bases cut from the end of the alignment. // //cutAmount = l2 - *lastA + 1; if (numA>=globalParams->_polyTailPercent*t->length) { /* remove the entire exon */ trim_polyA_align(Sptr,Exons,&exons_tail,t->frEST,lastA,s1,s2); cutAmount = startPos - *lastA + 1; } else { get_polyAT(s2+(*Sptr)->offset2-1,(*Sptr)->len2,&dummy,&pA,A_ONLY); if (pA) { int ct_pA; /* first position to be removed */ ct_pA = t->toEST-pA+1; ct_pA = (ct_pA-t->frEST>MIN_EXON) ? ct_pA : t->frEST; /* note: pA is the last (innermost) position in the tail */ trim_polyA_align(Sptr,Exons,&exons_tail,ct_pA,lastA,s1,s2); cutAmount = startPos - *lastA + 1; } if (t==exons_tail) trim_p = 0; } } } *lastA = cutAmount; if (reverse_script) script_flip_list(Sptr); } /* s2 is the cdna */ void Sim4::trim_polyT_align(struct edit_script_list **Sptr, Exon **exons, const int ec, int *pT, char *s1, char *s2) { edit_script_list *head = *Sptr; edit_script *tp; int tmpi = 0, num, idents = 0, identsN = 0; char *a, *b; Exon *t; int i, j; /* i index in the cDNA */ if (ecoffset2) { *pT = ec; return; } if (ec==head->offset2+head->len2-1) { /* cDNA gap: remove the entire script */ *Sptr = head->next_script; Free_script(head->script); ckfree(head); while ((*exons)->frESTnext_exon; //freeExon(t); garbage collected } *pT = ec; return; } i = head->offset2; j = head->offset1; tp = head->script; while (i<=ec && tp) { num = tp->num; switch (tp->op_type) { case INSERT: if (i<=ec && ecnumInDel -= ec - i + 1; (*exons)->numEdits -= ec - i + 1; tmpi += ec-i+1; tp->num -= ec-i+1; i = ec+1; } else { (*exons)->numInDel -= num; (*exons)->numEdits -= num; tmpi += num; i += num; head->script = tp->next; ckfree(tp); tp = head->script; } break; case DELETE: (*exons)->numInDel -= num; (*exons)->numEdits -= num; j += num; tmpi += num; head->script = tp->next; ckfree(tp); tp = head->script; break; case SUBSTITUTE: if (i<=ec && ecnumEdits--; tmpi++; } else { if (*a == 'N') { (*exons)->numNs--; identsN++; } else { (*exons)->numMatches--; idents++; } } a++; b++; } j += ec-i+1; tp->num -= ec-i+1; i = ec+1; } else { /* at most 1 nt remaining */ a = s2+i-1; b = s1+j-1; while (anum-1) { if (*a != *b) { (*exons)->numEdits--; tmpi++; } else { if (*a == 'N') { (*exons)->numNs--; identsN++; } else { (*exons)->numMatches--; idents++; } } a++; b++; } i +=num; j += num; head->script = tp->next; ckfree(tp); tp = head->script; } break; } /* indel walk */ } assert(i==ec+1); while ((tp != 0L) && (tp->op_type!=SUBSTITUTE) && (j-1<=(*exons)->toGEN)) { if (tp->op_type==INSERT) { i += tp->num; tmpi += tp->num; (*exons)->numInDel -= tp->num; (*exons)->numEdits -= tp->num; } else if (j>=(*exons)->toGEN && i>=(*exons)->toEST) { j += tp->num; } else { j += tp->num; tmpi += tp->num; (*exons)->numInDel -= tp->num; (*exons)->numEdits -= tp->num; } head->script = tp->next; ckfree(tp); tp = head->script; } if (head->script==NULL) { *Sptr = head->next_script; ckfree(head); } else { head->len1 -= j-head->offset1; head->len2 -= i-head->offset2; head->offset2 = i; head->offset1 = j; head->score -= tmpi; } if ((*exons)->toESTnext_exon; //freeExon(t); garbage collected } else { (*exons)->frEST = i; (*exons)->frGEN = j; (*exons)->length = (*exons)->toEST-(*exons)->frEST+1; (*exons)->alignmentLength = ((*exons)->toGEN - (*exons)->frGEN + 1 + (*exons)->toEST - (*exons)->frEST + 1 + (*exons)->numInDel); (*exons)->percentID = computePercentIdentity((*exons)->numEdits, (*exons)->alignmentLength); } *pT = i-1; return; } void Sim4::remove_polyT_front(struct edit_script_list **Sptr, Exon *Exons, char *s1, char *s2, int *lastT) { Exon *t, *exons_head; /* start from Lblock */ char *b, *end; int numT, dummy, trim_p, reverse_script=0, pT; int startPos=0, cutAmount=0; *lastT = pT = 0; if (!Exons || !Exons->next_exon || !Exons->next_exon->toGEN) return; if ((*Sptr)->next_script && (*Sptr)->offset1>(*Sptr)->next_script->offset1) { script_flip_list(Sptr); reverse_script = 1; } exons_head = Exons->next_exon; trim_p = 1; if (exons_head) { startPos = exons_head->frEST; while ((t=exons_head)!=NULL && t->toGEN && trim_p) { /* compute the 'T' contents of the exon */ b = s2 + t->frEST-1; end = s2+t->toEST; numT = 0; while (btoEST-(b-s2)>=globalParams->_polyTailPercent*t->length)) { numT += (*b++=='T'); } // Determine how much of the cut stuff was actually // poly-containing. The first method below returns the number of // bases cut from the end of the est, while the second return the // number of bases cut from the end of the alignment. // //cutAmount = l2 - *lastT + 1; if (numT>=globalParams->_polyTailPercent*t->length) { /* remove the entire exon */ trim_polyT_align(Sptr,&exons_head,t->toEST,lastT,s1,s2); cutAmount = *lastT - startPos + 1; } else { get_polyAT(s2+(*Sptr)->offset2-1,(*Sptr)->len2,&pT,&dummy,T_ONLY); if (pT) { int ct_pT; ct_pT = pT + (*Sptr)->offset2-1; ct_pT = (t->toEST-ct_pT>MIN_EXON) ? ct_pT : t->toEST; trim_polyT_align(Sptr,&exons_head,ct_pT,lastT,s1,s2); cutAmount = *lastT - startPos + 1; } if (t==exons_head) trim_p = 0; } } } Exons->next_exon = exons_head; *lastT = cutAmount; if (reverse_script) script_flip_list(Sptr); } kmer-code-2013-trunk/libsim4/sim4core/pluri_align.C0000644000000000000000000002231112415066336020654 0ustar rootroot#include #include "sim4.H" // Condense_both_Ends -- merge contiguous operations of the same type // together; return both new ends of the chain. // void Sim4::Condense_both_Ends(edit_script **head, edit_script **tail, edit_script **prev) { edit_script *tp, *tp1; tp = *head; *prev = NULL; while (tp != NULL) { while (((tp1 = tp->next) != NULL) && (tp->op_type == tp1->op_type)) { tp->num = tp->num + tp1->num; tp->next = tp1->next; ckfree(tp1); } if (tp->next) *prev = tp; else *tail = tp; tp = tp->next; } } void Sim4::pluri_align(int *dist_ptr, Exon *theExons, struct edit_script_list **Aligns, sim4_stats_t *st) { int i, end1, end2, diff, ali_dist; char *a, *b; Exon *thisExon = theExons; Exon *nextExon; int EditDistance = 0; // Sum of all tmpi, previously known as TMPI int AlignmentLength = 0; struct edit_script_list *enew; struct edit_script *head; struct edit_script *tmp_script; struct edit_script *left; struct edit_script *right; struct edit_script *prev; st->numberOfMatches = 0; st->numberOfNs = 0; head = 0L; *Aligns = 0L; *dist_ptr = ali_dist = 0; end1 = _genLen; end2 = _estLen; nextExon = thisExon->next_exon; while (nextExon && nextExon->toGEN) { diff = thisExon->frEST - nextExon->toEST - 1; if (diff != 0) { if (thisExon->toGEN) { enew = (edit_script_list *)ckalloc(sizeof(edit_script_list)); enew->next_script = *Aligns; *Aligns = enew; (*Aligns)->script = head; (*Aligns)->offset1 = thisExon->frGEN; (*Aligns)->offset2 = thisExon->frEST; (*Aligns)->len1 = end1-(*Aligns)->offset1+1; (*Aligns)->len2 = end2-(*Aligns)->offset2+1; (*Aligns)->score = ali_dist; ali_dist = 0; head = NULL; } end1 = nextExon->toGEN; end2 = nextExon->toEST; } else { diff = thisExon->frGEN - nextExon->toGEN - 1; if (diff != 0) { if (thisExon->toGEN) { struct edit_script *newthing; newthing = (edit_script *) ckalloc(sizeof(edit_script)); newthing->op_type = DELETE; newthing->num = diff; newthing->next = head; head = newthing; } else { end1 = nextExon->toGEN; } } } if (globalParams->_interspecies) { diff = get_dist(nextExon->frGEN-1, nextExon->frEST-1, nextExon->toGEN, nextExon->toEST, MAX(1000, (int)(globalParams->_percentError*(nextExon->toEST - nextExon->frEST + 1)))); } else { // original diff = align_get_dist(nextExon->frGEN-1, nextExon->frEST-1, nextExon->toGEN, nextExon->toEST, MAX(1000, (int)(.2*(nextExon->toEST - nextExon->frEST + 1)))); } // Return if the alignment fails. // if (diff < 0) { st->numberOfMatches = 0; st->numberOfNs = 0; st->percentID = -1; *Aligns = 0L; return; } #ifdef STATS if (diff > P * (nextExon->toEST - nextExon->frEST + 1)) (void)printf("Warning: Distance threshold on segment exceeded.\n"); #endif if (globalParams->_interspecies) { path(nextExon->frGEN-1, nextExon->frEST-1, SUBSTITUTE, nextExon->toGEN, nextExon->toEST, SUBSTITUTE, diff, &left, &right); } else { // original align_path(nextExon->frGEN-1, nextExon->frEST-1, nextExon->toGEN, nextExon->toEST, diff, &left, &right); } // Return if the alignment fails -- this occurred once aligning // dros frags to dros using snapper. Snapper was giving the wrong // sequence for the seeds it also supplied. // if ((left == 0L) || (right == 0L)) { st->numberOfMatches = 0; st->numberOfNs = 0; st->percentID = -1; *Aligns = 0L; return; } Condense_both_Ends(&left, &right, &prev); if (!thisExon->toGEN && right->op_type == DELETE) { /* remove gaps at end of alignment */ diff -= 0+right->num; /* subtract GAP_OPEN = 0 */ nextExon->toGEN -= right->num; end1 -= right->num; if (head && (head->op_type == DELETE)) head->num += right->num; ckfree(right); prev->next = NULL; right = prev; } if ((!nextExon->next_exon || !nextExon->next_exon->toGEN) && left && (left->op_type == DELETE)) { diff -= 0+left->num; /* subtract GAP_OPEN = 0 */ nextExon->frGEN += left->num; tmp_script = left->next; if (right == left) right = tmp_script; ckfree(left); left = tmp_script; } *dist_ptr += diff; ali_dist += diff; a = _genSeq + nextExon->frGEN - 1; b = _estSeq + nextExon->frEST - 1; nextExon->numMatches = 0; nextExon->numNs = 0; nextExon->numInDel = 0; nextExon->numEdits = 0; tmp_script = left; // These are used during SUBSTITUTE below to tell if the base at // a (b) is N (upper or lower case). // bool an = false; bool bn = false; while (tmp_script) { switch (tmp_script->op_type) { case DELETE: nextExon->numInDel += tmp_script->num; nextExon->numEdits += tmp_script->num; a += tmp_script->num; break; case INSERT: nextExon->numInDel += tmp_script->num; nextExon->numEdits += tmp_script->num; b += tmp_script->num; break; case SUBSTITUTE: // Count the number of matches and edits. // // An edit is a true substitute -- a base for a different base, // not a base for an 'n'. // for (i=0; inum; ++i, ++a, ++b) { an = (*a == 'N') || (*a == 'n'); bn = (*b == 'N') || (*b == 'n'); if (an && bn) { // Both are N. It isn't a match and it isn't an edit. // nextExon->numNs++; } else if (an || bn) { // One is an N. Someone has low quality sequence, and we // should penalize. We need to special case this because // IUPACidentity[][] claims N matches all. // nextExon->numEdits++; } else if (IUPACidentity[(int)*a][(int)*b]) { // Got a match. nextExon->numMatches++; } else { // Got a substitution nextExon->numEdits++; } } break; } tmp_script = tmp_script->next; } nextExon->alignmentLength = (nextExon->toGEN - nextExon->frGEN + 1 + nextExon->toEST - nextExon->frEST + 1 + nextExon->numInDel); nextExon->percentID = computePercentIdentity(nextExon->numEdits, nextExon->alignmentLength); st->numberOfMatches += nextExon->numMatches; st->numberOfNs += nextExon->numNs; EditDistance += nextExon->numEdits; AlignmentLength += (nextExon->toGEN - nextExon->frGEN + 1 + nextExon->toEST - nextExon->frEST + 1 + nextExon->numInDel); right->next = head; head = left; thisExon = nextExon; nextExon = thisExon->next_exon; } /* at the beginning of the sequences */ if (nextExon!=NULL) { if ((diff=thisExon->frEST-nextExon->toEST-1)!=0 && (diff != _estLen)) { enew = (edit_script_list *)ckalloc(sizeof(edit_script_list)); enew->next_script = *Aligns; *Aligns = enew; (*Aligns)->offset1 = thisExon->frGEN; (*Aligns)->offset2 = thisExon->frEST; (*Aligns)->len1 = end1-(*Aligns)->offset1+1; (*Aligns)->len2 = end2-(*Aligns)->offset2+1; (*Aligns)->script = head; (*Aligns)->score = ali_dist; } else if (diff != _estLen) { /* modified to cut introns at the beginning of the sequence */ enew = (edit_script_list *)ckalloc(sizeof(edit_script_list)); enew->next_script = *Aligns; *Aligns = enew; (*Aligns)->offset1 = thisExon->frGEN; (*Aligns)->offset2 = 1; (*Aligns)->len1 = end1-(*Aligns)->offset1+1; (*Aligns)->len2 = end2-(*Aligns)->offset2+1; (*Aligns)->script = head; (*Aligns)->score = ali_dist; } } st->percentID = computePercentIdentity(EditDistance, AlignmentLength); } void Sim4::updateStatistics(Exon *theExon, sim4_stats_t *st) { theExon = theExon->next_exon; st->numberOfMatches = 0; st->numberOfNs = 0; int EditDistance = 0; int AlignmentLength = 0; while (theExon && theExon->toGEN) { st->numberOfMatches += theExon->numMatches; st->numberOfNs += theExon->numNs; EditDistance += theExon->numEdits; AlignmentLength += (theExon->toGEN - theExon->frGEN + 1 + theExon->toEST - theExon->frEST + 1 + theExon->numInDel); theExon = theExon->next_exon; } st->percentID = computePercentIdentity(EditDistance, AlignmentLength); } kmer-code-2013-trunk/libsim4/sim4core/mspManager.H0000644000000000000000000001374112322046702020445 0ustar rootroot#ifndef MSP_MANAGER_H #define MSP_MANAGER_H #include #include #include #include "util++.H" #include "exon.H" #include "sim4b1_s.H" struct msp { int len; int pos1; int pos2; int score; int linkingScore; int prev; }; // // How to handle memory allocation? // // Just use an array of msp objects, reallocate // when needed. Allocate a large number of these // initially. // class mspManager { public: mspManager(); ~mspManager(); // Returns true if x=a and y=g x=g and y=a // x=c and y=t x=t and y=c // // This used to be an array of size [256][256], that was initialized on each construction of this // object. That was killing performance in snapper. // int transitionFunction(int x, int y) { int xa = ((x == 'a') || (x == 'A')); int xc = ((x == 'c') || (x == 'C')); int xg = ((x == 'g') || (x == 'G')); int xt = ((x == 't') || (x == 'T')); int ya = ((y == 'a') || (y == 'A')); int yc = ((y == 'c') || (y == 'C')); int yg = ((y == 'g') || (y == 'G')); int yt = ((y == 't') || (y == 'T')); return((xa && yg) || (xg && ya) || (xc && yt) || (xt && yc)); }; void setParameters(int match, int imismatch, int vmismatch, double percenterror, int wordextallow) { _imismatch = imismatch; _vmismatch = vmismatch; _match = match; _imatchdiff = match - imismatch; _vmatchdiff = match - vmismatch; _percentError = percenterror; _wordExtAllow = wordextallow; }; void setExonSource(exonManager *em) { _exonManager = em; }; void setLimits(uint32 a, double p) { _mspLimitAbsolute = a; _mspLimitPercent = p; }; void setScoreThreshold(int K, int interspecies); bool tooManyMSPs(void) { return(_tooManyMSPs); }; uint32 numberOfMSPs(void) { return(_numMSPs); }; void setLength(int l) { _cDNALength = l; }; void clear(void) { _numMSPs = 0; }; void clearDiagonal(int genlen, int estlen) { // XXX: These aren't always the real EST and GENOMIC lengths. If // we are working in a subsequence of the whole sequence they // will be the length of the subsequence. // _GENlen = genlen; _ESTlen = estlen; // Allocate more space, if needed. // if (_GENlen + _ESTlen + 1 > _diagMax) { delete _diagExt; // Allocate space for the list of extension ends. Each diagonal // remembers the lowest position that it has been extended to. This // lets us throw out new hits without extending or merging in new // extensions. Assumes that hits are added in order. // _diagMax = _GENlen + _ESTlen + 1; _diagExt = new int [_diagMax]; } // Using the obvious for loop for this hurts. Don't do it. // bzero(_diagExt, sizeof(int) * (_GENlen + _ESTlen + 1)); }; // add an extended MSP to the list // void addMSP(int l, int p1, int p2, int sc); // add a single unextended hit to the list. // this will do extensions if we haven't already extended through it. // void addHit(char *genSeq, char *estSeq, int genLen, int estLen, int genPos, int estPos, mss_t &MSS) { #if 0 // We'd like to tie this into DEBUG_EXTENSION, but I want // to keep those defines in the source file. Oh well. // fprintf(stderr, "mspManager::addHit()-- adding hit from GEN %d to %d and EST %d to %d (length = %d) diag=%d lim=%d have %d\n", genPos-MSS.seedLength, genPos, estPos-MSS.seedLength, estPos, MSS.seedLength, estLen + genPos - estPos - 1, _diagExt[estLen + genPos - estPos - 1], genPos); #endif if (_diagExt[estLen + genPos - estPos - 1] <= genPos) addHit_(genSeq, estSeq, genLen, estLen, genPos, estPos, MSS); }; Exon *doLinking(int weight, int drange, int offset1, int offset2, int flag, int relinkFlag, char *s1, char *s2); private: void addHit_(char *genSeq, char *estSeq, int genLen, int estLen, int genPos, int estPos, mss_t &MSS); bool _sorted; int _ESTlen; int _GENlen; uint32 _allocMSPs; uint32 _numMSPs; msp *_allMSPs; bool _tooManyMSPs; int _cDNALength; double _mspLimitPercent; uint32 _mspLimitAbsolute; int _match; int _imismatch; int _vmismatch; int _imatchdiff; int _vmatchdiff; double _percentError; int _wordExtAllow; exonManager *_exonManager; int _minMSPScore; int _diagMax; int *_diagExt; }; inline void mspManager::addMSP(int l, int p1, int p2, int sc) { // Allocate more MSPs, if we need to. // if (_numMSPs >= _allocMSPs) { _allocMSPs *= 2; msp *n = new msp [_allocMSPs]; for (uint32 i=0; i<_numMSPs; i++) { n[i].len = _allMSPs[i].len; n[i].pos1 = _allMSPs[i].pos1; n[i].pos2 = _allMSPs[i].pos2; n[i].score = _allMSPs[i].score; n[i].linkingScore = _allMSPs[i].linkingScore; n[i].prev = _allMSPs[i].prev; } delete _allMSPs; _allMSPs = n; } #ifdef DEBUG_MSPS fprintf(stdout, "ADDMSP: p1=%8d p2=%8d l=%8d sc=%8d\n", p1, p2, l, sc); #endif _allMSPs[_numMSPs].len = l; _allMSPs[_numMSPs].pos1 = p1; _allMSPs[_numMSPs].pos2 = p2; _allMSPs[_numMSPs].score = sc; _allMSPs[_numMSPs].linkingScore = 0; _allMSPs[_numMSPs].prev = 0; _numMSPs++; _sorted = false; } #endif // MSP_MANAGER_H kmer-code-2013-trunk/libsim4/sim4core/sim4command.C0000644000000000000000000001411712322046702020557 0ustar rootroot#include #include #include #include "sim4command.H" #include using namespace std; // Run a single EST against a genomic range // // XXX: We should pull out the EST and GEN from the seqCache, // and store them as the "two char*" method. // sim4command::sim4command(uint32 ESTid, seqCache *ESTs, uint32 GENid, uint32 GENlo, uint32 GENhi, seqCache *GENs, bool doFor, bool doRev) { _estIdx = ESTid; _ESTs = ESTs; _ESTloaded = 0L; _ESTsequence = 0L; _ESTsequenceLength = 0; _genIdx = GENid; _genLo = GENlo; _genHi = GENhi; _GENs = GENs; _GENloaded = 0L; _GENsequence = 0L; _GENsequenceLength = 0; _doForward = doFor; _doReverse = doRev; _externalSeedsLen = 0; _externalSeedsMax = 0; _externalSeeds = 0L; } sim4command::sim4command(seqInCore *EST, seqInCore *GEN, uint32 GENlo, uint32 GENhi, bool doFor, bool doRev) { _estIdx = EST->getIID(); _ESTs = 0L; _ESTloaded = EST; _ESTsequence = 0L; _ESTsequenceLength = 0; _genIdx = GEN->getIID(); _genLo = GENlo; _genHi = GENhi; _GENs = 0L; _GENloaded = GEN; _GENsequence = 0L; _GENsequenceLength = 0; _doForward = doFor; _doReverse = doRev; _externalSeedsLen = 0; _externalSeedsMax = 0; _externalSeeds = 0L; } // Use two char*'s for sequence sources // sim4command::sim4command(char *EST, uint32 ESTlen, char *GEN, uint32 GENlen, uint32 GENlo, uint32 GENhi, bool doFor, bool doRev) { _estIdx = 0; _ESTs = 0L; _ESTloaded = 0L; _ESTsequence = EST; _ESTsequenceLength = ESTlen; _genIdx = 0; _genLo = GENlo; _genHi = GENhi; _GENs = 0L; _GENloaded = 0L; _GENsequence = GEN; _GENsequenceLength = GENlen; _doForward = doFor; _doReverse = doRev; _externalSeedsLen = 0; _externalSeedsMax = 0; _externalSeeds = 0L; } sim4command::~sim4command() { if (_ESTs) delete _ESTloaded; if (_GENs) delete _GENloaded; delete [] _externalSeeds; } // Make absolutely sure that the genomic sequence start and end // positions are within the actual sequence. Ideally, this should // be checked by whatever generates the input, but it probably // isn't. // // If the end position is too big, make it the same as the sequence // length. // // If the start position is bigger than the (corrected) end // position, make it 100K less than the end position. // // This has the side-effect of loading the genomic sequence. // void sim4command::finalize(void) { if (_genHi > getGENlength()) _genHi = getGENlength(); if (_genLo > _genHi) if (_genHi > 100000) _genLo = _genHi - 100000; else _genLo = 0; } // get() routines have multple cases // // if no fastaBase, they can quickly return // otherwise // if nothing loaded or the thing loaded isn't right: // delete the current // load the correct // void sim4command::loadEST(void) { if ((_ESTloaded == 0L) || (_ESTloaded->getIID() != _estIdx)) { delete _ESTloaded; _ESTloaded = _ESTs->getSequenceInCore(_estIdx); } } uint32 sim4command::getESTidx(void) { if (_ESTsequence) return(0); return(_estIdx); } char* sim4command::getESTheader(void) { static char *xxx = "anonymous cDNA sequence"; if (_ESTsequence) return(xxx); loadEST(); return(_ESTloaded->header()); } char* sim4command::getESTsequence(void) { if (_ESTsequence) return(_ESTsequence); loadEST(); return(_ESTloaded->sequence()); } uint32 sim4command::getESTlength(void) { if (_ESTsequence) return(_ESTsequenceLength); loadEST(); return(_ESTloaded->sequenceLength()); } void sim4command::loadGEN(void) { if ((_GENloaded == 0L) || (_GENloaded->getIID() != _genIdx)) { delete _GENloaded; _GENloaded = _GENs->getSequenceInCore(_genIdx); } } char* sim4command::getGENheader(void) { char *xxx = "anonymous genomic sequence"; if (_GENsequence) return(xxx); loadGEN(); return(_GENloaded->header()); } char* sim4command::getGENsequence(void) { if (_GENsequence) return(_GENsequence); loadGEN(); return(_GENloaded->sequence()); } uint32 sim4command::getGENlength(void) { if (_GENsequence) return(_GENsequenceLength); loadGEN(); return(_GENloaded->sequenceLength()); } //////////////////////////////////////// // // This expects base-based seeds. // This expects that the position of the seed is the base in the seed. // This expects that GENpos is relative to the genomic subsequence. // // If reverse-complement match, the EST is reversed, the GEN is forward. // void sim4command::addSeed(uint32 GENpos, uint32 ESTpos, uint32 length) { if (_externalSeedsLen >= _externalSeedsMax) { if (_externalSeedsMax == 0) _externalSeedsMax = 256; _externalSeedsMax *= 2; externalSeed *n = new externalSeed [_externalSeedsMax]; memcpy(n, _externalSeeds, sizeof(externalSeed) * _externalSeedsLen); delete [] _externalSeeds; _externalSeeds = n; } _externalSeeds[_externalSeedsLen]._GENposition = GENpos; _externalSeeds[_externalSeedsLen]._ESTposition = ESTpos; _externalSeeds[_externalSeedsLen]._length = length; // fprintf(stderr, "sim4command::addSeed()-- GEN="uint32FMT" EST="uint32FMT" of length "uint32FMT"\n", GENpos, ESTpos, length); _externalSeedsLen++; } void sim4command::sortExternalSeeds(void) { sort(_externalSeeds, _externalSeeds + _externalSeedsLen); } kmer-code-2013-trunk/libsim4/sim4core/sites_donor.H0000644000000000000000000000033511415365503020703 0ustar rootroot#ifndef SITES_DONOR_H #define SITES_DONOR_H /* DO NOT REMOVE or MODIFY !!!! */ #define NUM_MODELS_DON 25 #define NUM_VALUES_DON 928 extern double don[NUM_MODELS_DON][NUM_VALUES_DON]; #endif /* SITES_DONOR_H */ kmer-code-2013-trunk/libsim4/sim4core/exon_cores.C0000644000000000000000000000650412322046702020511 0ustar rootroot#include "sim4.H" #include // exon_cores() must have seq-1 passed in. search() offsets this. void Sim4::exon_cores(char *s1, char *s2, int l1, int l2, int offset1, int offset2, int flag, mss_t MSS, int K, int type) { _mspManager.clear(); _mspManager.clearDiagonal(l1, l2); _mspManager.setScoreThreshold(K, globalParams->_interspecies); //mss_t MSS = masks_shifts(seed); LLL DELETE bld_table(s2,l2,MSS,type); search(s1,s2,l1,l2,MSS); // Cleaning up after the bld_table() is done at the next call, or // in the destructor. // hashtable = 0L; exon_list = _mspManager.doLinking(DEFAULT_WEIGHT, DEFAULT_DRANGE, offset1, offset2, flag, false, s1, s2); } void Sim4::search(char *s1, char *s2, int l1, int l2, mss_t MSS) { struct hash_node *h; char *t; uint64 ecode; int masked_ecode; int i, p, j; // Too short? Abort! // if (l1 < MSS.seedLength) return; t = s1+1; i = 0; int validEncoding = 1 - MSS.seedLength; int pos1; ecode = uint64ZERO; // 5% win (tested on on small examples) if we use t[] instead of *t below. // Scan from low to high position in the genomic sequence // if (MSS.type == CONTINUOUS_SEED) { for (i=0; i < l1; i++) { pos1 = (int)(t-s1) + i; if (encoding[(int)t[i]] >= 0) { validEncoding++; ecode &= mask; ecode <<= 2; ecode |= encoding[(int)t[i]]; masked_ecode = (int)ecode; if (validEncoding > 0) { for (h = hashtable->table[masked_ecode & HASH_SIZE]; h; h = h->link) { if (h->ecode == masked_ecode) { // These positions are from high to low (see table.C) // for (p = h->pos; p >= 0; p = hashtable->nextPos[p]) _mspManager.addHit(s1, s2, l1, l2, pos1, p, MSS); break; } } } } else { validEncoding = 1 - MSS.seedLength; } } } else { /* SPACED_SEED */ for (i=0; i < l1; i++) { pos1 = (int)(t-s1) + i; if (encoding[(int)t[i]] >= 0) { validEncoding++; ecode &= MSS.mask; ecode <<= 2; ecode |= encoding[(int)t[i]]; #if 0 masked_ecode = mask_shift(ecode,MSS); #else // 40% cheaper for cross-species, 53% cheaper for same species for (j=masked_ecode=0; j> MSS.shifts[j]; #endif if (validEncoding > 0) { for (h = hashtable->table[masked_ecode & HASH_SIZE]; h; h = h->link) { if (h->ecode == masked_ecode) { // These positions are from high to low (see table.C) // for (p = h->pos; p >= 0; p = hashtable->nextPos[p]) _mspManager.addHit(s1, s2, l1, l2, pos1, p, MSS); break; } } } } else { validEncoding = 1 - MSS.seedLength; } } } } kmer-code-2013-trunk/libsim4/sim4core/sim4command.H0000644000000000000000000001044612322046702020565 0ustar rootroot#ifndef SIM4COMMAND_H #define SIM4COMMAND_H #include "bio++.H" #include "seqCache.H" // // Contains the variable stuff for an execution of sim4 // access to sequences (via seqCache) // genomic iid // genomic range // est iid (maybe more than one) // forward only // reverse only // class sim4command { public: // Run a single EST against a genomic range // sim4command(uint32 ESTid, seqCache *ESTs, uint32 GENid, uint32 GENlo, uint32 GENhi, seqCache *GENs, bool doForward, bool doReverse); // Single EST against a genomic range, using alternative // interface. // sim4command(seqInCore *EST, seqInCore *GEN, uint32 GENlo, uint32 GENhi, bool doForward, bool doReverse); // Use two char*'s for sequence sources -- both sequence deflines // and iid's are undefined! // sim4command(char *EST, uint32 ESTlen, char *GEN, uint32 GENlen, uint32 GENlo, uint32 GENhi, bool doForward, bool doReverse); ~sim4command(); // These methods allow the initial seed detection to be // done outside Sim4::run(). If used: // each seed is extended as before. // for interative alignments, seeds are masked out // // addSeed() takes coordinates relative to the start of the GEN // sequence supplied to the constructor. // void addSeed(uint32 GENpos, uint32 ESTpos, uint32 length); void sortExternalSeeds(void); bool externalSeedsExist(void) { return(_externalSeedsLen > 0); }; uint32 numberOfExternalSeeds(void) { return(_externalSeedsLen); }; uint32 externalSeedESTPosition(uint32 i) { return(_externalSeeds[i]._ESTposition); }; uint32 externalSeedGENPosition(uint32 i) { return(_externalSeeds[i]._GENposition); }; uint32 externalSeedLength(uint32 i) { return(_externalSeeds[i]._length); }; void maskExternalSeed(uint32 i) { _externalSeeds[i]._length = 0; }; // Load the sequences, make some checks. This isn't done in the // constructor so that it is possible to make a big list of // commands, then give them to a processor. If we loaded all the // genomics at creation.... // void finalize(void); void setForward(bool x) { _doForward = x; }; void setReverse(bool x) { _doReverse = x; }; bool doForward(void) { return(_doForward); }; bool doReverse(void) { return(_doReverse); }; void setGenomic(uint32 idx, uint32 lo, uint32 hi) { _genIdx = idx; _genLo = lo; _genHi = hi; }; uint32 getESTidx(); char *getESTheader(); char *getESTsequence(); uint32 getESTlength(); uint32 getGENidx(void) { return(_genIdx); }; uint32 getGENlo(void) { return(_genLo); }; uint32 getGENhi(void) { return(_genHi); }; char *getGENheader(void); char *getGENsequence(void); uint32 getGENlength(void); private: void loadEST(void); void loadGEN(void); uint32 _estIdx; seqCache *_ESTs; seqInCore *_ESTloaded; char *_ESTsequence; uint32 _ESTsequenceLength; // valid only for _ESTsequence uint32 _genIdx; uint32 _genLo; uint32 _genHi; seqCache *_GENs; seqInCore *_GENloaded; char *_GENsequence; uint32 _GENsequenceLength; bool _doForward; bool _doReverse; // For external seeding // class externalSeed { public: uint32 _GENposition; uint32 _ESTposition; uint32 _length; bool operator<(const externalSeed &that) const { return(_GENposition < that._GENposition); }; }; uint32 _externalSeedsLen; uint32 _externalSeedsMax; externalSeed *_externalSeeds; }; #endif // SIM4COMMAND_H kmer-code-2013-trunk/libsim4/sim4core/greedy.C0000644000000000000000000002560512415066336017637 0ustar rootroot#include "sim4.H" //#define ANNOUNCEEXIT(S) fprintf(stdout, S); #define ANNOUNCEEXIT(S) int Sim4::greedy(char *s1, char *s2, int m, int n0, int OFFSET1, int OFFSET2, Exon **lblock, Exon **rblock) { int col, /* column number */ d, /* current distance */ k, /* current diagonal */ Cost, blower,flower, /* boundaries for searching diagonals */ bupper,fupper, row; /* row number */ int flip = 0; /* swap sequences for narrow gaps with interspecies */ int max_d; /* bound on size of edit script */ int back, forth; /* backward and forward limits at exit */ int *blast_d, *flast_d; /* rows containing the last d (at crt step, d-1) */ int *btemp_d, *ftemp_d; /* rows containing tmp values for the last d */ int *min_row, *min_diag; /* min (b)/ max (f) row (and diagonal) */ int *max_row, *max_diag; /* reached for cost d=0, ... m. */ const int MAX_D = max_d = MAX(wordSize,(int)(globalParams->_percentError * m + 1)); if (n0 < m) { if (m < (int)MIN(wordSize, (1 + globalParams->_percentError) * n0)) { *lblock = *rblock = _exonManager.newExon(OFFSET2+1,OFFSET1+1,OFFSET2+n0,OFFSET1+m, m,n0-m+(int)(globalParams->_percentError * m + 1),0,NULL); ANNOUNCEEXIT("greedy-1\n"); return(m-n0+(int)(globalParams->_percentError * n0 + 1)); } else if (m > (int)MIN(wordSize, (1 + globalParams->_percentError) * n0)) { if (globalParams->_interspecies) { /* flip coordinates */ d = m; m = n0; n0 = d; d = OFFSET1; OFFSET1 = OFFSET2; OFFSET2 = d; char *s = s1; s1 = s2; s2 = s; flip = 1; } else { *lblock = *rblock = 0L; ANNOUNCEEXIT("greedy-2\n"); return(MAX_D+1); } } } const int n1 = MIN(m+max_d+1, n0); const int n2 = n1; const int DELTA = n2-m; const int l_offset1 = OFFSET1; const int r_offset1 = OFFSET1; const int l_offset2 = OFFSET2; const int r_offset2 = OFFSET2 + n0 - n2; const int L_ORIGIN = MAX_D; const int R_ORIGIN = MAX_D - DELTA; const char *l_s1 = s1; const char *r_s1 = s1; const char *l_s2 = s2; const char *r_s2 = s2 + n0 - n2; for (row=m, col=n2; row>0 && col>0 && (r_s1[row-1]==r_s2[col-1]); row--,col--) /*LINTED empty loop body*/; if (row == 0) { /* hit last row; stop search */ if (flip) { d = m; m = n0; n0 = d; d = OFFSET1; OFFSET1 = OFFSET2; OFFSET2 = d; char *s = s1; s1 = s2; s2 = s; } *lblock = *rblock = _exonManager.newExon(r_offset2-m+n2+1,r_offset1+1,r_offset2+n2, r_offset1+m,m,0,0,NULL); ANNOUNCEEXIT("greedy-3\n"); return 0; } // Instead of doing eight calls to ckalloc, we do one, and dish out // that in pieces. // int *allocdSpace = (int *)ckalloc((4*(MAX_D+n2+1) + 4*(MAX_D+1)) * sizeof(int)); blast_d = allocdSpace; // MAX_D+n2+1 btemp_d = blast_d + (MAX_D+n2+1); // MAX_D+n2+1 flast_d = btemp_d + (MAX_D+n2+1); // MAX_D+n2+1 ftemp_d = flast_d + (MAX_D+n2+1); // MAX_D+n2+1 max_row = ftemp_d + (MAX_D+n2+1); // MAX_D+1 min_row = max_row + (MAX_D+1); // MAX_D+1 max_diag = min_row + (MAX_D+1); // MAX_D+1 min_diag = max_diag + (MAX_D+1); // MAX_D+1 for (k=0; k<=MAX_D+n2; ++k) { blast_d[k] = m+1; btemp_d[k] = m+1; } blast_d[R_ORIGIN+DELTA] = row; blower = R_ORIGIN + DELTA - 1; bupper = R_ORIGIN + DELTA + 1; for (row=0; row 0 && col > 0 && (r_s1[row-1]==r_s2[col-1])) { --row; --col; } btemp_d[k] = row; #if 0 if (row == 0 || col == 0) max_d = d; #endif } /* for k */ min_row[d] = btemp_d[DELTA+R_ORIGIN]; min_diag[d] = DELTA+R_ORIGIN; for (k=blower; k<=bupper; ++k) { blast_d[k] = btemp_d[k]; btemp_d[k] = m+1; if (blast_d[k] d+Cost) || (max_d==d+Cost && (forth<0)))) { max_d = d+Cost; back = d; forth = Cost; break; } } --blower; ++bupper; /* for each relevant diagonal ... */ for (k = flower; k <= fupper; k++) { /* get space for the next edit instruction */ /* find a d on diagonal k */ if (k==-d+L_ORIGIN) { /* move down from the last d-1 on diagonal k+1 */ row = flast_d[k+1]+1; } else if (k==d+L_ORIGIN) { /* move right from the last d-1 on diagonal k-1 */ row = flast_d[k-1]; } else if ((flast_d[k]>=flast_d[k+1]) && (flast_d[k]+1>=flast_d[k-1])) { /* substitution */ row = flast_d[k]+1; } else if ((flast_d[k+1]+1>=flast_d[k-1]) && (flast_d[k+1]>=flast_d[k])) { /* move left from the last d-1 on diagonal k+1 */ row = flast_d[k+1]+1; } else { /* move right from the last d-1 on diagonal k-1 */ row = flast_d[k-1]; } /* code common to the three cases */ col = row + k - L_ORIGIN; /* slide down the diagonal */ if (row>=0) while (row < m && col < n1 && (l_s1[row]==l_s2[col])) { ++row; ++col; } ftemp_d[k] = row; #if 0 if (row == m || col == n1) max_d = d; #endif } /* for k */ max_row[d] = ftemp_d[L_ORIGIN]; max_diag[d] = L_ORIGIN; for (k=flower; k<=fupper; ++k) { flast_d[k] = ftemp_d[k]; ftemp_d[k] = -1; if (flast_d[k]>max_row[d]) { max_row[d] = flast_d[k]; max_diag[d] = k; } } /* record backward and forward limits, if minimum combined * cost in overlapping. Note: it suffices to search up to * Cost=MIN(d,(max_d-d)). */ for (Cost=0; Cost<=d; Cost++) { if ((min_row[Cost]<=max_row[d]) && ((max_d>d+Cost) || (max_d==d+Cost && (forth<0)))) { max_d = d+Cost; back = Cost; forth = d; break; } } --flower; ++fupper; ++d; /* for d */ } if (d>MAX_D) { *lblock = *rblock = NULL; ckfree(allocdSpace); ANNOUNCEEXIT("greedy-5\n"); return d; } // XXX: Quick fix! // if ((back < 0) || (forth < 0)) { *rblock = *lblock = 0L; fprintf(stdout, "Choke!\n"); return(MAX_D+1); } if (flip) { /* Cost is within allocated limit */ d = m; m = n0; n0 = d; d = OFFSET1; OFFSET1 = OFFSET2; OFFSET2 = d; char *s = s1; s1 = s2; s2 = s; *lblock = *rblock = _exonManager.newExon(OFFSET2+1,OFFSET1+1,OFFSET2+n0,OFFSET1+m,m,back+forth,0,NULL); ckfree(allocdSpace); ANNOUNCEEXIT("greedy-6\n"); return back+forth; } if (m-min_row[back]>=max_row[forth]) { if ((r_offset2+1+min_diag[back]-R_ORIGIN) < (l_offset2+max_diag[forth]-L_ORIGIN)) { *rblock = *lblock = _exonManager.newExon(l_offset2+1,l_offset1+1, l_offset2+n0,l_offset1+m, m,back+forth,0,NULL); } else { *rblock = _exonManager.newExon(r_offset2+1+min_row[back]+min_diag[back]-R_ORIGIN, r_offset1+1+min_row[back], r_offset2+n2,r_offset1+m, m-min_row[back],back,0,NULL); *lblock = _exonManager.newExon(l_offset2+1,l_offset1+1, l_offset2+min_row[back]+max_diag[forth]-L_ORIGIN, l_offset1+min_row[back], min_row[back],forth,0,*rblock); } } else { if ((r_offset2+1+min_diag[back]-R_ORIGIN) < (l_offset2+max_diag[forth]-L_ORIGIN)) { *rblock = *lblock = _exonManager.newExon(l_offset2+1,l_offset1+1, l_offset2+n0,l_offset1+m, m,back+forth,0,NULL); } else { *rblock = _exonManager.newExon(r_offset2+1+max_row[forth]+min_diag[back]-R_ORIGIN, r_offset1+1+max_row[forth], r_offset2+n2,r_offset1+m,m-max_row[forth],back,0,NULL); *lblock = _exonManager.newExon(l_offset2+1,l_offset1+1, l_offset2+max_row[forth]+max_diag[forth]-L_ORIGIN, l_offset1+max_row[forth],max_row[forth],forth,0,*rblock); } } ckfree(allocdSpace); ANNOUNCEEXIT("greedy-7\n"); return back+forth; } kmer-code-2013-trunk/libsim4/sim4core/sim4b1-2.C0000644000000000000000000000545512415066336017617 0ustar rootroot#include "sim4.H" int Sim4::SIM4_block2(Exon* &tmp_Lblock, Exon* &tmp_Rblock, Exon* &tmp_block, Exon* &tmp_block1) { int cost; int rollbflag = 0; int diff = (int)(tmp_block1->frEST - tmp_block->toEST - 1); //fprintf(stderr, "Called SIM4_block2()\n"); if (diff <= MAX_GRINIT) { cost = greedy(_estSeq + tmp_block->toEST, _genSeq + tmp_block->toGEN, diff, tmp_block1->frGEN-tmp_block->toGEN-1, tmp_block->toEST,tmp_block->toGEN, &tmp_Lblock, &tmp_Rblock); #if 0 printf("greedy returned cost %d (limit:%d)\n", cost, MAX(wordSize,(int)(globalParams->_percentError * diff + 1))); #endif } else { cost = MAX(wordSize,(int)(globalParams->_percentError * diff + 1))+1; } //PRINTEXONS("greedy\n", tmp_Lblock); if (cost>MAX(wordSize,(int)(globalParams->_percentError * diff + 1))) { if (!tmp_block->flag && !tmp_block1->flag) { exon_cores(_genSeq+tmp_block->toGEN-1, _estSeq+tmp_block->toEST-1, tmp_block1->frGEN-tmp_block->toGEN-1, diff, tmp_block->toGEN+1, tmp_block->toEST+1, 1, spacedSeedIntMSS, mspThreshold2, TEMP); //PRINTEXONS("1\n", exon_list); tmp_Lblock = tmp_Rblock = exon_list; while ((tmp_Rblock!=NULL) && (tmp_Rblock->next_exon!=NULL)) tmp_Rblock = tmp_Rblock->next_exon; if ((!tmp_Lblock && tmp_block1->frGEN-tmp_block->toGEN>50000) || (tmp_Lblock && (tmp_Lblock->frEST-tmp_block->toEST>100) && (tmp_Lblock->frGEN-tmp_block->frGEN>50000)) || (tmp_Lblock && (tmp_block1->frEST-tmp_Rblock->toEST>100) && (tmp_block1->frGEN-tmp_Rblock->frGEN>50000))) { /* possible large intron; increase the score weight */ //freeExonList(tmp_Lblock); garbage collected exon_list = _mspManager.doLinking(globalParams->_relinkWeight, DEFAULT_DRANGE, tmp_block->toGEN + 1, tmp_block->toEST + 1, 1, true, _genSeq, _estSeq); //PRINTEXONS("1a\n", exon_list); tmp_Lblock = tmp_Rblock = exon_list; while ((tmp_Rblock!=NULL) && (tmp_Rblock->next_exon!=NULL)) tmp_Rblock = tmp_Rblock->next_exon; } _mspManager.clear(); if (tmp_Lblock) rollbflag = 1; else rollbflag = 0; /* already 0 */ } else { tmp_Lblock = tmp_Rblock = NULL; } } return(rollbflag); } kmer-code-2013-trunk/libsim4/sim4core/sim4parameters.H0000644000000000000000000001125512322046702021311 0ustar rootroot#ifndef SIM4_PARAMETERS_H #define SIM4_PARAMETERS_H #include "mspManager.H" #include "sim4defines.H" #include "../sim4polish/sim4polish.H" #define SPACED_SEED_MAX_LEN 64 class sim4parameters { public: sim4parameters(); ~sim4parameters(); void setFindAllExons(bool x=true) { _findAllExons = x; }; void setMinCoverage(double x) { _minCoverage = x; }; void setMinCoverageLength(int l) { _minCoverageLength = l; }; void setMinPercentExonIdentity(int l) { _minPercentExonIdentity = l; }; void setIncludeDefLine(bool x=true) { _includeDefLine = x; }; void setPrintAlignments(bool x=true) { _printAlignments = x; }; void setAlwaysReport(int n) { _alwaysReport = n; }; void setIgnorePolyTails(bool x) { _ignorePolyTails = x; }; void setPolyTailPercent(double x) { _polyTailPercent = x; }; void setMSPThreshold1(int t) { _mspThresh1 = t; }; void setMSPThreshold2(int t) { _mspThresh2 = t; }; void setMSPLimitAbsolute(uint32 t) { _mspLimitAbsolute = t; }; void setMSPLimitPercent(double p) { _mspLimitPercent = p; }; void setRelinkWeight(int x) { _relinkWeight = x; }; void setWordSize(int w) { _wordSize = w; }; void setWordSizeInt(int w) { _wordSizeInt = w; }; void setWordSizeExt(int w) { _wordSizeExt = w; }; void setSpacedSeed(char *z) { assert(strlen(z) < SPACED_SEED_MAX_LEN); strcpy(_spacedSeed, z); _isSetSpacedSeed = true; }; void setSpliceModel(int j) { _spliceModel = j; _isSetSpliceModel = true; }; void setDontForceCanonicalSplicing(bool x=true) { _dontForceCanonicalSplicing = x; }; void setForceStrandPrediction(bool x=true) { _forceStrandPrediction = x; }; void setSlideIntrons(bool x=true) { _slideIntrons = x; }; void setInterspecies(bool x=true) { _interspecies = x; if (_interspecies) { _percentError = 0.45; _match = 1; _imismatch = -1; _vmismatch = -3; if (_isSetSpliceModel == false) _spliceModel = SPLICE_GENESPLICER; if (_isSetSpacedSeed == false) strcpy(_spacedSeed, DEFAULT_SPACED_SEED); strcpy(_spacedSeedInt, DEFAULT_SPACED_SEED_INT); strcpy(_spacedSeedExt, DEFAULT_SPACED_SEED_EXT); } else { _percentError = 0.20; _match = 1; _imismatch = -5; _vmismatch = -5; } } void setOutputFormat(int styleCode) { switch (styleCode) { case S4P_POLISH_S4DB: _style = sim4polishS4DB; break; case S4P_POLISH_GFF3: _style = sim4polishGFF3; break; case S4P_POLISH_ATAC: _style = sim4polishS4DB; break; // Not yet implemented default: fprintf(stderr, "sim4parameters::setOutputFormat() error: unrecognized output format; re-setting to default.\n"); } } int setSpliceMutex(void) { return pthread_mutex_init(&_splice_mutex,NULL); } sim4polishStyle getOutputFormat(void) { return _style; } private: double _minCoverage; int _minCoverageLength; int _minPercentExonIdentity; int _alwaysReport; bool _findAllExons; bool _includeDefLine; bool _printAlignments; bool _dontForceCanonicalSplicing; bool _forceStrandPrediction; bool _ignorePolyTails; double _polyTailPercent; int _mspThresh1; int _mspThresh2; // For aborting expensive polishes // double _mspLimitPercent; // Same as below, as percentage of length of cDNA uint32 _mspLimitAbsolute; // Number of MSPs allowed per hit int _relinkWeight; int _wordSize; int _wordSizeInt; int _wordSizeExt; char _spacedSeed[SPACED_SEED_MAX_LEN]; char _spacedSeedInt[SPACED_SEED_MAX_LEN]; char _spacedSeedExt[SPACED_SEED_MAX_LEN]; bool _isSetSpacedSeed; int _spliceModel; bool _isSetSpliceModel; pthread_mutex_t _splice_mutex; // Interspecies comparison options. // _percentError is the former #defined P // _match is the former MATCH // _misMatch is the former MISMATCH // bool _slideIntrons; // Interspecies comparison options. // _percentError is the former #defined P // _match is the former MATCH // _misMatch is the former MISMATCH // bool _interspecies; sim4polishStyle _style; double _percentError; int _match; int _imismatch; int _vmismatch; int _imatchdiff; int _vmatchdiff; friend class Sim4; }; #endif // SIM4_PARAMETERS_H kmer-code-2013-trunk/libsim4/sim4core/mspManager.C0000644000000000000000000004065612415066336020455 0ustar rootroot#include #include #include #include #include #include "sim4.H" #define DEFAULT_L 8 mspManager::mspManager() { _sorted = true; _ESTlen = 0; _GENlen = 0; _allocMSPs = 16384; _numMSPs = 0; _allMSPs = new msp [_allocMSPs]; // The following four variables are for aborting expensive // polishes -- ones that have proven to be large chunks of // genomic labeled as cDNA, and that have (ESTmapper) signals // across entire scafflds. // _tooManyMSPs = false; _cDNALength = 0; _mspLimitPercent = 0.0; _mspLimitAbsolute = 0; // These need to be reset with setParameters. The code will die // during link() if they are not set. // _match = 0; _percentError = 0.0; _imismatch = 0; _vmismatch = 0; _imatchdiff = 0; _vmatchdiff = 0; _wordExtAllow = 0; _exonManager = 0L; _minMSPScore = 0; _diagMax = 0; _diagExt = 0L; } mspManager::~mspManager() { delete [] _allMSPs; delete [] _diagExt; } static int get_edist(int f1, int f2, int t1, int t2, char *seq1, char *seq2) { char *s1, *s2, *q1, *q2; int dist=0; s1 = seq1+f1+1; /* bc at this stage, the msp pos do not have added +1 */ s2 = seq2+f2+1; q1 = seq1+t1+1; q2 = seq2+t2+1; while (s1<=q1 && s2<=q2) { dist += (*s1!=*s2); s1++; s2++; } return dist; } static int mspManager_msp_compare(const void *A, const void *B) { msp const *a = (msp const *)A; msp const *b = (msp const *)B; if (a->pos2 < b->pos2) return(-1); if (a->pos2 > b->pos2) return(1); if (a->pos1 < b->pos1) return(-1); if (a->pos1 > b->pos1) return(1); return(0); } static int find_log_entry(const int *log4s, int n, int len, int offset) { int a; a = n/2; if ((len=log4s[a-1]))) return MAX(0,(a-1))+offset; else if ((len>=log4s[a]) && ((a==n-1) || (lenlog4s[a]) return find_log_entry(log4s+a+1,n-a-1,len, offset+a+1); return -1; } Exon* mspManager::doLinking(int weight, int drange, int offset1, int offset2, int flag, int relinkFlag, char *s1, char *s2) { // Ensure the MSP's are sorted // if (_sorted == false) qsort(_allMSPs, _numMSPs, sizeof(struct msp), mspManager_msp_compare); _sorted = true; // // Assumes the exon list is cleared // // If this ever occurs, you (the programmer) forgot to call // mspManager::setParameters() with the correct values. Unless the // code was really hacked, this should never occur. See // Sim4::Sim4(). // if ((_match == 0) && (_imatchdiff == 0) && (_vmatchdiff == 0) && (_percentError == 0.0)) { fprintf(stderr, "sim4::link()-- ERROR; mspManager parameters not set! This is an algorithm error.\n"); exit(1); } // Check if this match looks suspiciously expensive // if ((_cDNALength > 0) && (_mspLimitAbsolute > 0) && (_mspLimitAbsolute < _numMSPs) && (_mspLimitPercent > 0.0) && (_mspLimitPercent * _cDNALength < _numMSPs)) { _tooManyMSPs = true; return(0L); } int f1, f2, best, diag, diff_diag, best_sc, tryval; best = -1; best_sc = INT_MIN; #if 0 for (uint32 i = 0; i < _numMSPs; ++i) { fprintf(stderr, "LINK MSP %d -- %d-%d %d-%d score=%d,%d\n", i, _allMSPs[i].pos1, _allMSPs[i].pos1 + _allMSPs[i].len, _allMSPs[i].pos2, _allMSPs[i].pos2 + _allMSPs[i].len, _allMSPs[i].score, _allMSPs[i].linkingScore); } #endif for (uint32 i = 0; i < _numMSPs; ++i) { f1 = _allMSPs[i].pos1; /* start position in seq1 */ f2 = _allMSPs[i].pos2; /* start position in seq2 */ diag = f1 - f2; _allMSPs[i].prev = -1; _allMSPs[i].linkingScore = 0; #ifdef SHOW_LINKING fprintf(stderr, "link %d\r", i); fflush(stderr); #endif for (uint32 j = 0; j < i; ++j) { // 12 == default word size. A Magic Value. int WS = 12; int vL = DEFAULT_L; if ((_allMSPs[i].pos2 + _allMSPs[i].len - _allMSPs[j].pos2 - _allMSPs[j].len > 2 * WS) && (_allMSPs[i].pos2 - _allMSPs[j].pos2 > 2 * WS)) vL *= 2; diff_diag = diag - _allMSPs[j].pos1 + _allMSPs[j].pos2; // Abort if the difference is too big // if ((diff_diag < -drange) || ((diff_diag > drange) && (diff_diag < MIN_INTRON)) || (_allMSPs[j].pos2 + _allMSPs[j].len - 1 - f2 > vL) || (_allMSPs[j].pos1 + _allMSPs[j].len - 1 - f1 > vL)) continue; int n = abs(diff_diag); tryval = _allMSPs[j].linkingScore - n; if (relinkFlag) tryval = _allMSPs[j].linkingScore - ((n <= 100000) ? n : (100000+(int)(10*log((double)(n-100000))))); if (tryval > _allMSPs[i].linkingScore) { _allMSPs[i].linkingScore = tryval; _allMSPs[i].prev = j; } } _allMSPs[i].linkingScore += (weight * _allMSPs[i].score); if (_allMSPs[i].linkingScore > best_sc) { best = i; best_sc = _allMSPs[i].linkingScore; } } if (best < 0) return(0L); int last_msp = best; int diag_dist; int diff; msp *mp = _allMSPs + last_msp; Exon *elist = _exonManager->newExon(mp->pos1, mp->pos2, mp->pos1+mp->len-1, mp->pos2+mp->len-1, -1, (mp->len * _match - mp->score) / _vmatchdiff, 0, 0L); last_msp = mp->prev; while (last_msp >= 0) { mp = _allMSPs + last_msp; int l1 = elist->frEST - elist->frGEN; int l2 = mp->pos2 - mp->pos1; if (l1 > l2) diag_dist = l1 - l2; else diag_dist = l2 - l1; if ((diag_dist <= DEFAULT_L) && (elist->frEST - (mp->pos2 + mp->len - 1)) < MAX_INTERNAL_GAP) { /* merge with previous exon */ elist->edist += diag_dist; elist->edist += (mp->len * _match - mp->score) / _vmatchdiff; if ((diff=mp->pos2+mp->len-elist->frEST)>0) { /* overlap */ int dist1, dist2; dist1 = get_edist(elist->frGEN,mp->pos2+mp->len-diff, elist->frGEN+diff-1,mp->pos2+mp->len-1,s1,s2); dist2 = get_edist(mp->pos1+mp->len-diff,mp->pos2+mp->len-diff, mp->pos1+mp->len-1,mp->pos2+mp->len-1,s1,s2); elist->edist -= MAX(dist1,dist2); } else if (diff<0) { /* gap */ elist->edist += (int)(0.5 * _percentError * (-1) * diff); } elist->toGEN = MAX(elist->toGEN,mp->pos1+mp->len-1); elist->toEST = MAX(elist->toEST,mp->pos2+mp->len-1); elist->frGEN = MIN(elist->frGEN,mp->pos1); elist->frEST = MIN(elist->frEST,mp->pos2); } else { elist = _exonManager->newExon(mp->pos1, mp->pos2, mp->pos1+mp->len-1, mp->pos2+mp->len-1, -1, (mp->len * _match - mp->score) / _vmatchdiff, 0, elist); } last_msp = mp->prev; } // Fix them? What does this do?? // Exon *tmp_block = elist; while (tmp_block != 0L) { tmp_block->length = tmp_block->toEST-tmp_block->frEST+1; tmp_block->toGEN += offset1; tmp_block->frGEN += offset1; tmp_block->toEST += offset2; tmp_block->frEST += offset2; tmp_block->flag = flag; tmp_block = tmp_block->next_exon; } return(elist); } // The log4 arrays were computed to mimick the behaviour of the log formula // for computing the msp threshold in exon_cores(). For genomic_log4s, // entry i stores the value for the length of a genomic sequence // for which the contribution to the msp threshold is i/2, i.e.: // 1.4*log_4(3/4*len1) = i/2; // // Similarly, cDNA_log4s entries store lengths of the cDNA sequence for which // the contribution to the msp threshold is i/2, i.e.: // 1.4*log_4(len2) = i/2; // // Both arrays are sorted in increasing order, and can be searched with // binary search. // #define GEN_LOG4_ENTRIES 45 #define CDNA_LOG4_ENTRIES 25 const int genomic_log4s[GEN_LOG4_ENTRIES]= {1, 2, 3, 5, 9, 15, 26, 42, 70, 114, 188, 309, 507, 832, 1365, 1365, 2240, 2240, 3675, 6029, 9892, 16231, 26629, 43690, 71681, 117606, 192953, 316573, 519392, 852152, 1398101, 2293823, 3763409, 6174516, 10130347, 16620564, 27268873, 44739242, 73402365, 120429110, 197584514, 324171126, 531858072, 872603963, 1431655765 }; const int cDNA_log4s[CDNA_LOG4_ENTRIES]= {1, 1, 2, 4, 7, 11, 19, 32, 52, 86, 141, 231, 380, 624, 1024, 1680, 2756, 4522, 7419, 12173, 19972, 32768, 53761, 88204, 144715 }; #if 0 // The original used a binary search but with so few entries brute // force works better. // LLL 4/9/2009: does not return the same result as the original, // and gives false positive matches for interspecies comparisons; // restored original version // int get_msp_threshold(int len1, int len2) { int i, j; // Find the index of the largest value smaller than our lengths. // i = 0; while (i len1) break; i++; } i--; j = 0; while (j len2) break; j++; } j--; // // XXX: This looks suspicious! // if ((i % 2) == 0) return(i/2+j/2); if ((j % 2) == 0) return(i/2+j/2); return(i/2+j/2+1); } #endif int get_msp_threshold(int len1, int len2) { int i, j; i = find_log_entry(genomic_log4s, GEN_LOG4_ENTRIES, len1, 0); j = find_log_entry(cDNA_log4s, CDNA_LOG4_ENTRIES, len2, 0); if (!(i % 2)) return (int)(i/2+j/2); else if (!(j % 2)) return (int)(i/2+j/2); else return (int)(i/2+j/2+1); } void mspManager::setScoreThreshold(int K, int interspecies) { if (interspecies) { if (K <= 0) { // _minMSPScore = (int)(((int)(log(.75*(double)_GENlen)+log((double)_ESTlen))/log(4.0)) * 1.0); _minMSPScore = get_msp_threshold(_GENlen, _ESTlen); } else { _minMSPScore = K; } } else { if (K <= 0) { _minMSPScore = get_msp_threshold(_GENlen, _ESTlen); // compensate for the rounding in the log formula if (_minMSPScore >= 0) _minMSPScore--; } else { _minMSPScore = K; } } } void mspManager::addHit_(char *genSeq, char *estSeq, int genLen, int estLen, int genPos, int estPos, mss_t &MSS) { char *genBeg = 0L; char *estBeg = 0L; char *genEnd = 0L; char *genTmp = 0L; char *estTmp = 0L; int right_sum = 0; int middle_sum = 0; int left_sum = 0; int sum = 0; int score = 0; #ifdef DEBUG_EXTENSION fprintf(stderr, "mspManager::addHit()-- extending hit from GEN %d to %d and EST %d to %d (length = %d)\n", genPos-W, genPos, estPos-W, estPos, W); #endif #ifdef DEBUG_EXTENSION { char L[41], M[41], R[41]; int x; if (genPos-MSS.seedLength > 20) genTmp = genSeq + 1 + genPos - MSS.seedLength - 20; else genTmp = genSeq + 1; x=0; while (genTmp < genSeq + 1 + genPos - MSS.seedLength) L[x++] = *genTmp++; L[x] = 0; x=0; while (genTmp < genSeq + 1 + genPos) M[x++] = *genTmp++; M[x] = 0; x=0; while (genTmp < genSeq + 1 + genPos + 20) R[x++] = *genTmp++; R[x] = 0; fprintf(stderr, "GEN=%8d %s:%s:%s\n", genPos, L, M, R); if (estPos-MSS.seedLength > 20) estTmp = estSeq + 1 + estPos - MSS.seedLength - 20; else estTmp = estSeq + 1; x=0; while (estTmp < estSeq + 1 + estPos - MSS.seedLength) L[x++] = *estTmp++; L[x] = 0; x=0; while (estTmp < estSeq + 1 + estPos) M[x++] = *estTmp++; M[x] = 0; x=0; while (estTmp < estSeq + 1 + estPos + 20) R[x++] = *estTmp++; R[x] = 0; fprintf(stderr, "EST=%8d %s:%s:%s\n", estPos, L, M, R); } #endif // We use diagonals directly -- original version offset the array of // diagonal positions by the constant value included below. // Extend to the right // left_sum = 0; sum = 0; genTmp = genSeq + 1 + genPos; estTmp = estSeq + 1 + estPos; genEnd = genTmp; while ((*genTmp) && (*estTmp) && (estTmp <= estSeq + estLen) && (genTmp <= genSeq + genLen) && (sum >= left_sum - _wordExtAllow)) { sum += _match; if (*estTmp != *genTmp) sum -= (transitionFunction(*estTmp, *genTmp) ? _imatchdiff : _vmatchdiff); estTmp++; genTmp++; if (sum > left_sum) { left_sum = sum; genEnd = genTmp; } } #ifdef TEST_SEEDS_IN_EXTENSION // Check the bases that the seed supposedly matched // middle_sum = 0; sum = 0; genTmp = genSeq + 1 + genPos - 1; estTmp = estSeq + 1 + estPos - 1; for (int x=0; x estSeq + 1) && (genTmp > genSeq + 1) && (sum >= right_sum - _wordExtAllow)) { estTmp--; genTmp--; sum += _match; if (*estTmp != *genTmp) sum -= (transitionFunction(*estTmp, *genTmp) ? _imatchdiff : _vmatchdiff); if (sum > right_sum) { right_sum = sum; estBeg = estTmp; genBeg = genTmp; } } score = middle_sum + left_sum + right_sum; #ifdef DEBUG_MSPS printf("TESTMSP: p1 = %7d p2 = %7d l = %7d sc = %7d (%d-%d-%d) ", (int)(genBeg - (genSeq + 1)), (int)(estBeg - (estSeq + 1)), (int)(genEnd - genBeg), score, left_sum, middle_sum, right_sum); printf("g: "); for (s=genBeg; s= _minMSPScore) addMSP((int)(genEnd - genBeg), (int)(genBeg - (genSeq + 1)), (int)(estBeg - (estSeq + 1)), score); #ifdef DEBUG_EXTENSION fprintf(stderr, "mspManager::addHit()-- added from GEN %d to %d and EST %d to ? (length = %d) with score %d (needed %d) l,m,r sums %d %d %d\n", (int)(genBeg - (genSeq + 1)), (int)(genEnd - (genSeq + 1)) + W, (int)(estBeg - (estSeq + 1)), MSS.seedLength, score, _minMSPScore, left_sum, middle_sum, right_sum); #endif // Remember the highest point that this diagonal has been extended // to. We use this to short circuit useless mer extensions (if // we've already extended through it). // _diagExt[estLen + genPos - estPos - 1] = (int)(genEnd - genSeq - 1 + MSS.seedLength); } kmer-code-2013-trunk/libsim4/sim4core/CHANGES0000644000000000000000000000176707605326113017247 0ustar rootrootFri Apr 26 14:24:29 EDT 2002 Test for overlapping exons in sim4string.C. If SHOW_OVERLAPPING_EXONS is defined, they will be printed to stderr. Mon Apr 29 03:57:10 EDT 2002 (build 2333) Added '-V' option to print script lines as they are processed. Changed the wording of the status ('-v') output. Wed Aug 14 12:59:42 EDT 2002 Added -forcestrand to force the strand prediction to always be 'forward' or 'reverse'. Changes to sim4string.C/run() and util.C/slide_intron(), and sim4db.H. Added dbParams._forceStrandPrediction flag to enable/disable this behavior. Wed Aug 14 16:18:03 EDT 2002 Fixed sync_slide_intron to increase the limit of Glist, Clist and oris from 200 exons to anything. titin mapped to ncbi human genome had more than 200 exons. Wed Aug 28 14:03:50 EDT 2002 Fixed get_stats() to _not_ check/reset the strand prediction when -forcestrand is in effect. In addition, fixed run() to set the orientation of 'unknown' matches to FWD first. For complement matches, this will later become BWD. kmer-code-2013-trunk/libsim4/sim4core/sim4string.C0000644000000000000000000006021012322046702020442 0ustar rootroot#include "sim4.H" #include "sim4polishBuilder.H" //#define SHOW_OVERLAPPING_EXONS static void add_offset_exons(Exon *exons, int offset) { if (!offset || !exons) return; for (; exons; exons = exons->next_exon) { if (exons->toGEN) { exons->frEST += offset; exons->toEST += offset; } } } #if 0 static void add_offset_aligns(edit_script_list *aligns, int offset) { if (!offset || !aligns) return; for (; aligns; aligns = aligns->next_script) aligns->offset2 += offset; } #endif void Sim4::maskExonsFromSeeds(sim4command *cmd, Exon *theExon) { while (theExon) { if (theExon->toGEN) { for (uint32 x=0; xnumberOfExternalSeeds(); x++) { uint32 pos = cmd->externalSeedGENPosition(x); if (((uint32)theExon->frGEN <= pos + 1) && (pos <= (uint32)theExon->toGEN + cmd->externalSeedLength(x))) cmd->maskExternalSeed(x); } } theExon = theExon->next_exon; } } void Sim4::maskExonsFromGenomic(Exon *theExon, char *f, char *r, int l) { while (theExon) { if (theExon->toGEN) { for (int i=theExon->frGEN-1; itoGEN; i++) f[i] = 'N'; for (int i=l-theExon->frGEN; i>=l-theExon->toGEN; i--) r[i] = 'N'; } theExon = theExon->next_exon; } } sim4polishList* Sim4::run(sim4command *cmd) { sim4polishBuilder B; sim4polishList *L = new sim4polishList; int dist, match_ori; int g_pA=0, f_pA=0, r_pA=0; int g_pT=0, f_pT=0, r_pT=0; Exon *fExons = NULL; Exon *rExons = NULL; edit_script_list *fAligns = NULL; edit_script_list *rAligns = NULL; int matchesPrinted = 0; char touppercache[256]; for (int i=0; i<256; i++) touppercache[i] = (char)toupper(i); cmd->finalize(); uint32 dblen = cmd->getGENhi() - cmd->getGENlo(); char *dbseq = 0L; char *dbrev = 0L; char *dbseqorig = cmd->getGENsequence(); int estlen = 0; char *estseq = 0L; char *estrev = 0L; char *estseqorig = 0L; //mss_t MSS; LLL DELETE // Allocate space for temporary sequence storage. We need // to allocate space for two copies of the database, and space // for the longest EST (in case we need to print it out // reverse complemented). // char *seqStorage = 0L; uint32 seqStorageSize = 0; seqStorageSize = 2 * dblen + 2 * cmd->getESTlength() + 8; seqStorage = new char [seqStorageSize]; // Original, forward, reverse, cdna // dbseq = seqStorage; dbrev = seqStorage + dblen + 2; estseq = seqStorage + dblen + 2 + dblen + 2; estrev = seqStorage + dblen + 2 + dblen + 2 + cmd->getESTlength() + 2; // Prepare the database sequence // // Trimming to the correct range // Convert to uppercase // Reverse complement // for (uint32 i=0, j=cmd->getGENlo(), k=dblen-1; jgetGENhi(); i++, j++, k--) { dbseq[i] = touppercache[(int)dbseqorig[j]]; dbrev[k] = complementSymbol[(int)dbseq[i]]; } dbseq[dblen] = 0; dbrev[dblen] = 0; sim4_stats_t st, rev_st; estseqorig = cmd->getESTsequence(); estlen = cmd->getESTlength(); for (int i=0; i_ignorePolyTails) { get_polyAT(estseq, estlen, &g_pT, &g_pA); } // GRRR! XXXXX This needs to be defined outside the loop, and before the goto's bool pleaseContinueComputing = false; if (estlen - g_pA - g_pT <= 0) goto abort; matchesPrinted = 0; do { //fprintf(stderr, "sim4string::main loop begins!\n"); int nmatches = 0; double coverage = 0; int percentid = 0; pleaseContinueComputing = false; B.create(cmd->getESTidx(), estlen, cmd->getGENidx(), cmd->getGENlo(), cmd->getGENhi()); if (globalParams->_includeDefLine) { B.setESTdefline(cmd->getESTheader()); B.setGENdefline(cmd->getGENheader()); } memset(&st, 0, sizeof(sim4_stats_t)); memset(&rev_st, 0, sizeof(sim4_stats_t)); if (cmd->externalSeedsExist() == false) { // MSS = masks_shifts(globalParams->_spacedSeed); LLL DELETE bld_table(estseq - 1 + g_pT, estlen - g_pA - g_pT, spacedSeedMSS, INIT); } if (cmd->doForward()) { // Initialize the sequences and lengths // // genSeq was seq1 // estSeq was seq2 // _genSeq = dbseq; _estSeq = estseq + g_pT; _genLen = dblen; _estLen = estlen - g_pT - g_pA; // This should be in a better spot. _mspManager.setLength(_estLen); _mspManager.clearDiagonal(_genLen, _estLen); _mspManager.setScoreThreshold(mspThreshold1, globalParams->_interspecies); #ifdef SHOW_EXTERNAL_SEEDING fprintf(stderr, "FWD: estLen = %d genLen = %d\n", _estLen, _genLen); #endif // Find the seeds. // if (cmd->externalSeedsExist() == false) { exon_cores(_genSeq-1, _estSeq-1, _genLen, _estLen, 1, 1, 0, spacedSeedMSS, mspThreshold1, PERM); } else { #ifdef SHOW_EXTERNAL_SEEDING fprintf(stderr, "FWD: Using external seeds -- adding "uint32FMT" seeds to sim4.\n", cmd->numberOfExternalSeeds()); #endif cmd->sortExternalSeeds(); for (uint32 x=0; xnumberOfExternalSeeds(); x++) if (cmd->externalSeedLength(x) > 0) _mspManager.addHit(_genSeq-1, _estSeq-1, _genLen, _estLen, cmd->externalSeedGENPosition(x), cmd->externalSeedESTPosition(x), spacedSeedMSS); // LLL 6-17/10 This doesn't make sense here (seed is probably 20mer, but not used anyway // cmd->externalSeedLength(x)); LLL: MUST CHANGE, using spaced seeds exon_list = _mspManager.doLinking(DEFAULT_WEIGHT, DEFAULT_DRANGE, 1, 1, 0, false, _genSeq, _estSeq); #ifdef SHOW_EXTERNAL_SEEDING fprintf(stderr, "FWD: Added and chained, starting SIM4() run.\n"); #endif } fAligns = SIM4(&dist, &fExons, &f_pA, &f_pT, &st); // Continued from util.C :: slide_intron() // // If we are forcing the strand prediction, and we are still unknown, // set the strand prediction to the match orientation. Since this // will be reversed later on, set it to FWD here. // if ((globalParams->_forceStrandPrediction) && (st.orientation == BOTH)) st.orientation = FWD; // If the match was deemed expensive, report // if (st.tooManyMSPs) { B.setNumberOfMatches(0, 0); B.setPercentIdentity(0); B.setMatchOrientation(SIM4_MATCH_FORWARD); B.setStrandOrientation(SIM4_STRAND_INTRACTABLE); B.addExon(1, estlen, 1, cmd->getGENhi() - cmd->getGENlo(), st.numberOfMatches, 0, 0, SIM4_INTRON_NONE); goto fail; } } if (cmd->doReverse()) { // Initialize the sequences and lengths // // genSeq was seq1 // estSeq was seq2 // _genSeq = dbrev; _estSeq = estseq + g_pT; _genLen = dblen; _estLen = estlen - g_pT - g_pA; // This should be in a better spot. _mspManager.setLength(_estLen); _mspManager.clearDiagonal(_genLen, _estLen); _mspManager.setScoreThreshold(mspThreshold1, globalParams->_interspecies); #ifdef SHOW_EXTERNAL_SEEDING fprintf(stderr, "BWD: estLen = %d genLen = %d g_pT=%d g_pA=%d\n", _estLen, _genLen, g_pT, g_pA); #endif // Find the seeds. // if (cmd->externalSeedsExist() == false) { exon_cores(_genSeq-1, _estSeq-1, _genLen, _estLen, 1, 1, 0, spacedSeedMSS, mspThreshold1, PERM); } else { #ifdef SHOW_EXTERNAL_SEEDING fprintf(stderr, "BWD: Using external seeds -- adding "uint32FMT" seeds to sim4.\n", cmd->numberOfExternalSeeds()); #endif cmd->sortExternalSeeds(); // We have sorted the seeds in incresing genomic position, // but we need to reverse everything. We can do this by just // adding the seeds backwards! // // for (uint32 x=cmd->numberOfExternalSeeds(); x--; ) // // Not sure _why_ we wanted to add them backwards, but it // screws up the addHit logic of skipping seeds we have // extended through. I vaguely remember some piece of sim4 // external seeding needing to be done backwards. // Apparently, this isn't it. // for (uint32 x=0; xnumberOfExternalSeeds(); x++) if (cmd->externalSeedLength(x) > 0) _mspManager.addHit(_genSeq-1, _estSeq-1, _genLen, _estLen, cmd->externalSeedGENPosition(x), cmd->externalSeedESTPosition(x), spacedSeedMSS); // 6-17-10 LLL This doesn't make sense here; seed must probably be a 20-mer, but the code is unused anyway // cmd->externalSeedLength(x)); LLL: MUST CHANGE, using spaced seeds exon_list = _mspManager.doLinking(DEFAULT_WEIGHT, DEFAULT_DRANGE, 1, 1, 0, false, _genSeq, _estSeq); #ifdef SHOW_EXTERNAL_SEEDING fprintf(stderr, "BWD: Added and chained, starting SIM4() run.\n"); #endif } rAligns = SIM4(&dist, &rExons, &r_pA, &r_pT, &rev_st); // Continued from util.C :: slide_intron() // // If we are forcing the strand prediction, and we are still unknown, // set the strand prediction to the match orientation. // if ((globalParams->_forceStrandPrediction) && (rev_st.orientation == BOTH)) rev_st.orientation = FWD; // If the match was deemed expensive, report if (rev_st.tooManyMSPs) { B.setNumberOfMatches(0, 0); B.setPercentIdentity(0); B.setMatchOrientation(SIM4_MATCH_COMPLEMENT); B.setStrandOrientation(SIM4_STRAND_INTRACTABLE); B.addExon(1, estlen, 1, cmd->getGENhi() - cmd->getGENlo(), rev_st.numberOfMatches, 0, 0, SIM4_INTRON_NONE); goto fail; } } if (st.numberOfMatches >= rev_st.numberOfMatches) { match_ori = FWD; if (globalParams->_ignorePolyTails) { add_offset_exons(fExons, g_pT); //add_offset_aligns(fAligns, g_pT); for (edit_script_list *aligns = fAligns; aligns; aligns = aligns->next_script) aligns->offset2 += g_pT; } B.setPolyTails(g_pA + f_pA, g_pT + f_pT); if (fExons) { // We used to mask the seeds down with the masking of the // genomic, but reverse exons are flipped here, and we need // unflipped exons to mask. // if (cmd->externalSeedsExist() && globalParams->_findAllExons) maskExonsFromSeeds(cmd, fExons); if (checkExonsForOverlaps(fExons)) { #ifdef SHOW_OVERLAPPING_EXONS B.setNumberOfMatches(0, 0); B.setPercentIdentity(0); B.setMatchOrientation(SIM4_MATCH_FORWARD); B.setStrandOrientation(SIM4_STRAND_FAILED); // XXX: result contains the exons and alignments //B.addExon(1, estlen, 1, cmd->getGENhi() - cmd->getGENlo(), rev_st.numberOfMatches, 0, SIM4_INTRON_NONE); #endif goto fail; } } } else { match_ori = BWD; if (globalParams->_ignorePolyTails) { add_offset_exons(rExons, g_pT); //add_offset_aligns(rAligns, g_pT); for (edit_script_list *aligns = rAligns; aligns; aligns = aligns->next_script) aligns->offset2 += g_pT; } B.setPolyTails(g_pA + r_pA, g_pT + r_pT); if (rAligns && rAligns->next_script) script_flip_list(&rAligns); if (rExons) { if (cmd->externalSeedsExist() && globalParams->_findAllExons) maskExonsFromSeeds(cmd, rExons); // This used to be right before appendExons() in // the reverse match section, but we need it // before we test for overlapping exons // complement_exons(&rExons, dblen, estlen); if (checkExonsForOverlaps(rExons)) { #ifdef SHOW_OVERLAPPING_EXONS B.setNumberOfMatches(0, 0); B.setPercentIdentity(0); B.setMatchOrientation(SIM4_MATCH_COMPLEMENT); B.setStrandOrientation(SIM4_STRAND_FAILED); // XXX: result contains the exons and alignments //B.addExon(1, estlen, 1, cmd->getGENhi() - cmd->getGENlo(), rev_st.numberOfMatches, 0, SIM4_INTRON_NONE); #endif goto fail; } } } if (match_ori == FWD) { nmatches = st.numberOfMatches; percentid = st.percentID; } else { nmatches = rev_st.numberOfMatches; percentid = rev_st.percentID; } coverage = (double)nmatches / (double)estlen; // Is this match decent? // pleaseContinueComputing = ((coverage >= globalParams->_minCoverage) && (percentid >= globalParams->_minPercentExonIdentity) && (nmatches >= globalParams->_minCoverageLength) && (nmatches > 0)); // If we're supposed to print at least _alwaysReport things, // and we found a match, keep going. // if ((matchesPrinted < globalParams->_alwaysReport) && (nmatches > 0)) pleaseContinueComputing = true; // However, if we have printed enough stuff, and the last one is // below the thresholds, stop. // if ((matchesPrinted >= globalParams->_alwaysReport) && ((coverage < globalParams->_minCoverage) || (percentid < globalParams->_minPercentExonIdentity))) pleaseContinueComputing = false; if (pleaseContinueComputing) { matchesPrinted++; if (match_ori == FWD) { B.setNumberOfMatches(st.numberOfMatches, st.numberOfNs); B.setPercentIdentity(st.percentID); B.setMatchOrientation(SIM4_MATCH_FORWARD); switch (st.orientation) { case FWD: B.setStrandOrientation(SIM4_STRAND_POSITIVE); break; case BWD: B.setStrandOrientation(SIM4_STRAND_NEGATIVE); break; default: B.setStrandOrientation(SIM4_STRAND_UNKNOWN); break; } } else { B.setNumberOfMatches(rev_st.numberOfMatches, rev_st.numberOfNs); B.setPercentIdentity(rev_st.percentID); B.setMatchOrientation(SIM4_MATCH_COMPLEMENT); B.setStrandOrientation(SIM4_STRAND_FAILED); switch (rev_st.orientation) { case FWD: B.setStrandOrientation(SIM4_STRAND_NEGATIVE); break; case BWD: B.setStrandOrientation(SIM4_STRAND_POSITIVE); break; default: B.setStrandOrientation(SIM4_STRAND_UNKNOWN); break; } } // If we have external seeds, we need to mask out seeds that we // used BEFORE we print alignments -- printing reverse // alignments also switches from reverse-complemented genomic // to reverse-complemented EST, and then we can't (easily) mask // seeds! // // Likewise, we can't do the normal masking before we print the // alignments, else we'd just print out N's for the genome. // if (match_ori == FWD) { appendExons(B, fExons); if (globalParams->_printAlignments) { appendAlignments(B, estseq, dbseq, estlen, dblen, fAligns, fExons, FWD); } if (globalParams->_findAllExons) maskExonsFromGenomic(fExons, dbseq, dbrev, dblen); } else { appendExons(B, rExons); if (globalParams->_printAlignments) { for (int i=0, k=estlen-1; i_findAllExons) maskExonsFromGenomic(rExons, dbseq, dbrev, dblen); } } fail: // These are NOT garbage collected! if (fAligns) free_align(fAligns); if (rAligns) free_align(rAligns); // These ARE garbage collected //freeExonList(fExons); //freeExonList(rExons); fAligns = rAligns = 0L; fExons = rExons = 0L; L->push(B.release()); } while (globalParams->_findAllExons && pleaseContinueComputing); abort: delete [] seqStorage; return(L); } //////////////////////////////////////////////////////////// // // Exons // //////////////////////////////////////////////////////////// bool Sim4::checkExonsForOverlaps(Exon *theExons) { Exon *a = theExons; Exon *b = theExons->next_exon; while (b && b->toGEN) { if ((b->frGEN <= a->toGEN) || (b->frEST <= a->toEST)) { return(true); } a = b; b = b->next_exon; } return(false); } void Sim4::appendExons(sim4polishBuilder &B, Exon *theExons) { Exon *theExon = theExons; while (theExon) { if (theExon->toGEN) { #ifdef SPLSCORE // Save the splice score (theExon->splScore); // "%d-%d (%d-%d) <%d-%d-%d> %1.2f %s" #error I do not know how to save the splice score! #endif char ori = SIM4_INTRON_NONE; if ((theExon->next_exon) && (theExon->next_exon->toGEN)) { switch (theExon->ori) { case 'C': // <- ori = SIM4_INTRON_NEGATIVE; break; case 'E': // == ori = SIM4_INTRON_GAP; break; case 'G': // -> ori = SIM4_INTRON_POSITIVE; break; case 'N': // -- ori = SIM4_INTRON_AMBIGUOUS; break; default: ori = SIM4_INTRON_ERROR; break; } } B.addExon(theExon->frEST, theExon->toEST, theExon->frGEN, theExon->toGEN, theExon->numMatches, theExon->numNs, theExon->percentID, ori); } theExon = theExon->next_exon; } } //////////////////////////////////////////////////////////// // // Alignments // //////////////////////////////////////////////////////////// void Sim4::IDISPLAY(sim4polishBuilder &builder, char *aString, char *bString, char *A, char *B, int M, int N, int *S, int AP, int BP, int est_strand, Exon *exons) { Exon *t0; register int i, j, op; int starti, is_intron=0; if ((exons==NULL) || (!exons->toGEN && (exons->next_exon==NULL))) { builder.addExonAlignment("Empty exon list; no alignment possible!", "Empty exon list; no alignment possible!"); return; } /* find the starting exon for this alignment */ t0 = exons; while (t0 && (((est_strand==2) && ((t0->frGEN!=AP) || (t0->frEST!=BP))) || ((est_strand==1) && ((t0->frGEN!=BP) || (t0->frEST!=AP))))) { t0 = t0->next_exon; } if (!t0) { builder.addExonAlignment("Alignment fragment not found; no alignment possible!", "Alignment fragment not found; no alignment possible!"); return; } i = j = op = 0; starti = (t0->next_exon && t0->next_exon->toGEN) ? (t0->toGEN+1):-1; char *a = aString; char *b = bString; #if 0 fprintf(stderr, "M=%d N=%d\n", M, N); fprintf(stderr, "aString=0x%p\nbString=0x%p\n", aString, bString); #endif while (i < M || j < N) { *a = *b = 0; #if 0 fprintf(stderr, "i=%d < M=%d and j=%d < N=%d\n", i, M, j, N); fprintf(stderr, "a=%s\n", aString); fprintf(stderr, "b=%s\n", bString); #endif if (op == 0 && *S == 0) { op = *S++; i++; j++; if (A[i] == B[j]) { *a++ = (char)(A[i] + 'a' - 'A'); *b++ = (char)(B[j] + 'a' - 'A'); } else { *a++ = A[i]; *b++ = B[j]; } } else { if (op == 0) op = *S++; if (op > 0) { if (est_strand==2) { *a++ = '-'; *b++ = B[++j]; op--; } else { if (j+BP==starti) { /* detected intron */ t0 = t0->next_exon; starti=(t0->next_exon && t0->next_exon->toGEN)?(t0->toGEN+1):-1; /* print entire exon */ is_intron = 1; j += op; op = 0; } else { *a++ = '-'; *b++ = B[++j]; op--; } } } else { if (est_strand==1) { *a++ = A[++i]; *b++ = '-'; op++; } else { if (i+AP==starti) { /* detected intron */ t0 = t0->next_exon; starti=(t0->next_exon && t0->next_exon->toGEN)?(t0->toGEN+1):-1; is_intron = 1; i += -op; op = 0; } else { *a++ = A[++i]; *b++ = '-'; op++; } } } } if (is_intron || ((i >= M) && (j >= N))) { *a = 0; *b = 0; builder.addExonAlignment(aString, bString); a = aString; b = bString; is_intron = 0; } } } void Sim4::S2A(edit_script *head, int *S) { edit_script *tp; int *lastS, i; tp = head; lastS = S; while (tp != NULL) { if (tp->op_type == SUBSTITUTE) { for (i=0; inum; ++i) *lastS++ = 0; } else if (tp->op_type == INSERT) { *lastS++ = -tp->num; } else { /* DELETE */ *lastS++ = tp->num; } tp = tp->next; } *(S-1) = (int)(lastS - S); } void Sim4::appendAlignments(sim4polishBuilder &builder, char *s1, char *s2, int l1, int l2, edit_script_list *Aligns, Exon *Exons, int match_ori) { if (Aligns==NULL) return; // Detemine the maximum length of an alignment by finding the // longest exon. // int maxAlignmentLength = 0; Exon *theExon = Exons; while (theExon) { if (theExon->toGEN) { if (maxAlignmentLength < (theExon->toGEN - theExon->frGEN + theExon->toEST - theExon->frEST)) maxAlignmentLength = theExon->toGEN - theExon->frGEN + theExon->toEST - theExon->frEST; } theExon = theExon->next_exon; } char *aString = new char [maxAlignmentLength + 4]; char *bString = new char [maxAlignmentLength + 4]; for(edit_script_list *aligns = Aligns; aligns; aligns = aligns->next_script) { int *S = (int *)ckalloc((2 * aligns->len2 + 1 + 1) * sizeof(int)); S++; S2A(aligns->script, S); if (match_ori==FWD) { IDISPLAY(builder, aString, bString, s1 + aligns->offset2 - 1 - 1, s2 + aligns->offset1 - 1 - 1, aligns->len2, aligns->len1, S, aligns->offset2, aligns->offset1, 1, Exons); } else { align_reverse(S); IDISPLAY(builder, aString, bString, s1 + l1 + 1 - (aligns->offset2 + aligns->len2 - 1) - 1 - 1, s2 + l2 + 1 - (aligns->offset1 + aligns->len1 - 1) - 1 - 1, aligns->len2, aligns->len1, S, l1 + 1 - (aligns->offset2+aligns->len2 - 1), l2 + 1 - (aligns->offset1+aligns->len1 - 1), 1, Exons); } ckfree(S-1); } delete [] aString; delete [] bString; } kmer-code-2013-trunk/libsim4/sim4core/sim4b1.C0000644000000000000000000002325012415066336017451 0ustar rootroot#include "sim4.H" #ifdef DEBUG_EXONS #define PRINTEXONS(S, L) (L)->printList(S) #else #define PRINTEXONS(S, L) #endif Sim4::edit_script_list * Sim4::SIM4(int *dist_ptr, Exon **Exons, int *pA, int *pT, sim4_stats_t *st) { int rollbflag; Exon *Lblock=0L, *tmp_Lblock=0L; Exon *Rblock=0L, *tmp_Rblock=0L; Exon *tmp_block=0L; Exon *tmp_block1=0L; *dist_ptr = 0; *Exons = 0L; *pA = 0; *pT = 0; // // The call to exon_cores() that used to be here is now done in sim4string. // // See if there are too many MSPs found. If so, fail. // st->tooManyMSPs = false; if (_mspManager.tooManyMSPs()) { st->tooManyMSPs = true; st->numberOfMatches = _mspManager.numberOfMSPs(); return(0L); } PRINTEXONS("initial exon set\n", exon_list); tmp_block = Lblock = exon_list; while (tmp_block) { if (tmp_block->next_exon==NULL) Rblock = tmp_block; tmp_block = tmp_block->next_exon; } if (Lblock && ((Lblock->frGEN>50000 && Lblock->frEST>100) || ((_genLen - Rblock->toGEN > 50000) && (_estLen - Rblock->toEST > 100)))) { //freeExonList(exon_list); garbage collected exon_list = _mspManager.doLinking(globalParams->_relinkWeight, DEFAULT_DRANGE, 1, 1, 0, true, _genSeq, _estSeq); PRINTEXONS("relink the initial stuff\n", exon_list); tmp_block = Lblock = exon_list; while (tmp_block) { if (tmp_block->next_exon==NULL) Rblock = tmp_block; tmp_block = tmp_block->next_exon; } } _mspManager.clear(); tmp_block = Lblock = exon_list; while (tmp_block) { if (tmp_block->next_exon==NULL) Rblock = tmp_block; tmp_block = tmp_block->next_exon; } PRINTEXONS("initial exon set after possibly relinking\n", exon_list); /* enclose the current path in the (0,0,0,0) and (M+1,N+1,0,0) brackets */ #ifdef SHOW_PROGRESS fprintf(stderr, "exon bracket at start\n"); #endif Lblock = _exonManager.newExon(0,0,0,0,0,0,0,Lblock); if (Rblock == NULL) Rblock = Lblock; #ifdef SHOW_PROGRESS fprintf(stderr, "exon bracket at end; Lblock = 0x%08lx, Rblock = 0x%08lx\n", Lblock, Rblock); #endif Rblock->next_exon = _exonManager.newExon(_genLen+1,_estLen+1,0,0,0,0,0,NULL); PRINTEXONS("initial exon set after inserting brackets\n", Lblock); /* compute current statistics */ bool good_match = get_match_quality(Lblock, Rblock, st, _estLen); PRINTEXONS("after get_match_quality\n", Lblock); #ifdef SHOW_PROGRESS fprintf(stderr, "before big nasty while loop\n"); #endif tmp_block = Lblock; while ((tmp_block1 = tmp_block->next_exon)!=NULL) { PRINTEXONS("start of loop to fill in missing pieces\n", Lblock); rollbflag = 0; // This is the distance from this exon to the next exon // in the EST // int diff = (int)(tmp_block1->frEST - tmp_block->toEST - 1); #ifdef SHOW_PROGRESS fprintf(stdout, "tmp_block: %8d %8d %8d %8d %d diff=%d\n", tmp_block->frGEN, tmp_block->toGEN, tmp_block->frEST, tmp_block->toEST, tmp_block->flag, diff); #endif if (diff) { if (diff < 0) { // If the diff is less than zero, then there is an overlap in // the EST. Wobble the boundary using GTAG signals (so // obviously, this won't work correctly if we are not cDNA). // #ifdef SHOW_PROGRESS fprintf(stderr, "Called SIM4_block1() with diff=%d\n", diff); #endif rollbflag = SIM4_block1(Lblock, tmp_block, tmp_block1); } else { // Otherwise, there is a gap in the EST, and we need to fill // it in. This is done only if there is no overlap in the // genomic. // if (tmp_block1->frGEN - tmp_block->toGEN - 1 > 0) { if (tmp_block1->toEST && tmp_block->toEST) { // We are not the first or last gap -- an interior gap // between two exons. // #ifdef SHOW_PROGRESS fprintf(stderr, "Called SIM4_block2()\n"); #endif rollbflag = SIM4_block2(tmp_Lblock, tmp_Rblock, tmp_block, tmp_block1); } else if (tmp_block1->toGEN) { // Not the last gap, so must be the first gap. // #ifdef SHOW_PROGRESS fprintf(stderr, "Called SIM4_block3()\n"); #endif rollbflag = SIM4_block3(good_match, tmp_Lblock, tmp_Rblock, tmp_block, tmp_block1); } else { // By default, the last gap. // #ifdef SHOW_PROGRESS fprintf(stderr, "Called SIM4_block4()\n"); #endif rollbflag = SIM4_block4(good_match, tmp_Lblock, tmp_Rblock, tmp_block, tmp_block1); } } else { // Overlapping genomic. What these do when set to // NULL is unknown. // tmp_Rblock = tmp_Lblock = NULL; } // Merge block in the exon list; make connections to the // previous list of blocks; maintain increasing order // if (tmp_Lblock) { tmp_block->next_exon = tmp_Lblock; tmp_Rblock->next_exon = tmp_block1; PRINTEXONS("before merge tmp_block\n", tmp_block); PRINTEXONS("before merge tmp_block1\n", tmp_block1); PRINTEXONS("before merge tmp_Lblock\n", tmp_Lblock); PRINTEXONS("before merge tmp_Rblock\n", tmp_Rblock); merge(&tmp_block,&tmp_block1); } } } // If this exon block was not removed, move to the next. If it was removed, // we're already there. // if (rollbflag == 0) tmp_block = tmp_block1; } PRINTEXONS("all done -- final Lblock\n", Lblock); #ifdef SHOW_PROGRESS fprintf(stderr, "sim4b1 -- before compact_list\n"); #endif /* compaction step; note: it resets the right end of the list to */ /* the last item in the block list */ compact_list(&(Lblock->next_exon), &Rblock, (globalParams->_interspecies ? SHORT_INTRON : wordSize)); if (globalParams->_interspecies) filter(&Lblock, &Rblock); #ifdef SHOW_PROGRESS fprintf(stderr, "sim4b1 -- before small block at start removal\n"); #endif /* eliminate marginal small blocks at the start of the sequence; */ /* resets the empty alignment to one block (Lblock) only */ tmp_block = Lblock->next_exon; while ((tmp_block!=NULL) && (tmp_block->lengthtoGEN) { tmp_block1 = tmp_block; tmp_block = tmp_block->next_exon; //freeExon(tmp_block1); garbage collected } Lblock->next_exon = tmp_block; PRINTEXONS("all done -- after removing small blocks at the start\n", Lblock); // eliminate marginal small blocks at the end of the sequence // XXX: Yes, there is a leak here. That's why we garbage collect! #ifdef SHOW_PROGRESS fprintf(stderr, "Rblock before end of list removal 0x%08lx\n", Rblock); #endif Exon *last = Lblock->next_exon; tmp_block = last; while (tmp_block!=NULL) { if (tmp_block->length>=wordSize) last = tmp_block; tmp_block = tmp_block->next_exon; } if (last && last->toGEN) last->next_exon = Rblock->next_exon; Rblock = last; #ifdef SHOW_PROGRESS fprintf(stderr, "Rblock after end of list removal 0x%08lx\n", Rblock); #endif PRINTEXONS("all done -- after removing small blocks at the end\n", Lblock); /* if high accuracy requirement, adjust boundaries of marginal exons */ if (_accurateSequences) adjustBoundariesOfMarginalExons(Lblock); /* Slide exon boundaries for optimal intron signals */ if (globalParams->_slideIntrons) { if (globalParams->_interspecies == 1) { SLIDE_INTRON(MIN(15,MAX_SLIDE), Lblock->next_exon, Rblock, spliceModel, st, 1); } else { if (get_sync_flag(Lblock, Rblock, 6) == 1) SLIDE_INTRON(6, Lblock->next_exon, Rblock, SPLICE_ORIGINAL, st, 1); else SLIDE_INTRON(6, Lblock->next_exon, Rblock, SPLICE_ORIGINAL, st, 0); } } else { // Set orientation flag on introns to be unknown -- this has an // undesired side effect of forcing the resulting match to have a // strand orientation the same as the intron orientation (if one // exon) instead of 'unknown'. Exon *t0 = Lblock->next_exon; Exon *t1 = NULL; while (t0 && (t1=t0->next_exon) && t1->toGEN) { t0->ori = 'E'; t0 = t1; } } /* decreasingly; script will be in reverse order */ struct edit_script_list *Shead = NULL; flip_list(&Lblock, &Rblock); pluri_align(dist_ptr, Lblock, &Shead, st); flip_list(&Lblock, &Rblock); /* increasingly */ *pT = 0; *pA = 0; if (Shead) { if (globalParams->_ignorePolyTails) { remove_polyT_front(&Shead, Lblock, _genSeq, _estSeq, pT); remove_polyA_back(&Shead, Lblock, _genSeq, _estSeq, _estLen, pA); if (*pA || *pT) updateStatistics(Lblock, st); } get_stats(Lblock, st); *Exons = Lblock->next_exon; //freeExon(Lblock); garbage collected } else { *Exons = 0L; //freeExonList(Lblock); garbage collected } // Memory leak when Script_head == 0L -- see pluri_align, too! return(Shead); } kmer-code-2013-trunk/libsim4/sim4core/splice.C0000644000000000000000000006724612415066336017646 0ustar rootroot#include #include "sim4.H" #define GENESPLICER_SPAN 80 #define GLIMMER_XSPAN 30 #define GLIMMER_ISPAN 20 #define GLIMMER_SPAN 30 #define S4_SPAN 0 /* #define MAX_SPAN 80 Now defined in sim4.H */ static int spl_encode[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static int rev_compl[256] = { 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 71, 84, 84, 84, 67, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 65, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 71, 84, 84, 84, 67, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 65, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84 }; static int spliceInit = 0; int const gt[5][5] = {{0, 0, 0, 2, 0}, {0, 0, 0, 2, 0}, {2, 3, 2, 5, 2}, {0, 0, 0, 2, 0}, {0, 0, 0, 2, 0}}; int const ct[5][5] = {{0, 0, 0, 2, 0}, {2, 2, 2, 5, 2}, {0, 0, 0, 2, 0}, {0, 0, 0, 2, 0}, {0, 0, 0, 2, 0}}; int const ag[5][5] = {{2, 2, 5, 2, 2}, {0, 0, 2, 0, 0}, {0, 0, 2, 0, 0}, {0, 0, 2, 0, 0}, {0, 0, 2, 0, 0}}; int const ac[5][5] = {{2, 5, 2, 2, 2}, {0, 2, 0, 0, 0}, {0, 3, 0, 0, 0}, {0, 2, 0, 0, 0}, {0, 2, 0, 0, 0}}; #if 0 int const gt[4][4] = {{0, 0, 0, 2},{0, 0, 0, 2},{2, 2, 2, 5},{0, 0, 0, 2}}; int const ct[4][4] = {{0, 0, 0, 2},{2, 2, 2, 5},{0, 0, 0, 2},{0, 0, 0, 2}}; int const ag[4][4] = {{2, 2, 5, 2},{0, 0, 2, 0},{0, 0, 2, 0},{0, 0, 2, 0}}; int const ac[4][4] = {{2, 5, 2, 2},{0, 2, 0, 0},{0, 2, 0, 0},{0, 2, 0, 0}}; #endif /* GLIMMER functions - move to glimmer.h? */ static char Glimmer_TRAIN_DIR[] = "./GlimmerModels/"; static char Glimmer_posDonModelPath[] = "donors.162.pos.icm"; static char Glimmer_negDonModelPath[] = "donors.162.neg.icm"; static char Glimmer_posAccModelPath[] = "acceptors.162.pos.icm"; static char Glimmer_negAccModelPath[] = "acceptors.162.neg.icm"; struct Fixed_Length_ICM_t donor_pos_model, donor_neg_model; struct Fixed_Length_ICM_t acceptor_pos_model, acceptor_neg_model; int donor_pos_model_len, donor_neg_model_len; int acceptor_pos_model_len, acceptor_neg_model_len; int initGlimmerModel = 0; void Sim4::loadGlimmerModel (char *train_dir) { char filename[1000]; if (initGlimmerModel) return; /* LLL is this still needed? Yes, since it is initialized in the class Sim4*/ sprintf(filename, "%s/%s", train_dir, Glimmer_posDonModelPath); readModel (&donor_pos_model, filename); sprintf(filename, "%s/%s", train_dir, Glimmer_negDonModelPath); readModel (&donor_neg_model, filename); sprintf(filename, "%s/%s", train_dir, Glimmer_posAccModelPath); readModel (&acceptor_pos_model, filename); sprintf(filename, "%s/%s", train_dir, Glimmer_negAccModelPath); readModel (&acceptor_neg_model, filename); donor_pos_model_len = getModelLength (donor_pos_model); donor_neg_model_len = getModelLength (donor_neg_model); acceptor_pos_model_len = getModelLength (acceptor_pos_model); acceptor_neg_model_len = getModelLength (acceptor_neg_model); if (donor_pos_model_len!=donor_neg_model_len) fatal ("ERROR: Positive and negative donor model lengths differ\n"); if (acceptor_pos_model_len!=acceptor_neg_model_len) fatal ("ERROR: Positive and negative acceptor model lengths differ\n"); initGlimmerModel = 1; } double Sim4::ScoreDonor_Glimmer (char *asegment, char *train_dir) { double pos_score, neg_score, diff; pos_score = Score_Window (donor_pos_model, asegment, GLIMMER_XSPAN); neg_score = Score_Window (donor_neg_model, asegment, GLIMMER_XSPAN); diff = pos_score - neg_score; // printf ("%s %9.5f %9.5f %9.5f\n", string, pos_score, neg_score, diff); return diff; } double Sim4::ScoreAcceptor_Glimmer (char *asegment, char *train_dir) { double pos_score, neg_score, diff; pos_score = Score_Window (acceptor_pos_model, asegment, GLIMMER_ISPAN); neg_score = Score_Window (acceptor_neg_model, asegment, GLIMMER_ISPAN); diff = pos_score - neg_score; // printf ("%s %9.5f %9.5f %9.5f\n", string, pos_score, neg_score, diff); return diff; } /* Generic splice scoring functions: new_splice(), splice_donor(), splice_donor_uni(), splice_acceptor(), splice_acceptor_uni(), splice_init() */ Sim4::splice_t * Sim4::new_splice(char c, int xs, int xe, int ys, int ye, double score, splice_t *next) { splice_t *sp = (splice_t *)ckalloc(sizeof(splice_t)); sp->type = c; sp->xs = xs; sp->xe = xe; sp->ys = ys; sp->ye = ye; sp->score = score; sp->next = next; return sp; } void Sim4::splice_init(int spl_model) { if (spliceInit) return; #if 0 // Enable this to generate the spl_encode and rev_compl data // initialized at the top of this file. for (int i=0; i<256; spl_encode[i]=0, rev_compl[i]='T', i++) ; spl_encode[(int)'A'] = spl_encode[(int)'a'] = 0; spl_encode[(int)'C'] = spl_encode[(int)'c'] = 1; spl_encode[(int)'G'] = spl_encode[(int)'g'] = 2; spl_encode[(int)'T'] = spl_encode[(int)'t'] = 3; rev_compl[(int)'A'] = rev_compl[(int)'a'] = 'T'; rev_compl[(int)'C'] = rev_compl[(int)'c'] = 'G'; rev_compl[(int)'G'] = rev_compl[(int)'g'] = 'C'; rev_compl[(int)'T'] = rev_compl[(int)'t'] = 'A'; for (int i=0; i<256; i++) fprintf(stdout, "%2d, ", spl_encode[i]); fprintf(stdout, "\n"); for (int i=0; i<256; i++) fprintf(stdout, "%2d, ", rev_compl[i]); fprintf(stdout, "\n"); exit(1); #endif if ((spl_model != SPLICE_GENESPLICER) && (spl_model != SPLICE_GLIMMER)) { spliceInit = 1; return; } // This really needs to be moved out of the Sim4 class. Sim4 should take as a parameter // the model to use, which should be initialized by the client -- before it starts doing // any sim4 work. if (spliceInit == 1) // Data already loaded, no need to involve a mutex here. return; pthread_mutex_lock(&(globalParams->_splice_mutex)); // If after getting the mutex the data still isn't loaded, load it. Otherwise, someone // already loaded the data for us and we just exit. if (spliceInit == 0) { if (spl_model == SPLICE_GENESPLICER) loadGeneSplicerModel(); if (spl_model == SPLICE_GLIMMER) loadGlimmerModel(Glimmer_TRAIN_DIR); spliceInit = 1; } pthread_mutex_unlock(&(globalParams->_splice_mutex)); } void Sim4::splice_donor(char *xseq, char *yseq, int M, int N, double *gt_score, double *ct_score, double **max_Gf, double **max_Cf, int **start_Gi, int **start_Ci) { int *CCf, *Xt; double *mG, *mC, tmpf; int *sC, *sG; int i, j, tmpi, ss, ssx, cx, c; char *s, *t; CCf = (int *)ckalloc((M+1)*sizeof(int)); Xt = (int *)ckalloc((M+1)*sizeof(int)); mG = *max_Gf = (double *)ckalloc((N+1)*sizeof(double)); sG = *start_Gi = (int *)ckalloc((N+1)*sizeof(int)); mC = *max_Cf = (double *)ckalloc((N+1)*sizeof(double)); sC = *start_Ci = (int *)ckalloc((N+1)*sizeof(int)); t = yseq; Xt[0] = CCf[0] = 0; for (j=1; j<=M; j++) { CCf[j] = j; Xt[j] = 0; } mG[0] = mC[0] = -999999; for (j=0; j<=M; j++) { if ((100*gt_score[j])>mG[0]) { mG[0] = 100*gt_score[j]; sG[0] = j; } if ((100*ct_score[j])>mC[0]) { mC[0] = 100*ct_score[j]; sC[0] = j; } } for (i=1; i<=N; i++, t++) { s = xseq; ss = CCf[0]; ssx = Xt[0]; c = ++CCf[0]; cx = Xt[0]; for (j=1; j<=M; j++, s++) { tmpi=MIN(MIN(CCf[j]+1, ss+(*t!=*s)),c+1); if (tmpi==c+1); else if (tmpi==CCf[j]+1) cx = Xt[j]; else cx = ssx + (*t==*s); c = tmpi; ss = CCf[j]; CCf[j] = c; ssx = Xt[j]; Xt[j] = cx; } /* compute max_Gf and max_Cf */ mG[i] = mC[i] = -999999; for (j=0; j<=M; j++) { assert(Xt[j]+CCf[j]!=0); tmpf = (int)(stepct(j)*Xt[j]/(double)(Xt[j]+CCf[j])*100); if ((tmpf+100*gt_score[j])>mG[i]) { mG[i] = tmpf+100*gt_score[j]; sG[i] = j; #if 0 fprintf(stderr, "%2d: mG[i]=%1.6f tmpf=%1.6f gt_score[%2d]=%1.6f\n", i, mG[i], tmpf, j, gt_score[j]); #endif } if ((tmpf+100*ct_score[j])>mC[i]) { mC[i] = tmpf+100*ct_score[j]; sC[i] = j; } } } ckfree(CCf); ckfree(Xt); } void Sim4::splice_donor_uni(char *xseq, char *yseq, int M, int N, double *It_score, double **max_If, int **start_Ii) { int *CCf, *Xt, tmpi; double *mI, tmpf; int *sI; int i, j, ss, ssx, cx, c; char *s, *t; CCf = (int *)ckalloc((M+1)*sizeof(int)); Xt = (int *)ckalloc((M+1)*sizeof(int)); mI = *max_If = (double *)ckalloc((N+1)*sizeof(double)); sI = *start_Ii = (int *)ckalloc((N+1)*sizeof(int)); t = yseq; Xt[0] = CCf[0] = 0; for (j=1; j<=M; j++) { CCf[j] = j; Xt[j] = 0; } mI[0] = -999999; for (j=0; j<=M; j++) if ((100*It_score[j])>mI[0]) { mI[0] = 100*It_score[j]; sI[0] = j; } for (i=1; i<=N; i++, t++) { s = xseq; ss = CCf[0]; ssx = Xt[0]; c = ++CCf[0]; cx = Xt[0]; for (j=1; j<=M; j++, s++) { tmpi=MIN(MIN(CCf[j]+1, ss+(*t!=*s)),c+1); if (tmpi==c+1); else if (tmpi==CCf[j]+1) cx = Xt[j]; else cx = ssx + (*t==*s); c = tmpi; ss = CCf[j]; CCf[j] = c; ssx = Xt[j]; Xt[j] = cx; } /* compute max_If */ mI[i] = -999999; for (j=0; j<=M; j++) { assert(Xt[j]+CCf[j]!=0); tmpf = (int)(stepct(j)*Xt[j]/(double)(Xt[j]+CCf[j])*100)+100*It_score[j]; if (tmpf>mI[i]) { mI[i] = tmpf; sI[i] = j; } } } ckfree(CCf); ckfree(Xt); } void Sim4::splice_acceptor(char *xseq, char *yseq, int M, int N, double *ag_score, double *ac_score, double **max_Gb, double **max_Cb, int **end_Gi, int **end_Ci) { int *CCb, *Xt; double *mC, *mG, tmpf; int *eC, *eG; int tmpi, i, j, ss, ssx, cx, c; char *t, *s; CCb = (int *)ckalloc((M+1)*sizeof(int)); Xt = (int *)ckalloc((M+1)*sizeof(int)); mG = *max_Gb = (double *)ckalloc((N+1)*sizeof(double)); eG = *end_Gi = (int *)ckalloc((N+1)*sizeof(int)); mC = *max_Cb = (double *)ckalloc((N+1)*sizeof(double)); eC = *end_Ci = (int *)ckalloc((N+1)*sizeof(int)); t = yseq+N-1; CCb[M] = Xt[M] = 0; for (j=M-1; j>=0; j--) { CCb[j] = M-j; Xt[j] = 0; } mG[N] = mC[N] = -999999; for (j=M; j>=0; j--) { if ((100*ag_score[j])>mG[N]) { mG[N] = 100*ag_score[j]; eG[N] = j+1; } if ((100*ac_score[j])>mC[N]) { mC[N] = 100*ac_score[j]; eC[N] = j+1; } } for (i=N-1; i>=0; i--, t--) { s = xseq+M-1; ss = CCb[M]; ssx = Xt[M]; c = ++CCb[M]; cx = Xt[M]; for (j=M-1; j>=0; j--, s--) { tmpi=MIN(MIN(CCb[j]+1, ss+(*t!=*s)),c+1); if (tmpi==c+1) ; else if (tmpi==CCb[j]+1) cx = Xt[j]; else cx = ssx + (*t==*s); c = tmpi; ss = CCb[j]; CCb[j] = c; ssx = Xt[j]; Xt[j] = cx; } /* compute max_Gb and max_Cb */ mG[i] = -999999; mC[i] = -999999; for (j=M; j>=0; j--) { assert(CCb[j]+Xt[j]!=0); tmpf = (int)(stepct(M-j)*Xt[j]/(double)(CCb[j]+Xt[j])*100); if ((tmpf+100*ag_score[j])>mG[i]) { mG[i] = tmpf+100*ag_score[j]; eG[i] = j+1; } if ((tmpf+100*ac_score[j])>mC[i]) { mC[i] = tmpf+100*ac_score[j]; eC[i] = j+1; } } } ckfree(CCb); ckfree(Xt); } void Sim4::splice_acceptor_uni(char *xseq, char *yseq, int M, int N, double *aI_score, double **max_Ib, int **end_Ii) { int *CCb, *Xt; double *mI, tmpf; int *eI; int tmpi, i, j, ss, ssx, cx, c; char *t, *s; CCb = (int *)ckalloc((M+1)*sizeof(int)); Xt = (int *)ckalloc((M+1)*sizeof(int)); mI = *max_Ib = (double *)ckalloc((N+1)*sizeof(double)); eI = *end_Ii = (int *)ckalloc((N+1)*sizeof(int)); t = yseq+N-1; CCb[M] = Xt[M] = 0; for (j=M-1; j>=0; j--) { CCb[j] = M-j; Xt[j] = 0; } mI[N] = -999999; for (j=M; j>=0; j--) if ((100*aI_score[j])>mI[N]) { mI[N] = 100*aI_score[j]; eI[N] = j+1; } for (i=N-1; i>=0; i--, t--) { s = xseq+M-1; ss = CCb[M]; ssx = Xt[M]; c = ++CCb[M]; cx = Xt[M]; for (j=M-1; j>=0; j--, s--) { tmpi=MIN(MIN(CCb[j]+1, ss+(*t!=*s)),c+1); if (tmpi==c+1) ; else if (tmpi==CCb[j]+1) cx = Xt[j]; else cx = ssx + (*t==*s); c = tmpi; ss = CCb[j]; CCb[j] = c; ssx = Xt[j]; Xt[j] = cx; } /* compute max_Ib */ mI[i] = -999999; for (j=M; j>=0; j--) { assert(CCb[j]+Xt[j]!=0); tmpf = (int)(stepct(M-j)*Xt[j]/(double)(CCb[j]+Xt[j])*100)+100*aI_score[j]; if (tmpf>mI[i]) { mI[i] = tmpf; eI[i] = j+1; } } } ckfree(CCb); ckfree(Xt); } void Sim4::splice(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, splice_t **gcell, splice_t **ccell, int ori, int spl_model) { double *gtscore=NULL, *ctscore=NULL, *agscore=NULL, *acscore=NULL; int i; double tmpf, maxCscore, maxGscore; int Gxs, Gxe, Gy, Cxs, Cxe, Cy; double *max_Cf=NULL, *max_Gf=NULL, *max_Cb=NULL, *max_Gb=NULL; int *start_Gi=NULL, *start_Ci=NULL, *end_Gi=NULL, *end_Ci=NULL; char *nsegmentL=NULL, *nsegmentR=NULL, *asegmentL=NULL, *asegmentR=NULL; // Initialize the encoding. This isn't quite as wonderful as // it should be, as there is a chance that two different threads // could initialize the encoding twice, but then again, // it doesn't matter. // //splice_init(spl_model); LLL nsegmentL = (char *) ckalloc(2*MAX_SPAN + 2*MAX_SLIDE + 3); nsegmentR = (char *) ckalloc(2*MAX_SPAN + 2*MAX_SLIDE + 3); /* Obs: for Glimmer scoring, need only remember the reverse complemented segments; but for now we allocate two arrays */ if (spl_model==SPLICE_GLIMMER) { asegmentL = (char *) ckalloc(2*MAX_SPAN + 2*MAX_SLIDE + 3); asegmentR = (char *) ckalloc(2*MAX_SPAN + 2*MAX_SLIDE + 3); } if (ori==FWD || ori==BOTH) { gtscore = (double *)ckalloc(((us-ls+2)+(ue-le+2))*sizeof(double)); agscore = gtscore+(us-ls+2); } if (ori==BWD || ori==BOTH) { ctscore = (double *)ckalloc(((us-ls+2)+(ue-le+2))*sizeof(double)); acscore = ctscore+(us-ls+2); } switch (spl_model) { case SPLICE_ORIGINAL: splice_original(in_seqx,ls,us,le,ue,in_seqy,ys,ye,gtscore,agscore,ctscore,acscore,ori,nsegmentL,nsegmentR); break; case SPLICE_GENESPLICER: splice_GeneSplicer(in_seqx,ls,us,le,ue,in_seqy,ys,ye,gtscore,agscore,ctscore,acscore,ori,nsegmentL,nsegmentR); break; case SPLICE_GLIMMER: splice_Glimmer(in_seqx,ls,us,le,ue,in_seqy,ys,ye,gtscore,agscore,ctscore,acscore,ori,nsegmentL,nsegmentR,asegmentL,asegmentR); break; default: fprintf(stderr, "Unrecognized splice model (%d). Using original.\n", spl_model); splice_original(in_seqx,ls,us,le,ue,in_seqy,ys,ye,gtscore,agscore,ctscore,acscore,ori,nsegmentL,nsegmentR); break; } if (ori==FWD) { splice_donor_uni(in_seqx+ls-1, in_seqy+ys-1, us-ls+1, ye-ys+1, gtscore, &max_Gf, &start_Gi); splice_acceptor_uni(in_seqx+le-1, in_seqy+ys-1, ue-le+1, ye-ys+1, agscore, &max_Gb, &end_Gi); ckfree(gtscore); /* ckfree(agscore) */ } else if (ori==BWD) { splice_donor_uni(in_seqx+ls-1, in_seqy+ys-1, us-ls+1, ye-ys+1, ctscore, &max_Cf, &start_Ci); splice_acceptor_uni(in_seqx+le-1, in_seqy+ys-1, ue-le+1, ye-ys+1, acscore, &max_Cb, &end_Ci); ckfree(ctscore); /* ckfree(acscore) */ } else { splice_donor(in_seqx+ls-1, in_seqy+ys-1, us-ls+1, ye-ys+1, gtscore, ctscore, &max_Gf, &max_Cf, &start_Gi, &start_Ci); splice_acceptor(in_seqx+le-1, in_seqy+ys-1, ue-le+1, ye-ys+1, agscore, acscore, &max_Gb, &max_Cb, &end_Gi, &end_Ci); ckfree(gtscore); /* ckfree(agscore); */ ckfree(ctscore); /* ckfree(acscore); */ } #if 0 for (i=0; i<=ye-ys+1; i++) { fprintf(stderr, "%3d: max_Gf=%1.6f max_Cf=%1.6f max_Gb=%1.6f max_Cb=%1.6f\n", i, max_Gf[i], max_Cf[i], max_Gb[i], max_Cb[i]); } #endif maxCscore = -999999; maxGscore = -999999; Gxs = Gxe = Gy = Cxs = Cxe = Cy = -1; if (ori==FWD || ori==BOTH) { for (i=0; i<=ye-ys+1; i++) { if ((tmpf=max_Gf[i]+max_Gb[i])>maxGscore) { maxGscore = tmpf; /* save (i, start_Gi[i], end_Gi[i]); */ Gxs = ls+start_Gi[i]-1; Gxe = le+end_Gi[i]-1; Gy = ys+i-1; } } ckfree(max_Gf); ckfree(max_Gb); ckfree(start_Gi); ckfree(end_Gi); } if (ori==BWD || ori==BOTH) { for (i=0; i<=ye-ys+1; i++) { if ((tmpf=max_Cf[i]+max_Cb[i])>maxCscore) { maxCscore = tmpf; /* save (i, start_Ci[i], end_Ci[i]); */ Cxs = ls+start_Ci[i]-1; Cxe = le+end_Ci[i]-1; Cy = ys+i-1; } } ckfree(max_Cf); ckfree(max_Cb); ckfree(start_Ci); ckfree(end_Ci); } #if 0 fprintf(stderr, "%8d %8d %8d %8d %8f\n%8d %8d %8d %8d %f\n", Gxs, Gxe, Gy, Gy+1, maxGscore, Cxs, Cxe, Cy, Cy+1, maxCscore); #endif *gcell = new_splice('G', Gxs, Gxe, Gy, Gy+1, maxGscore, NULL); *ccell = new_splice('C', Cxs, Cxe, Cy, Cy+1, maxCscore, NULL); #ifdef DEBUG printf("Type: %c sx: %d se: %d ys: %d score: %d\n", gcell.type, gcell.xs, gcell.xe, gcell.ys, gcell.score); printf("Type: %c sx: %d se: %d ys: %d score: %d\n", ccell.type, ccell.xs, ccell.xe, ccell.ys, ccell.score); #endif ckfree(nsegmentL); ckfree(nsegmentR); if (spl_model==SPLICE_GLIMMER) { ckfree(asegmentL); ckfree(asegmentR); } return; } /* Customized splice signal scoring functions: splice_original(), splice_GeneSplicer(), splice_Glimmer() */ void Sim4::splice_original(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, double *gtscore, double *agscore, double *ctscore, double *acscore, int ori, char *nsegmentL, char *nsegmentR) { int p, q, i; char *s,*t, ch; /* changed MAX_SPAN to S4_SPAN; see main fix to out of bounds problems in util.C */ for (i=0, s=in_seqx+ls-S4_SPAN-1; i<2*S4_SPAN+us-ls+3; nsegmentL[i++] = spl_encode[(int)(*s++)]); for (i=0, s=in_seqx+le-2-S4_SPAN-1; i<2*S4_SPAN+ue-le+3; nsegmentR[i++] = spl_encode[(int)(*s++)]); if (ori==FWD || ori==BOTH) { if (globalParams->_dontForceCanonicalSplicing) { for (p=0, s=nsegmentL+S4_SPAN; p<=us-ls+1; p++, s++) gtscore[p] = 0; for (q=ue-le+1, s=nsegmentR+S4_SPAN+ue-le+2; q>=0; q--, s--) agscore[q] = 0; } else { for (p=0, s=nsegmentL+S4_SPAN; p<=us-ls+1; p++, s++) gtscore[p] = gt[(int)*s][(int)*(s+1)]; for (q=ue-le+1, s=nsegmentR+S4_SPAN+ue-le+2; q>=0; q--, s--) agscore[q] = ag[(int)*(s-1)][(int)*s]; } } if (ori==BWD || ori==BOTH) { /* reverse complement the nsegments, 0-3 alphabet */ for (s=nsegmentL, t=nsegmentL+2*S4_SPAN+us-ls+3-1; s_dontForceCanonicalSplicing) { for (p=0, s=nsegmentL+S4_SPAN+us-ls+2; p<=us-ls+1; p++, s++) ctscore[p] = 0; for (q=ue-le+1, s=nsegmentR+S4_SPAN; q>=0; q--, s--) acscore[q] = 0; } else { for (p=0, s=nsegmentL+S4_SPAN+us-ls+2; p<=us-ls+1; p++, s--) ctscore[p] = ag[(int)*(s-1)][(int)*s]; for (q=ue-le+1, s=nsegmentR+S4_SPAN; q>=0; q--, s++) acscore[q] = gt[(int)*s][(int)*(s+1)]; } } return; } void Sim4::splice_GeneSplicer(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, double *gtscore, double *agscore, double *ctscore, double *acscore, int ori, char *nsegmentL, char *nsegmentR) { int p, q, i; char *s,*t, ch; /* changed MAX_SPAN to GENESPLICER_SPAN; see main fix to out of bounds problems in util.C */ for (i=0, s=in_seqx+ls-GENESPLICER_SPAN-1; i<2*GENESPLICER_SPAN+us-ls+3; nsegmentL[i++] = spl_encode[(int)(*s++)]); for (i=0, s=in_seqx+le-2-GENESPLICER_SPAN-1; i<2*GENESPLICER_SPAN+ue-le+3; nsegmentR[i++] = spl_encode[(int)(*s++)]); if (ori==FWD || ori==BOTH) { for (p=0, s=nsegmentL+GENESPLICER_SPAN; p<=us-ls+1; p++, s++) { gtscore[p] = ScoreDonor_GeneSplicer(s-GENESPLICER_SPAN); if (gtscore[p] < -14) gtscore[p] = -14.0; if (gtscore[p] > 19) gtscore[p] = 19; gtscore[p] = 5.0*(gtscore[p]+14.0)/33.0; gtscore[p] = 0.4*gtscore[p] + 0.6*gt[(int)*s][(int)*(s+1)]; } for (q=ue-le+1, s=nsegmentR+GENESPLICER_SPAN+ue-le+2; q>=0; q--, s--) { agscore[q] = ScoreAcceptor_GeneSplicer(s-GENESPLICER_SPAN-1); if (agscore[q] < -23) agscore[q] = -23.0; if (agscore[q] > 20) agscore[q] = 20.0; agscore[q] = 5.0*(agscore[q]+23.0)/43.0; agscore[q] = 0.4*agscore[q] + 0.6*ag[(int)*(s-1)][(int)*s]; } #if 0 printf("gtscore:"); for (p=0; p<=us-ls+1; p++) printf(" %f", gtscore[p]); printf("\n"); printf("agscore:"); for (q=ue-le+1; q>=0; q--) printf(" %f", agscore[q]); printf("\n"); #endif } if (ori==BWD || ori==BOTH) { /* reverse complement the nsegments, 0-3 alphabet */ for (s=nsegmentL, t=nsegmentL+2*GENESPLICER_SPAN+us-ls+3-1; s 20) ctscore[p] = 20.0; ctscore[p] = 5.0*(ctscore[p]+23.0)/43.0; ctscore[p] = 0.4*ctscore[p] + 0.6*ag[(int)*(s-1)][(int)*s]; } for (q=ue-le+1, s=nsegmentR+GENESPLICER_SPAN; q>=0; q--, s++) { acscore[q] = ScoreDonor_GeneSplicer(s-GENESPLICER_SPAN); if (acscore[q] < -14) acscore[q] = -14.0; if (acscore[q] > 19) acscore[q] = 19.0; acscore[q] = 5.0*(acscore[q]+14.0)/33.0; acscore[q] = 0.4*acscore[q] + 0.6*gt[(int)*s][(int)*(s+1)]; } #if 0 printf("ctscore:"); for (p=0; p<=us-ls+1; p++) printf(" %f", ctscore[p]); printf("\n"); printf("acscore:"); for (q=ue-le+1; q>=0; q--) printf(" %f", acscore[q]); printf("\n"); #endif } return; } void Sim4::splice_Glimmer(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, double *gtscore, double *agscore, double *ctscore, double *acscore, int ori, char *nsegmentL, char *nsegmentR, char *asegmentL, char *asegmentR) { int p, q, i; char *s,*t, ch; /* changed MAX_SPAN to GLIMMER_SPAN; see also main fix to out of bounds problems in util.C */ for (i=0, s=in_seqx+ls-GLIMMER_SPAN-1; i<2*GLIMMER_SPAN+us-ls+3; nsegmentL[i++] = spl_encode[(int)(*s++)]); for (i=0, s=in_seqx+le-2-GLIMMER_SPAN-1; i<2*GLIMMER_SPAN+ue-le+3; nsegmentR[i++] = spl_encode[(int)(*s++)]); /* Glimmer specific matrices */ for (i=0, s=in_seqx+ls-GLIMMER_SPAN-1; i<2*GLIMMER_SPAN+us-ls+3; asegmentL[i++] = *s++); for (i=0, s=in_seqx+le-2-GLIMMER_SPAN-1; i<2*GLIMMER_SPAN+ue-le+3; asegmentR[i++] = *s++); if (ori==FWD || ori==BOTH) { for (p=0, s=nsegmentL+GLIMMER_SPAN, t=asegmentL+GLIMMER_SPAN; p<=us-ls+1; p++, s++, t++) { gtscore[p] = ScoreDonor_Glimmer(t-GLIMMER_XSPAN, Glimmer_TRAIN_DIR); if (gtscore[p] < 0) gtscore[p] = 0.0; if (gtscore[p] > 0.31) gtscore[p] = 0.31; gtscore[p] = 5.0*(gtscore[p]+0.0)/0.31; gtscore[p] = 0.2*gtscore[p] + 0.8*gt[(int)*s][(int)*(s+1)]; } for (q=ue-le+1, s=nsegmentR+GLIMMER_SPAN+ue-le+2, t=asegmentR+GLIMMER_SPAN+ue-le+2; q>=0; q--, s--, t--) { agscore[q] = ScoreAcceptor_Glimmer(t-GLIMMER_ISPAN-1, Glimmer_TRAIN_DIR); if (agscore[q] < -0.16) agscore[q] = -0.16; if (agscore[q] > 0.23) agscore[q] = 0.23; agscore[q] = 5.0*(agscore[q]+0.16)/0.39; agscore[q] = 0.2*agscore[q] + 0.8*ag[(int)*(s-1)][(int)*s]; } } if (ori==BWD || ori==BOTH) { /* reverse complement the nsegments, 0-3 alphabet */ for (s=nsegmentL, t=nsegmentL+2*GLIMMER_SPAN+us-ls+3-1; s 0.23) ctscore[p] = 0.23; ctscore[p] = 5.0*(ctscore[p]+0.16)/0.39; ctscore[p] = 0.2*ctscore[p] + 0.8*ag[(int)*(s-1)][(int)*s]; } for (q=ue-le+1, s=nsegmentR+GLIMMER_SPAN, t=asegmentR+GLIMMER_SPAN; q>=0; q--, s++, t++) { acscore[q] = ScoreDonor_Glimmer(t-GLIMMER_XSPAN, Glimmer_TRAIN_DIR); if (acscore[q] < 0) acscore[q] = 0.0; if (acscore[q] > 0.31) acscore[q] = 0.31; acscore[q] = 5.0*(acscore[q]+0.0)/0.31; acscore[q] = 0.2*acscore[q] + 0.8*gt[(int)*s][(int)*(s+1)]; } } return; } void Sim4::splice_close () { UnLoadSites_GeneSplicer(); spliceInit = 0; } kmer-code-2013-trunk/libsim4/sim4core/sites_donor.C0000644000000000000000000023076711415365503020714 0ustar rootroot#include "sim4.H" /* DO NOT REMOVE or MODIFY !!!! */ double don[NUM_MODELS_DON][NUM_VALUES_DON] = {/*, don[0]=..., */ {-1.194022, 0.000100, 0.000100, 0.000100, -1.488549, 0.000100, 0.000100, 0.000100, -1.590126, 0.000100, 0.000100, 0.000100, -1.319010, 0.000100, 0.000100, 0.000100, -1.117603, -1.599586, -1.315587, -1.596323, -0.968354, -1.333230, -2.393915, -1.326530, -0.973824, -1.341693, -1.554107, -1.899903, -1.563394, -1.337397, -1.211732, -1.468084, -0.998394, -1.139395, -1.632676, -2.153211, -0.818638, -1.064793, -2.641517, -1.945501, -0.946117, -1.071815, -1.639264, -2.587007, -1.473391, -0.957669, -1.381619, -1.995872, -0.454442, -2.214835, -2.129729, -1.986628, -0.293988, -2.510470, -2.991967, -2.093170, -0.434752, -2.092543, -2.115746, -2.219543, -1.262582, -1.757518, -1.302403, -1.299283, -2.384259, -3.736880, -0.175496, -3.101671, -1.513713, -2.694443, -0.710651, -1.509572, -1.858238, -2.956849, -0.358565, -2.370948, -3.019029, -3.316280, -0.183299, -2.496840, -16.187622, -16.187622, -0.000000, -16.187622, -15.027452, -15.027452, -0.000001, -15.027452, -18.177462, -18.177462, 0.000000, -18.177462, -15.831747, -15.831747, -0.000000, -15.831747, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -18.420681, -18.420681, -18.420681, 0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.478520, -3.470547, -1.146963, -3.454598, -0.415031, -2.375713, -2.259474, -1.949628, -0.328148, -2.520914, -3.174837, -1.847972, -0.183708, -3.100533, -2.491223, -3.219190, -0.935461, -2.922524, -1.451677, -1.140622, -2.447088, -3.145141, -0.202256, -2.927916, -1.341329, -1.830612, -0.989187, -1.578387, -2.560945, -3.098087, -0.182121, -3.120559, -2.420583, -2.470345, -0.358279, -2.058100, -1.370294, -1.902511, -1.226400, -1.192613, -1.203973, -1.382792, -2.171248, -1.093363, -1.716096, -2.018802, -1.959097, -0.604343, -1.670320, -2.090491, -1.139263, -0.999314, -1.061786, -1.735801, -1.364941, -1.502794, -1.011423, -1.508000, -2.117894, -1.221927, -1.121546, -1.432701, -1.521532, -1.527011, -1.171592, -1.915998, -1.144701, -1.493377, -1.360640, -1.488907, -1.564893, -1.175145, -1.235830, -1.235830, -2.437877, -1.104227, -1.434473, -1.351348, -1.381335, -1.379815, -1.703031, -1.583154, -1.339306, -1.048351, -1.244275, -1.529555, -1.482686, -1.316055, -1.292946, -1.202040, -2.470091, -1.077688, -1.391529, -1.361451, -1.216870, -1.615149, -1.828850, -1.450746, -1.320862, -1.084409, -1.227998, -1.585702, -1.417540, -1.347041, -1.264398, -1.287256, -2.297413, -1.075741, -1.543335, -1.419175, -1.116076, -1.528520, -1.754513, -1.551330, -1.286488, -1.082335, -1.185089, -1.636288, -1.392199, -1.382098, -1.196416, -1.309063, -2.332659, -1.106804, -1.515828, -1.347139, -1.162641, -1.571509, -1.763248, -1.524356, -1.332962, -1.058267, -1.278840, 0.000100, 0.000100, 0.000100, -1.577067, 0.000100, 0.000100, 0.000100, -1.585331, 0.000100, 0.000100, 0.000100, -1.170603, 0.000100, 0.000100, 0.000100, -1.189564, -1.653730, -1.417380, -1.339552, -1.076494, -1.348020, -2.793821, -1.083908, -1.335665, -1.489445, -1.400451, -1.327900, -1.564922, -1.589900, -1.259134, -1.193858, -1.165974, -1.762895, -1.387537, -1.319968, -1.060081, -1.440125, -2.778800, -1.036874, -1.356823, -1.631915, -1.476295, -1.144179, -1.552356, -1.644394, -1.317984, -1.116427, -1.128674, -1.567696, -1.470539, -1.434596, -0.991671, -1.306884, -2.611551, -1.255403, -1.266786, -1.487478, -1.395204, -1.408377, -1.601340, -1.509013, -1.205371, -1.281340, -1.131920, -2.753591, -1.393460, -1.006017, -0.901514, -2.705863, -2.954560, -0.744164, -1.265302, -2.731009, -1.407433, -0.896680, -1.460255, -2.900763, -1.202876, -0.885489, -18.855316, -18.855316, 0.000000, -18.855316, -17.256889, -17.256889, -0.000000, -17.256889, -18.498844, -18.498844, 0.000000, -18.498844, -19.144358, -19.144358, 0.000000, -19.144358, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -20.030119, -20.030119, -20.030119, 0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.518318, -1.650989, -1.228402, -1.216383, -1.156200, -1.788296, -1.501707, -1.219676, -1.085152, -1.409175, -2.928513, -1.009700, -1.359202, -1.665857, -1.398529, -1.180433, -1.588399, -1.610113, -1.532489, -0.967886, -1.195646, -1.629226, -1.437924, -1.331844, -1.045906, -1.402171, -2.878129, -1.060378, -1.304552, -1.549151, -1.341843, -1.366829, -1.600224, -1.601107, -1.285358, -1.139672, -1.175097, -1.835931, -1.409790, -1.246354, -1.099655, -1.419533, -2.840918, -1.002897, -1.366974, -1.666650, -1.447375, -1.136135, -1.574024, -1.622230, -1.372203, -1.073574, -1.158644, -1.718377, -1.426295, -1.322253, -0.994002, -1.441001, -2.797775, -1.101803, -1.285864, -1.604010, -1.342650, -1.341906, -1.551531, -1.705969, -1.221082, -1.166133, -1.173539, -1.749182, -1.426353, -1.285074, -1.020789, -1.462937, -2.878790, -1.044319, -1.337747, -1.600078, -1.440283, -1.207906, -1.534570, -1.623598, -1.400524, -1.076472, -1.155850, -1.752763, -1.407940, -1.319500, -1.056948, -1.422322, -2.716046, -1.063643, -1.242005, -1.579490, -1.397563, -1.355118, -1.566043, -1.664274, -1.253461, -1.151105, -1.178903, -1.767702, -1.417641, -1.275203, -1.017385, -1.426898, -2.732214, -1.098612, -1.337898, -1.663610, -1.408090, -1.192244, -1.532042, -1.609303, -1.347952, -1.126681, -1.170419, -1.682621, -1.438948, -1.321631, -1.011322, -1.395969, -2.827050, -1.110233, -1.257048, -1.544841, -1.414122, -1.350840, -1.561129, -1.634762, -1.267677, -1.159581}, /*, don[1][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, don[2][]=..., */ {-1.127750, 0.000100, 0.000100, 0.000100, -1.544002, 0.000100, 0.000100, 0.000100, -1.618277, 0.000100, 0.000100, 0.000100, -1.330011, 0.000100, 0.000100, 0.000100, -0.951229, -1.629670, -1.539015, -1.593824, -1.003500, -1.345906, -2.201571, -1.337608, -0.923325, -1.405163, -1.728389, -1.715317, -1.479855, -1.370656, -1.427408, -1.278485, -0.959851, -1.258190, -1.581417, -2.062003, -0.747872, -1.308530, -2.617591, -1.695911, -0.855382, -1.230075, -1.695132, -2.312292, -1.441921, -1.047820, -1.459016, -1.712796, -0.827904, -1.335001, -1.662214, -2.205829, -0.520725, -1.510318, -2.659481, -2.162079, -0.685304, -1.251451, -1.848454, -2.947063, -1.273415, -1.251192, -1.355332, -1.736699, -0.557657, -1.910278, -16.137899, -1.275069, -0.837769, -2.018499, -15.532907, -0.833628, -0.658662, -1.757273, -15.045153, -1.171372, -1.232144, -1.529395, -14.690981, -0.709955, -16.187622, -16.187622, -0.000000, -16.187622, -15.027452, -15.027452, -0.000001, -15.027452, -1.386294, -1.386294, -1.386294, -1.386294, -15.831747, -15.831747, -0.000000, -15.831747, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.887741, -16.887741, -16.887741, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.442502, -5.038336, -1.076170, -4.586354, -0.196115, -3.322874, -3.045243, -2.359701, -0.559632, -1.540440, -11.849426, -1.540440, -0.061645, -4.036274, -4.036274, -3.710854, -0.788466, -2.397863, -1.299284, -1.704741, -4.264841, -5.913483, -0.022412, -5.220346, -2.022278, -2.610056, -0.307489, -2.833194, -2.833198, -3.749459, -0.125166, -3.344010, -2.970405, -1.553348, -0.349377, -3.440401, -0.926765, -2.484890, -1.386294, -1.306252, -2.420352, -0.280308, -3.806571, -2.014895, -2.528913, -2.659819, -3.100874, -0.216561, -1.011607, -1.299284, -1.704741, -1.704741, -0.873410, -1.771350, -1.630271, -1.530188, -1.188058, -1.542229, -2.286667, -0.968429, -0.977777, -1.257361, -1.918757, -1.646824, -0.915692, -2.017303, -1.312957, -1.620887, -1.537694, -1.438396, -1.758094, -0.979649, -1.212187, -1.380178, -2.742371, -0.950616, -1.504077, -1.287854, -1.548137, -1.240388, -1.796079, -1.534714, -1.524764, -0.914138, -1.304949, -1.447449, -1.592631, -1.236998, -1.243320, -1.113909, -2.899902, -1.113909, -1.448900, -1.362825, -1.228006, -1.530817, -1.762735, -1.456568, -1.429466, -1.032954, -1.164345, -1.656242, -1.576199, -1.236984, -1.316000, -1.215286, -2.441458, -1.055165, -1.442202, -1.536512, -1.227091, -1.365241, -1.762345, -1.616928, -1.331128, -1.006020, -1.171985, -1.716286, -1.399769, -1.332328, -1.164384, -1.268283, -2.524701, -1.119360, -1.564220, -1.435966, -1.107462, -1.502977, -1.909182, -1.533276, -1.437966, -0.919895, -1.277291, 0.000100, 0.000100, 0.000100, -1.598323, 0.000100, 0.000100, 0.000100, -1.608086, 0.000100, 0.000100, 0.000100, -1.143497, 0.000100, 0.000100, 0.000100, -1.165809, -1.660611, -1.464417, -1.320155, -1.052982, -1.381888, -2.799833, -1.081225, -1.326450, -1.481961, -1.433251, -1.313571, -1.558245, -1.601781, -1.260439, -1.189321, -1.142379, -1.779521, -1.433138, -1.295858, -1.044407, -1.476549, -2.864414, -1.014294, -1.373199, -1.697648, -1.510690, -1.070511, -1.522434, -1.671029, -1.363123, -1.084988, -1.120639, -1.402516, -1.528047, -1.555932, -1.063424, -1.073184, -2.579814, -1.439602, -1.324647, -1.259512, -1.442013, -1.542435, -1.646956, -1.349078, -1.252099, -1.339483, -0.846615, -2.468286, -18.492071, -0.720712, -0.848006, -2.652355, -18.499954, -0.690656, -0.984568, -2.450274, -18.256395, -0.615946, -1.103110, -2.543618, -18.330427, -0.528343, -18.855316, -18.855316, 0.000000, -18.855316, -17.256889, -17.256889, -0.000000, -17.256889, -1.386294, -1.386294, -1.386294, -1.386294, -19.144358, -19.144358, 0.000000, -19.144358, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -19.786440, -19.786440, -19.786440, 0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.503070, -1.656781, -1.237191, -1.215359, -1.161706, -1.788209, -1.518696, -1.201338, -1.092477, -1.408889, -2.945510, -1.000691, -1.369898, -1.660583, -1.481816, -1.112871, -1.566330, -1.624434, -1.559759, -0.957165, -1.188177, -1.643856, -1.449267, -1.319579, -1.037927, -1.418877, -2.862330, -1.059322, -1.289732, -1.546512, -1.390807, -1.336394, -1.594081, -1.631040, -1.269707, -1.138606, -1.176095, -1.811145, -1.444429, -1.230414, -1.107634, -1.429539, -2.923137, -0.976900, -1.393215, -1.665943, -1.507297, -1.075275, -1.559653, -1.603848, -1.422090, -1.057104, -1.140343, -1.705881, -1.455260, -1.326748, -1.000762, -1.420308, -2.853456, -1.099263, -1.304924, -1.589897, -1.383572, -1.293975, -1.544923, -1.725755, -1.207386, -1.172295, -1.153519, -1.758848, -1.456136, -1.276196, -1.007714, -1.473459, -2.933208, -1.042450, -1.329920, -1.616170, -1.483927, -1.170868, -1.528306, -1.635882, -1.436988, -1.047898, -1.125503, -1.769071, -1.440820, -1.315701, -1.052560, -1.440463, -2.739929, -1.051069, -1.229160, -1.574676, -1.437955, -1.335698, -1.563993, -1.669438, -1.252803, -1.149972, -1.159321, -1.775033, -1.432411, -1.279810, -1.014332, -1.447764, -2.758530, -1.082123, -1.374357, -1.662961, -1.441048, -1.137120, -1.516585, -1.625889, -1.361823, -1.115932, -1.151594, -1.669701, -1.484179, -1.313506, -1.006378, -1.410411, -2.831643, -1.104126, -1.254718, -1.548698, -1.437382, -1.328892, -1.547701, -1.672813, -1.258016, -1.154143}, /*, don[3][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, don[4][]=..., */ {-1.084913, 0.000100, 0.000100, 0.000100, -1.549219, 0.000100, 0.000100, 0.000100, -1.861738, 0.000100, 0.000100, 0.000100, -1.223316, 0.000100, 0.000100, 0.000100, -1.050795, -1.537275, -1.423721, -1.637026, -0.872875, -1.291378, -2.669827, -1.435084, -0.735450, -1.368698, -1.664162, -2.564947, -1.325670, -1.405222, -1.319805, -1.505641, -0.859737, -0.978275, -1.998415, -2.730028, -0.835375, -0.784300, -3.010997, -2.803358, -0.833135, -0.880889, -1.921437, -5.402628, -1.246842, -0.827697, -1.520844, -2.864576, -0.167783, -3.276570, -3.189559, -2.583425, -0.096419, -4.097666, -3.809986, -2.934520, -0.101907, -4.280108, -3.027364, -3.363833, -0.665998, -2.494116, -1.600305, -1.600305, -16.747238, -16.747238, -0.000000, -16.747238, -13.321220, -13.321220, -0.000005, -13.321220, -13.732133, -13.732133, -0.000003, -13.732133, -14.193950, -14.193950, -0.000002, -14.193950, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.895124, -16.895124, -0.000000, -16.895124, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.895124, -16.895124, -16.895124, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.151090, -4.465904, -2.179452, -4.158421, -0.554549, -1.557343, -2.650891, -1.935272, -0.820987, -1.272967, -3.218792, -1.427116, -0.399387, -1.535039, -2.940376, -2.797276, -1.734596, -1.734596, -1.223777, -1.041457, -0.760663, -1.431905, -16.346027, -1.224891, -0.884203, -1.369053, -15.341568, -1.100789, -0.773191, -1.288656, -14.260199, -1.336284, -1.159856, -1.539345, -14.924074, -0.750889, -1.399088, -1.880313, -1.218914, -1.187166, -1.145405, -1.572050, -2.104514, -1.043043, -1.386294, -1.386294, -1.386294, -1.386294, -1.699803, -2.128798, -1.125893, -0.983666, -1.050633, -1.727693, -1.607549, -1.301175, -0.968053, -1.585976, -2.323573, -1.147254, -1.280235, -1.657791, -1.621751, -1.096866, -1.236612, -1.939148, -1.319384, -1.208960, -1.337921, -1.554144, -1.794679, -1.021490, -1.155771, -1.290503, -2.469156, -1.122710, -1.311232, -1.482050, -1.379635, -1.379635, -1.426361, -1.612463, -1.560791, -1.048528, -1.261436, -1.517115, -1.651772, -1.185153, -1.130161, -1.392525, -2.624666, -1.032522, -1.351723, -1.432575, -1.371331, -1.391332, -1.681933, -1.448318, -1.581850, -0.985034, -1.173274, -1.558594, -1.492454, -1.364941, -1.210530, -1.472237, -2.496739, -0.941106, -1.487714, -1.371181, -1.275871, -1.422474, -1.681933, -1.623523, -1.528857, -0.916291, -1.071511, -1.629790, -1.770019, -1.233715, -1.146035, -1.280513, -2.697577, -1.088141, -1.297741, -1.306252, -1.493714, -1.463255, -1.654771, -1.584813, -1.525779, -0.950814, -1.266370, 0.000100, 0.000100, 0.000100, -1.491946, 0.000100, 0.000100, 0.000100, -1.541319, 0.000100, 0.000100, 0.000100, -1.276135, 0.000100, 0.000100, 0.000100, -1.283218, -1.642509, -1.262101, -1.401191, -1.173902, -1.223035, -2.889842, -1.076104, -1.345962, -1.513016, -1.333223, -1.363204, -1.613285, -1.490518, -1.302845, -1.191536, -1.218639, -1.739363, -1.243837, -1.425202, -1.112642, -1.368903, -2.525986, -1.087835, -1.259947, -1.459995, -1.413163, -1.424116, -1.653757, -1.556271, -1.188394, -1.227450, -1.179645, -2.718660, -1.272526, -1.059798, -0.790659, -3.028704, -2.778654, -0.830219, -1.111371, -2.961830, -1.332975, -1.034296, -1.418085, -2.887375, -1.028677, -1.065311, -17.125324, -17.125324, -0.000000, -17.125324, -15.363074, -15.363074, -0.000001, -15.363074, -16.861937, -16.861937, -0.000000, -16.861937, -17.244972, -17.244972, -0.000000, -17.244972, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -18.246090, -18.246090, 0.000000, -18.246090, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -18.246090, -18.246090, -18.246090, 0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.569521, -1.634518, -1.243400, -1.176336, -1.117089, -1.582905, -1.518193, -1.393183, -1.168783, -1.210916, -3.006781, -1.073295, -1.323099, -1.459069, -1.232734, -1.561867, -1.698206, -1.393268, -1.567844, -1.021008, -0.939280, -1.294295, -16.906553, -1.093625, -1.020407, -1.290232, -16.837399, -1.009655, -0.995359, -1.202025, -16.654589, -1.109195, -1.326903, -1.187006, -17.008043, -0.844973, -1.171385, -1.934166, -1.290290, -1.308124, -1.073367, -1.386707, -2.606201, -1.095319, -1.386294, -1.386294, -1.386294, -1.386294, -1.630663, -1.695422, -1.202359, -1.138829, -1.251045, -1.707330, -1.377310, -1.272291, -0.991367, -1.460693, -2.644101, -1.121514, -1.260420, -1.583989, -1.249350, -1.493351, -1.545626, -1.638939, -1.337873, -1.107958, -1.249782, -1.696255, -1.374846, -1.283068, -1.078600, -1.409146, -2.718208, -1.051012, -1.376973, -1.540098, -1.340531, -1.303454, -1.523039, -1.586829, -1.315751, -1.174072, -1.310988, -1.679441, -1.297958, -1.306082, -1.069397, -1.347161, -2.716063, -1.106626, -1.242621, -1.566901, -1.327945, -1.436910, -1.526374, -1.642559, -1.311676, -1.139981, -1.232082, -1.765550, -1.362632, -1.268560, -1.035108, -1.340604, -2.609841, -1.172545, -1.205373, -1.636560, -1.352253, -1.397904, -1.555996, -1.585555, -1.310958, -1.156373, -1.230524, -1.701391, -1.356071, -1.317666, -1.006103, -1.351503, -2.788383, -1.158403, -1.273042, -1.501938, -1.403574, -1.379891, -1.608615, -1.505108, -1.344353, -1.148433}, /*, don[5][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, don[6][]=..., */ {-1.166591, 0.000100, 0.000100, 0.000100, -1.526530, 0.000100, 0.000100, 0.000100, -1.571106, 0.000100, 0.000100, 0.000100, -1.333818, 0.000100, 0.000100, 0.000100, -1.215445, -1.511711, -1.302457, -1.555771, -0.914930, -1.439454, -2.408852, -1.300192, -0.919144, -1.383449, -1.641278, -1.853452, -1.679233, -1.269761, -1.192800, -1.473101, -1.077916, -1.039555, -1.630572, -2.204857, -0.843300, -1.000248, -2.564947, -2.079441, -1.077724, -1.101254, -1.426677, -2.441027, -1.538635, -0.904137, -1.343735, -2.123892, -0.274809, -3.006387, -2.554403, -2.179710, -0.171953, -3.161895, -3.475552, -2.468750, -0.217724, -3.001958, -2.639054, -2.596495, -1.339288, -1.973010, -1.319086, -1.103975, -16.345232, -16.345232, -0.000000, -16.345232, -13.774693, -13.774693, -0.000003, -13.774693, -14.122998, -14.122998, -0.000002, -14.122998, -14.518610, -14.518610, -0.000002, -14.518610, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.642233, -16.642233, -0.000000, -16.642233, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.642233, -16.642233, -16.642233, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.463983, -3.227198, -1.420625, -2.408011, -16.178249, -2.085107, -0.860886, -0.792042, -13.415039, -1.496642, -1.719784, -0.515817, -15.221608, -1.667461, -0.543208, -1.467972, -14.234223, -2.826648, -0.963438, -0.581231, -1.386294, -1.386294, -1.386294, -1.386294, -14.661381, -14.661381, -0.000001, -14.661381, -15.838383, -15.838383, -0.000000, -15.838383, -15.761421, -15.761421, -0.000000, -15.761421, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.695320, -1.959621, -1.918463, -0.637305, -1.386294, -1.386294, -1.386294, -1.386294, -1.193278, -1.517064, -1.317732, -1.562185, -1.223776, -1.361397, -2.071072, -1.128466, -1.323774, -1.354546, -1.386294, -1.488077, -1.230066, -1.896747, -1.048383, -1.574231, -1.425759, -1.486914, -1.523615, -1.153019, -1.215249, -1.215249, -2.531861, -1.117132, -1.555586, -1.292476, -1.378131, -1.338285, -1.804900, -1.604229, -1.194445, -1.103848, -1.269971, -1.612066, -1.410200, -1.288839, -1.432701, -1.201589, -2.300200, -1.020591, -1.488077, -1.365017, -1.173962, -1.562185, -1.866016, -1.538512, -1.224162, -1.088988, -1.171270, -1.534175, -1.383603, -1.497361, -1.386294, -1.275069, -1.950823, -1.113428, -1.577611, -1.381332, -1.061978, -1.627004, -1.708693, -1.595752, -1.193529, -1.161577, -1.243874, -1.812378, -1.307823, -1.279914, -1.215768, -1.342061, -2.202261, -1.103650, -1.689105, -1.432385, -1.026921, -1.521113, -1.802809, -1.577002, -1.245100, -1.076873, -1.382168, 0.000100, 0.000100, 0.000100, -1.545379, 0.000100, 0.000100, 0.000100, -1.379818, 0.000100, 0.000100, 0.000100, -1.258390, 0.000100, 0.000100, 0.000100, -1.207902, -1.563453, -1.298499, -1.519490, -1.126704, -1.283708, -2.251290, -1.225439, -1.511144, -1.500562, -1.218158, -1.344909, -1.632572, -1.687231, -1.076001, -1.278028, -1.568616, -1.535826, -1.170586, -1.323493, -1.159405, -1.150894, -2.487589, -1.248750, -1.441864, -1.432385, -1.205476, -1.490654, -1.876316, -1.521113, -1.071944, -1.251638, -1.161048, -2.804674, -1.315199, -1.027517, -0.827369, -2.981340, -2.560128, -0.832910, -1.146403, -2.931190, -1.015498, -1.321756, -1.453063, -2.971525, -0.983059, -1.076585, -15.516617, -15.516617, -0.000001, -15.516617, -13.721204, -13.721204, -0.000003, -13.721204, -15.352379, -15.352379, -0.000001, -15.352379, -15.588767, -15.588767, -0.000001, -15.588767, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.644598, -16.644598, -0.000000, -16.644598, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.644598, -16.644598, -16.644598, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.599447, -1.892994, -1.006152, -1.266741, -15.045153, -3.126755, -0.837599, -0.647425, -14.751605, -2.173966, -1.877701, -0.310156, -15.638446, -2.666903, -0.644280, -0.902653, -15.377857, -2.835309, -0.787619, -0.720779, -1.386294, -1.386294, -1.386294, -1.386294, -13.955276, -13.955276, -0.000003, -13.955276, -15.802014, -15.802014, -0.000000, -15.802014, -15.954400, -15.954400, -0.000000, -15.954400, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.294373, -1.653514, -1.292220, -1.347483, -1.386294, -1.386294, -1.386294, -1.386294, -1.149452, -2.114532, -1.191125, -1.352393, -0.960462, -1.755391, -2.522644, -1.010059, -1.165304, -1.837972, -1.151605, -1.546917, -1.742969, -1.452046, -1.116962, -1.333185, -1.287854, -1.700699, -1.196563, -1.429824, -1.061257, -1.476772, -2.530930, -1.061257, -1.454040, -1.521699, -1.171042, -1.435521, -1.759183, -1.583292, -1.072467, -1.271705, -1.230449, -1.725144, -1.289043, -1.369804, -1.201112, -1.344212, -2.271551, -1.092898, -1.459255, -1.673266, -1.113650, -1.379918, -1.774291, -1.736070, -1.055502, -1.183623, -1.420696, -1.583215, -1.289668, -1.280934, -1.040457, -1.333804, -2.766615, -1.138312, -1.335864, -1.770436, -1.149452, -1.386294, -1.793840, -1.413693, -1.199134, -1.241393, -1.312867, -1.820747, -1.098613, -1.445135, -1.148623, -1.325829, -2.866271, -1.020448, -1.189250, -1.726050, -1.151509, -1.602436, -1.690179, -1.532787, -1.177084, -1.233043}, /*, don[7][]=don[8][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, don[9][]=..., */ {-1.304321, 0.000100, 0.000100, 0.000100, -1.532105, 0.000100, 0.000100, 0.000100, -1.609438, 0.000100, 0.000100, 0.000100, -1.162950, 0.000100, 0.000100, 0.000100, -1.132514, -1.686398, -1.167605, -1.706601, -0.927987, -1.327587, -2.480263, -1.363305, -1.215768, -1.401484, -1.215768, -1.827568, -1.505686, -1.409060, -1.185916, -1.477113, -1.129072, -1.069054, -1.441103, -2.336484, -0.812009, -0.969401, -2.674144, -2.227859, -0.991301, -1.114361, -1.434304, -2.772584, -1.255182, -0.810931, -1.669157, -2.499501, -0.000001, -15.079638, -15.079638, -15.079638, -0.000001, -15.115704, -15.115704, -15.115704, -0.000002, -14.441451, -14.441451, -14.441451, -0.000003, -13.676253, -13.676253, -13.676253, -16.113083, -16.113083, -0.000000, -16.113083, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.113083, -16.113083, -0.000000, -16.113083, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.113083, -16.113083, -16.113083, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -16.113083, -16.113083, -16.113083, -0.000000, -16.113083, -16.113083, -16.113083, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.113083, -16.113083, -0.000000, -16.113083, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.121999, -1.726593, -1.482208, -1.311756, -1.386294, -1.386294, -1.386294, -1.386294, -0.897942, -1.848917, -1.476678, -1.576051, -0.845417, -1.620801, -2.778248, -1.168817, -1.001695, -1.431551, -1.508512, -1.756973, -1.371479, -1.877414, -1.184268, -1.247182, -1.104199, -1.772448, -1.318974, -1.464482, -1.266672, -1.173146, -2.365280, -1.155446, -1.390913, -1.409605, -1.409605, -1.336846, -1.665608, -1.643629, -1.181812, -1.168206, -1.062112, -1.766069, -1.467577, -1.375204, -1.216396, -1.297741, -2.379543, -1.084819, -1.271771, -1.398926, -1.317581, -1.584643, -1.864448, -1.670293, -1.271386, -0.977147, -1.152680, -1.594512, -1.393842, -1.456362, -1.180984, -1.243504, -2.598045, -1.107959, -1.259543, -1.490654, -1.243795, -1.596014, -1.599216, -1.400088, -1.270035, -1.307306, -1.128466, -1.511457, -1.431415, -1.528264, -1.223776, -1.361397, -2.476535, -1.006363, -1.459054, -1.335001, -1.302211, -1.459054, -1.657255, -1.720434, -1.179628, -1.130236, -1.234746, 0.000100, 0.000100, 0.000100, -2.215564, 0.000100, 0.000100, 0.000100, -1.368276, 0.000100, 0.000100, 0.000100, -1.062896, 0.000100, 0.000100, 0.000100, -1.386294, -11.982954, -0.693160, -1.386294, -0.405507, -1.791726, -11.002167, -1.791726, -1.029628, -1.945889, -1.252766, -1.540440, -1.152684, -1.845814, -1.335002, -1.335002, -1.335002, -1.335002, -1.558141, -1.335002, -0.405507, -1.098629, -11.002167, -11.002167, -1.223779, -1.446918, -1.223779, -1.734591, -1.466334, -1.178661, -1.466334, -1.466334, -0.000018, -12.043577, -12.043577, -12.043577, -0.000020, -11.918417, -11.918417, -11.918417, -0.000025, -11.695280, -11.695280, -11.695280, -0.000027, -11.608273, -11.608273, -11.608273, -13.217681, -13.217681, -0.000005, -13.217681, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -13.217681, -13.217681, -0.000005, -13.217681, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -13.217681, -13.217681, -13.217681, -0.000005, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.000005, -13.217681, -13.217681, -13.217681, -0.000005, -13.217681, -13.217681, -13.217681, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -13.217681, -13.217681, -0.000005, -13.217681, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.442383, -1.442383, -1.368276, -1.299284, -1.386294, -1.386294, -1.386294, -1.386294, -1.466334, -11.775321, -0.773204, -1.178661, -0.773204, -2.564880, -2.564880, -0.955522, -1.029628, -2.638986, -1.540440, -1.029628, -2.014880, -2.014880, -1.098619, -0.916301, -0.980838, -1.673968, -1.386294, -1.673968, -0.693197, -0.693197, -10.596735, -10.596735, -0.628623, -1.321758, -2.014880, -2.707977, -1.049828, -2.995652, -1.386294, -1.049828, -1.056058, -1.526054, -1.189587, -2.036866, -0.223171, -11.512965, -11.512965, -1.609428, -2.397832, -1.299286, -0.788474, -1.704734, -1.704734, -1.704734, -1.011612, -1.299286, -1.335002, -2.251263, -0.998536, -1.335002, -0.693167, -1.609428, -11.512965, -1.203979, -1.163156, -2.079417, -0.980838, -1.673968, -1.203979, -1.203979, -1.203979, -2.302525, -0.944470, -2.197197, -1.504075, -1.280936, -0.810950, -2.197169, -11.407610, -0.810950, -1.163156, -1.386294, -0.826689, -11.982954, -1.791743, -1.386294, -1.791743, -0.875482}, /*, don[10][]=..., */ {-1.256549, 0.000100, 0.000100, 0.000100, -1.426557, 0.000100, 0.000100, 0.000100, -1.495300, 0.000100, 0.000100, 0.000100, -1.382105, 0.000100, 0.000100, 0.000100, -1.272123, -1.837015, -0.997687, -1.651613, -1.021263, -1.178655, -2.190254, -1.512857, -1.012538, -1.412861, -1.353142, -2.003728, -1.850295, -1.396379, -0.872130, -1.730151, -1.190420, -0.998529, -1.302211, -2.890368, -0.755553, -0.859232, -2.593829, -3.441120, -1.032473, -0.962947, -1.530053, -3.088192, -1.467874, -0.735507, -1.372564, -3.300445, -0.000001, -15.228934, -15.228934, -15.228934, -0.000001, -15.390358, -15.390358, -15.390358, -0.000001, -14.711600, -14.711600, -14.711600, -0.000006, -13.161592, -13.161592, -13.161592, -16.292889, -16.292889, -0.000000, -16.292889, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.292889, -16.292889, -0.000000, -16.292889, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.292889, -16.292889, -16.292889, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.292889, -1.988798, -0.227140, -2.713100, -1.386294, -1.386294, -1.386294, -1.386294, -0.000002, -14.304093, -14.304093, -14.304093, -0.000000, -16.065750, -16.065750, -16.065750, -0.000004, -13.579793, -13.579793, -13.579793, -16.292889, -16.292889, -0.000000, -16.292889, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.525720, -1.725962, -1.581290, -0.919234, -1.386294, -1.386294, -1.386294, -1.386294, -1.294148, -1.772638, -1.057019, -1.567844, -1.066127, -1.549923, -1.549923, -1.464766, -1.131811, -1.252763, -1.358124, -2.004749, -1.435927, -1.781288, -0.911042, -1.652455, -1.211941, -1.486378, -1.374460, -1.499623, -1.487479, -1.094437, -2.180625, -1.119755, -1.358409, -1.368459, -1.209989, -1.659514, -2.246894, -1.512927, -1.094217, -1.081145, -1.323556, -1.500487, -1.310133, -1.422928, -1.593177, -1.010782, -2.424473, -1.066352, -1.298187, -1.482921, -1.111858, -1.765783, -2.133507, -1.239691, -0.980830, -1.527373, -1.498212, -1.480820, -1.210530, -1.382380, -1.255798, -1.245216, -2.215572, -1.145133, -1.695299, -1.278406, -1.055262, -1.661397, -1.956308, -1.435774, -1.105533, -1.239064, -1.347074, -1.609437, -1.044125, -1.671313, -1.161862, -1.342123, -2.153052, -1.172224, -1.513556, -1.240263, -1.079921, -1.890850, -1.757219, -1.415470, -1.181855, -1.280295, -1.352718, 0.000100, 0.000100, 0.000100, -1.431190, 0.000100, 0.000100, 0.000100, -1.279959, 0.000100, 0.000100, 0.000100, -1.494368, 0.000100, 0.000100, 0.000100, -1.572395, -1.572395, -1.137080, -1.331235, -1.058609, -1.493924, -2.793183, -1.001451, -0.952011, -1.845823, -1.098614, -2.097134, -0.938273, -2.036874, -1.343735, -1.526055, -1.128467, -1.734599, -1.128467, -1.734599, -0.971864, -0.971864, -2.224609, -2.001471, -1.734598, -0.753775, -1.629238, -1.852379, -1.326871, -1.326871, -0.947384, -2.505509, -0.000005, -13.270790, -13.270790, -13.270790, -0.000005, -13.353481, -13.353481, -13.353481, -0.000005, -13.217681, -13.217681, -13.217681, -0.000010, -12.577650, -12.577650, -12.577650, -14.533352, -14.533352, -0.000001, -14.533352, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -14.533352, -14.533352, -0.000001, -14.533352, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -14.533352, -14.533352, -14.533352, -0.000001, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -14.533352, -0.834374, -0.928562, -1.767661, -1.386294, -1.386294, -1.386294, -1.386294, -0.000003, -13.698981, -13.698981, -13.698981, -0.000004, -13.604795, -13.604795, -13.604795, -0.000009, -12.765699, -12.765699, -12.765699, -14.533352, -14.533352, -0.000001, -14.533352, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.195876, -1.685423, -1.164127, -1.609437, -1.386294, -1.386294, -1.386294, -1.386294, -0.949083, -1.488076, -1.642226, -1.642226, -0.641859, -1.691672, -2.251277, -1.691672, -0.980831, -1.856294, -1.519825, -1.386294, -1.767657, -1.634128, -1.315677, -1.005525, -1.321756, -1.919589, -1.021653, -1.484274, -0.955515, -1.584118, -12.873912, -0.890977, -1.228667, -1.634128, -1.228667, -1.516346, -1.832577, -1.714795, -1.203974, -1.021654, -1.116963, -2.215564, -1.174121, -1.368276, -0.639086, -2.197211, -2.890333, -1.185626, -0.944464, -2.379534, -1.791756, -1.044547, -1.529394, -1.696447, -1.049824, -1.386294, -1.372308, -1.623621, -1.777771, -0.966845, -1.178658, -1.871793, -2.564915, -0.773197, -1.326871, -1.183772, -1.406914, -1.694593, -1.679640, -1.369487, -1.880309, -0.899486, -0.967587, -1.832577, -1.078812, -2.120255, -0.875472, -1.568614, -2.772564, -1.163152, -1.609435, -1.358124, -0.916295, -1.945902, -1.711715, -1.018571, -1.280934, -1.711715}, /*, don[11][]=don[12][]=[13][]=don[14][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, don[15][]=..., */ {-1.326871, 0.000100, 0.000100, 0.000100, -1.372427, 0.000100, 0.000100, 0.000100, -1.380938, 0.000100, 0.000100, 0.000100, -1.470283, 0.000100, 0.000100, 0.000100, -1.398515, -1.725198, -1.127362, -1.382254, -1.133099, -1.438480, -2.630614, -0.997924, -1.164862, -1.344447, -1.447988, -1.648658, -1.931988, -1.255102, -1.117481, -1.414732, -1.018889, -1.873878, -1.514505, -1.325914, -1.033230, -1.641477, -2.224621, -1.071944, -1.112218, -1.139998, -1.765090, -1.713797, -2.286453, -1.330944, -1.050985, -1.256837, -14.786291, -1.747307, -1.121602, -0.693148, -14.585621, -1.443453, -1.546637, -0.596156, -14.513647, -1.453157, -1.144422, -0.803496, -14.731803, -1.783790, -0.967585, -0.794074, -1.386294, -1.386294, -1.386294, -1.386294, -14.436090, -14.436090, -0.000002, -14.436090, -14.883665, -14.883665, -0.000001, -14.883665, -15.328439, -15.328439, -0.000001, -15.328439, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.046600, -16.046600, -0.000000, -16.046600, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.046600, -16.046600, -16.046600, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.824992, -3.468961, -0.685657, -3.617380, -0.000001, -15.221608, -15.221608, -15.221608, -0.000010, -12.577650, -12.577650, -12.577650, -0.000001, -15.360944, -15.360944, -15.360944, -0.000012, -12.429232, -12.429232, -12.429232, -16.046600, -16.046600, -0.000000, -16.046600, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.046600, -16.046600, -16.046600, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.456362, -1.832313, -0.975473, -1.470283, -1.354546, -1.595707, -1.508696, -1.145791, -1.132746, -1.393028, -2.364884, -1.091924, -1.454067, -1.429969, -1.245666, -1.429969, -1.782456, -1.494775, -1.559313, -0.923325, -1.275543, -1.432385, -1.243795, -1.641477, -1.283755, -1.353959, -1.993995, -1.112797, -1.466337, -1.178655, -1.226665, -1.782189, -1.901759, -1.293513, -1.345472, -1.152101, -1.229452, -1.745668, -1.424761, -1.229452, -1.207965, -1.248787, -2.124254, -1.221388, -1.720149, -1.373279, -1.002906, -1.605080, -1.738793, -1.487479, -1.302076, -1.119755, -1.282754, -1.609437, -1.219241, -1.481604, -1.223776, -1.390830, -2.220107, -1.054358, -1.800492, -1.120592, -1.203973, -1.566878, -1.710533, -1.417547, -1.243194, -1.243194, -1.494775, 0.000100, 0.000100, 0.000100, -1.305533, 0.000100, 0.000100, 0.000100, -1.728388, 0.000100, 0.000100, 0.000100, -1.117482, 0.000100, 0.000100, 0.000100, -1.098616, -1.568613, -1.568613, -1.386294, -0.882395, -1.980990, -3.367210, -0.882395, -2.251263, -1.335002, -1.152684, -1.152684, -1.358124, -1.763583, -1.157455, -1.358124, -2.047681, -1.131405, -1.131405, -1.488076, -1.897107, -1.049828, -2.995652, -0.798517, -2.036866, -1.189587, -1.749192, -0.938276, -1.704744, -1.550595, -1.299284, -1.098615, -11.982954, -2.772514, -0.693160, -0.826689, -12.644341, -2.047681, -1.236764, -0.543623, -12.388411, -2.484873, -0.780166, -0.780166, -12.793871, -2.197211, -0.944466, -0.693153, -1.386294, -1.386294, -1.386294, -1.386294, -11.608273, -11.608273, -0.000027, -11.608273, -12.948019, -12.948019, -0.000007, -12.948019, -13.199332, -13.199332, -0.000006, -13.199332, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -13.883173, -13.883173, -0.000003, -13.883173, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -13.883173, -13.883173, -13.883173, -0.000003, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.453953, -1.305533, -1.305533, -1.494775, -0.000012, -12.429232, -12.429232, -12.429232, -0.000010, -12.577650, -12.577650, -12.577650, -0.000010, -12.577650, -12.577650, -12.577650, -0.000012, -12.388411, -12.388411, -12.388411, -13.883173, -13.883173, -0.000003, -13.883173, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -13.883173, -13.883173, -13.883173, -0.000003, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.677095, -2.274928, -0.801629, -1.340624, -0.916298, -1.609433, -1.609433, -1.609433, -1.299286, -1.299286, -1.299286, -1.704734, -1.163152, -2.079433, -1.568614, -1.037990, -1.945899, -0.847304, -1.722761, -1.386294, -1.321757, -1.098616, -1.098616, -2.708014, -1.609434, -1.139438, -1.832572, -1.139438, -1.299284, -1.704741, -1.704741, -1.011607, -2.014891, -1.098616, -1.321757, -1.321757, -1.526054, -1.749192, -1.749192, -0.832916, -1.163153, -1.386294, -1.856290, -1.268513, -0.773197, -2.564915, -1.466336, -1.466336, -1.648654, -1.178658, -1.466336, -1.312187, -1.856290, -1.268513, -0.900791, -1.856290, -1.011607, -1.145136, -3.090961, -1.299284, -1.435084, -1.945896, -0.847306, -1.658222, -1.268513, -1.673972, -1.067844, -1.673972}, /*, don[16][]=..., */ {-1.390956, 0.000100, 0.000100, 0.000100, -1.318853, 0.000100, 0.000100, 0.000100, -1.354255, 0.000100, 0.000100, 0.000100, -1.489074, 0.000100, 0.000100, 0.000100, -1.191589, -1.395684, -1.376992, -1.628306, -1.007263, -1.343735, -2.302583, -1.294945, -1.168571, -1.033230, -1.713797, -1.876316, -1.866660, -1.108976, -1.124724, -1.656940, -1.013620, -1.402555, -1.752228, -1.524444, -0.800246, -1.434304, -2.654802, -1.418043, -0.798509, -1.301137, -1.825660, -2.148432, -1.559566, -1.258461, -1.219241, -1.559566, -14.987994, -1.500986, -1.095521, -0.814808, -14.608505, -1.569521, -1.636962, -0.515362, -14.166170, -1.218158, -1.004584, -1.084627, -14.369398, -1.603707, -0.896376, -0.939548, -1.386294, -1.386294, -1.386294, -1.386294, -14.483342, -14.483342, -0.000002, -14.483342, -14.823470, -14.823470, -0.000001, -14.823470, -15.179049, -15.179049, -0.000001, -15.179049, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.967274, -15.967274, -0.000000, -15.967274, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.967274, -15.967274, -15.967274, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.593618, -4.191976, -0.848850, -5.370614, -0.000001, -15.373656, -15.373656, -15.373656, -0.000023, -11.775321, -11.775321, -11.775321, -0.000001, -15.118424, -15.118424, -15.118424, -0.000075, -10.596735, -10.596735, -10.596735, -15.967274, -15.967274, -0.000000, -15.967274, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.957840, -1.219598, -1.136532, -15.967273, -1.386294, -1.386294, -1.386294, -1.386294, -1.098613, -1.828800, -1.135654, -1.688218, -0.864506, -1.426460, -2.103345, -1.530001, -0.849717, -1.343735, -1.493266, -2.442344, -1.386294, -1.386294, -1.386294, -1.386294, -1.383314, -1.208961, -1.336794, -1.670995, -1.255455, -1.082613, -2.281305, -1.293921, -1.443157, -1.182431, -1.604425, -1.361479, -1.945908, -1.722765, -0.847299, -1.386294, -1.143733, -1.489478, -1.511457, -1.446919, -1.283347, -1.123004, -2.426408, -1.173648, -1.420696, -1.221845, -1.056053, -2.154663, -2.040219, -1.560647, -1.049823, -1.171183, -1.475906, -1.562918, -1.252763, -1.286665, -1.218572, -1.263692, -2.393554, -1.107346, -1.786985, -1.581134, -0.864998, -1.581134, -2.173802, -1.480657, -0.982411, -1.257514, -1.232961, -1.380596, -1.358124, -1.609437, -1.366492, -1.220780, -1.864329, -1.220780, -1.402555, -1.524444, -1.002570, -1.775758, -1.830473, -1.324926, -1.191394, -1.309177, -1.189987, 0.000100, 0.000100, 0.000100, -1.623622, 0.000100, 0.000100, 0.000100, -1.465398, 0.000100, 0.000100, 0.000100, -1.318241, 0.000100, 0.000100, 0.000100, -1.463255, -1.504077, -1.248145, -1.349927, -1.203974, -1.203974, -3.149855, -1.029621, -1.410987, -1.410987, -1.228666, -1.516347, -1.558144, -1.914817, -1.057371, -1.221673, -1.020362, -2.056448, -1.196252, -1.563975, -1.002154, -1.479076, -2.983134, -1.037245, -1.291984, -1.677645, -1.178656, -1.466337, -1.599387, -1.704747, -1.039773, -1.337023, -13.864305, -2.862188, -0.741939, -0.762142, -13.369229, -2.212965, -2.549431, -0.207644, -13.742944, -2.923146, -0.819029, -0.682454, -13.742944, -2.923146, -0.771401, -0.725939, -1.386294, -1.386294, -1.386294, -1.386294, -12.345852, -12.345852, -0.000013, -12.345852, -14.144817, -14.144817, -0.000002, -14.144817, -14.473033, -14.473033, -0.000002, -14.473033, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.082459, -15.082459, -0.000001, -15.082459, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.082459, -15.082459, -15.082459, -0.000001, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.667425, -1.199289, -1.127186, -1.682463, -0.000004, -13.415039, -13.415039, -13.415039, -0.000003, -13.883173, -13.883173, -13.883173, -0.000003, -13.955276, -13.955276, -13.955276, -0.000005, -13.400002, -13.400002, -13.400002, -15.082459, -15.082459, -0.000001, -15.082459, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.981769, -1.490091, -0.916291, -15.082459, -1.386294, -1.386294, -1.386294, -1.386294, -1.201470, -1.799305, -1.040202, -1.712294, -0.798510, -1.817075, -2.590255, -1.163152, -1.218158, -1.820331, -1.105680, -1.554629, -1.386294, -1.386294, -1.386294, -1.386294, -1.159767, -2.131624, -1.133099, -1.403389, -0.969403, -1.662545, -2.961804, -0.969403, -1.108664, -1.609437, -1.139435, -1.897117, -1.536234, -1.425009, -1.191395, -1.425009, -1.053763, -1.918757, -1.290151, -1.472472, -0.863049, -1.519825, -3.465692, -1.114362, -1.439216, -1.483668, -1.108976, -1.578978, -2.044752, -1.446919, -1.041455, -1.264598, -1.098613, -1.887067, -1.459625, -1.262916, -0.780161, -1.791757, -4.276572, -1.018571, -1.211091, -1.904235, -1.176000, -1.407800, -1.609437, -1.666595, -1.167606, -1.203974, -1.317302, -2.233588, -0.957300, -1.422662, -1.170073, -1.227231, -2.961804, -1.064713, -1.349927, -1.349927, -1.561235, -1.303407, -1.508896, -1.553348, -1.147884, -1.386294}, /*, don[17..24][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, }; kmer-code-2013-trunk/libsim4/sim4core/sim4.H0000644000000000000000000004271212415066336017237 0ustar rootroot#ifndef SIM4_H #define SIM4_H #include #include #include #include #include #include #include #include #include #include #include "util++.H" #include "sim4parameters.H" #include "sim4command.H" #include "exon.H" #include "mspManager.H" #include "sim4defines.H" #include "glimmerSplice.H" #include "sim4b1_s.H" #include "../sim4polish/sim4polish.H" #include "../sim4polish/sim4polishList.H" #include "../sim4polish/sim4polishBuilder.H" // // A wrapper for the sim4 subroutines // // Define this to use our own memory management -- one that doesn't bother to // release memory during the compute (but does release it all at the end). // // Define this to get a detailed report on hit extension (in mspManager.C) // //#define DEBUG_EXTENSION // Define this to test the seeds when doing extension -- it tests // if the seed is infact an exact match (in mspManager.C) // //#define TEST_SEEDS_IN_EXTENSION // Debug of sim4b1.C and related. // // SHOW_PROGRESS -- write the progress of Sim4::SIM4 to stderr // DEBUG_EXONS -- dump the exons at various places // //#define SHOW_PROGRESS //#define DEBUG_EXONS // Show what is being done for external seeding in sim4string.C // //#define SHOW_EXTERNAL_SEEDING enum { INIT = 0, PERM = 1, TEMP = 2}; enum { FWD = 0, BWD = 1, BOTH = 2 }; enum { OK = 0, FREE_START = 1, FREE_END = 2, FREE_BOTH_ENDS = 3}; #define GEN_LOG4_ENTRIES 45 #define CDNA_LOG4_ENTRIES 25 #define HASH_SIZE 32767 /* 2**15 - 1 */ class Sim4 { private: struct coords { int pos1; int pos2; }; /* used only in the alignment stage */ struct edit_script { int op_type; /* SUB, INS, or DEL */ int num; /* Number of operations */ struct edit_script *next; }; struct edit_script_list { int offset1, offset2; int len1, len2; int score; struct edit_script *script; struct edit_script_list *next_script; }; struct splice_t { int xs; int xe; int ys; int ye; int score; int type; struct splice_t *next; }; struct sim4_stats_t { int internal; int icoverage; int numberOfMatches; int numberOfNs; int orientation; int percentID; bool tooManyMSPs; }; struct hash_node { int ecode; /* integer encoding of the word */ int pos; /* positions where word hits query sequence */ struct hash_node *link; /* next word with same last 7.5 letters */ }; struct hash_table { struct hash_node *table[HASH_SIZE+1]; int *nextPos; struct hash_node *nodes; int nodesused; }; private: mspManager _mspManager; exonManager _exonManager; sim4parameters *globalParams; void *_pallochandle; char *_genSeq; char *_estSeq; int _genLen; int _estLen; int encoding[256]; coords last_GT; coords last_CT; coords last_AG; coords last_AC; //sim4_args_t rs; int wordExtensionAllowance; int wordSize; int wordSizeInt; int wordSizeExt; char *spacedSeed; char *spacedSeedInt; char *spacedSeedExt; mss_t spacedSeedMSS; mss_t spacedSeedIntMSS; mss_t spacedSeedExtMSS; int spliceModel; int mspThreshold1; int mspThreshold2; int G_score; int C_score; Exon *exon_list; int mask; struct hash_table *hashtable; struct hash_table phashtable; struct hash_table thashtable; // For high-accuracy switch // bool _accurateSequences; #ifdef __APPLE__ bool _pad[7]; #endif void adjustBoundariesOfMarginalExons(Exon *Lblock); void findLastAGandAC(Exon *tmp_block1); void findLastGTandCT(Exon *tmp_block); // Functions from Xtend1.c // typedef struct ValNode { void *data; struct ValNode *next; } *ValNodePtr; void free_coords(coords ***val, int size) { for(int i=0; im) return(x); int y = x+k; while ((x>startx) && (y>starty) && (_genSeq[x-1] == _estSeq[y-1])) { --x; --y; } return(x); }; bool checkExonsForOverlaps(Exon *theExons); void appendExons(sim4polishBuilder &B, Exon *theExon); void maskExonsFromSeeds(sim4command *cmd, Exon *theExons); void maskExonsFromGenomic(Exon *theExons, char *f, char *r, int l); void IDISPLAY(sim4polishBuilder &builder, char *aString, char *bString, char A[], char B[], int M, int N, int S[], int AP, int BP, int est_strand, Exon *exons); void appendAlignments(sim4polishBuilder &builder, char *seq1, char *seq2, int len1, int len2, edit_script_list *Aligns, Exon *Exons, int match_ori); // Functions from sim4b1.h // int good_ratio(int); void flip_list(Exon **,Exon **); void free_align(edit_script_list *); void complement_exons(Exon **,int,int); void bld_table(char *,int,mss_t,int); int SIM4_block1(Exon* &Lblock, Exon* &tmp_block, Exon* &tmp_block1); int SIM4_block2(Exon* &tmp_Lblock, Exon* &tmp_Rblock, Exon* &tmp_block, Exon* &tmp_block1); int SIM4_block3(bool good_match, Exon* &tmp_Lblock, Exon* &tmp_Rblock, Exon* &tmp_block, Exon* &tmp_block1); int SIM4_block4(bool good_match, Exon* &tmp_Lblock, Exon* &tmp_Rblock, Exon* &tmp_block, Exon* &tmp_block1); struct edit_script_list *SIM4(int *, Exon* *, int *pA, int *pT, sim4_stats_t *); void merge(Exon **,Exon **); bool get_sync_flag(Exon *, Exon *, int); void slide_intron(int w, Exon *, Exon *, int, sim4_stats_t *); void sync_slide_intron(int w, Exon *, Exon *, int, sim4_stats_t *); void filter(Exon **,Exon **); void wobble(Exon *, Exon *, const char *, const char *, char *seq1); Exon *bmatch(char *,char *,int,int,int,int); Exon *fmatch(char *,char *,int,int,int,int); void compact_list(Exon **Lblock, Exon **Rblock, int SI); int resolve_overlap(Exon *,Exon *,char *); int greedy(char *,char *,int,int,int,int,Exon **, Exon **); int extend_bw(char *,char *,int,int,int,int,int *,int *); int extend_fw(char *,char *,int,int,int,int,int *,int *); void SLIDE_INTRON(int a, Exon *b, Exon *c, int d, sim4_stats_t *e, int f) { if (f == 1) { return(sync_slide_intron(a,b,c,d,e)); } else { return(slide_intron(a,b,c,d,e)); } }; void pluri_align(int *dist_ptr, Exon *lblock, struct edit_script_list **Aligns, sim4_stats_t *st); void updateStatistics(Exon *theExon, sim4_stats_t *st); void get_stats(Exon *,sim4_stats_t *); int get_edist(int,int,int,int,char *,char *); void add_word(int,int); int extend_hit(int,int,const char *const,const char * const,int,int,int,int); void search(char *,char *,int,int,mss_t); void exon_cores(char*,char*,int,int,int,int,int,mss_t,int,int); Exon *find_previous(Exon *head, Exon *target) { while (head && (head->next_exon != target)) head = head->next_exon; return(head); }; bool get_match_quality(Exon *,Exon *,sim4_stats_t *,int); #if 0 // Dead code, 05 apr 2004, bpw void check_consistency_intron_ori(Exon *,int,char *); #endif // sim4b1_s.h // public: mss_t masks_shifts(char *); int mask_shift(uint64, mss_t); private: // splice.h // #define MAX_SPAN 80 void splice_donor(char *xseq, char *yseq, int M, int N, double *gt_score, double *ct_score, double **max_Gf, double **max_Cf, int **start_Gi, int **start_Ci); void splice_donor_uni(char *xseq, char *yseq, int M, int N, double *It_score, double **max_IF, int **end_Ii); void splice_acceptor(char *xseq, char *yseq, int M, int N, double *ag_score, double *ac_score, double **max_Gb, double **max_Cb, int **end_Gi, int **end_Ci); void splice_acceptor_uni(char *xseq, char *yseq, int M, int N, double *aI_score, double **max_Ib, int **end_Ii); void splice_init(int spl_model); void splice_close(); void loadGeneSplicerModel (void); void loadGlimmerModel (char *train_dir); double ScoreDonor_Glimmer (char *asegment, char *train_dir); double ScoreAcceptor_Glimmer (char *asegment, char *train_dir); void splice_original(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, double *gtscore, double *agscore, double *ctscore, double *acscore, int ori, char *nsgemnetL, char *nsegmentR); void splice_GeneSplicer(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, double *gtscore, double *agscore, double *ctscore, double *acscore, int ori, char *nsgemnetL, char *nsegmentR); void splice_Glimmer(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, double *gtscore, double *agscore, double *ctscore, double *acscore, int ori, char *nsgementL, char *nsegmentR, char *asegmentL, char *asegmentR); int stepct(int n) { if (n<0) fatal("splice.c: Negative value in stepct()."); if (n<=4) return 9; // return((int)1.6*8); if (n<=8) return 10; // return((int)1.6*9); if (n<=12) return 12; // return((int)1.6*10); return 12; // return((int)1.6*11); }; splice_t *new_splice(char,int,int,int,int,double,splice_t *); void splice(char *in_seqx, int ls, int us, int le, int ue, char *in_seqy, int ys, int ye, splice_t **gcell, splice_t **ccell, int ori, int spl_model); // Functions and defines from GeneSplicer header files: sites.h // #define ALPHABET_SIZE 4 void init_GeneSPlicer (); double ScoreAcceptor_GeneSplicer (char *); double ScoreDonor_GeneSplicer (char *); void UnLoadSites_GeneSplicer (); // ... sites_score.h #define NUM_VALUES_SCORES 2560 double score_ex_acc[NUM_VALUES_SCORES]; double score_in_acc[NUM_VALUES_SCORES]; double score_ex_don[NUM_VALUES_SCORES]; double score_in_don[NUM_VALUES_SCORES]; // ... sites_donor.h #define NUM_MODELS_DON 25 #define NUM_VALUES_DON 928 double don[NUM_MODELS_DON][NUM_VALUES_DON]; // ... sites_acceptor.h #define NUM_MODELS_ACC 25 #define NUM_VALUES_ACC 928 double acc[NUM_MODELS_ACC][NUM_VALUES_ACC]; // Functions from misc.h // void fatal(const char *msg) { fflush(stdout); fprintf(stderr, "%s\n", msg); fflush(stderr); kill(getpid(), SIGKILL); }; // Poly-A/T masking stuff // #define T_ONLY 1 #define A_ONLY 2 #define BOTH_AT 3 void get_polyAT(char *seq, int len, int *pA, int *pT, int flag=BOTH_AT); //void remove_poly(struct edit_script_list **,Exon *,char *,char *,int,int *,int *); void remove_polyA_back(struct edit_script_list **,Exon *,char *,char*,int,int *); void remove_polyT_front(struct edit_script_list **,Exon *,char *,char*,int *); void trim_polyT_align(struct edit_script_list **,Exon **,const int,int *,char *,char *); void trim_polyA_align(struct edit_script_list **,Exon *,Exon **,const int,int *,char *,char *); /* reverse a list of edit script chains */ void script_flip_list(edit_script_list **left) { edit_script_list *ep, *ahead, *behind; ahead = *left; ep = NULL; while (ahead!=NULL) { behind = ep; ep = ahead; ahead = ahead->next_script; ep->next_script = behind; } *left = ep; } int computePercentIdentity(int numEdits, int alignLen) { if (alignLen == 0) return(0); if (numEdits == 0) return 100; int pctId = (int)(round(100.0 * (1 - 2.0 * numEdits / alignLen))); return ((pctId < 100) ? pctId : 99); }; public: Sim4(sim4parameters *p) { globalParams = p; _pallochandle = pallochandle(64 * 1024); _genSeq = 0L; _estSeq = 0L; _genLen = 0; _estLen = 0; for (uint32 i=256; i;) encoding[--i] = -1; encoding[(int)'A'] = encoding[(int)'a'] = 0; encoding[(int)'C'] = encoding[(int)'c'] = 1; encoding[(int)'G'] = encoding[(int)'g'] = 2; encoding[(int)'T'] = encoding[(int)'t'] = 3; last_GT.pos1 = last_GT.pos2 = 0; last_CT.pos1 = last_CT.pos2 = 0; last_AG.pos1 = last_AG.pos2 = 0; last_AC.pos1 = last_AC.pos2 = 0; wordExtensionAllowance = 12; _mspManager.setLimits(globalParams->_mspLimitAbsolute, globalParams->_mspLimitPercent); _mspManager.setParameters(globalParams->_match, globalParams->_imismatch, globalParams->_vmismatch, globalParams->_percentError, wordExtensionAllowance); _mspManager.setExonSource(&_exonManager); // wordSize -- for finding initial seeds // wordSizeInt -- for extending seeds between seeds // wordSizeExt -- for extending seeds on the ends // // If sim4parameters defined wordSizeInt or Ext use that, // otherwise, use the original method. // wordSize = globalParams->_wordSize; wordSizeInt = MIN(8, globalParams->_wordSize); wordSizeExt = MIN(10, globalParams->_wordSize); if (globalParams->_wordSizeInt) wordSizeInt = globalParams->_wordSizeInt; if (globalParams->_wordSizeExt) wordSizeExt = globalParams->_wordSizeExt; spacedSeed = globalParams->_spacedSeed; spacedSeedInt = globalParams->_spacedSeedInt; spacedSeedExt = globalParams->_spacedSeedExt; spacedSeedMSS = mss_t(spacedSeed); spacedSeedIntMSS = mss_t(spacedSeedInt); spacedSeedExtMSS = mss_t(spacedSeedExt); spliceModel = globalParams->_spliceModel; if (!globalParams->_dontForceCanonicalSplicing) splice_init(spliceModel); mspThreshold1 = globalParams->_mspThresh1; //K; mspThreshold2 = globalParams->_mspThresh2; //C; G_score = 0; C_score = 0; exon_list = 0L; mask = 0; hashtable = 0L; phashtable.nextPos = 0L; phashtable.nodes = 0L; phashtable.nodesused = 0; thashtable.nextPos = 0L; thashtable.nodes = 0L; thashtable.nodesused = 0; _accurateSequences = false; }; ~Sim4() { //pdumppalloc(_pallochandle); pfree2(_pallochandle); pfreehandle(_pallochandle); delete [] phashtable.nextPos; delete [] phashtable.nodes; delete [] thashtable.nextPos; delete [] thashtable.nodes; }; sim4polishList *run(sim4command *cmd); void *ckalloc(size_t size) { return(palloc2(size, _pallochandle)); }; void ckfree(void *) { }; }; #endif // SIM4_H kmer-code-2013-trunk/libsim4/sim4core/Make.include0000644000000000000000000000223611512763666020500 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../../libutil/)/ LIBBIO/ :=$(realpath $/../../libbio/)/ LIBS4P/ :=$(realpath $/../sim4polish/)/ src := $/sim4command.C \ $/sim4parameters.C \ $/sim4string.C \ $/Xtend1.C \ $/align.C \ $/exon.H \ $/exon_cores.C \ $/extend.C \ $/glimmerSplice.C \ $/glimmerSplice.H \ $/greedy.C \ $/mspManager.C \ $/mspManager.H \ $/pluri_align.C \ $/poly.C \ $/sim4.H \ $/sim4b1.C \ $/sim4b1a.C \ $/sim4b1-1.C \ $/sim4b1-2.C \ $/sim4b1-3.C \ $/sim4b1-4.C \ $/sim4b1_s.C \ $/sim4defines.H \ $/sim4parameters.H \ $/sites.C \ $/sites_acceptor.C \ $/sites_donor.C \ $/sites_score.C \ $/splice.C \ $/table.C \ $/util.C $/.CXX_SRCS := $(filter %.C,${src}) $/.CXX_INCS := $(filter %.H,${src}) $/.CXX_LIBS := $/libsim4.a $/.CLEAN := $/*.o $/libsim4.a: ${$/.CXX_SRCS:.C=.o} $(eval $/%.d $/%.o: CXXFLAGS += -I${LIBUTL/} -I${LIBBIO/} -I${LIBS4P/}) kmer-code-2013-trunk/libsim4/sim4core/sites.C0000644000000000000000000004454111515726327017512 0ustar rootroot//Copyright (c) 2003 by Mihaela Pertea #include "sim4.H" #include "sites_score.H" #include "sites_donor.H" #include "sites_acceptor.H" char DONOR_TREE[] = "( 0 2 4 10000 l( 1 2 9 7841 l( 3 0 8 5666 l( 5 0 3 3977 l( 7 0 7 2186 l( 9 -1 -1 995 l r ) r( 10 -1 -1 1191 l r ) ) r( 8 3 10 1791 l( 15 -1 -1 931 l r ) r( 16 -1 -1 860 l r ) ) ) r( 6 -1 -1 1689 l r ) ) r( 4 -1 -1 2175 l r ) ) r( 2 -1 -1 2159 l r ) )"; // \n5 20\n"; char ACCEPTOR_TREE[] = "( 0 1 23 10000 l( 1 3 21 6544 l( 3 3 20 3573 l( 5 3 16 2146 l( 7 -1 -1 1295 l r ) r( 8 -1 -1 851 l r ) ) r( 6 -1 -1 1427 l r ) ) r( 4 1 21 2971 l( 15 1 20 1914 l( 17 -1 -1 1009 l r ) r( 18 -1 -1 905 l r ) ) r( 16 -1 -1 1057 l r ) ) ) r( 2 -1 -1 3456 l r ) )"; // \n44 72\n"; #define TRUE 1 #define FALSE 0 #define ACCEPTOR_LEN 29 /* Positions +44,72 in a80 */ #define ACCEPTOR_SIGNAL_OFFSET 24 /* Start of AG */ #define DONOR_LEN 16 /* Positions +5,20 in d80 */ #define DONOR_SIGNAL_OFFSET 5 /* Start of GT */ #define MARKOV_DEGREE 3 #define MARKOV_LEN 64 /* ALPHABET_SIZE ^ MARKOV_DEGREE */ #define LOW_SCORE -99.0 /* Score if pattern does not have GT or AG signal */ #define SITE_LEN 162 #define CODING_LEN 80 #ifndef EXIT_FAILURE #define EXIT_FAILURE -1 #endif #ifndef EXIT_SUCCESS #define EXIT_SUCCESS 0 #endif typedef struct tree { int val; int consens; int poz; int no; struct tree *left; struct tree *right; } tree; void postorder(tree *root) { if(root) { postorder(root->left); postorder(root->right); printf("[%d %d %d %d] ", root->val, root->consens, root->poz, root->no); } } typedef unsigned int word; int Acc (const int *, double *,tree *t,int ind); int Don (const int *, double *, tree *t,int ind); int comp(const void *a, const void *b); int findfile(const int * S, tree *t); int readtree(Sim4 *S4, char *line, tree *t, int start); int find(char *line, int start); int Is_Cod_NonCod (const int * , double *, int ind); float ****Load4dim(Sim4 *S4, int d1, int d2, int d3, int d4); void free4dim(Sim4 *S4, float ****ptr,int d1, int d2, int d3); #define Start_PosEx 56 #define Stop_PosEx 84 #define Start_PosIn 75 #define Stop_PosIn 90 #define Start_Cod 0 #define Stop_Cod 79 #define Start_NoCod 82 #define Stop_NoCod 161 int markov_degree; int markov_len; tree *tacc = NULL; tree *tdon = NULL; int readtacc=FALSE; int readtdon=FALSE; int accmax = 0; int donmax = 0; float ****Acc_Positive_Table = NULL; float ****Acc_Negative_Table = NULL; int *Acc_Tables_Loaded = NULL; float ****Don_Positive_Table = NULL; float ****Don_Negative_Table = NULL; int *Don_Tables_Loaded = NULL; float Cod_Positive_Table [4][CODING_LEN] [ALPHABET_SIZE] [MARKOV_LEN]; float Cod_Negative_Table [4][CODING_LEN] [ALPHABET_SIZE] [MARKOV_LEN]; int Cod_Tables_Loaded[4] = {FALSE,FALSE,FALSE,FALSE}; void Sim4::loadGeneSplicerModel() { int i; markov_degree=1; markov_len=(int)pow(ALPHABET_SIZE,1); if(!readtdon) { tdon = (tree *) malloc(sizeof(tree)); if (tdon == NULL) {fprintf(stderr,"Memory allocation for tree failure.\n"); abort();} donmax=readtree(this, DONOR_TREE, tdon, 0); readtdon=TRUE; // alloc memory for the tables Don_Positive_Table=Load4dim(this,donmax,DONOR_LEN,ALPHABET_SIZE,markov_len); Don_Negative_Table=Load4dim(this,donmax,DONOR_LEN,ALPHABET_SIZE,markov_len); Don_Tables_Loaded=(int *) malloc(donmax*sizeof(int)); if(Don_Tables_Loaded == NULL) { fprintf(stderr,"Memory allocation for donor site tables failed.\n"); abort(); } for(i=0;ileft); freetree(t->right); free(t); t=NULL; } #endif void Sim4::UnLoadSites_GeneSplicer() { int i; // Garbage collected! (not yet - needs palloc) if(readtacc) { free4dim(Acc_Positive_Table,accmax,ACCEPTOR_LEN,ALPHABET_SIZE); free4dim(Acc_Negative_Table,accmax,ACCEPTOR_LEN,ALPHABET_SIZE); if(Acc_Tables_Loaded != NULL ) free(Acc_Tables_Loaded); } // Garbage collected! (not yet - needs palloc) if(readtdon) { free4dim(Don_Positive_Table,donmax,DONOR_LEN,ALPHABET_SIZE); free4dim(Don_Negative_Table,donmax,DONOR_LEN,ALPHABET_SIZE); if(Don_Tables_Loaded != NULL ) free(Don_Tables_Loaded); } #ifdef DEBUG printf("tacc:\n"); postorder(tacc); printf("\n"); #endif // Garbage collected! (not yet - needs palloc) if(readtacc) freetree(tacc); #ifdef DEBUG printf("tdon:\n"); postorder(tdon); printf("\n"); #endif // Garbage collected! (not yet - needs palloc) if(readtdon) freetree(tdon); readtacc=FALSE; readtdon=FALSE; for(i=0;i<4;i++) Cod_Tables_Loaded[i]=FALSE; } float ****Load4dim(Sim4 *S4, int d1, int d2, int d3, int d4) { int i,j,k; float ****ptr; ptr = (float ****) malloc(d1 * sizeof(float ***)); if(ptr==NULL) { fprintf(stderr,"Memory allocation for splice site tables failed.\n"); abort(); } for(i=0;i= SITE_LEN); for(i=0;i1) Acc(T, &S2, tacc,1); else S2=S1; score1=(S1+S2)/2; // if(score1<=THR_ACC) score1=-99; score2=0; score3=0; for(i=0;i<=Stop_NoCod-Start_NoCod;i++) T[i]=B[i+Start_NoCod]; Is_Cod_NonCod(T,&score2,0); for(i=0;i<=Stop_Cod-Start_Cod;i++) T[i]=B[i+Start_Cod]; Is_Cod_NonCod(T,&score3,1); // printf("score1 = %.5f, score2 = %.5f, score3 = %.5f\n", score1, score2, score3); Score=score1+score2+score3; return(Score); } double Sim4::ScoreDonor_GeneSplicer(char *Data) { double Score,S1,S2; int ind,i; int T[100]; double score1,score2,score3; char *B = Data; #if 0 assert( strlen(Data) >= SITE_LEN); for(i=0;i1) Don(T, &S2, tdon,1); else S2=S1; score1=(S1+S2)/2; score2=0; score3=0; for(i=0;i<=Stop_Cod-Start_Cod;i++) T[i]=B[i+Start_Cod]; Is_Cod_NonCod(T,&score2,2); for(i=0;i<=Stop_NoCod-Start_NoCod;i++) T[i]=B[i+Start_NoCod]; Is_Cod_NonCod(T,&score3,3); Score=score1+score2+score3; return Score; } int readtree(Sim4 *S4, char *line, tree *t, int start) { int len; int i,n; int val,valmax; char part[10]; len=strlen(line); i=start; while((line[i]=='(')||(line[i]==' ')) i++; n=i; while(line[i]!=' ') { part[i-n]=line[i]; i++; } part[i-n]='\0'; t->val=atoi(part); valmax=t->val; i++; n=i; while(line[i]!=' ') { part[i-n]=line[i]; i++; } part[i-n]='\0'; t->consens=atoi(part); i++; n=i; while(line[i]!=' ') { part[i-n]=line[i]; i++; } part[i-n]='\0'; t->poz=atoi(part); i++; n=i; while(line[i]!=' ') { part[i-n]=line[i]; i++; } part[i-n]='\0'; t->no=atoi(part); t->left=NULL; t->right=NULL; i+=2;n=i; if(line[i]=='(') { i=find(line,i+1); t->left = (tree *) malloc(sizeof(tree)); if (t->left == NULL) {fprintf(stderr,"Memory allocation for tree failure.\n"); abort();} val=readtree(S4,line,t->left,n); if(val>valmax) valmax=val; } i+=2;n=i; if(line[i]=='(') { i=find(line,i+1); t->right = (tree *) malloc(sizeof(tree)); if (t->right == NULL) { fprintf(stderr,"Memory allocation for tree failure.\n"); abort(); } val=readtree(S4,line,t->right,n); if(val>valmax) valmax=val; } valmax++; return(valmax); } int find(char *line, int start) { int stop,i; i=start; while(line[i]!=')') if(line[i]=='(') i=find(line,i+1); else i++; stop=i+1; return(stop); } int comp(const void *a, const void *b) { if(*(double *)a > *(double *)b) return(1); else if (*(double *)a==*(double *)b) return(0); else return(-1); } int findfile(const int * S, tree *t) { int val, cons, poz; val=t->val; cons=t->consens; if( cons !=-1) { poz=t->poz; if(S[poz]==cons) val=findfile(S,t->left); else val=findfile(S, t->right); } return(val); } int findleaf(tree *t, int n, int leaf, int *found) { int ret=n; if(t==NULL) { fprintf(stderr,"tree NULL\n");exit(0);} if(t->val == leaf) {*found=1; return(n+1);} if(t->left == NULL && t->right == NULL) return(n+1); if(t->left != NULL) ret=findleaf(t->left,n,leaf,found); if(!(*found) && t->right != NULL) ret=findleaf(t->right,ret,leaf,found); return(ret); } int Acc (const int * S, double * Return_Score, tree *t,int ind) /* Evaluate string S [0 .. (ACCEPTOR_LEN -1)] and * return TRUE or FALSE as to whether it is a likely acceptor * site. Also set Return_Score to the probability that it is an acceptor * site. */ { double Positive_Sum, Negative_Sum, Score; #if RETURN_TRUE_PROB double X, Y; #endif int i, j, k, Sub, no, idx; /* see which acceptor you should use */ if(ind) { no=findfile(S,t); k=0; } else no=0; idx = 0; if (! Acc_Tables_Loaded[no]) { for (i = markov_degree - 1; i < ACCEPTOR_LEN; i ++) for (k = 0; k < markov_len; k ++) for (j = 0; j < ALPHABET_SIZE; j ++) { Acc_Positive_Table[no][i][j][k] = acc[no][idx++]; } for (i = markov_degree - 1; i < ACCEPTOR_LEN; i ++) for (k = 0; k < markov_len; k ++) for (j = 0; j < ALPHABET_SIZE; j ++) { Acc_Negative_Table[no][i][j][k] = acc[no][idx++]; } Acc_Tables_Loaded[no] = TRUE; } /* if (S [ACCEPTOR_SIGNAL_OFFSET] != 0 || S [ACCEPTOR_SIGNAL_OFFSET + 1] != 2) // AG { * Return_Score = LOW_SCORE; return FALSE; } */ Sub = 0; for (i = 0; i < markov_degree; i ++) Sub = ALPHABET_SIZE * Sub + S [i]; Positive_Sum = Acc_Positive_Table [no][markov_degree - 1] [0] [Sub]; Negative_Sum = Acc_Negative_Table [no][markov_degree - 1] [0] [Sub]; for (i = markov_degree; i < ACCEPTOR_LEN; i ++) { j = S [i]; Positive_Sum += Acc_Positive_Table [no] [i] [j] [Sub]; Negative_Sum += Acc_Negative_Table [no] [i] [j] [Sub]; Sub = ALPHABET_SIZE * (Sub % (markov_len / ALPHABET_SIZE)) + j; } Score = Positive_Sum - Negative_Sum; * Return_Score = Score; return(1); } int Don (const int * S, double * Return_Score, tree *t,int ind) /* Evaluate string S [0 .. (DONOR_LEN -1)] and * return TRUE or FALSE as to whether it is a likely donor * site. Also set Return_Score to the probability that it is an donor * site. */ { double Positive_Sum, Negative_Sum, Score; int no; #if RETURN_TRUE_PROB double X, Y; #endif int i, j, k, Sub, idx; /* see which donor file you should use */ if(ind) { no=findfile(S,t); k=0; } else no=0; idx = 0; if (! Don_Tables_Loaded[no] ) { for (i = markov_degree - 1; i < DONOR_LEN; i ++) for (k = 0; k < markov_len; k ++) for (j = 0; j < ALPHABET_SIZE; j ++) { Don_Positive_Table[no][i][j][k] = don[no][idx++]; } for (i = markov_degree - 1; i < DONOR_LEN; i ++) for (k = 0; k < markov_len; k ++) for (j = 0; j < ALPHABET_SIZE; j ++) { Don_Negative_Table[no][i][j][k] = don[no][idx++]; } Don_Tables_Loaded [no] = TRUE; } /* if (S [DONOR_SIGNAL_OFFSET] != 2 || S [DONOR_SIGNAL_OFFSET + 1] != 3) // GT { * Return_Score = LOW_SCORE; return FALSE; } */ Sub = 0; for (i = 0; i < markov_degree; i ++) Sub = ALPHABET_SIZE * Sub + S [i]; Positive_Sum = Don_Positive_Table [no] [markov_degree - 1] [0] [Sub]; Negative_Sum = Don_Negative_Table [no] [markov_degree - 1] [0] [Sub]; for (i = markov_degree; i < DONOR_LEN; i ++) { j = S [i]; Positive_Sum += Don_Positive_Table [no] [i] [j] [Sub]; Negative_Sum += Don_Negative_Table [no] [i] [j] [Sub]; Sub = ALPHABET_SIZE * (Sub % (markov_len / ALPHABET_SIZE)) + j; } Score = Positive_Sum - Negative_Sum; * Return_Score = Score; return(1); } int Is_Cod_NonCod (const int * S, double * Return_Score, int ind) /* Evaluate string S [0 .. (CODING_LEN -1)] and * return TRUE or FALSE as to whether it is a likely donor * site. Also set Return_Score to the probability that it is an donor * site. */ { double Positive_Sum, Negative_Sum, Score; double *scores; int no; #if RETURN_TRUE_PROB double X, Y; #endif int i, j, k, Sub, idx; no=ind; switch (no) { case 0: // case of exon in acceptor scores = score_ex_acc; break; case 1: // case of intron in acceptor scores = score_in_acc; break; case 2: // case of exon in donor scores = score_ex_don; break; case 3: // case of intron in donor scores = score_in_don; break; } idx = 0; if (! Cod_Tables_Loaded[no] ) { for (i = markov_degree - 1; i < CODING_LEN; i ++) for (k = 0; k < markov_len; k ++) for (j = 0; j < ALPHABET_SIZE; j ++) { Cod_Positive_Table[no][i][j][k] = scores[idx++]; } for (i = markov_degree - 1; i < CODING_LEN; i ++) for (k = 0; k < markov_len; k ++) for (j = 0; j < ALPHABET_SIZE; j ++) { Cod_Negative_Table[no][i][j][k] = scores[idx++]; } Cod_Tables_Loaded [no] = TRUE; } Sub = 0; for (i = 0; i < markov_degree; i ++) Sub = ALPHABET_SIZE * Sub + S [i]; Positive_Sum = Cod_Positive_Table [no] [markov_degree - 1] [0] [Sub]; Negative_Sum = Cod_Negative_Table [no] [markov_degree - 1] [0] [Sub]; for (i = markov_degree; i < CODING_LEN; i ++) { j = S [i]; Positive_Sum += Cod_Positive_Table [no] [i] [j] [Sub]; Negative_Sum += Cod_Negative_Table [no] [i] [j] [Sub]; Sub = ALPHABET_SIZE * (Sub % (markov_len / ALPHABET_SIZE)) + j; } Score = Positive_Sum - Negative_Sum; * Return_Score = Score; return (1); } kmer-code-2013-trunk/libsim4/sim4core/sim4b1a.C0000644000000000000000000000614210061055347017606 0ustar rootroot#include "sim4.H" void Sim4::adjustBoundariesOfMarginalExons(Exon *Lblock) { coords *sig; char tmp[50]; Exon *newthing; Exon *tmp_block = Lblock->next_exon; /* condition for non-signal */ if (tmp_block && tmp_block->toGEN && (strncmp((char *)(_genSeq+tmp_block->frGEN-3), END_SIG, (size_t)2) || (tmp_block->frEST!=1))) { sig = (G_score>=abs(C_score)) ? &last_AG : &last_AC; if (sig->pos1 && (sig->pos2<=20)) { /* generated in extend_bw */ assert(sig->pos2 > 1); (void)strcpy((char *)tmp,END_SIG); (void)strncpy((char *)(tmp+2),(char *)_estSeq,(size_t)sig->pos2-1); (void)strcpy((char *)(tmp+sig->pos2+1), START_SIG); newthing = bmatch(_genSeq,tmp,tmp_block->frGEN-3,sig->pos2+3,1,1); if (newthing) { Lblock->next_exon->frGEN = sig->pos1; Lblock->next_exon->frEST = sig->pos2; Lblock->next_exon->length -= sig->pos2-1; newthing->next_exon = Lblock->next_exon; newthing->ori = (G_score>=abs(C_score)) ? 'G' : 'C'; Lblock->next_exon = newthing; } } } while (tmp_block && tmp_block->next_exon && tmp_block->next_exon->toGEN) tmp_block = tmp_block->next_exon; if (tmp_block && tmp_block->toGEN && (strncmp((char *)(_genSeq+tmp_block->toGEN),START_SIG,(size_t)2) || (tmp_block->toEST!=_estLen))) { sig = (G_score>=abs(C_score)) ? &last_GT : &last_CT; if (sig->pos1 && (_estLen-sig->pos2<=20)) { assert(_estLen-sig->pos2 >= 0); (void)strcpy((char *)tmp,END_SIG); (void)strncpy((char *)(tmp+2),(char *)(_estSeq+sig->pos2), (size_t)_estLen-sig->pos2); (void)strcpy((char *)(tmp+_estLen-sig->pos2+2),START_SIG); newthing = fmatch(_genSeq+sig->pos1-1,tmp, _genLen-sig->pos1+1,_estLen-sig->pos2+4, sig->pos1-1,sig->pos2+1); if (newthing) { tmp_block->toGEN = sig->pos1; tmp_block->toEST = sig->pos2; newthing->next_exon = tmp_block->next_exon; tmp_block->next_exon = newthing; tmp_block->ori = (G_score>=abs(C_score)) ? 'G' : 'C'; } } } } void Sim4::findLastAGandAC(Exon *tmp_block1) { int v; for (v=tmp_block1->frGEN-1; v<=tmp_block1->toGEN-3; v++) if (!strncmp((char *)(_genSeq+v-2),"AG",(size_t)2)) { last_AG.pos1 = v+1; last_AG.pos2 = tmp_block1->frEST + (v-tmp_block1->frGEN)+1; break; } for (v=tmp_block1->frGEN-1; v<=tmp_block1->toGEN-3; v++) if (!strncmp((char *)(_genSeq+v-2),"AC",(size_t)2)) { last_AC.pos1 = v+1; last_AC.pos2 = tmp_block1->frEST + (v-tmp_block1->frGEN)+1; break; } } void Sim4::findLastGTandCT(Exon *tmp_block) { int v; for (v=tmp_block->toGEN; v>=tmp_block->frGEN; v--) if (!strncmp((char *)(_genSeq+v),"GT",(size_t)2)) { last_GT.pos1 = v; last_GT.pos2 = tmp_block->toEST-(tmp_block->toGEN-v); break; } for (v=tmp_block->toGEN; v>=tmp_block->frGEN; v--) if (!strncmp((char *)(_genSeq+v),"CT",(size_t)2)) { last_CT.pos1 = v; last_CT.pos2 = tmp_block->toEST-(tmp_block->toGEN-v); break; } } kmer-code-2013-trunk/libsim4/sim4core/sim4b1-4.C0000644000000000000000000000770012415066336017614 0ustar rootroot#include "sim4.H" int Sim4::SIM4_block4(bool good_match, Exon* &tmp_Lblock, Exon* &tmp_Rblock, Exon* &tmp_block, Exon* &tmp_block1) { int I, J; int rollbflag = 0; int cost; //fprintf(stderr, "Called SIM4_block4()\n"); if (_accurateSequences) findLastGTandCT(tmp_block); // These two blocks should do the same thing. The first one isn't readable. #if 0 int diff = (int)(tmp_block1->frEST - tmp_block->toEST - 1); diff = (int)(MIN(diff,(int)(MAX_GRINIT/2))); cost = EXTEND_FW(_estSeq+tmp_block->toEST, _genSeq+tmp_block->toGEN, diff, MIN(4*diff,tmp_block1->frGEN-tmp_block->toGEN-1), tmp_block->toEST,tmp_block->toGEN, &I, &J); #else int diff = MIN(tmp_block1->frEST - tmp_block->toEST - 1, MAX_GRINIT/2); int u = MIN(4*diff, tmp_block1->frGEN - tmp_block->toGEN - 1); cost = EXTEND_FW(_estSeq + tmp_block->toEST, _genSeq + tmp_block->toGEN, diff, u, tmp_block->toEST, tmp_block->toGEN, &I, &J); #endif if ((good_match==0) || tmp_block1->flag || (I==_genLen) || (J==_estLen)) { if (tmp_block->toGEN) { tmp_block->toEST = I; tmp_block->toGEN = J; tmp_block->edist += cost; tmp_block->length = tmp_block->toEST-tmp_block->frEST+1; tmp_Rblock = tmp_Lblock = NULL; } else /* special case: no initial exon */ tmp_Lblock = tmp_Rblock = NULL; } //PRINTEXONS("tmp_block after if\n", tmp_block); /* use blast if marginal gap still exists, and this is first scan */ if (!(diff=(int)(tmp_block1->frEST-tmp_block->toEST-1)) || tmp_block1->flag) { /* blast-treated region or no gap */ tmp_Rblock = tmp_Lblock = NULL; } else { //PRINTEXONS("tmp_block\n", tmp_block); //PRINTEXONS("tmp_block1\n", tmp_block); exon_cores(_genSeq+tmp_block->toGEN-1, _estSeq+tmp_block->toEST-1, tmp_block1->frGEN-tmp_block->toGEN-1, diff, tmp_block->toGEN+1, tmp_block->toEST+1, 1, spacedSeedExtMSS, mspThreshold2, TEMP); //PRINTEXONS("3\n", exon_list); tmp_Lblock = tmp_Rblock = exon_list; while (tmp_Rblock && tmp_Rblock->next_exon) tmp_Rblock = tmp_Rblock->next_exon; if ((!tmp_Lblock && tmp_block1->frGEN-tmp_block->toGEN>50000) || (tmp_Lblock && (tmp_Lblock->frEST-tmp_block->toEST>100) && (tmp_Lblock->frGEN-tmp_block->frGEN>50000)) || (tmp_Lblock && (tmp_block1->frEST-tmp_Rblock->toEST>100) && (tmp_block1->frGEN-tmp_Rblock->frGEN>50000))) { /* possible large intron; increase the score weight */ //freeExonList(tmp_Lblock); garbage collected exon_list = _mspManager.doLinking(globalParams->_relinkWeight, DEFAULT_DRANGE, tmp_block->toGEN + 1, tmp_block->toEST + 1, 1, true, _genSeq, _estSeq); //PRINTEXONS("3a\n", exon_list); tmp_Lblock = tmp_Rblock = exon_list; while ((tmp_Rblock!=NULL) && (tmp_Rblock->next_exon!=NULL)) tmp_Rblock = tmp_Rblock->next_exon; } _mspManager.clear(); tmp_block1->flag = 1; if (tmp_Lblock) { rollbflag = 1; } else { if (tmp_block->toGEN) { tmp_block->toEST = I; tmp_block->toGEN = J; tmp_block->edist += cost; tmp_block->length = tmp_block->toEST-tmp_block->frEST+1; tmp_Rblock = tmp_Lblock = NULL; } else /* special case: no initial exon */ tmp_Lblock = tmp_Rblock = NULL; } } return(rollbflag); } kmer-code-2013-trunk/libsim4/sim4core/sim4defines.H0000644000000000000000000000243512415066336020573 0ustar rootroot// Don't define this (unless your name starts with L or B). // // Changes to here, sim4db.H and exon_cores.H // //#define INTERSPECIES #define DIST_CUTOFF 3 #define MIN_INTRON 30 #define SHORT_INTRON 50 #define LONG_INTRON 20000 #define SHORT_EXON 40 #define MAX_GRINIT 500 #define MAX_SLIDE 15 #define MAX_INTERNAL_GAP 50 #define DEFAULT_DRANGE 10 #define DEFAULT_WEIGHT 100 #define DEFAULT_RELINK_WEIGHT 500 #define DEFAULT_K 16 #define DEFAULT_C 12 #ifndef MIN #define MIN(x,y) ((x>y) ? (y):(x)) #endif #ifndef MAX #define MAX(x,y) ((x= abs(C_score)) ? "GT" : "CT") #define END_SIG ((G_score >= abs(C_score)) ? "AG" : "AC") #define DELETE 1 #define INSERT 2 #define SUBSTITUTE 3 #define INTRON 4 #define O_INTRON 5 #define SPLICE_ORIGINAL 0 #define SPLICE_GENESPLICER 1 #define SPLICE_GLIMMER 2 #define DEFAULT_SPLICE_MODEL SPLICE_ORIGINAL #define DEFAULT_SPACED_SEED "1xx1011011011xx11" #define DEFAULT_SPACED_SEED_INT "10011010100011" #define DEFAULT_SPACED_SEED_EXT "1101100011010111" #define SEED_SPAN 40 /* 22 */ kmer-code-2013-trunk/libsim4/sim4core/util.C0000644000000000000000000005474412415066336017343 0ustar rootroot#include "sim4.H" // Original call was if (!strncmp(S, "GT", 2)) {} // which is if (S == "GT") // #define DAcmp(S, A, B) (((S)[0] == A) && ((S)[1] == B)) void Sim4::complement_exons(Exon **left, int M, int N) { Exon *tmp_block, *right; char prev, ch; #ifdef SPLSCORE double spl=0, prevspl=0; #endif prev = 'U'; /* unknown, should trigger error */ tmp_block = *left; while (tmp_block) { if (tmp_block->toGEN) { register int aux; if (tmp_block->next_exon && tmp_block->next_exon->toGEN) { ch = tmp_block->ori; tmp_block->ori = prev; #ifdef SPLSCORE spl = tmp_block->splScore; tmp_block->splScore = prevspl; prevspl = spl; #endif switch (ch) { case 'C': prev = 'G'; break; case 'G': prev = 'C'; break; case 'N': prev = 'N'; break; case 'E': prev = 'E'; break; default: fatal("sim4b1.c: Inconsistency. Check exon orientation at complementation."); } } else { tmp_block->ori = prev; #ifdef SPLSCORE tmp_block->splScore = prevspl; #endif } aux = tmp_block->frGEN; tmp_block->frGEN = M+1-tmp_block->toGEN; tmp_block->toGEN = M+1-aux; aux = tmp_block->frEST; tmp_block->frEST = N+1-tmp_block->toEST; tmp_block->toEST = N+1-aux; } tmp_block = tmp_block->next_exon; if (tmp_block && tmp_block->toGEN) right = tmp_block; } flip_list(left,&right); } void Sim4::get_stats(Exon *lblock, sim4_stats_t *st) { Exon *t, *t1; bool singleExon = true; st->icoverage = 0; st->internal = 1; if ((lblock->next_exon == NULL) || !lblock->next_exon->toGEN) st->internal = 0; for (t=lblock->next_exon; t; t = t->next_exon) st->icoverage += t->length; t = lblock; while (t) { t1 = t->next_exon; if (t->toGEN && t1 && t1->toGEN) singleExon = false; if ((t->toGEN) && (t1) && (t1->frEST - t->toEST - 1 > 0) && t1->toGEN) st->internal = 0; t = t1; } if (!globalParams->_forceStrandPrediction) { if (((st->orientation != BOTH) && (!globalParams->_interspecies && (st->percentID < 90))) || (!globalParams->_interspecies && (st->internal == 0)) || singleExon) { st->orientation = BOTH; } } } void Sim4::flip_list(Exon **left, Exon **right) { Exon *ep, *ahead, *behind; *right = *left; ahead = *left; ep = NULL; while (ahead!=NULL) { behind = ep; ep = ahead; ahead = ahead->next_exon; ep->next_exon = behind; } *left = ep; } /* operates on a list sorted in increasing order of exon coordinates */ void Sim4::compact_list(Exon **Lblock, Exon **Rblock, int SI) { Exon *tmp_block=*Lblock, *tmp_block1; int diff; while ((tmp_block!=NULL) && ((tmp_block1=tmp_block->next_exon)!=NULL) && tmp_block1->toGEN) { if ((abs((tmp_block1->frEST-tmp_block1->frGEN) - (tmp_block->toEST-tmp_block->toGEN))<=SI) && ((diff=tmp_block1->frEST-tmp_block->toEST-1)<=MAX_INTERNAL_GAP)) { /* merge blocks */ tmp_block->toGEN = tmp_block1->toGEN; tmp_block->toEST = tmp_block1->toEST; tmp_block->length = tmp_block->toEST-tmp_block->frEST+1; tmp_block->edist += tmp_block1->edist; tmp_block->edist -= (int)(globalParams->_percentError * diff); tmp_block->next_exon = tmp_block1->next_exon; //freeExon(tmp_block1); garbage collected } else tmp_block = tmp_block1; } /* reset right end of the list */ *Rblock = tmp_block; } /* ------------------ memory management routines --------------- */ int Sim4::good_ratio(int length) { if (length<=wordSize/2) return 2; else if (length<2*wordSize) return DIST_CUTOFF; else return (int)(.75 * globalParams->_percentError * length + 1); } void Sim4::merge(Exon **t0, Exon **t1) { Exon *tmp0, *tmp1; int diff; if ((*t0) && !(*t0)->toGEN) tmp0 = (*t0)->next_exon; else tmp0 = *t0; while (tmp0 && (tmp0!=*t1)) { tmp1 = tmp0->next_exon; assert(tmp1!=NULL); if (tmp1 && tmp1->toGEN && tmp0->toGEN && (abs((tmp1->frEST-tmp1->frGEN)-(tmp0->toEST-tmp0->toGEN))<=wordSize) && ((tmp1->frEST - tmp0->toEST - 1 <= wordSize))) { diff = tmp1->frEST - tmp0->toEST - 1; /* merge blocks tmp0 and tmp1 */ tmp0->frGEN = MIN(tmp0->frGEN, tmp1->frGEN); tmp0->frEST = MIN(tmp0->frEST, tmp1->frEST); tmp0->toGEN = MAX(tmp1->toGEN, tmp0->toGEN); tmp0->toEST = MAX(tmp1->toEST, tmp0->toEST); tmp0->length = tmp0->toEST-tmp0->frEST+1; tmp0->flag = tmp1->flag; tmp0->edist += tmp1->edist; tmp0->edist -= (int)(globalParams->_percentError * diff); if (tmp1==*t1) { /* tmp0->flag = (*t1)->flag; */ *t1 = tmp0; } tmp0->next_exon = tmp1->next_exon; //freeExon(tmp1); garbage collected } else { tmp0 = tmp0->next_exon; } } } void Sim4::free_align(edit_script_list *aligns) { edit_script_list *head; head = aligns; while ((head=aligns)!=NULL) { aligns = aligns->next_script; Free_script(head->script); ckfree(head); } } Exon * Sim4::bmatch (char *s1, char *s2, int l1, int l2, int offset1, int offset2) { int i, j, i1, score; Exon *newthing=NULL; for (i1=i=l1-3; i>=l2-3; i--, i1=i) { for (j=l2-3; j>=2; j--, i1--) if (*(s1+i1)!=*(s2+j)) break; if (j<2) { /* exact match for CDS found; check signals */ score = 0; if (*(s1+(i1--))==*(s2+(j--))) score++; if (*(s1+(i1--))==*(s2+(j--))) score++; if (*(s1+i1+l2-1)==*(s2+j+l2-1)) score++; if (*(s1+i1+l2)==*(s2+j+l2)) score++; if (score>=3) { newthing = _exonManager.newExon(i1+3+offset1, offset2, i1+3+offset1+l2-5, offset2+l2-5, l2-4, 0, 0, NULL); newthing->ori = (G_score >= abs(C_score)) ? 'G' : 'C'; return newthing; } } } return NULL; } Exon * Sim4::fmatch (char *s1, char *s2, int l1, int l2, int offset1, int offset2) { int i, j, i1, score; Exon *newthing=NULL; for (i1=i=2; i=l2-2) { /* exact match found for internal part, look for signals */ score = 0; if (*(s1+(i1++))==*(s2+(j++))) score++; if (*(s1+(i1++))==*(s2+(j++))) score++; if (*(s1+i1-l2)==*s2) score++; if (*(s1+i1-l2+1)==*(s2+1)) score++; if (score>=3) { newthing = _exonManager.newExon(i+offset1,offset2,i1+offset1-2,offset2+l2-5, l2-4,0,0,NULL); newthing->ori = (G_score >= abs(C_score)) ? 'G' : 'C'; return newthing; } } } return NULL; } /* -------------------- to be added to psublast ---------------------- */ bool Sim4::get_sync_flag(Exon *lblock, Exon *rblock, int w) { int numx=0, e2; Exon *t; if (((t=lblock->next_exon)==NULL) || !t->toGEN) return 0; numx++; e2 = t->toEST; while (((t=t->next_exon)!=NULL) && t->toGEN) { ++numx; if ((t->frEST-e2>1) || (t!=rblock && ((t->toEST-t->frEST+1<2*w+2) || (t->toGEN-t->frGEN+1<2*w+2)))) return 0; e2 = t->toEST; } return ((numx<3) ? 0:1); } void Sim4::sync_slide_intron(int in_w, Exon *first, Exon *last, int spl_model, sim4_stats_t *st) { Exon *t0=NULL, *t1=NULL, *head = first; splice_t *g=NULL, *c=NULL, *cell=NULL; splice_t **Glist, **Clist; int Gscore=0, Cscore=0; char *oris; int w1, w2, ni, i, numC, numG, model; ni = 0; numG = numC = 0; // Count the exons to allocate space for Glist, Clist and oris // t0 = head; while (t0 && (t0!=last) && (t1=t0->next_exon) && t1->toGEN) { ni++; t0 = t1; } Glist = (splice_t **)ckalloc((ni + 1) * sizeof(splice_t *)); Clist = (splice_t **)ckalloc((ni + 1) * sizeof(splice_t *)); oris = (char *) ckalloc((ni + 1) * sizeof(char)); memset(Glist, 0, (ni + 1) * sizeof(splice_t *)); memset(Clist, 0, (ni + 1) * sizeof(splice_t *)); memset(oris, 0, (ni + 1) * sizeof(char)); if ((Glist == 0L) || (Clist == 0L) || (oris == 0L)) { fprintf(stderr, "Can't allocate memory for sync_slide_intron() with %d exons.\n", ni); exit(1); } ni = 0; /* assume forward orientation */ t0 = head; while (t0 && (t0!=last) && (t1=t0->next_exon) && t1->toGEN) { g = c = NULL; if (t1->frEST-t0->toEST-1==0) { if (!strncmp((char *)(_genSeq+t0->toGEN),"GT",2) && !strncmp((char *)(_genSeq+t1->frGEN-3),"AG",2)) { g = new_splice('G',t0->toGEN,t1->frGEN,t0->toEST,t1->frEST,-1,NULL); t0->ori = 'G'; oris[ni] = 'G'; numG++; #ifdef SPLSCORE t0->splScore = 999999; #endif } else if (!strncmp((char *)(_genSeq+t0->toGEN),"CT",2) && !strncmp((char *)(_genSeq+t1->frGEN-3),"AC",2)) { c = new_splice('C',t0->toGEN,t1->frGEN,t0->toEST,t1->frEST,-1,NULL); t0->ori = 'C'; oris[ni] = 'C'; numC++; #ifdef SPLSCORE t0->splScore = 888888; #endif } else { w1 = MIN(in_w, (int)(0.5*MIN(t0->length-1, t0->toGEN-t0->frGEN))); w2 = MIN(in_w, (int)(0.5*MIN(t1->length-1, t1->toGEN-t1->frGEN))); model = ((t0->toGEN-w1<=MAX_SPAN) || (t1->frGEN+w2+MAX_SPAN+2>_genLen)) ? SPLICE_ORIGINAL : spl_model; splice(_genSeq, t0->toGEN-w1, t0->toGEN+w1, t1->frGEN-w2, t1->frGEN+w2, _estSeq, t0->toEST-w1, t1->frEST+w2, &g, &c, BOTH, model); Gscore += g->score; Cscore += c->score; cell = NULL; oris[ni] = '*'; if (g->score>c->score) { numG++; cell = g; oris[ni] = 'G'; } else if (c->score>g->score) { numC++; cell = c; oris[ni] = 'C'; } else if (c->score==g->score) { numG++; numC++; cell = g; oris[ni] = 'G'; } #ifdef SPLSCORE t0->splScore = (model==spl_model) ? cell->score : 777777; #endif t0->ori = oris[ni]; t0->toGEN = cell->xs; t0->toEST = cell->ys; t1->frGEN = cell->xe; t1->frEST = cell->ye; t0->length = t0->toEST-t0->frEST+1; t1->length = t1->toEST-t1->frEST+1; } Clist[ni] = c; Glist[ni] = g; } else { t0->ori = 'E'; oris[ni] = 'E'; } ni++; t0 = t1; } st->orientation = BOTH; if ((numG==1) && (numC==1) && (!Glist[0] || !Clist[0] || !Glist[1] || !Clist[1])) goto free_all; if (numG && numG>=numC) { /* revisit all previous assignments that are inconsistent */ for (i=0, t0=head; inext_exon; switch (oris[i]) { case 'G': break; case 'C': if (Glist[i]==NULL) { /* compute the values for C */ w1 = MIN(in_w, (int)(0.5*MIN(t0->length-1, t0->toGEN-t0->frGEN))); w2 = MIN(in_w, (int)(0.5*MIN(t1->length-1, t1->toGEN-t1->frGEN))); model = ((t0->toGEN-w1<=MAX_SPAN) || (t1->frGEN+w2+MAX_SPAN+2>_genLen)) ? SPLICE_ORIGINAL : spl_model; splice(_genSeq, t0->toGEN-w1, t0->toGEN+w1, t1->frGEN-w2, t1->frGEN+w2, _estSeq, t0->toEST-w1, t1->frEST+w2, &g, &c, FWD, model); } else g = Glist[i]; #ifdef SPLSCORE t0->splScore = (model==spl_model) ? g->score : 777777; #endif t0->ori = 'G'; t0->toGEN = g->xs; t0->toEST = g->ys; t1->frGEN = g->xe; t1->frEST = g->ye; t0->length = t0->toEST-t0->frEST+1; t1->length = t1->toEST-t1->frEST+1; break; case 'E': break; default : fatal("sim4b1.c: intron orientation not initialized."); } if (oris[i]!='E') wobble(t0,t1,"GT","AG",_genSeq); } st->orientation = FWD; } else if (numC) { /* analyze all assignments for consistency */ for (i=0, t0=head; inext_exon; switch (oris[i]) { case 'C': break; case 'G': if (Clist[i]==NULL) { /* compute the values for C */ w1 = MIN(in_w, (int)(0.5*MIN(t0->length-1, t0->toGEN-t0->frGEN))); w2 = MIN(in_w, (int)(0.5*MIN(t1->length-1, t1->toGEN-t1->frGEN))); model = ((t0->toGEN-w1<=MAX_SPAN) || (t1->frGEN+w2+MAX_SPAN+2>_genLen)) ? SPLICE_ORIGINAL : spl_model; splice(_genSeq, t0->toGEN-w1, t0->toGEN+w1, t1->frGEN-w2, t1->frGEN+w2, _estSeq, t0->toEST-w1, t1->frEST+w2, &g, &c, BWD, model); } else c = Clist[i]; #ifdef SPLSCORE t0->splScore = (spl_model==model) ? c->score : 777777; #endif t0->ori = 'C'; t0->toGEN = c->xs; t0->toEST = c->ys; t1->frGEN = c->xe; t1->frEST = c->ye; t0->length = t0->toEST-t0->frEST+1; t1->length = t1->toEST-t1->frEST+1; break; case 'E': break; default : fatal("sim4b1.c: intron orientation not initialized."); } if (oris[i]!='E') wobble(t0,t1,"CT","AC",_genSeq); } st->orientation = BWD; } /* now free all memory allocated */ free_all: for (i=0; itoGEN; // first nt of donor char *q = seq + t1->frGEN - 3; // first nt of acceptor if (DAcmp(s, donor[0], donor[1])) { /* match in place */ if (DAcmp(q, acceptor[0], acceptor[1])) { return; } else if (DAcmp(q-1, acceptor[0], acceptor[1])) { t1->frGEN--; return; } else if (DAcmp(q+1, acceptor[0], acceptor[1])) { t1->frGEN++; return; } } else if (DAcmp(s-1, donor[0], donor[1])) { /* match is 1 off to the left */ if (DAcmp(q, acceptor[0], acceptor[1])) { t0->toGEN--; return; } else if (DAcmp(q-1, acceptor[0], acceptor[1])) { t0->toGEN--; t1->frGEN--; t0->toEST--; t1->frEST--; t0->length++; t1->length--; return; } else if (DAcmp(q+1, acceptor[0], acceptor[1])) { t0->toGEN--; t1->frGEN++; return; } } else if (DAcmp(s+1, donor[0], donor[1])) { /* match is 1 off to the right */ if (DAcmp(q, acceptor[0], acceptor[1])) { t0->toGEN++; return; } else if (DAcmp(q-1, acceptor[0], acceptor[1])) { t0->toGEN++; t1->frGEN--; return; } else if (DAcmp(q+1, acceptor[0], acceptor[1])) { t0->toGEN++; t1->frGEN++; t0->toEST++; t1->frEST++; t0->length--; t1->length++; return; } } else if (DAcmp(q-1, acceptor[0], acceptor[1])) { /* match is 1 off to the left */ t1->frGEN--; return; } else if (DAcmp(q+1, acceptor[0], acceptor[1])) { /* match is 1 off to the right */ t1->frGEN++; return; } } void Sim4::slide_intron(int in_w, Exon *first, Exon *last, int spl_model, sim4_stats_t *st) { Exon *t0, *t1, *head = first; splice_t *g, *c, *cell; char type; int w1, w2; int numG=0, numC=0, numE=0, numN=0, model; t0 = head; while (t0 && (t0!=last) && (t1=t0->next_exon) && t1->toGEN) { g = c = NULL; if (t1->frEST-t0->toEST-1==0) { if (!strncmp((char *)(_genSeq+t0->toGEN),"GT",2) && !strncmp((char *)(_genSeq+t1->frGEN-3),"AG",2)) { t0->ori = 'G'; numG++; #ifdef SPLSCORE t0->splScore = 999999; #endif } else if (!strncmp((char *)(_genSeq+t0->toGEN),"CT",2) && !strncmp((char *)(_genSeq+t1->frGEN-3),"AC",2)) { t0->ori = 'C'; numC++; #ifdef SPLSCORE t0->splScore = 888888; #endif } else { int gtag=0, ctac=0; char *s; w1 = MIN(in_w, (int)(0.5*MIN(t0->length-2, t0->toGEN-t0->frGEN-1))); w2 = MIN(in_w, (int)(0.5*MIN(t1->length-2, t1->toGEN-t1->frGEN-1))); model = ((t0->toGEN-w1<=MAX_SPAN) || (t1->frGEN+w2+MAX_SPAN+2>_genLen)) ? SPLICE_ORIGINAL : spl_model; splice(_genSeq, t0->toGEN-w1, t0->toGEN+w1, t1->frGEN-w2, t1->frGEN+w2, _estSeq, t0->toEST-w1, t1->frEST+w2, &g, &c, BOTH, model); if (g->score>c->score) { cell = g; type = 'G'; } else if (c->score>g->score) { cell = c; type = 'C'; } else { cell = g; type = 'G'; } #ifdef SPLSCORE t0->splScore = (model==spl_model) ? cell->score : 777777; #endif t0->toGEN = cell->xs; t0->toEST = cell->ys; t1->frGEN = cell->xe; t1->frEST = cell->ye; t0->length = t0->toEST-t0->frEST+1; t1->length = t1->toEST-t1->frEST+1; wobble(t0,t1,(type=='G')? "GT":"CT",(type=='G')? "AG":"AC",_genSeq); ckfree(g); ckfree(c); /* determine the type, based on the # matches w/ GT-AG (CT-AC) */ s = _genSeq+t0->toGEN; if (*s=='G') gtag++; else if (*s=='C') ctac++; ++s; if (*s=='T') { gtag++; ctac++;} s = _genSeq+t1->frGEN-3; if (*s=='A') { gtag++; ctac++; } ++s; if (*s=='G') gtag++; else if (*s=='C') ctac++; if (gtag>ctac) { type = 'G'; numG++; } else if (ctac>gtag) { type = 'C'; numC++; } else { type = 'N'; numN++; } t0->ori = type; } } else { t0->ori = 'E'; numE++; } t0 = t1; } st->orientation = BOTH; if ((numG > 0) && ((numC + numE + numN) == 0)) { st->orientation = FWD; } else if ((numC > 0) && ((numG + numE + numN) == 0)) { st->orientation = BWD; } /* code not actually used - sim4cc (-interspecies) currently uses only sync_slide_intron(), but provided here in case that changes */ if ((globalParams->_interspecies) && (st->orientation == BOTH)) { if (numG > numC) st->orientation = FWD; if (numG < numC) st->orientation = BWD; } if ((globalParams->_forceStrandPrediction) && (st->orientation == BOTH)) { if (numG > numC) st->orientation = FWD; if (numG < numC) st->orientation = BWD; // otherwise, st->orientation = match orientation, but we // don't know that here. It's set in sim4string.C:run() } } /* Removes short (potentially spurious) marginal exons past a long intron or cDNA gap; remove short (potentially spurious) internal exons bordering a cDNA gap; rblock now points to the second to last element in the list */ void Sim4::filter(Exon **lblock, Exon **rblock) { Exon *t0, *t1, *last; int frontTrim = 0; /* NOTE: There must be >=1 non-cap exon on either side of the intron */ go_front: /* At the start... keep t1*/ last = NULL; t0 = (*lblock)->next_exon; t1 = t0->next_exon; while (t1 && t1->next_exon && (t0->toGEN-t0->frGEN+1<=SHORT_EXON || t0->toEST-t0->frEST+1<=SHORT_EXON)) { if ((t1->frGEN-t0->toGEN>LONG_INTRON) || (t1->frEST-t0->toEST>1)) last = t0; t0 = t1; t1 = t1->next_exon; } if (last) { /* reset the start of exon list */ last = last->next_exon; (*lblock)->next_exon = last; /* remove all start exons up to and including 'last' */ t0 = (*lblock)->next_exon; while (t0!=last) { t1 = t0; t0 = t0->next_exon; // freeExon(t1); LLL 6-17-10 We are doing garbage collection } /* (*rblock) remains unchanged - see NOTE */ } /* repeat if necessary */ if (last) { frontTrim = 1; goto go_front; } /* At the end... keep t0 */ last = NULL; t0 = frontTrim ? (*lblock) : (*lblock)->next_exon; t1 = t0->next_exon; /* NOTE: if all exons are short on both sides of the long intron, then the condition in the loop below will test TRUE and the rest of the exons to the right of the intron will be removed */ while (t1 && t1->next_exon) { if ((t1->frGEN-t0->toGEN>LONG_INTRON) || (t1->frEST-t0->toEST>1)) { last = t0; while (t1 && t1->next_exon && (t1->toGEN-t1->frGEN+1<=SHORT_EXON || t1->toEST-t1->frEST+1<=SHORT_EXON)) { t0 = t1; t1 = t1->next_exon; } if (t1->next_exon!=NULL) { /* long exon found */ t0 = last; t1 = t0->next_exon; last = NULL; } } t0 = t1; t1 = t1->next_exon; } if (last) { /* reset the end of exon list */ t0 = last->next_exon; last->next_exon = (*rblock)->next_exon; *rblock = last; /* remove all end exons, starting with last->next_exon */ while (t0!=(*rblock)->next_exon) { t1 = t0; t0 = t0->next_exon; // freeExon(t1); LLL 6-17-10 We are doing garbage collection } /* (*lblock) stays unchanged */ } /* now filter short internal exons nearby cDNA gaps */ last = *lblock; t0 = last->next_exon; t1 = t0->next_exon; while (t1 && t1->next_exon) { if (t1->frEST-t0->toEST>1) { if (t0->toGEN-t0->frGEN+1<=SHORT_EXON || t0->toEST-t0->frEST+1<=SHORT_EXON) { /* remove t0 */ last->next_exon = t1; // freeExon(t0); LLL 6-17-10 We are doing garbage collection t0 = last; /* note: for simplicity, limit removing exons back to one exon */ } while (t1->toGEN-t1->frGEN+1<=SHORT_EXON || t1->toEST-t1->frEST+1<=SHORT_EXON) { /* remove t1 */ t0->next_exon = t1->next_exon; // freeExon(t1); LLL We are doing garbage collection t1 = t0->next_exon; } t0 = t1; t1 = t1->next_exon; } else { last = t0; t0 = t1; t1 = t1->next_exon; } } } bool Sim4::get_match_quality(Exon *lblock, Exon *rblock, sim4_stats_t *st, int N) { int tcov; bool good_match; Exon *t; good_match = 1; st->icoverage = 0; t = lblock->next_exon; while (t->toGEN) { st->icoverage += t->toEST-t->frEST+1; if (100*t->edist>=5*(t->toEST-t->frEST+1)) { good_match = 0; break; } t = t->next_exon; } tcov = rblock->toEST-lblock->next_exon->frEST+1; if (lblock->next_exon->frEST>=.5*N && tcov>=.75*(N-lblock->next_exon->frEST) && st->icoverage>=MAX(.95*tcov,100)) ; else if (rblock->toEST<=.5*N && tcov>=.75*rblock->toEST && st->icoverage>=MAX(.95*tcov,100)) ; else if ((tcov<.75*N) || (st->icoverage<.9*tcov)) good_match = 0; return good_match; } kmer-code-2013-trunk/libsim4/sim4core/table.C0000644000000000000000000000757212322046702017442 0ustar rootroot#include "sim4.H" // The position of a mer (word) is the position of the last base // (base-based). Note that the sequence starts at position 1. // // 11111111112 // 12345678901234567890 // acgggctactcgaggcta // // First mer is at position 12. // void Sim4::add_word(int ecode, int pos) { struct hash_node *h; int hval; hval = ecode & HASH_SIZE; // Find the word in the hash table // for (h = hashtable->table[hval]; h; h = h->link) if (h->ecode == ecode) break; // Didn't find the word? Add a new one! // if (h == NULL) { h = hashtable->nodes + hashtable->nodesused++; h->link = hashtable->table[hval]; hashtable->table[hval] = h; h->ecode = ecode; h->pos = -1; } // Set the position -- this keeps a list of words from high // position to low position. // hashtable->nextPos[pos] = h->pos; h->pos = pos; } void Sim4::bld_table(char *s, int len, mss_t MSS, int type) { uint64 ecode; int i, j, masked_ecode; char *t; if (type == PERM) { mask = (1 << (2*MSS.seedLength-2)) - 1; /* LLL 6/16/10 we are setting this for continuous seeds, where 2*seedLength=matchedLength; no effect if seed is spaced */ hashtable = &phashtable; return; } /* perform initializations */ if (type == INIT) { mask = (1 << (2*MSS.seedLength-2)) - 1; /* LLL 6/16/10 we are setting this for continuous seeds, where 2*seedLength=matchedLength; no effect if seed is spaced */ hashtable = &phashtable; if (phashtable.nextPos) { delete [] phashtable.nextPos; delete [] phashtable.nodes; } phashtable.nextPos = new int [len+1]; phashtable.nodes = new struct hash_node [len+1]; phashtable.nodesused = 0; for (i=0; i> MSS.shifts[j]; add_word(masked_ecode, (int)(t-s-1)); #else add_word(masked_shift(ecode), (int)(t-s-1)); #endif } } } } kmer-code-2013-trunk/libsim4/sim4core/sim4b1_s.H0000644000000000000000000000077212322046702017774 0ustar rootroot#ifndef SIM4B1_S_H #define SIM4B1_S_H #define CONTINUOUS_SEED 10 #define SPACED_SEED 11 struct position_t { int begin; int end; int width; int result_shifts; }; class mss_t { public: mss_t() {}; mss_t(char seed[32]); ~mss_t() {}; int type; uint64 mask; int masknum; int seedLength; int matchedLength; uint64 masks[64]; // Fails assert in sim4b1_s.C if exceeded int shifts[64]; uint64 mask_shift(uint64 ecode); }; #endif kmer-code-2013-trunk/libsim4/sim4core/glimmerSplice.C0000644000000000000000000003251311475230740021145 0ustar rootroot#include "sim4.H" const char ALPHA_STRING [] = "acgt"; const int DEFAULT_PERIODICITY = 3; const int DEFAULT_MODEL_DEPTH = 7; const int DEFAULT_MODEL_LEN = 12; const int ALPHABETSIZE = 4; const int MAX_ERROR_MSG_LEN = 1000; const int ICM_VERSION_ID = 200; const unsigned int NUM_FIXED_LENGTH_PARAMS = 6; const int ID_STRING_LEN = 400; //const unsigned NUM_FIXED_LENGTH_PARAMS = 6; #define PARENT(x) ((int) ((x) - 1) / ALPHABETSIZE) int Filter(char Ch); int Subscript(char ch); void Permute_String(char * s, int * perm, int n); void *Safe_malloc(size_t len, const char * src_fname, size_t line_num); void *Safe_realloc(void * q, size_t len, const char * src_fname, size_t line_num); void *Safe_calloc(size_t n, size_t len, const char * src_fname, size_t line_num); int Int_Power(int a, int b); void Input(struct ICM_t *p, FILE *fp, int model_len,int model_depth, int periodicity); int Get_Model_Depth (struct ICM_t p) { return p.model_depth; } int Get_Model_Len (struct ICM_t p) { return p.model_len; } int Get_Periodicity (struct ICM_t p) { return p.periodicity; } double Full_Window_Prob(struct ICM_t p, const char * string, int frame); int Get_Length(struct Fixed_Length_ICM_t fixed){ return fixed.length; } double Score_Window (struct Fixed_Length_ICM_t fixed, char * w, int left); int getModelLength(struct Fixed_Length_ICM_t fixed) { return fixed.length;} int getModelType(struct Fixed_Length_ICM_t fixed) { return fixed.model_type;} int getSpecialPosition(struct Fixed_Length_ICM_t fixed){ return fixed.special_position; } void readModel(struct Fixed_Length_ICM_t *fixed, const char *path); void readModel(struct Fixed_Length_ICM_t *fixed, const char *path) { FILE * fp; char line [ID_STRING_LEN]; int param [NUM_FIXED_LENGTH_PARAMS]; int i; if ((fp = fopen (path, "r"))==NULL) { fprintf(stderr, "Error: Could not open Glimmer model file for reading (%s).\n", path); exit(1); } fread (line, sizeof (char), ID_STRING_LEN, fp); // skip the text header line if (fread (param, sizeof (int), NUM_FIXED_LENGTH_PARAMS, fp) != NUM_FIXED_LENGTH_PARAMS) { fprintf (stderr, "ERROR reading file \"%s\"\n", path); exit (-1); } if (ICM_VERSION_ID != param [0]) { fprintf (stderr, "Bad ICM version = %d should be %d\n", param [0], ICM_VERSION_ID); exit (-1); } if (ID_STRING_LEN != param [1]) { fprintf (stderr, "Bad ID_STRING_LEN = %d should be %d\n", param [1], ID_STRING_LEN); exit (-1); } (*fixed).length = param [2]; (*fixed).max_depth = param [3]; (*fixed).special_position = param [4]; (*fixed).model_type = param [5]; (*fixed).permutation = (int *) Safe_malloc((*fixed).length*sizeof(int), __FILE__,__LINE__); for(i=0;i<(*fixed).length;i++) { (*fixed).permutation[i] = 0; } fread ((*fixed).permutation, sizeof (int), (*fixed).length, fp); (*fixed).sub_model = (struct ICM_t *) Safe_malloc ((*fixed).length * sizeof (struct ICM_t ), __FILE__, __LINE__); for (i = 0; i < (*fixed).length; i ++) { (*fixed).sub_model[i].score = (struct ICM_Score_Node_t * *) Safe_calloc (1, sizeof (struct ICM_Score_Node_t *), __FILE__, __LINE__); // for (j = 0; j < 1; j ++) { // (*fixed).sub_model[i].score[j] = (struct ICM_Score_Node_t *) // Safe_calloc (12, sizeof (struct ICM_Score_Node_t), // __FILE__, __LINE__); // for(k=0;k<4;k++) { // (*fixed).sub_model[i].score[j][k].prob = (float *) // Safe_calloc (4, sizeof(float), __FILE__, __LINE__); // } // } // } for (i = 0; i < (*fixed).length; i ++) { Input(&((*fixed).sub_model[i]), fp,1,0,1); } } // Input the contents of this model from fp , which has already been opened. void Input(struct ICM_t *p, FILE *fp, int model_len,int model_depth, int periodicity) { char line [ID_STRING_LEN]; int param [NUM_FIXED_LENGTH_PARAMS]; int node_id; int prev_node; int period; int i,j; (*p).model_len = model_len; (*p).model_depth = model_depth; (*p).periodicity = periodicity; (*p).empty = 1; // skip the text header line if (fread (line, sizeof (char), ID_STRING_LEN, fp) != (unsigned) (ID_STRING_LEN)) { fprintf (stderr, "ERROR reading ICM header\n"); exit (-1); } if (fread (param, sizeof (int), NUM_FIXED_LENGTH_PARAMS, fp) != NUM_FIXED_LENGTH_PARAMS) { fprintf (stderr, "ERROR reading parameters\n"); exit (-1); } if (ICM_VERSION_ID != param [0]) { fprintf (stderr, "Bad ICM version = %d should be %d\n", param [0], ICM_VERSION_ID); exit (-1); } if (ID_STRING_LEN != param [1]) { fprintf (stderr, "Bad ID_STRING_LEN = %d should be %d\n", param [1], ID_STRING_LEN); exit (-1); } (*p).model_len = param [2]; (*p).model_depth = param [3]; (*p).periodicity = param [4]; (*p).num_nodes = param [5]; (*p).score = (struct ICM_Score_Node_t **) Safe_malloc ((*p).periodicity * sizeof (struct ICM_Score_Node_t *), __FILE__, __LINE__); for (i = 0; i < (*p).periodicity; i ++) { (*p).score [i] = (struct ICM_Score_Node_t *) Safe_calloc ((*p).num_nodes, sizeof (struct ICM_Score_Node_t), __FILE__, __LINE__); for(j=0;j<(*p).num_nodes;j++) { (*p).score[i][j].prob = (float *) Safe_malloc(ALPHABETSIZE*sizeof(float), __FILE__, __LINE__); } } period = -1; prev_node = 0; while (fread (& node_id, sizeof (int), 1, fp) != 0) { if (node_id < 0) break; if (node_id == 0) period++; // read in the probabilities if (fread ((*p).score [period] [node_id] . prob, sizeof (float), ALPHABETSIZE, fp) != (unsigned) (ALPHABETSIZE)) { fprintf (stderr, "ERROR reading icm node = %d period = %d\n", node_id, period); exit (-1); } // read in the max mutual information position if (fread (& ((*p).score [period] [node_id] . mut_info_pos), sizeof (short int), 1, fp) != 1) { fprintf (stderr, "ERROR reading mut_info_pos for node = %d period = %d\n", node_id, period); exit (-1); } // check for cut nodes if (node_id != 0 && prev_node != node_id - 1) for (i = prev_node + 1; i < node_id; i ++) (*p).score [period] [i] . mut_info_pos = -2; if (node_id == 0 && period > 0) for (i = prev_node + 1; i < (*p).num_nodes; i ++) (*p).score [period - 1] [i] . mut_info_pos = -2; prev_node = node_id; } if (period != periodicity - 1) { fprintf (stderr, "ERROR: Too few nodes for periodicity = %d\n", periodicity); exit (-1); } // check for cut nodes in last period if (prev_node != (*p).num_nodes - 1) for (i = prev_node + 1; i < (*p).num_nodes; i ++) (*p).score [period] [i] . mut_info_pos = -2; (*p).empty = 0; } // Rearrange the characters in s according // to the permutation in perm . void Permute_String(char * s, int * perm, int n) { static char * buff = NULL; static int buff_len = 0; int i; if (n > buff_len) { buff = (char *) Safe_realloc (buff, n, __FILE__, __LINE__); buff_len = n; } for (i = 0; i < n; i ++) buff [i] = s [perm [i]]; strncpy (s, buff, n); return; } // Return a single a, c, g or t for Ch . int Filter(char Ch) { switch (tolower (Ch)) { case 'a' : case 'c' : case 'g' : case 't' : return Ch; case 'r' : // a or g return 'g'; case 'y' : // c or t return 'c'; case 's' : // c or g return 'c'; case 'w' : // a or t return 't'; case 'm' : // a or c return 'c'; case 'k' : // g or t return 't'; case 'b' : // c, g or t return 'c'; case 'd' : // a, g or t return 'g'; case 'h' : // a, c or t return 'c'; case 'v' : // a, c or g return 'c'; default : // anything return 'c'; } } // Return the subscript equivalent (used in offsets of the // model) for character ch . int Subscript(char ch) { char * p; p = strchr ((char *)ALPHA_STRING, tolower (Filter (ch))); if (p == NULL) { fprintf (stderr, "ERROR: Bad character %c in subscript conversion", ch); exit (-1); } return (int) (p - ALPHA_STRING); } // Return the log-probability of the last character in the first // model_len bases of string conditioned on the preceding characters // using the entries in score [frame] . double Full_Window_Prob (struct ICM_t icm, const char * string, int frame) { double prob; int num_node, i, pos, sub; num_node = 0; for (i = 0; i < icm.model_depth; i ++) { pos = icm.score [frame] [num_node] . mut_info_pos; if (pos == -1) break; if (pos < -1) // No information here or below in tree, go back up // Shouldn't happen { num_node = PARENT (num_node); pos = icm.score [frame] [num_node] . mut_info_pos; break; } sub = Subscript (string [pos]); num_node = (num_node * ALPHABETSIZE) + sub + 1; } pos = icm.score [frame] [num_node] . mut_info_pos; if (pos < -1) { num_node = PARENT (num_node); pos = icm.score [frame] [num_node] . mut_info_pos; } sub = Subscript (string [icm.model_len - 1]); prob = (double) icm.score [frame] [num_node] . prob [sub]; if (pos < -1) { fprintf (stderr, "WARNING: prob = %.4f pos = %d in Full_Window_Prob\n", prob, pos); fprintf (stderr, "num_node = %d\n", num_node); } return prob; } // Return the score of this model on string w double Score_Window (struct Fixed_Length_ICM_t fixed, char * w, int left) { static char * buff = NULL; static int buff_len = 0; double score = 0.0; int i; if (fixed.length > buff_len) { buff = (char *) Safe_realloc (buff, fixed.length+1, __FILE__, __LINE__); buff_len = fixed.length; } strncpy (buff, w, fixed.length); // strncpy (buff, w, left); // strncpy (buff+left, w+left+2, fixed.length-left); if (fixed.permutation != NULL) Permute_String (buff, fixed.permutation, fixed.length); for (i = 0; i < fixed.length; i ++) { if (buff [i] == '\0') { fprintf (stderr, "ERROR: String \"%s\" too short in Score_Window\n", buff); exit (-1); } score += Full_Window_Prob (fixed.sub_model[i], buff, 0); } return score; } void Clean_Exit (const char * msg, const char * src_fname, size_t line_num) // Write string msg to stderr and also a line indicating // the error happen in source file src_fname at line line_num // if they are not NULL and 0 respectively. // Then exit with an error condition. { fprintf (stderr, "%s\n", msg); if (src_fname != NULL) fprintf (stderr, " in file %s", src_fname); if (line_num != 0) fprintf (stderr, " at line %lu", (long unsigned) (line_num)); fprintf (stderr, " errno = %d\n", errno); exit (-1); } void * Safe_calloc (size_t n, size_t len, const char * src_fname, size_t line_num) // Allocate and return a pointer to enough memory to hold an // array with n entries of len bytes each. All memory is // cleared to 0. If fail, print a message and exit, assuming the // call came from source file src_fname at line line_num . { void * p; char Clean_Exit_Msg_Line [MAX_ERROR_MSG_LEN]; p = calloc (n, len); if (p == NULL) { sprintf (Clean_Exit_Msg_Line, "ERROR: calloc failed %lu x %lu", (long unsigned) (n), (long unsigned) (len)); Clean_Exit (Clean_Exit_Msg_Line, src_fname, line_num); } return p; } void * Safe_malloc (size_t len, const char * src_fname, size_t line_num) // Allocate and return a pointer to len bytes of memory. // If fail, print a message and exit, assuming the call came from // source file src_fname at line line_num . { void * p; char Clean_Exit_Msg_Line [MAX_ERROR_MSG_LEN]; p = malloc (len); if (p == NULL) { sprintf (Clean_Exit_Msg_Line, "ERROR: malloc failed %lu bytes", (long unsigned) (len)); Clean_Exit (Clean_Exit_Msg_Line, src_fname, line_num); } return p; } void * Safe_realloc (void * q, size_t len, const char * src_fname, size_t line_num) // Reallocate memory for q to len bytes and return a // pointer to the new memory. If fail, print a message and exit, // assuming the call came from source file src_fname at line line_num . { char Clean_Exit_Msg_Line [MAX_ERROR_MSG_LEN]; void * p; p = realloc (q, len); if (p == NULL) { sprintf (Clean_Exit_Msg_Line, "ERROR: realloc failed %lu bytes", (long unsigned) (len)); Clean_Exit (Clean_Exit_Msg_Line, src_fname, line_num); } return p; } int Int_Power(int a, int b) { int result = 1; int p = a; while (b > 0) { if (b & 1) result *= p; p = p * p; b >>= 1; } return result; } kmer-code-2013-trunk/libsim4/sim4core/GlimmerModels/0000755000000000000000000000000012641613357021006 5ustar rootrootkmer-code-2013-trunk/libsim4/sim4core/GlimmerModels/acceptors.162.pos.icm0000644000000000000000000012645411415365503024601 0ustar rootroot>ver=2.00 len=50 depth=2 special=-1 type=0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49 2  !"#$%&'()*+,-./01>ver = 2.00 len = 1 depth = 0 periodicity = 1 nodes = 1 J,3glv>ver = 2.00 len = 2 depth = 1 periodicity = 1 nodes = 5 J,l`'o1ee6kZ꛿@Z!¿7²sÿk>ver = 2.00 len = 3 depth = 2 periodicity = 1 nodes = 21 Z̿)ida* E>Ŀk;eICQ3Ŀ{jhT[ƿ~Ymě$ĿLxӿ4ݣFxLwӀj翢 \ ?+51u Zݿٶ0 O `ֿoFL6S ¿V)[=) }׿hۛiVǿbK sZǿRÿ׿s#ͥ~P:RbݿԿior\ſ+0Wj-r2$)#*>ver = 2.00 len = 4 depth = 2 periodicity = 1 nodes = 21 ٿiWYҤPUe ꀿtտHw|;YS̿k./a\_"x;Ŀ\-Tا"~E((飿 -~׿9oq k͢*4ub ߿畐|GoG tտHw|; ,+ܿeAܪl- Ͽ飿&ns26Vd2Ͽտ -GDֿxVEEG/׿@ܬqA:êX('|ֿ ˕O}E2 Tǿ SG8#>ver = 2.00 len = 5 depth = 2 periodicity = 1 nodes = 21 # 뿑(ҺM5!j lݿ餖^l>IwDؿu*/NJʿ\F=*ft<_ؿte38&bӊv3˿x9^; E!5[/i %8翰bBK{T ݿ餖^l>IwD  鿴fVl ڿg*dpnῑ~"鿖,\G׿<俦;aԿbտo*33ϿJS"g{e+YVq*˿Ů|Xe uҿR>ver = 2.00 len = 6 depth = 2 periodicity = 1 nodes = 21 g򿲳qcXFT¿೿c Zۿ>Bk?'5Ek=B^2 8w+8̿CFۏ]bs X#bZ:߿󠿐-8ee߿<| 0Xڿ;>Z0 iXys*bj x'xj/] ƿ7=%U ӥ%0r* cL;[ qg N몿;zIu\߫ TZs̿S%4ٿ~KſRQoӽl]mN0oȿL*#>ver = 2.00 len = 7 depth = 2 periodicity = 1 nodes = 21 (%3>[Ϸƀ)DTUfcD9*2u~?-3ӿhAއ˿@}TDֿ3 u%%¿Nkp~a}Ϳ M' =^+TW ]N~_S5[ J<4JlB V aK )SU8QKXF Qqk.ZL Jѿu>+F^K/ÿE&I¿n>'5[4￿ !>ver = 2.00 len = 8 depth = 2 periodicity = 1 nodes = 21  vP5vk9 G>΢Jo!<(~:K)ٿYA{ JO>[PW.Ὶ; $9Mس %/SpͿz;  %Dl&Z>( 25}ZE>J ^:i$Ab3 W \r]b B!BBz =k[*Xį(?¾wſlUtmٿuMO,גn!qfBĿ^Y$bX!S >ver = 2.00 len = 9 depth = 2 periodicity = 1 nodes = 21  p[Y!-ȿx8J._ O mT! ѿnb:ӿпx3hF$пQjXOQeϿ2FOGf*Al¿ka{/ Zb_q`07 kہM@ @ _ O K6ەwiq W .*@DJU*A#[+D76  ۿcĿߧ48,4dgƄ¿秿a[8̖>ver = 2.00 len = 10 depth = 2 periodicity = 1 nodes = 21  ɜ@'̿#ɺ>_W dvMQ^&4S^d Tc'x0ڿ.-ʿndk#ݿ5~:H27ۿV|J0Ѷ˿y Vڿ!ج]aU% $.vW41 Mh f0|" $uR ZT a Z'fcҋ;4S^d TxQ̿s%X+5̿(ݿ $9d^ÿ`fRW'%;pֲJ{$*[qE>ver = 2.00 len = 11 depth = 2 periodicity = 1 nodes = 21  }7  W׿ 1y>;xSF~ "9Dv]$\)jGKc:Ὲ ZX'6^Ͽ5vF~̿%Qؿ2% D >x) ߦw DX[? 7%aE @ѩv B#㧿[m&BQ ެFŔ$p t໿ #"Cc쿢Ό BmLӿK?%+ʗÿ¿3~ xm>ver = 2.00 len = 12 depth = 2 periodicity = 1 nodes = 21  1vaJ n?cJ V'Ŀf:I@.6$ #A)ʿ4 ݗ6ԍ>¿r#G ڿqLο2 P 7ϿmUl׾ B/iZ џ䉥}YZ 5?{M #tɿ ń!YD##1̿t4(q[ EqοǖF򒿵0ҿO:+Cſ}ſ/A*`Ur>ver = 2.00 len = 13 depth = 2 periodicity = 1 nodes = 21  - 1B俚i!5sd|~ n &E+ qݿHgMm/ 1B俚i)7y;Ɖ +?Tg Ia(v7Q.N- |u"M3V  {P 7 g !;!rU#K| -2CNȿSGyͿ9!tG@^1 ֿ%}JW'%l[տm/=ɂE#I >ver = 2.00 len = 14 depth = 2 periodicity = 1 nodes = 21 ؗBn*0  ٿLﭿYGW0  (LjL pYe ' /o jamտ%K 迒㾿nYUOؿ Nar/ۿsIP Ͽ6F O[Q `mas# Ǻ.mkeI ER B:\ &wft Ǵ } >2"k< 'DnRſP418WĿ:t;:UlA!Ѣ[vM^e5 >ver = 2.00 len = 15 depth = 2 periodicity = 1 nodes = 21 ta|5 e-*4 1|^( Pc &f {Q fݿ$M %琫~w ޿ܭP[VqX V4SbǏ2 sF]W 迺؋:8/N @&k ֐'H+8!>-N f#m¿SjXk c.׿dF WiL4 3UW?߿pfv'㓿)_DmԠq?>ver = 2.00 len = 16 depth = 2 periodicity = 1 nodes = 21  2[I=,ʿH+ $$a,l$T6پuצG -  Z8nI y82{<;ƈH/  ʿY}k/˿ Vڹ OޡFf C]`@iT $U%I ˍxj_& S"x G0 W ,\i >9J -S @/] ,Ņ#I*g}#fn#ng"` U vnJc: >ver = 2.00 len = 17 depth = 2 periodicity = 1 nodes = 21 qvH*(8鿲~8% ʁ_?yB tNLd 9@ T$ 8鿲~8% J憿D71뿥Wy~ǀſK6! ʁ_?yB sp4p_{ \ D!c3[ ) r# Q.ƿ qZ / -'^Ŀc" cL Ŀv"b"!"kHο1 }VD*yeY9 FK߶sh 2E.>ver = 2.00 len = 18 depth = 2 periodicity = 1 nodes = 21 RSS6Tx} BAqhne>?[E1c1i2gпh2!2Ӿzz$zA; bٿpʂojU :rNѿvJC]4<. \oo3 DTqgFqqŹ[ #>V:iNxL vXk}y+ >  , T T )),+ܿyпnm0 6,~W$?m)F,"Ay_)⠣2l*X~ǿ$ >ver = 2.00 len = 19 depth = 2 periodicity = 1 nodes = 21 z0ȿ/͞>e6ȿ! d¿VT6F ܿZп q-Trڿ%ʿ S]a@_Cǿ Oɿ &i̿J_ =qM&5ި ׭l/yO ̿ Hi   ^؅ Žup9$ ܿZп jKԿ=׿Gѿ뿟盿<>˿CڿۥmIC&}I!ҿW`CǾԻ!y>ver = 2.00 len = 20 depth = 2 periodicity = 1 nodes = 21 8Pپz-!.wɿƛ>H #E*ƿ3ABL Ny^.ׯ2_9!nѕ|ۿ:ѹꭿ ƛ>H FZRƾj+/ {SN޾Pq Π+%d <) !-Ty9WISg=kWirE1:A˾%6jb>+3UUL ߽x:*Q݊e4^ A3 ht>ver = 2.00 len = 21 depth = 2 periodicity = 1 nodes = 21 M; <ٿU'{CCL4 UV?3! cd 2m ̿0#8`TV޿迕xf <ٿU'{ ?&]- ฿޿897 xƿ=D|-: ܙ*|bQe UV?3! n0 g2 78in2{2 ~1b"!z) 812$mrzA_))> ;tBF4>ver = 2.00 len = 22 depth = 2 periodicity = 1 nodes = 21 ֿտtm'῁տp䍭F%';]5ݿ:bݿRt*`׿©:ֿ%7 v q޿AſvK}9῍ֿr XS Jf3b "Z]"id Tզlqx6Ao zǿ4*{aI ,#ڿ]~~ֿϿtzPۿ޿q c -*mp*`׿fw׿m/ Fuֿl/=i:ҿ~ǒ)s>ver = 2.00 len = 23 depth = 2 periodicity = 1 nodes = 21 IefWl׿ Կb~PNW!01s6["?0qҿYRfWl׿ Կb?gK &¿ə TLh+K.|5 1UM/);0U |#l _qƿ9 t N)O# ̧خ(Ƕw  ÿl|߿<(ޞjb ؈ؿFFĥ6͒> Կl[ؿΩQߡ+E >ver = 2.00 len = 24 depth = 2 periodicity = 1 nodes = 21 $ȰB0s¿2KsտIzlh&ܣa۷`[<[]iſLտ? mߣ3TOU "lϿ}GL،x _w`) 񏪿н@J哿 @uver = 2.00 len = 25 depth = 2 periodicity = 1 nodes = 21 Qt/O˿-\Zƿg ֿ\!Y=*Tÿ٦Ŀ8鸿ݗmqοĿο1s˿iqѿʿ4⮿f񖿩b} ~VX60 br魙 \!Y= 0ٺ ܇ 'ִ˿ϿL*rÿ^8uedſ]ɏpͿs(!¾k{ D¿ƾdmַțd1rK*>ver = 2.00 len = 26 depth = 2 periodicity = 1 nodes = 21 ׵ҵĿWrzubN}eRo2uoοLӤ:ʿ賿%RʿGɒkqMؿ øQ:˖ LyYl+ XpZE xϊL-Gyʅ D )= fQ ĿԿ5.ڿ>{0qQǿQ6&(X| @ʶBqaͿ)ۿЕ˿+?v ) U鱿-킘>ver = 2.00 len = 27 depth = 2 periodicity = 1 nodes = 21 {/׻Pҗؿ)OĿkVr} {b5rɿO羿%{_͒vNZ*οoCg̿Ey$Qu[uʿ !gü˿y ߃ޠ aT[˿ T>Vґ )Gj̸ w~$0ؿ1HȿMwV 8ns縿]̿a%ٟſtӄۿ5j|Vɿ]KٿEտWX\ver = 2.00 len = 28 depth = 2 periodicity = 1 nodes = 21 N<]=]οnE房黿۞ο ȿk>v◿T  -ҿqˡ޿X嫿Tп|lϚU̿xv"䵿Q~ĿAtl څֿƇz Q! >¼Y r+Z mo:ۿv'̿(gT fſVֿӿݺԼQֿLBU"쥿Q^{RZ¿Nw>ver = 2.00 len = 29 depth = 2 periodicity = 1 nodes = 21 IY]Waֿٔ扞$W¿Ds߹Fпn ?ĭndѿ=̣M h¿ T]ۿ87J̿S朿\Ñ D D sD D x}!k Bs]ϹͿ1J@¿E5LͿCC᛿p˿?z̚%p-̿4ldԱOIj>ver = 2.00 len = 30 depth = 2 periodicity = 1 nodes = 21 J緿0lο>M4Ϳh +Q ]ؿPIPpڿ,ߗӿ¬rĿCۿ;ҿAοh ÿ[[ ?J&@ qodJ k M猦hO= v`%7Ղ  c? )Cÿlпi۸pڿpڿ'O˿;ɿ|l?HÍA ߓ9q-ՕݿƿN:>ver = 2.00 len = 31 depth = 2 periodicity = 1 nodes = 21 soqBG˿ zɑ_1s ò!XǿےǿP8Dl/JڿQݿWڿˌ¿%"Ŀ6N-䝿ݩ,  C 2 r  _'>9 & ࣿA' DhjrʿӺkȿW ȑ.Jɿvt׿οǿ婿B\x\ tl!/z4 m$T⚿(>ver = 2.00 len = 32 depth = 2 periodicity = 1 nodes = 21  xuaṙοVͿcQ  eȾyÿv 뿭Ib"޿% fʿMt΀̿'zտZGӿe@ȯ:0ݿ6 {FM. D햿_x :e䘠 زW GiF؅>̿㯿IT ԃ1»>mۿ⺿c1Jt̒Eʿܿ\ ]+ТwOKhwտڿaƿB}>ver = 2.00 len = 33 depth = 2 periodicity = 1 nodes = 21 !n֑GJغLH79׿-iǑL 3s,羿6GTѿ3Dq|aj]$ؿ gzڿ$O޿~7¿Nⅿ ܿMQay4V iǑL 3 rt̒^' ኿U"= mh>J b@N̿Rg=߿ -ÿk,tMHϿGᶿid#vdĿ%᷿$Bc{ !jlɿGпS/a>ver = 2.00 len = 34 depth = 2 periodicity = 1 nodes = 21 "4lDcUI) t-ѿǥ-¿%sRP ƺ!ǿiߵYIhxwٿDU;ҿ\뻿9Qq'%&KԿfͿ, %sRP  "کB& i ޿>O7/ Capt LwK}οI~ٿ矿Kѿރ nͿT_ſ|C~w;޿@ԿaxhE3>6PsʮC"9l[>ver = 2.00 len = 35 depth = 2 periodicity = 1 nodes = 21 #޴Ѭ˷!eԌɿwn 5͏ƥ# GvʿWRQmÀ˶ Ù0+ѿԖhп^[[I䃿ֿx Ŀ(ʿ, r a\ ھ l 5H C햿 Sw  <ː% GJ.ʿ7=Ͽ ]$ʙ] ^Ѥ"žbpԋ`Ŀ88¿B]1cĿTcĿ3/+i췿m0m>ver = 2.00 len = 36 depth = 2 periodicity = 1 nodes = 21 $BG X"LOȿܙƿ!%kؚ ±!0øG|Aɿ!ִ~sֹ kĿnjI߿ $ͿUĿԿƤT*ǿӜ츿Bɿ Dayɝ ! FM, Mᢿ NT?=z =X E(ǿ (}F w7ȿA+慿BԴԿ]Ŀ'Ŀ2¿f޿#Ŀx\3;3wjͿ3OY!Y忎ۿ"]Z>ver = 2.00 len = 37 depth = 2 periodicity = 1 nodes = 21 %߾۱#gvkҿ " 7VyloDÿʿ"A ϸm"5SWvȿ"/ܿaZ]ÿ@Ől˿!ʣPۿ gA_OJ 7B㖿 3Xoi: }M~ > mڿݑ߿xÿMAwֿN̿Aԏ ɿ5Yl$V;ǿHbqtȿM( 翷F~Ȩw>ver = 2.00 len = 38 depth = 2 periodicity = 1 nodes = 21 &a!e9j $ҥп0T#$֋4/#o&{ʿ\ w~p(#࿯ ٿ j߮Ǣ tmPϿ 2ۿHިJbͿ҉ î 76T dew¿Bg! = lA~ 5碿:  -,޿T'ɿ!EB{榿ufο#bп3Ҹt7\Huſ" ʿPE5`׿!̿ZgɿHܛܛ>ver = 2.00 len = 39 depth = 2 periodicity = 1 nodes = 21 '5C*¿%\db˿Ydɿ$IAc_ A Xc }Sֿ$IxP쿿e;!Կ.5ڿcʿ߲X̿ۿ$"ꦿʿ> a}yP Q ver = 2.00 len = 40 depth = 2 periodicity = 1 nodes = 21 (tkag&~[ʿS調Y%f&.D)%C]'|Կ%Dx$ᓿ˿۝aԿQ3ǿcǿ/ڿ_ÿ joㇿ$ {Z Tڿ+迃 XR$jn *ޖY ➙ $r7¿RϿwbWr!B`LԿ$Oʖoɿu^Կ#Ϳ dwg UHꂿD͓%;3nƿN~#<>ver = 2.00 len = 41 depth = 2 periodicity = 1 nodes = 21 )ÃO`|Ҹ'x頿̿2˚!Dſ&^q+ ]nQ `ſ&o;"z7k5Ͽ {ܕv ÿ mSh˿ߥ{п%uɶLk| ^q+ ]n u/  3ՃQ Vܧver = 2.00 len = 42 depth = 2 periodicity = 1 nodes = 21 *9j-۽(n;ȿ/ſ'ba $~Ob'F :xOط&l ѿӿ#o ׿JK1GǿKPyȿ#K&P5խ$ ulر9 dŏ: _ro ̪  L6VyǞÞ6֧Rtɴۿ=zFǿf޿bC0Ŀ͆lêƿ 㶝'H D{T쯿kƿG濪>׏>ver = 2.00 len = 43 depth = 2 periodicity = 1 nodes = 21 +be5)QȿoeĿ()(̓Bȿ(X6F (RPҿ5[Ϳ$%mֿ ο'a@ʿ!%ο@\^ 'v/ޯ 5~ſ@# 3t iG 鏿}Wu– ?H˵ĭ¿ſ& ^3"$ſ$woÿοo Կnƿ^#b"y%..' +uDs㳺95꿟ֱp>ver = 2.00 len = 44 depth = 2 periodicity = 1 nodes = 21 ,֊aB3*\ɿ ɿ)hver = 2.00 len = 45 depth = 2 periodicity = 1 nodes = 21 -nH+&zտÿ*7uٷ%7mn}}ӿ*aZYrl)sVҿ2_!ٿ)ɉ DQwO.οڿ9q?˿@$Ჿ'HV &r R w! o2-S^o Iy X#_ȿ_K +װͿ^3ٿ)cÿ&ѿ3Կٿh:?'wȿݿƿp\ϿuQlFҿ(>ver = 2.00 len = 46 depth = 2 periodicity = 1 nodes = 21 .2*/׻.X߳,o.Iο꭭L=+$! 1h86Ϳ+))-+t|vؿп m}˿1ky!xӿH{ e 7v»V*ت F%%! $Ƞ* hޘE * N¿6?̿ѿoo?տtο*¿Կa4*J4uW\6ٿńֿ{庮)5>ver = 2.00 len = 47 depth = 2 periodicity = 1 nodes = 21 /:ôQLf-ϝKϿ`+{ɿ,󑿳7Pt!"Q~ӆɿ,-⦿ܧ{% 懿8LϿH'UB߿c~(ϿdRڿ!~˿+9!To׿^ y{n־ט oY+ 訿ĕA@I 7cݗ w拿ver = 2.00 len = 48 depth = 2 periodicity = 1 nodes = 21 0()_.O~ǿ: ÿ-#6ž2q  Oʿ-oѪ|()*¿b)ο*Yÿ߉ʿ&0v9բ/ɿ,U桿g̿cI Z~ l/ʢ- HO Ӯ* "iy" Uɔ- V+Ĺ>ӿQ춿ګ])$nuÿa߿ߵտ9ٽHS俗ǿ/T&-R-8j!eֿEϿ *YM$>ver = 2.00 len = 49 depth = 2 periodicity = 1 nodes = 21 1?a]g /[ Vοe!.Γ; >'醿J˿.6M9.ڍrʿTӿ|gMۿ *"ቿD׿Ig;p^ڑ ~Ŀ_+ 覝㒿I<$ /ҕ.b., Qњ@r, S~nտѿ Qƿd'DҿdDM׿gğ%¿gؿӽC +_ە#_"챿-Ȗݘ|  >ver = 2.00 len = 50 depth = 2 periodicity = 1 nodes = 21 2/êPL!s0D3@[ʿ˥ſ/xp馿I /(KJ#D4п/ DK]4Sk/m8DϿX2ڿ.Dݿۧp&Ϳ ;4Dп@&Ϳ+$jTHC? ԃ?}i(1 El'Y֭ ֛֛ e\zlJ D g˿%J տ%_'˿>{`=yrտ.Mzë7SM .k;=W`ȿ%#ҾkXͿ_Jǿ:`qo&(kmer-code-2013-trunk/libsim4/sim4core/GlimmerModels/donors.162.pos.icm0000644000000000000000000012645411415365503024122 0ustar rootroot>ver=2.00 len=50 depth=2 special=-1 type=0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49 2  !"#$%&'()*+,-./01>ver = 2.00 len = 1 depth = 0 periodicity = 1 nodes = 1 ^/7>ver = 2.00 len = 2 depth = 1 periodicity = 1 nodes = 5 sJ]ͿU4฿ ;Z7/ aJ;AYfuп:! 9O>ver = 2.00 len = 3 depth = 2 periodicity = 1 nodes = 21 Sѯ8믿q ο>V˿Ӑ5 `;ο뭿N lbUkۿG+L޿*p翑MvӿvBϿver = 2.00 len = 4 depth = 2 periodicity = 1 nodes = 21 vX(7˿IцDٗtϘsᅯ׉淿ĿæB̿U6MМǿy>vѿUACmĿ  ʡu P鞿뿲^ N¬Ȃ 0m iCƭ x8$џsxֿ6tſ6KkaV>ǿ̄ؿ߿]ĿDYB얿Йs ƿRϿcCمNy>ver = 2.00 len = 5 depth = 2 periodicity = 1 nodes = 21 ȤQ~=7sY,8ÿޚO7M6/Ìa߿ @#MXUɿ>{TֿsϿ㞿f&fҿaSſжDU2 |yWǿ@ 5q렿 ӠRی bU믿J璿 Q{ƿ1S쟿2⤿ygӿ % ƿ,iӿE$ѿײkwB{,Rh"ؚشtd妿5d妿>ver = 2.00 len = 6 depth = 2 periodicity = 1 nodes = 21 C[b\t/꡿wȿhʿybp_)䘿WjԿhRWɁAVDgfӿ ڿiȿ`ҁֿm)Ϳ ᘿQ׿F¿Ewտo == < V=4_ 9#  щȠ ܋ = !j[~48А ޿ⁿ䴹J?{<ſVڿؿ꥿k W@%`- \ƿؿ ʆŷ>ver = 2.00 len = 7 depth = 2 periodicity = 1 nodes = 21 "᧿bM$¿jJſ_ѿs˩PRS47[ؿXjV0eſQ,ο¥rZl xݑ߿He8{ Nv)=p縿 +y82jޘ 4 d â'| nX b9Qd"<)\mՍNR=п_Xx5$㊿ֿqͿQXſ NKjy鿰)grX׿<׿>숁!>ver = 2.00 len = 8 depth = 2 periodicity = 1 nodes = 21 `\eٷ֐bÿ \~ r޿Qʨ'{gvȿ6Կ?޿ٔ?ﱿE舿Ŀ˧9տaΓ є n'  |ʕKa 퀍x9 `ʞt} e˿_ֿ w%׿-qNqOǿm5D0΢˿`=ҝGݵ`i|[µ>ver = 2.00 len = 9 depth = 2 periodicity = 1 nodes = 21  v7s>z[l"ȿBӒԿ4%gMHOF/~ҭٿue06bl* `˿Lwx{pŸSʿx)ۿğYؿܿÿ%2ؿ9k l3X ~Wѿ@ @Ě'c,t \iᯜ SSPȿjg`޿=@ݰuaʿq$111Mp+r+8H׿퍿ЭO>ver = 2.00 len = 10 depth = 2 periodicity = 1 nodes = 21  ):bMkG󝿒/Ϳ]C f]_ Zݿm7 p0=ʿ%Ʋſ)ʿLnJқſ1CQJϿ :~٬-  )Ɲ(  Y .溿稿| D2R Ry"ܩN¿Eaؿa辿 NP 麿O],eƬԿ5)X4x񓰿>ver = 2.00 len = 11 depth = 2 periodicity = 1 nodes = 21  K^UѴ ʢyĿU: L՝T 5]п-^u]~ ծ폿HOǨә`/Կ2rmu9=N4ҿ޾ b =39A E$B_ if ~ɿ] !o̿5ӿ:gV˿qdm5qϿy 1x ҿi8 FΈQ\c.76ÿ2﷿">ver = 2.00 len = 12 depth = 2 periodicity = 1 nodes = 21  vP@@ m::Ͽ 0h|)ԲʩRֿ !৿݆ yԙkſ%ܿ>a'ȿwnʿ?7ǿq?ٿƿ)rEG  Eh' O7ˊu [ Z㨿 eh7 ㊿RF)ԲʩRֿĭy7O#utȿ廲܍׿9$+#tX$j : 񷿟UOǿ IRի B >ver = 2.00 len = 13 depth = 2 periodicity = 1 nodes = 21  ެr7 _bſƑɿ #E ͧ氿 .׿ )^ ӷ m:ȿ医C/)v̿_l׿u=ǿՃ BJ ᝿ʉjȮ N|H  ѹ[&;v -Ɨҳ)ֿ GQ|{W˿ 5Xקkb?ȿN}Ϳ¿ƿ_ΓŮb `࿛oɿKⱿⱿ>ver = 2.00 len = 14 depth = 2 periodicity = 1 nodes = 21 7sǶr q8+Ͽsk* ~0(㓿ڮa\u ȦMٿoſ&{ԿΛ\K,1߿)3п-tɢ9J' ˿XR % ; Zkx. yӿh[} d;Nuf05ڿL&GlzA|ǿ>_R_oֿ mJ阿n,n Pe1v䱿G|A1uj N~sƫg1 >ver = 2.00 len = 15 depth = 2 periodicity = 1 nodes = 21 Zq翿 Lӽқɿ z!uiם<3 |]  T长 Ը^¿P%ͿVpw~-ο зJڿn %~cॿ `©d7  ۈ_m17U S% ?͘_ƿj  xߥq|U@F s!py¿D Bdoݿ a xxϿ^e0iѬ!>ver = 2.00 len = 16 depth = 2 periodicity = 1 nodes = 21  e"٠E(ʿ HNOmJ앿yH\ ӭyt|οy;7iؤخζZz0Ϳ%qG"ǿ\ʿ 6N޿Y Ή| ʿgU¥ Akp5 ,>l- -e}fȿɌLֿ ^ ܗ'kNE "fſM梿Rxݿ g/ǿ>0⠙25 W?tǿ q}ݿ.W5f0ܿ ~7w >ver = 2.00 len = 17 depth = 2 periodicity = 1 nodes = 21 {YQsGX z _ƿTl$ Hۿ ɜ':0^ I玿ٿ ~Ͽ  =ſdĿ넿0¿&Hz hr饕 0d¿ $ Ttە} a頿. ' CҊ &v˿Mҿ Jt@]&Կ+$$ؿתD~C뿆._lNc1Ů K/am}E/Ŀ:ɿ辿7`\ >ver = 2.00 len = 18 depth = 2 periodicity = 1 nodes = 21 Ne®Iﰿ.;|¿0ÿ f ?V$חɎd٧+gݿt1jֿdSֿƉ̿^JԿkΝĿ\*̺ÿ]:Niy wӫ$Y  lAgJ Ԓșq貯 Hnu F%K_7t ':b Ϳ n޹fMArxÿ[ F̿ -!2m7쟿 ,y8 Xn S!ݱ>ver = 2.00 len = 19 depth = 2 periodicity = 1 nodes = 21 Z]"5(9;ÿ嗿пTGlOY ,"ܿ؏Ū^ؿ/70/ʿ͵ſ^¿]¿尸o_Njſ c- a ĩ; Ѭckh %8 U 6&!uT/KYYӿn΄8οT k ÿ֛ ÿ _ ̿WߓHO֘]ʿ@E屿;5Y2;"Ŀ^b >ver = 2.00 len = 20 depth = 2 periodicity = 1 nodes = 21 y䡿`#2VĿ&ÿAUM4F ƍ ¿1ܿXc0d̿ÙB޿ NB\ȿK馿+¿ w܇k׿㯿 Ŀ W(O l50n> ujZ0#^ h=j yXᒿ ӋrU׿bп JX<¿ؿ /Jx  <V"޿*ؿ  z _2 "Д >ver = 2.00 len = 21 depth = 2 periodicity = 1 nodes = 21 gzҵĸſſcWVJw :tܿMԐxǗѿꌿ3ܿ"0n¿D[ ˔/⿁} p׷&+pM qc,∿ @@& Ȋfv  L`B~̿fտdkfunX-+sѿ㺿 y߿~y?<sM ǿ"D 5 >ver = 2.00 len = 22 depth = 2 periodicity = 1 nodes = 21 0GBTȿ\퀿┮ sG͙¯տ忊N)咴{:%ſnBh(ο¾tͿJثۿŭ jԘ%V3 &)ĠRŭ uxHU, XD'ɑ N}/ĿQ.'m*)0ֻl v㶝gƿVֿhl獿35Pq2 𢿩荿 ƿ{ˡP>ver = 2.00 len = 23 depth = 2 periodicity = 1 nodes = 21 +0ƿ 3ɿкFgmk}CI= zރ v ÿ%1p Hժ`3} L0s yv;' c;CMǿyݿ-H̭}ʿAqKQ˿H; οŰa.?pndФb⿯&?>ver = 2.00 len = 24 depth = 2 periodicity = 1 nodes = 21 S^򷳿HFsƿ8 fʬr Ơ"Gz{(ĭ*˿+:>Gϒؿ2WϿ)ޛH!lԿSsɿп؝ ˚A m WX5ȴ 탃Tſ o ƣͿ|(ʿC5Ӧ<ϿN1}g¿Sܿ}꼿YZU1Y ;lW v/tcԿ|]W BD/Ͽ>ver = 2.00 len = 25 depth = 2 periodicity = 1 nodes = 21 <]趵z,9ɿ%x C~u9Ochݹ`,tп|Ῠ:` ԬY h/|ƿ%I¿ ގտ]nɿGAb4 P~,) ?y嬿;2  v~f¿]2꘿ :̿Ƞ% dgȿEٿ4t鬨+˿ 'Mv췿 ɿ5i ÿf˿fͿn2>[5rKtYŒr}Ϩ˿8ÿLk7>ver = 2.00 len = 26 depth = 2 periodicity = 1 nodes = 21 ՘ˆ<˿Oը켏LҿOiAe4CҿϿhܿd¿L).2xN{4\@ٿ%o⓿fXѿX߹棿$F팿 yQӿT `p)*ޖ H~ NU1O ;r"*Oڿ„ -ܿIʿ 9]*ٿ97矿 zR]꿷4v^'K'X^EſؠՓ,Ͽraʿa>ver = 2.00 len = 27 depth = 2 periodicity = 1 nodes = 21  Es(ſ 8̿'ePT̿ wK5˩ Lyƿ0Jȿ/,껿w㿇qaܿ ,VĿЋ ]ჿ߿7a8ǿ*}* jhWĿk N=,Ѷ w,g ~pxŁQ< Uvឿ྿uc:ٿڣ@u!}{ڿ8@"Tؿ{W4alп=%̿uſI ̿ >ver = 2.00 len = 28 depth = 2 periodicity = 1 nodes = 21 sTݿyבп4 DQ K)+4r>1fѿ%)uذx^̩mn п(tt;PgĿfV7H !ա`I= z9]+i S| =d/F"%/ qՄ~ҿ/{IY߿:G炿!aV8𳴿Svu ;~(#=J~jiJʿ޳u3^ RûcX>ver = 2.00 len = 29 depth = 2 periodicity = 1 nodes = 21 _龢 B (ܿ {MI f|?ۗ޾;ch =^)NӾKhY B,z sC 뿛e4׾Uu) י&K Vw;߰ Hv#wV)Kr qn'M; . .,Hhw %D.H#1) "GΏ6п_ܤ Qn\ \glWi 3Zȿl>ver = 2.00 len = 30 depth = 2 periodicity = 1 nodes = 21 XTy%)oL3ɁFZq,259 ==Ǖ#BS;> ewv-G |0qp$^B*0rb!<1b 0bD쒾 d>(#O?zֿ |:Ϳ hP) gDϿ>Ag[Ŀ }$¿j24P1 4Rɿ JT)- #3H ==Ǖ vA닢5t:j ̆ #BS;>wA$(H7Z)8*kFP >ver = 2.00 len = 31 depth = 2 periodicity = 1 nodes = 21 p^ϒ ]is) 0H qH27QSl%RH-f lţ"u=ܾы,=|"It is) 0 QnGE~ H qH ss_4 { -Tl3ŏ C,DL)"xYQcs Dr}+1|!y3P jYd Ԕڟ#A^H-f H-f>ver = 2.00 len = 32 depth = 2 periodicity = 1 nodes = 21  $ǎ Ծ :n멾"9 M$12 𡣾c$*:p 멾"9 M r3d8c堽RsQ:hs`uAcl폽#nj\Npg85_sE3ǿ9΀G'5CʿdA8U̿쥿 >ver = 2.00 len = 33 depth = 2 periodicity = 1 nodes = 21 !:7''2%G¾L4Z ^fT`@iփ?kL)NZ6W<d~UL^zf %T0\t:H |,bXwQ^ D|w }+^Q O L4Z ^fT 64Y̓N jf2:o(`-)mP dёV `_J[>Pн]lXH0MJ; L)NZ6W<EBUO~@4O#"O.><>ver = 2.00 len = 34 depth = 2 periodicity = 1 nodes = 21 "Γѿsb2 &' 4(#< >5& fҿ"[%οX>] |f-Z!!n#X? DzZs¾g.(KVA*0Uz $u> ! 5zז u0#t1\7' 8NG\ NP!e ug5k Of 8ɿ[g!?Ŀ.;ͧ޿կJ!ٿ:>J{6#Z#iLE0 eqee41sE7)6"h G>ver = 2.00 len = 35 depth = 2 periodicity = 1 nodes = 21 #lܿh֑!|޿J B7iԎb¿uÿ l?&uK3׷0)ſM랿04޿ iVš絿m οr Mu{ټ K | }?O 4oz/%Sh п 3*Km V-ͿϿ >0Xz=1kt`z2ǿѭ˿յ3{s}9Hſ{t/$j˿\M*￈ሿHRAoۿ>ver = 2.00 len = 36 depth = 2 periodicity = 1 nodes = 21 $.uBVǿ薿"F~fDȿ%a!BYi!Ϝϰ!ٿʤʿ`nY0 A0pͿeڿ5߿oq\ﰻZͿԾZpk߿w cY듿 BVLݠ Kɿ\wY U$P@)  Ӧ($ГĿ ĕſtZ7Ŧmy泿Xyu̻M"\(ۿkȿ^!`'ſ㲿jcsPZFȿY!x3ÿ.ʿQ¿I>ver = 2.00 len = 37 depth = 2 periodicity = 1 nodes = 21 %:W¿eY#fպÿ f"`G Dg›6ο" ͊&ӿ^ֿ.SIȿh?,l!\ͮƿWB ߷F⳿qv &yg>(I!  :~ ɿY/" `G o~ܿÿn@ӿ 36"ۿǿj9ֿƿ~")i𿾪H < }Ⱥ٪"s鿺#=\>ver = 2.00 len = 38 depth = 2 periodicity = 1 nodes = 21 &g%<$qʿTV7P#`̉"ſێÿ#ƿ#UԿ¿rwǿRŒj $^9пU4ǿ"1ȿo! wG u ;b> MA < |Ա s퉿# l;f Kgը=ӿ}Vmɿ¿b/kC^˿goxÿxڿG_!U~6!GN!ҚdͿkbÿ<տ1n>ver = 2.00 len = 39 depth = 2 periodicity = 1 nodes = 21 's֑%PVѿͰ$I@{CXXǍ"¿ onє2'ɿ$ÿ}Iu$pk%ʿ#2o¿.>u|пu@ͿϿ)5ڿ z$ƹ3#_S$ bb < ֜$ П0j# eY#qƿ ʪ Ĵֿ 0˿Xtv#ǿe\lԠ)kĿ}Ҩ-ׂQ Rᪿκݘ ƃ) 3&#οdſ8ؿh">ver = 2.00 len = 40 depth = 2 periodicity = 1 nodes = 21 (#Dc*<~&B ʿ%ķnG$"ݿ7,%2 㿰տ G%N@Eտÿ&/ƿJſ#┿ÿAǿ>˿m$$ l¿$]ͅ iж&fؤ% ?(LE$ ;׷ (az# w^nȿ$l޿+e=໿#ӿSm vĿ"#ſ0|XHiϿ{翋`ǿؙy͏Qݿ ӿտRP$>ver = 2.00 len = 41 depth = 2 periodicity = 1 nodes = 21 )e]鹿y'梿Wʿ鳿{ &uoQB"ÿp薿&֬`ÿڀ& ؿտΠҿb,,l#ਿ5^ʿiJ% |Ϳ ;'J/)D 찻N ($䎿 o#6_ ڞ4Qk& Dſ擿ſ 9濌$ߍ/ο糪i[ؿ%bҾűC! _~%%u[⊿DPԿȾi翪]>ver = 2.00 len = 42 depth = 2 periodicity = 1 nodes = 21 *G%@@8(rƿo' !' w絿'v'2_߿Ϳө͂''@}ѿѿ  Ͽ6鲿jZ4׿aiſdyÿk Ik)' 7ζYxۢ U3,Uѹ% *('@B9 ЭbPi&Ͽw=Jȿk hVˁO^ @Կ!*%''82ZǩC P΍Iy¿8!ҿpTѿfտr[$>ver = 2.00 len = 43 depth = 2 periodicity = 1 nodes = 21 +PLok3)Pѿ( (6SI"Cpg)*(>%ۿ[3ʿ&t~(`|5Ŀ鯿⿜0-OؿAƿ ?ؿ}oϿ& e(~( R{3rn$ 㸷dR m嚿$ “ث#· G;$ħ'ƿ'bӿSi}AU1~B!yʿ59ȿx#̿4ObnrGn.B*¿G$LHm ο-L̿:W#>ver = 2.00 len = 44 depth = 2 periodicity = 1 nodes = 21 ,f溿(6T*]aҿ[)ȑ՝i]&ӹſNȿ\巿)afݿy˿F)Hj>wk$#IؿFڕ@̸*<׿Dqǿ$@Dֿe( l/[u ݕ]' jʲP& 胙vCj( ׭N-ſޒaǿ(%pM6&L¿ο`e=߿(пѿ:jMͿw(ſ Ѐ_%4 ѿb"ҿ6Ϳտ(@_&>ver = 2.00 len = 45 depth = 2 periodicity = 1 nodes = 21 -zÿs+#QϿ]*ạ;*Dl&ۿ ֿRw*7ӿfܿڨϿړȿ[䲿"\#ۿᒿֿ/ß_Zɿm߿U pԺſ}#C' ǿӉ7v) 9謿堿|$ W顿ʫ"(6՚k# hʬҗ䡮)ÿջJ庿*>%ٿȵ olڿ%zտ3Ft>]l)m G X#_%׿KͿ? տ0׿t"ٿP(>ver = 2.00 len = 46 depth = 2 periodicity = 1 nodes = 21 .]鹿CU¿}, Pտ%+ˢ៿C3y$UĿUĿ;u+$0{Ͽʗ+x5vֿ|&1ĿW[DZ)ɔɷ߿U)',#>ο>q 9%ʿ#82m% ]ͳ更&'" g㌿CQD' c} xs|ɿy$aʿ*ܿ78ʿ'L=ӿq&ӿ>\V%޿kzFDt*#l`oPϿՃvڿͿؿ V&>ver = 2.00 len = 47 depth = 2 periodicity = 1 nodes = 21 /^͹[]-E˿!:u,ࣿ&&ɿµ 'ֿ=*ɿvBd,wdqKǭ(ٿů[M#iYſhڿ!PhͿ\ܿd( ы rſ-'|y# _΋.Td*  W+ 2?D^% v_fs]%׿*ӗпӿqoqҿ!̿Gdא& ſNݿrN(TŴ& ٿ*>$ノ!0YϿ ÿhi\">ver = 2.00 len = 48 depth = 2 periodicity = 1 nodes = 21 0<]P.񊝿 z׿@ۦ-BΞP Ǡ*οh$JM- VԿƿA -َ{㿴¿x,˿IK+ڿO{Hÿ ӿh鿯e' ,sĿ9tv %sH2( .?V2% j˞HB$Ms' ip̿3,`俁;⫿#ރѿh(|׿y̿%f+$Ĉ'j<["ٿZ̿ؑ&)̿ǿl=ѿ5o)>ver = 2.00 len = 49 depth = 2 periodicity = 1 nodes = 21 1Jغ_”/壿iͿJ.ۯܗuI"턿.b¿Ϳ>󔯿?׿ο쬿).SLſpO&ڿU%[K%:-꿴 -i/1 ;NⅿQ( rY} G- +ҲhN_+ XͿN!,b¿Ϳ>󔯿׿̿PRaϿ#:ο {@ }د ,պuҘ(}ֿ׿8&eؿpɿgҿ$a+>ver = 2.00 len = 50 depth = 2 periodicity = 1 nodes = 21 27s IH0t׿-йg7/`(E-pɿƿ ?/ŘֿRǿ+qz?ݿz'ȿޠ͆&䍿$忬^"3xMOؿ&ܿϏ 6ϴ[6!d. 1^", @=_\W ɞk G` ʪƿݘѡKZpsѻXſƿdň*.5ڿrҿU)-{ǿӿ%tՍ(P,$|j1,ڹC9п֎:) ʿ{ҿǿo/kmer-code-2013-trunk/libsim4/sim4core/GlimmerModels/donors.162.neg.icm0000644000000000000000000012645411415365503024072 0ustar rootroot>ver=2.00 len=50 depth=2 special=-1 type=0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49 2  !"#$%&'()*+,-./01>ver = 2.00 len = 1 depth = 0 periodicity = 1 nodes = 1 VLʿǿ>ver = 2.00 len = 2 depth = 1 periodicity = 1 nodes = 5 ̿YȿA܌|ǶU'p4[M̿H4V%ſRп@I>ver = 2.00 len = 3 depth = 2 periodicity = 1 nodes = 21  ͿC6ƿjݿ󏶿Zm|a3N:Ͽsp#Ͽ㪿h瑿:~}¿K篿걿6ͿK坿46#߿vr QݿJhҿ0' o,ƿ2 38aI.a -7R)h Zޮ A| GWοgpóHQ˿Lƿj˞r ɿZ;9տm੿2խ_ΕTdݿ0U{ȿ޿˛_]ɿ:ο|>ver = 2.00 len = 4 depth = 2 periodicity = 1 nodes = 21 8ͿzɿujwQῠ] (0 b3_,|ͿZơ-ÿ;ҿG z쿦ʿh'}OϿ%v~cm9ҿo5 n6n/X$< 6ruqver = 2.00 len = 5 depth = 2 periodicity = 1 nodes = 21 7@̿6Ŀk@dn~ڿJŶoǨ-8T+˿5P\槿ƿտ~W#It*ſL+q㥿i@4W/g'\ ۿͿ dzǿO$ >=3n 拿<5'/ n6|t veο֭僺~Jߛҿ:.޺ȿѷ>`)`"3޿t¿mj?ĿܿjC8cɿ+пο>m>ver = 2.00 len = 6 depth = 2 periodicity = 1 nodes = 21 ̀aͿȿr sdb0R3] 4Cʿ<jÿ`ҿ﮿S-ȿh [ɿ,qosZݿԿ>y j:KĿ4* ^q%/l g*Va Ў0ݴ?qbx 䩕<ο=طXtqׯǿ殿ƿ緿t t˿Bſë`r/b{ݿ¿_AFvLEٿ.Ѭ,ʿCͿ̿o>ver = 2.00 len = 7 depth = 2 periodicity = 1 nodes = 21 ʿ<1ſXۿK׷aA s1ى1NɿX{ЬƿͿd{L迨ſٱ95Ŀ_}c 꿜Υ&[Ooٿ{9ҿ5ᗿ l4tu`4㐿 dء;, l" T$| I ˿?-)ᾧɿ¿ԿX%u>ver = 2.00 len = 8 depth = 2 periodicity = 1 nodes = 21 JϿY˿6\⿜칿U9{D6$mtпmvp¿G3Ͽ C}ǿ榿 ̿[{4忞#АqֿNT GobǿH7^剿 ե(  O+ Qex7vLBs RTBFοg,Dÿ#ԝN?ο6;ڿeȿуN姿K%{­޿-O}2w˿zfcݚȿz ҿƿ;s>ver = 2.00 len = 9 depth = 2 periodicity = 1 nodes = 21  Z$ɿſ0䇑rڿ\M3_APĿ{aʮoĿ~Ͽo cy^鿰ſ9tA7.˿S>eTzm俤=ü ѿqտ^ _GɿT+  ׊jT04 Ȉ-0P d)ϩ C;o} 󂛿$Ŀ¿4Pj{ĿDN%fǿX-VuQOݿ>NJܿYF1Sg˿ۿ%;ſXɿfqֿΙo>ver = 2.00 len = 10 depth = 2 periodicity = 1 nodes = 21  wW̿}ȿ,.Ῠl#ǦJp˯3eۧP˿YaSͣjƿпnX~ƿZ/CLƿҢQԃi G*ÿHr-пZu`ٙϿCħ/NԿY?NӯbMS?+ܿߗ˿mڿ嚿_9ʿz˿ʿs>ver = 2.00 len = 11 depth = 2 periodicity = 1 nodes = 21  ٟגͿ!¿ mz 0׎[| Ϳ]⨿ĿYѿc̥2=zVɿeEɿxL 꿑!7F`οu NcɿK03 kk꧿M* I˅Gy&접 .C@=Ƃ G9ʿ嶿r/ah\2/GxԿDο Șh=!ܿزFQڿ¿׏p{ʿ[郿 $aʿHɿȿ:(x>ver = 2.00 len = 12 depth = 2 periodicity = 1 nodes = 21  Š˿CʿP ldܿRXyΩ oƆZ55 \SοJ l9̿ZƏ !O}Vuoȿ݋(_ɿy]n("&WA޿Sҿ۳ wjXÿ9'͎ 8.E-<吿 ]سT/'` Ő\겿Gڔs ♿.Ͽ,%ދjɿ͗Y7E霿Vʿ?c踿2 ׿@ſ2Mިӿ.׿¿e(Ѭӿ9e ʿ ˿5ѿ0l>ver = 2.00 len = 13 depth = 2 periodicity = 1 nodes = 21  ]-˿ ÿu sڿK0 gm;1J倢lɿic }ǿҿՍa &-z(Ŀǽ jÿN>힫E" ޿(z/޿?˿1 %dh~9=ߔ ,x#'pV 鉿m$W {'ү8t恿 h꠿ɿ ,@ſŧ)GɿGu{ÿOkʿ6ȿИ4Cq U% t ʿMۿu/Ϳv;ѿÿdr>ver = 2.00 len = 14 depth = 2 periodicity = 1 nodes = 21 P?ɿ ʿ@, 8 ݿj Y :ү{2yÊ kyÿ 0 ?RƿIϿFH r ?TſƯ.dȿnώC ؿ5I% qѿ d1z>q 9拿 ꋿ ]' d󏪿$c c/A|| Ÿ_ r˿{t&IX 휢ǿH`GƿQɿ$ئ sNʋ ޿/TF*ֿFƜYɿ<ȿʿx>ver = 2.00 len = 15 depth = 2 periodicity = 1 nodes = 21 w̿¿~ U;M࿞g < "-, ឿlɿb% ſҿO &iĿ ޵˿Ûl}4￁`ⶖdi޿Nп‘ [eʾy&n S᩿Q2᧐ {΋,V2!su RP5Ktu ɓʿ,X  ƿnR릿}j]RMƿ浿* cEϿ>ffΜ[޿Q DۿPBƿV[x JFѿ t1ǿɿ7˿x>ver = 2.00 len = 16 depth = 2 periodicity = 1 nodes = 21 ?ͿɿTƗo=ܿ≻T 5>*2' :˿d )vȿaѿ zqjο Щzʿeѣg!a,6Oﺿ FB'ٿiֿ HzaÏǿ/fƕ ܤjv0Q` by*1 @??r p Ezſs 9ӽZſL/ !ڿܛƿR hٿf۱| tܿĿ㠿bӿJ(ֿIܵbʿvѿ`ɿAn >ver = 2.00 len = 17 depth = 2 periodicity = 1 nodes = 21 nRʿ¿l*ؿn;qԳI0E3 ~|ſ8Ŀӿ0u=꓿z%5.Ŀ4﴿BƿDZnw\׿%Q¿ WГ`NܿhԿȌ KpԾ/q ؔ⺝N09 g<~~+h; KE #l?*={ @3lXǿ8 ? vJ ݛjXÿdI ¿^ȿCXV UE PGޕ睿 XϿ%忬觿 ὿οcɿl >ver = 2.00 len = 18 depth = 2 periodicity = 1 nodes = 21 QjhͿQ~ɿhהЁۿIM+o8ㅿ -orͿ<򏞿ǿѿ_ʉ21~yĿ*ʫ &1 ĿԜ᱿ G'{鿳X ƤܿAο\ !l¿P9_m 䌿]o@0ѐ ل6p-ߌ >)8ÿFf D{ ˿an4XԿ9  Zƿ㨿iZgֿ* Ϋ*ܿA8Ґ+s˿,cr Aÿ<ӿ 6_ 'пr>˿˿0m>ver = 2.00 len = 19 depth = 2 periodicity = 1 nodes = 21 _Ѡ'ο^霿~q޿߿ ̿ޗ ^eϿj+ ƭ-y ƈ#ˉ 쪌ҨVj9 +hƿJ -ſkƮI-ʿ qտd˰ơMbb !׿;꥿ EϿG{|Kɿi3ҿmo̿" m>ver = 2.00 len = 20 depth = 2 periodicity = 1 nodes = 21 "οtɿ"t3ܿD.6330w܅FծՉͿVz`ȿRyп|;GeQſG};Zƿ__8 Wۿ筿 LtTп㌿ ZcwɿA. /㌿ٳ-+ I|"p9- l :&x ۜ}hʿ\֫ƿs Pu¿&e 숽޿h{ʿFpײ! ߿{sD"绿њq ƎʿۿUϚ٪ ȿEпƫǿt >ver = 2.00 len = 21 depth = 2 periodicity = 1 nodes = 21 Ϳ֋qR_ݿz^ >F6%蕥fп+ɿiѿ{xELfJؿ뾜,J%إA 4ܿOBͿ 1i~̿2E !?x7 +(g̯)T ?>OD󷁿 Hl<̿Dqӿٰ ꪤǿ5S#l/ڿͯZa&珿ݯGm& uۿuۿK Hn¿yɿr˿V~ >ver = 2.00 len = 22 depth = 2 periodicity = 1 nodes = 21 HοĿWtg⿽m+Դm3^[пDKƿ4пC4un ÿ1[0~Կ Џ$} "%Ϳ͊ umH{ÿ%+V ĥ ES04G }p -E$ 6>#w xQȿcշʥrɿѕԿ}  ".dȿVTu-ۿ~4 'aozq8GNk q޿UϜNAϿ3ſǿx>ver = 2.00 len = 23 depth = 2 periodicity = 1 nodes = 21 㠿}οrEڛE_U޿yީ^)2f W'̿x8ɿN%ֿf:,6o{pGĿwFտDԠnw  ?h,f}YsSܿȿ .:fɿ'{ g!$4B Ww+ .TzC+~ xſ}.h {*Ϳᬿ̿&NvĿ ^ê5^ѿD vXwA> ?׿Roۿ嶀a>ſ<տ¿Eyx>ver = 2.00 len = 24 depth = 2 periodicity = 1 nodes = 21 ܥ#˿PVȿBϔ`ۿ:ԦҤ^-,7ypuϿrFȿvʿJ荿K@ÿB ̿ZU$[b1dXп-˿N o8^R6  Tkk;y YU #: HtEN.o p?̿. Bȿ& ܷƿ hݿɿrv `vؿ6ǿ`οr5ο5ƿͿODs>ver = 2.00 len = 25 depth = 2 periodicity = 1 nodes = 21 ivuͿ|梟veU;޿SS 7)/lءʿjȿ0ӿ ӝf7,),˿\~V•*ۿ,)4ʗ?ſq  fĿ% kX?0N g۳Q$ŏ ;"B! &; Ϳ谰Vʿ輿>t~ȿڿˈǿc*3䷳qݿ쥿u9+dXο3.n$¿#ѿÿ~>ver = 2.00 len = 26 depth = 2 periodicity = 1 nodes = 21 ֣ɿʿM֕)M]ٿ:$,d:6)UIʿ=Ŀ˿|=PɿLOoU޿`ZܿϿ⍿ kcpӓ8 3ﵤ/: +"%w_ ‰)kAx  ÿP в}wĿвA[6ֿWuοrG)n ֿsHڿz"ܝKÿoݿ% kƿcſ̿~>ver = 2.00 len = 27 depth = 2 periodicity = 1 nodes = 21 7楿Bÿ#¿gӿGb@ S2z݊A[Oȿԁ˿Q+WИ퀿3꿽f{mMʻſءኬ|п`WZKʿп9( y"ټU.p) w‰PW-NԌ p*( e֝GSQ aĿU~0׿Iq\ѕP;WnS&곻-hٿ~ua{äh$Ϳ tOA˿_qʿs>ver = 2.00 len = 28 depth = 2 periodicity = 1 nodes = 21 ٣3Ͽ&ǿ퓿t_῭{ި\χ 񷿋1ׄDп?ttƿ{ҿ玿Xod_׿u݌ )X*~俾ݿO̿ V Nljſ>O)Vy ՠ* QM5&S -ZKO0s H8οefe̿f)03mǿ籿V}85ݿqпh׭5߿ͩ¿ҟͿڿ۝ŏǿ˿mÿi>ver = 2.00 len = 29 depth = 2 periodicity = 1 nodes = 21 ZeDثʘZȿ ~}+/5'&e E̿T'IzpۿiʿU%rNG&UϿ/񎾿 <1[տcि '^c²H' y~.d  r&Y[ INzB7' #%4*ǿr8Nj ՙǿgۿ D!<  ѿ"ÛyHUÏ'PLԿCܿU^Yǿ̿Fߵ¿>ver = 2.00 len = 30 depth = 2 periodicity = 1 nodes = 21 j_b1ĿDb#0lQTf_,=%>m.&Ԍe麿9fbAz[ 0ϿZ:*rG"ӌ׃֌n5:HLg4:Q\ fUZ8!1N [ecWq#LF}C qfՉ#O8F yV2QDs) &'*xα௅hX¿7POԐx#($Pe?v/D6迭<:՛oʿU.}M)_h4l1ӽLgEC4 >ver = 2.00 len = 31 depth = 2 periodicity = 1 nodes = 21 ;a¿`IӿGver = 2.00 len = 32 depth = 2 periodicity = 1 nodes = 21  㧿-Ͽ<ѿ1k.}$Nq;> "#տly[˿ ο(ĿawGF3c֔ íѿ%H俜&'xƿ0ꏿ 7H ꖿt ۤ#7e r)Kn FϿbḿd οxe׿ʺ+ٿ˿]@x' ֿ<ɿcx(ҿHſ#ǿ^οUֿ_ ҿ`̿_>ver = 2.00 len = 33 depth = 2 periodicity = 1 nodes = 21 ! ǿm(¿6;N~п%b;8뷇:ƿAm̿̿ZPѿҪw.ڿ6U겿؟^ Կ! qE8οhϿ PeyT>H̖ Ԋ#:} _~ver = 2.00 len = 34 depth = 2 periodicity = 1 nodes = 21 "cϦMaѿ~\ʿǎ S꿋ezt݌l5qx,5տClDɿϿ^j{ eÿƲr`WꣿJ&릿%#΢t޿wпu ?mnGjȿ0m jCAV pTg% 9U=Nj }۝+ͿدǿC픿 iYֿrNbĿۿ"Ͽfٶֿ5p^:߿Lƿ&34ǿ#ۿȊʿ˿#ſx>ver = 2.00 len = 35 depth = 2 periodicity = 1 nodes = 21 #bп{1!7ۿxt' A~j3) ]@Ϳ۫ë ߴƿ/<ڿjLC +r忿Pl{bIɿ4zWuڿQF 7bAտ(ޘ 2TԿi/m WҩQL. Xz*¿.%Ό 310ܫHj #ͿD F鍢uдuf6 ҿ7.ѿVa$5 ,ڿ|Ŀj䐿΢ {ڿ dByĿ.տb¿z>ver = 2.00 len = 36 depth = 2 periodicity = 1 nodes = 21 $οgʿ&Ɠ"sh߿>\!YႿHúc7ۅEF̿6[lĿϿ^Dɉ!|Hƿ2"ѿCO[^z翜]ݿGӿ, i uT>= ݌49! thHƿ9+9W NfK=e rLǿ߽JBĿr綿! $̿ܠnAֿMؿ6j!~ÐԎ ֿg3؝FĿ ҿw+SʿSʿҿ8m>ver = 2.00 len = 37 depth = 2 periodicity = 1 nodes = 21 %p롿_οH۶#-:<࿕" Ǩ"~~5-\P( ʿX㲿t"~ȿԿkqjW"|JSFFſ<4ɿՔ18l`ԩsbtӿO kο4j7 ޗD+n !r"I I䷿q6I .˿Z̴h[կ2WfWܧ0nȿ=ver = 2.00 len = 38 depth = 2 periodicity = 1 nodes = 21 &yϿIƿp$'I$P#on.ԌtKԿHϿ- ;!2[̒!ҿMh#ٲBտK<"HGؿ㊗9ſSnؿԚĿcLϿȿYy">ver = 2.00 len = 39 depth = 2 periodicity = 1 nodes = 21 '/&{ɿِÿ1%+3׿$ŕձ 5Q$tſT謿"ǿ?ѿ;C'm$v)ɿ㫿 ~Q?ɿŲN!ܿZ,I旿%ӿpܿB&" pd?˽<u# +D2Q ߲{1!r 4` B'! մ3ɿPf!sÐ7ƿ#B 捤TSֿ]ܻ w'߿Ơ."0c㿯ʺ]& Ϳ)0U|6.ǿ1ɿ˿;x#>ver = 2.00 len = 40 depth = 2 periodicity = 1 nodes = 21 (οȿO&k#߿U%r츿I7ݸ[JͿg %ȿп«%5!E¿V$NൿͿnVmx+:ݿ墿fIտES LB!1]>% KDzT S 7|+Y ΄h¿D$w% 2!0ʿ2ٳ9ٿ"m\ ꃟ8rʿk)yTZؿɿxs#WĴ>Eի$?x.ſɔKDɿ?*ʿ|ɿο8Vǿ6mu!>ver = 2.00 len = 41 depth = 2 periodicity = 1 nodes = 21 )c"˿'ȌLؿ8&M&ւ[/o"LmƿH"-&ɿFοʠÖ& "d 栿"`XZ6HRտEa˿h Ժqi;,*& Uu*-, zƿ wł0S;ֻ Z̿C ⩿{ʭɿ)a%l!ǿV彿iy&:~gڿN%7ݿţzޕ$_ٿݿfp["9ǿmɿſ ~>ver = 2.00 len = 42 depth = 2 periodicity = 1 nodes = 21 * ʿ̿v(ֿ 'GD7Ɇ' Jο_%6=¿EпHJ 'Tb'ʿ\<#ڷmƿS{pۦ!Џ,ؿ5!9̀Pտi*ۿu% xdRFÿeK;pߑ% ks|18& b 袿-$ OG=<$r οÿVRɁ9yʿ$^ٿF˿Fo!yW׿O&#῭¿]P*M!|qڿ+ &ʿkп ȿBq >ver = 2.00 len = 43 depth = 2 periodicity = 1 nodes = 21 +k̿Wÿ)擿PMܿ}õ9(B.O1惿(2ſIJ8ʧ(_ɿҿ棿(s߂l%g2ȿ6"ZkdHx$鏿׈ؿFڿ^ hrN-ſ8(c$ Ⓙf:aiw$ X켿E$A" b헿CWAEt ο򯿪z¿ μC[Rf0#Dÿ[SCՖ $6ܿd'Fߔ#ؘ;ѿgyC]nuοBϿ6Aɿ1m>ver = 2.00 len = 44 depth = 2 periodicity = 1 nodes = 21 ,s-&ʿwǿ\*yver = 2.00 len = 45 depth = 2 periodicity = 1 nodes = 21 -ܮ0<˿AX¿+AҪ*WJvr/G%*ȿ X*Sǿ6ѿ]*Ly_Rÿ$C̿ϓڿ1 g mYϿÓ" =ZecƿQj( z*Б& 콉Ȫ$( B1OꩿbD:/% 1Ɲ4¿@񳿂|ҿ𰻿d뛿2ÿ$ɫľ%oտz9t#8\h޿<ٛ&ewԿ\擿"ڿؿjx{&v˿5пTȿp>ver = 2.00 len = 46 depth = 2 periodicity = 1 nodes = 21 .L I˿kȿe,"$ݿGsCg+-1/)]Ͽ.Nכ)ǿiɿX+(z&ZͿ+)pƿ/qNNQ#Q՚0ٿJѿ% eſe5(y* +nx`C/FU' <Gְ*( u&5آy' FmϿҞt%C/Mǿc:ݥRƿ^u:ȶ&=ڸܿÿǯz(gQ_C* ^vԩǿ˿R ٜjϿy¿xʿy!>ver = 2.00 len = 47 depth = 2 periodicity = 1 nodes = 21 /} Ϳ!¿B- m޿䳿, y/(dFʿ,;ƿҿm+둿,k,˞ÿ+{CX\Կפ*v ;߿L覿_¿(&/޿doѿ՘* iMÿh7=$ pB/;& j%ay$ M?́' tҘǿ:M:%q}ÿ^^ 'ȿ֭ο\ė#b4ܿ =[r(+߿ _, PKȿ]S*A˿Q2ο!-ɿۢq+>ver = 2.00 len = 48 depth = 2 periodicity = 1 nodes = 21 0Š˿ȿ . τٿַ>-9pmU3-ɿļ䡿-¿Կ-bB俺xɿ$)򯿕ɿqϤ B“ ؿWu,&0߿̿A! }]{ƿ/ .骿7*5) ߁O2* ̒w«'@=}) -G |ÿ`Bл+ 8"0Xlɿ"~1 zѿ¿)m뿪K+1ٿFǿ_Htݿw$6*ȿ&ʿ(sѿ;p*>ver = 2.00 len = 49 depth = 2 periodicity = 1 nodes = 21 1ʿFſ-/'^޿u婿.yJQ3Պ.OĿϪ*ſZѿC*.J}Ŀۚ, aLӿo;|C&If$<At׿տA, +b)ǿ^<2 q# PǿZ :%"kh=JZ"%4տpꙿWǿoìT[uSN,J8޿h]l]*rʿݿf>R**_ɿI̿1ѿ"n'>ver = 2.00 len = 50 depth = 2 periodicity = 1 nodes = 21 24Rʿ4ȿ 08YuٿI §/;֎k=9 /ě0Ͽ!/ ƿn̿/~"ƿ..[ҷȿҠD Ԙ TܿV[.ӛ߿˿̏+ 0td"ǿz,0- ̆M;6!  #އ1p+ ->RvVL'e i|3C̿&ٷŧҿ'kĿ(Ӱ蠠ѿ򨿩ۯ$ϴDѿ4ǿ…+\K h*$3.t׵ h?ƿ.eѿcH6)ÿ$ʿFlǿ-3%kmer-code-2013-trunk/libsim4/sim4core/GlimmerModels/acceptors.162.neg.icm0000644000000000000000000012645411415365503024551 0ustar rootroot>ver=2.00 len=50 depth=2 special=-1 type=0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49 2  !"#$%&'()*+,-./01>ver = 2.00 len = 1 depth = 0 periodicity = 1 nodes = 1 Xοǿ >ver = 2.00 len = 2 depth = 1 periodicity = 1 nodes = 5 $xͿÿϡ26ିD'dBⲿ8>]~Q˿Wb"п%>ver = 2.00 len = 3 depth = 2 periodicity = 1 nodes = 21 H1Iѿǿ J=Vzd2 տO簿q\οp~EO}F˿uvq킿;neu˿.{ܿ̿ g]˿3 !s~:'y Y꺿#m,苿 3]D K낿\FݿȲtĿcjUȿXˢ}Ul5п>οإӿٹ*{=~!}쎿ӿĿ5瘛f ϿȘҕj|9ƿԿ>}>ver = 2.00 len = 4 depth = 2 periodicity = 1 nodes = 21 ,ͿTſ꠿S޿܉+n7⬊?.ʿلnICjvϿ_Li됿kv#;cQȿew,Ȑ;7hrV ֿSϿ# 6m> ӆ*$$ Y-Dq- 5 IL ~Oɿи ::%Ɲq3 ەͿLҒ,j˿tޭg@ԿT¿NUE˿%"dĿͿ;˿ ,w>ver = 2.00 len = 5 depth = 2 periodicity = 1 nodes = 21 k̿Gƿ84g࿿C f4촋ӕ'ȿqb+п3!szTd쿖ô{uSͿQḿh_4뎿ecʿ^ ]gTǿ9/^ x8%\0Ŕ .q8)g 䍿iH$ x?6˿ j#ɿ,k` 㬿|˿,Ҳ!l:ſ·(liۿÿ˗ֺп_(͟{˿ƿ>ver = 2.00 len = 6 depth = 2 periodicity = 1 nodes = 21 "ěsο6Ŀꞿpw(?oSy〿G4s59kzȿ:#BϿ䬿C,>|[@&ڦ/п#mЭ,֒g+XǿοI p\ﺿ/ < c7f €ZWd,ዿ Q;ۋLv SſղkVVHſ ǿEAaп!@H+=࿙Y׿X#Snz ѿŝLH8ĿY˿,5ȿ~>ver = 2.00 len = 7 depth = 2 periodicity = 1 nodes = 21 a]οǿg蠿S<M+{e27N&qпYMkS˿$: |soXſĭ1bԿ!htx¿o;޿ȿ 3i\JyR: ED/r ;<3pe ݵ== X~T`ڿ︿SƿZ ¿CKிz0LͿ 8٩rͿ)ڎԿQ8%Uտ$P7~|8{ȿver = 2.00 len = 8 depth = 2 periodicity = 1 nodes = 21 a:Ϳ>ÿb\㏿P߿a)[}eA3㐿$(˿8cfϿ]h3n v/ĿwԳHȿߘ,AJ7P9AƿU@$߿̝ɿjC [ſ/ǚ SuꪤR,k ,~f:(Ѯ ͇[K ͇ $VoпA8ⓥh^'7֗7ͿH ҿt\׿4cw3=|տ2qlccؿBĿvg٣)տZÿϿȿ־y>ver = 2.00 len = 9 depth = 2 periodicity = 1 nodes = 21  pѿӱƿ=#㿀鳿߲c{"}55Rӿ\LOXп쯿oB54¿x.ȧF׿l7ƀBldqȿ䏿rٿ;ѿ횿 DVſG;Q V,*՗ W*x kDDYw 6nֿÿ¿5v`1տ@ ɿң2[ѿѳg<:࿖﫿8͝Y׿ʇ$1տY`՗oPϿxͿps>ver = 2.00 len = 10 depth = 2 periodicity = 1 nodes = 21  隿οܣ⿂ͪmz9㋿w3ʿpRҾ^п) ݑ%xW}oÞοo76U86P[Ŀ~忑Ŀ 6{k<z,Q b \9  Vd9  DylI+ .MԿd㱿ཿPʾh`GLɿ⅝" ʿ>3Lʿ2hQ ܿ ֿLeſ2WszF6տBA5i¿x̿ʿz{>ver = 2.00 len = 11 depth = 2 periodicity = 1 nodes = 21  喿?Ϳȿy䡿 GXu⿲F! Nz K7tZvʿ赿vո~JϿ9ҐZt{LĿ| Ͽឿ򓉿v<ǜʿ폿܇п eWi.ſ14 鉿W66}䐿 DC,V sy涿K m~&ۿנVǿ4|C/@ÿoÿ ¿ߺ϶66応欿Cҿ˙U?e)ѿv Cޭƿɿɿ{>ver = 2.00 len = 12 depth = 2 periodicity = 1 nodes = 21  G^RϿeĿµ ɎdR 4Y4`PIQ1ʿ?RM ҿ zp¿~{k9ӿu%ǹVVk𿑓bſ ݿȿ  }k8ÿcU: Fà^3ć яݲ1( /ddT; P߾˿~ݻ =FCKsᯍ+oſi)ƿĢMϿYSȣ稿ʷ߿Nؿbȿ儐;Ϡ뻿Tڿggɿ&'ʿƑ>ver = 2.00 len = 13 depth = 2 periodicity = 1 nodes = 21  Nпÿ ڌo$9 4y\=5썿 R jͿUS?ʻR?ѿƪ#| |r(⢿v ﯿȮҿ颔pp\duĿ*u'ݿ ɿ^, \S̿2dS ۄ^3 ")|`+ VN&uC` {hWڿ^[ǿqS|ĿRYڭƿo娿Xs QͿ B㡿;q俪ߛ ӿR%ÿPڥVܿ p`^zǿ%̿쁿>ver = 2.00 len = 14 depth = 2 periodicity = 1 nodes = 21 .ӿཿR {+樿 Z, 2`E 떿;Կ eBm Yֿ[Wϔ }KU赿Uʿbgҹ 9ɿ{{߿Xwנ X&ʿ],q㚿 0.B W,Ǝ ~yEyyCa Tz׿/]-sĿe iҿӿ Hٿ[?+| sӿ&ͿJ=^I߿3ܐ;Α\ѿſn΄>ver = 2.00 len = 15 depth = 2 periodicity = 1 nodes = 21 .ӿÿJ餿 ])%t u?614 ׿[𰿙칿 $Կro lPſ| U.A̿c,E ɿ 4t޿aJȿ ~KX~ӿ45 'P&/ߏ wG/1_ /HƗ r.FؿΎ̿ CM -rꐿGտ&E~¿ Zݿdʷ߿CΜ bֿrVɿàrۿNϿ˿`{ >ver = 2.00 len = 16 depth = 2 periodicity = 1 nodes = 21 ѿNsO忋D |{p2p Hӿܯ% (ο'ʓ Qn'YgϿ K۪;Xd ̿  ؿ&ʿ xn[~<>N ?4 3 [|ʷMJ隿 b? v@ܿs73Nܮ \6Vs+ʿc-ɿz~]а5޿-wR ҿĿR \¿U˿j蠿NQ˿yοȂ >ver = 2.00 len = 17 depth = 2 periodicity = 1 nodes = 21 lͿſ>wF? Uo{41˓ )Jҿ]5ế{̿d*nƿSg9Ͽ:MҸ nN􍤿m]ſ  ߿#J !_虻?R4 `Eo-0' .nӼ,o 𗋿ŧF_ sP信`5 ſuѿUܘÿ*kο˞ Uuݿ ؿ ׻s.eͿۚ D q㺿:˿'ο=>ver = 2.00 len = 18 depth = 2 periodicity = 1 nodes = 21 ǀʿUÿڿ4է᷿9)<C 85ƿnظbͿ&+4TO3ƿ3+ͺ < ܿ9~3Ϳ HݿLÿ] uFE!熿 ˌv*92 q6%> ȅI#Lԉz 7L2տ6ȿꬿ?p b[~ɿ 1+¿X 㪿D޿ 㶿 Cӿ[̶ȿo׿тlQpɿ οu>ver = 2.00 len = 19 depth = 2 periodicity = 1 nodes = 21 ߿cbk^gse=42 Ή#꿕գ#ʰܿ" ɗkj軿 8硿Hǿ ݃V֤6߿ b툿zj+ hJ@ӿ?H ڠ1;0bM }kھE!l+ Hp쵽>F Arn'[M׿kğ ɿꨝX̿ v߿n西y<5忊N1̿Ͽ 㦿㿕lyڿXƿj>ver = 2.00 len = 20 depth = 2 periodicity = 1 nodes = 21 O\НTrֿw(ԋIpR.ȿٛ,ۤƞܤ!OпJ%ڿ7ꤿg< Ӫo ňdӿп [qN,)^ǿ b,V|)w׿ j8&տ ?畿9Vl9= &2h@&U~ U\(f9ꝓu]пh3H }ٿY= &֤mҿ +&Pʿ>ver = 2.00 len = 21 depth = 2 periodicity = 1 nodes = 21 /<̿^< WUٿqȸ䖿UFOş ̖<̿򑩿ʿ7\Ϳq8~GDԿ˿ ׿]GB϶`s#ǯѿB׿K V EzѼ뫾 󑘿S¿E Ͽ "׾ /5ڿ-꯿cy褿Y?뫿58JsӿE]XٿƼο7oп_ÿc'Sܸ8y]zcǿrN׈ؿ ƿդ>ver = 2.00 len = 22 depth = 2 periodicity = 1 nodes = 21 z:aοÿJՄl2+п>x춿 2 Eƿ`8N_ѿ,KQpRI?˻Ŀ Č]. qݿSlʿ!倿¿ me)$/Ӈ E~ɮ4ٓ .bc!`哿 ,%`CA ~kϿmlǿŎ4" V.]ǿ$UzԿ浿R fʬͿE GXͿh?֠ lͿ ͿB榿UY.߿`2>ver = 2.00 len = 23 depth = 2 periodicity = 1 nodes = 21 ޥ|ӿ)ƿ鷨MCDr>vFP(11(>ѿG,=\ѿrީ5f% 'H¿1'ѿJtޱ!IHͿ *$ѿ蜿 w_fɿ̐3j UuW5儿 ]m ÿ~&} wÿ;5 ]kٿfrɿ-ƿ$v¿,4&ӿEȿο,箿N٧c4zĥ˿~~ÿϧ~տƥ#l_̿gο |>ver = 2.00 len = 24 depth = 2 periodicity = 1 nodes = 21 <^Ϳx¿EѾ޿ۮ6Vx.v6ژKÿ3?0ƿͿ>;h,sg` +0ǿѝ-rbac횿0ǿUzӿ@ȿ6* e ]̿:X ﵉ J9ꃿ ymػC(O 8W!UKCq pο4鮿21*}ʿ;; տ ЖIοc>瑯pؿb|3ݿ Ƙ~pſοg_pɿп?ز~>ver = 2.00 len = 25 depth = 2 periodicity = 1 nodes = 21 /!?ӿ=^¿7"ɊkȮ+|j`4䩎R;ֿկUT(ӿXQP|3z߮k )Cؿ&  04Aſn䔿}ܿͿj| XĿR08 5)& քVtz<7  $7: Y}kw㿛 AϿػy ¿lؤ6菿ͿIÿ7ѿOX4$濶r׿fſ W׿( ~ʿſ>ver = 2.00 len = 26 depth = 2 periodicity = 1 nodes = 21 7yӿʣꏿ0俌R|8|-\i!vӿ!.;F»&ӿ# Z쿰*K ̌ؿ+0heQړI̿uܿmĿw b`dͿCy% 2b`# uWƿ,\ N+^DdS 咿){ֿK/SvΦ\?]˿ܸ̿uۿWGzݥi߿X԰=ֿۘ˿W/ :[[׿# . ͿT6ο{>ver = 2.00 len = 27 depth = 2 periodicity = 1 nodes = 21 >ҿT: ዿ#9 bzycw1,rdӿ謿fC$Կk3-tJ꿏Bƿݨ]x aѿ'i<=Ϟ*¿nƿ ^xbZÿZI0Z +Y {s7Lj#nh |n G?3 C|7w`Oʳ 촿H ӿ7)¿mԜuؿA`>9ߪ 㿀ɫ@Yӿa̿Wr#'/3Կic)Ϳ,Ŀ>ver = 2.00 len = 28 depth = 2 periodicity = 1 nodes = 21 ʘ.ѿcdrM@^xj(1&F./~P˿'/kϿXksly"'ѻف fw寿|cʿver = 2.00 len = 29 depth = 2 periodicity = 1 nodes = 21 +zҿ>ÿMcSw#W2tytSN56 ׿%DF ѿPr;4$Tпţ8K!Xm󿹹sʿߕ ۿUpͿ7 cQͿ^6 z}T)}ږ oA4嬓  ?@ U ;s˿{!mr𨍿ۿ^[T"˿ZTL=CsԔ-ѿPƿjI 3>ԿG.ÿ&˿̿ay>ver = 2.00 len = 30 depth = 2 periodicity = 1 nodes = 21 jhͿ¿⢿Viۿ끮q0ypUj70Ȍf<п)o0ʿ߬ɔ^Q:&Iʿѹ+Ή}_Ŀy׿Ͽ kĿ۵4 5~N L. vRР-Gt v;)ſ2N, ҋU߿"ܥkĿŹ찿괿vPԿL}3¿ Ϳ-44Mֿ6>ܿ/嘿ط.FϿuA'¿ǿƿ>ver = 2.00 len = 31 depth = 2 periodicity = 1 nodes = 21 Y>Ͽǿp;֏e\s9|/4dZӿ,v񫺿N̿5yU}{e%d}տ?)2ٙ޿2y¿ )Ăɿ0b `-Nǿ464 ”G/B 'uy,% ?ÿ>O' 0{r߿AMIɿX;N&%8bпq~Sny_ͿUޢ(*RݿigOҿfŜPKfϿ,} "༿ ƿ`ؿ{>ver = 2.00 len = 32 depth = 2 periodicity = 1 nodes = 21  d̿f4="߿9~Kl6'+LĿ n_|ϿTuyv!οQTضa㿧Aeɿ(ˑۿſx `i)+ >' 0܁4!S #v㭸'&? GwZ? N̎!}¿ULƿP׭꼿Ŷ'd&ǿB^ZɿȿOڵTؿN#M׿ĿڎwަtK׿Y_ɿ˿}¿C~>ver = 2.00 len = 33 depth = 2 periodicity = 1 nodes = 21 !rοpƿ4pY࿠UⱿ]Yηp4~aο˲·z/C~ϿE.}` ><ver = 2.00 len = 34 depth = 2 periodicity = 1 nodes = 21 "0Ͽÿ6 V俚[̳<z4"#̿B)^zMMSο5oڑoMj⹿íw'ͿG_E@ĿL d7ǿKA ]{п-˒ 3ver = 2.00 len = 35 depth = 2 periodicity = 1 nodes = 21 #ƙRʿcſz!pݿIԯ뱿 II>4 ڔ ˿ ggR/ʿw4:$ 5 C ]L04*Ͽ!>SW뿚ݞ.pؿoʿw} ؁d9*آ 9`5 G0R!,J ZwoG̳ |䂿%eտڻg.`At M-Ͽ_ȿƢ˿qSuҏܿO#㿸' K:cϿq z'B¿Ͽڄ>ver = 2.00 len = 36 depth = 2 periodicity = 1 nodes = 21 $1qοUĿߗ"yoZ俙$!jLU6J!H^tʿXm N[hUпp!Utv6S-گd|п$گk鿔ȿs*˿)Ԛ -d Ϳ/ $󑨿3s o<K2r 5oEc >qпǮvॿJI?ȗwͿ DĿ֡JʿQ*y,忀 ߿s̗'ɞ߻ ֿ板R=¿\*Ϳƿ4D>ver = 2.00 len = 37 depth = 2 periodicity = 1 nodes = 21 %ܑ̿ͅ3ȿ̠#N |]կ"aNG|4"& п>F"FȿϩNߑ"jv_忧ſM/!XܨտQ迓]¿$|޿*Ϳo- .[:{ſCU7; ʇD!x-| 8.06! Jiq@~ P(տCƿ է庿x¬6d˿vʿrӿ0'k&Nٿ|t(!ؿ0G2f`^p4ɿpX¿ɿ~Ϳlh|>ver = 2.00 len = 38 depth = 2 periodicity = 1 nodes = 21 &$w̿*ĿY$ l߿ECY.# Ozhﲿl98#ǿ0#A.KѿG{Չ#t,q¿+"U п`m! ͊#✝UſǜӿaȿΗ [Q0̿k6! m.m~n5N gEg)l tZYOWO 𽐿ƿꊳB¿"Ŀƿf< EܒTĿ}r7ɿEw>̿; Օ޿bVd "B߿ſ䍿֡sNĿ2Tٿl˿ɿ2">ver = 2.00 len = 39 depth = 2 periodicity = 1 nodes = 21 'ͿĿ(ࢿ%q-ܿS$oy6(304"0jϿ oi$%ο܇:V$Nw7}>2dɿ?|̲4ݿ#`A9Ϳx ߿5ǿԟ# #V6ȿ:h nS(=# z4u7i j<@U$ Q׿T¿!ο?N%#{y1ȿm8ȿ =KϿF詿߿弳A3#nӿ ¿E˙hF¿!Կqn۟ȿsϿ~ >ver = 2.00 len = 40 depth = 2 periodicity = 1 nodes = 21 (28ͿBĿӠ&KGo5P%Q4=č#/J̿^ﱿ%1qοzS폿%d2#F$6ҿ >8翣,Ŀۿ˿G8# Od66 pZ%S! RTy޳A0: O㶿>E?}! տeպ#`s>Y25 DS¿yɿՌϿ꥿.迓$xտ)դ!-,L׿;0@g!K¿_8ȿϿw{$>ver = 2.00 len = 41 depth = 2 periodicity = 1 nodes = 21 ).l̿ǿ֡'@޿I&&°7T#̓˿%χ&_`{Ͽc&w쿡WD¸"sͿ@"Rܿ ƿ%lhο'p# st`<ᕑ" X$fK." ȥ-X-_ 튂FaE~ :ѿ&ݵ$̿"꨿P ĿtſG"пγIC῱,%LܿAEs튟"%ֿ椿ؔ ˿ǿQ">ver = 2.00 len = 42 depth = 2 periodicity = 1 nodes = 21 *`P˿pÿp(PxM'` 3쌿'ȿ<Ȯ#%gÿǿq'X~oc߽-' տwݝ uuҕQ!i s^޿ѿ _ɿ/7& %B(5: u=R[,ir }jj;o& s%:˿ jh#2F¿F"۲M&6˿h@Ͽ7"ſn'H$ Nӿ(ӭ&W޿1BķLY# )οFa!ȿƿ&ſ󓀿%>ver = 2.00 len = 43 depth = 2 periodicity = 1 nodes = 21 +_l̿{ȿ')ޑ߿(6n4ۍ(1zĶ̿0(˿t(ytlǪV Zȿg օ&tKɩǿ:8߿|ɿv& $`&Ŀ0 ITm4 蕿% ҂TO)' tC# }ւXlп \Ϳ$¿Ĩ1DտϹʿ"!͏ſ=M$X"ݿ$;'Dڿ 𹡿&&kտ@]}O4ſ"п偿 >ver = 2.00 len = 44 depth = 2 periodicity = 1 nodes = 21 ,yq̿¿䢿*m࿛,̴)(1گM 4䐿οbTJ)튾ݛͿ))lzT* \#ɿ=#S NU4%7?ǿ( n|ݶ9 ) ;7=;c [ɋ:Ä ێ +}ظi:_ 7) οq5տE?%%Ŀ%οed%¼̿6.ο*}(>ver = 2.00 len = 45 depth = 2 periodicity = 1 nodes = 21 -aޘGͿ+ſ,1+"޿U9*IQ|д64r@(锿3ͿB^<*M.nϿ~쑿*kLl¿ )̿*$y>快JSÿcݿʿ) 2[>k wLW*!j' +3* 喹ŵ5,$ ѿ' "ʿ ]ﵿfv(׍ʿH5|Ͽ&.#HοñZ"^ٿ)C&6ҿKNͿl8ver = 2.00 len = 46 depth = 2 periodicity = 1 nodes = 21 .U8˿ȿN,}߿J+=P,5't˿p(Cd쿿ʿmʵM+|O`j\*ɵſᩜy)Š濖Ʒ!?4˿뱖" hHƿ`<;) $,i'% A s${ {rثEу ӿǯ+RJȹh fci˿[>Y̿Ի`ڙܿ,b-)QڿG'':¿ ¿#v'˿4ɿֿ v#>ver = 2.00 len = 47 depth = 2 periodicity = 1 nodes = 21 /윚=̿ǿpU-D7=ݿbD:د, )I4_S,SoKͿd,(kοT,-xӮ(08ȿj+#掿Cwcs^ӿ˿+ N_ǿ(? * Dɨ3ʖ" Z~ =1( R=⩿jDst( H׿wa׻"˿IA<{UϿYҠſ¿:,@ݿeL++ܿƿtӽ#ؿdʠ&,IʿeĿпv)>ver = 2.00 len = 48 depth = 2 periodicity = 1 nodes = 21 0PEx̿j:ſ.όΉ۴-=]1x)*ɿ'F'B{Ͽn-o𿶉ſߵ,~Xʿj*#+爿w XĿ# PlCʿ' (;ky7\<- J(:j,H}' n{W*P y,AV; 鎿Ӏ׿rE+@@Oīʲʨ`ӿ˨fN+]zſ3ÿ^G俋o,ݿڗPuI,򾿙fۿw!ƿ˿Ŀ:>ver = 2.00 len = 49 depth = 2 periodicity = 1 nodes = 21 1d#˿H[ƿJ/pJݿ5X5.*󨮿}5.˿uu,kȾ7οM.tpdƿ0*ҿ**ଋkbǿ'x{Fտ@̿ - ^82:ܛ* i;n1G+ v }-[- Oc7yCv* ~Eտ$0.iver = 2.00 len = 50 depth = 2 periodicity = 1 nodes = 21 2_9 ʿaƿ{ۢ0ܿNfȴ/^޲7/aBaǿ\/˿lL/${i[0,ӫz/οZX඿(d7׿嫥Oʿ)nvϿL% \ZĿq9] ʁK2f* 2᳿$0,)+ [6.(BG\% .\ƿA$p ƿ!ɿ\1Gx ړƖɿVHaĿ.aſ鳿F oy߿"26.pؿ ¿mP+̿㡿%/ƿ!'ĿƿEɆ+kmer-code-2013-trunk/libsim4/sim4core/glimmerSplice.H0000644000000000000000000000137211415365503021151 0ustar rootroot#ifndef GLIMMER_SPLICE_H #define GLIMMER_SPLICE_H struct ICM_Score_Node_t { short int mut_info_pos; float mut_info; float *prob; // was prob[ALPHABETSIZE]; }; struct ICM_t { int empty; int model_len; int model_depth; int periodicity; int num_nodes; struct ICM_Score_Node_t **score; }; struct Fixed_Length_ICM_t { int length; int max_depth; int special_position; int model_type; int * permutation; struct ICM_t *sub_model; }; extern void readModel(struct Fixed_Length_ICM_t *fixed, const char *path); extern double Score_Window (struct Fixed_Length_ICM_t fixed, char * w, int left); extern int getModelLength(struct Fixed_Length_ICM_t fixed); #endif /* GLIMMER_SPLICE_H */ kmer-code-2013-trunk/libsim4/sim4core/sim4b1_s.C0000644000000000000000000000445112322046702017765 0ustar rootroot#include "sim4.H" #include "sim4b1_s.H" mss_t::mss_t(char seed[32]) { position_t MP[64]; type = 0; mask = 0; masknum = 0; seedLength = strlen(seed); matchedLength = 0; int total=0; int maskSeedLength=0; char seed_mask[2*seedLength+1]; for (int i=0;i> shifts[i]; return(masked_ecode); } kmer-code-2013-trunk/libsim4/sim4core/sim4parameters.C0000644000000000000000000000302211512742750021303 0ustar rootroot#include #include "sim4parameters.H" #include "sim4defines.H" #include "../sim4polish/sim4polish.H" sim4parameters::sim4parameters() { _findAllExons = false; _minCoverage = 0.0; _minCoverageLength = 0; _minPercentExonIdentity = 0; _includeDefLine = true; _printAlignments = false; _alwaysReport = 0; _ignorePolyTails = true; _polyTailPercent = 0.60; _mspThresh1 = 0; _mspThresh2 = 0; _mspLimitAbsolute = 0; _mspLimitPercent = 0.0; _relinkWeight = DEFAULT_RELINK_WEIGHT; _wordSize = 12; _wordSizeInt = 8; _wordSizeExt = 10; _dontForceCanonicalSplicing = false; _forceStrandPrediction = false; _slideIntrons = true; strcpy(_spacedSeed, "111111111111"); strcpy(_spacedSeedInt, "11111111"); strcpy(_spacedSeedExt, "1111111111"); _isSetSpacedSeed = false; _spliceModel = DEFAULT_SPLICE_MODEL; _isSetSpliceModel = false; _interspecies = false; _style = sim4polishStyleDefault; _percentError = 0.20; _match = 1; _imismatch = -5; _vmismatch = -5; } sim4parameters::~sim4parameters() { pthread_mutex_destroy(&_splice_mutex); } kmer-code-2013-trunk/libsim4/sim4core/sites_score.H0000644000000000000000000000051511415365503020675 0ustar rootroot#ifndef SITES_SCORE_H #define SITES_SCORE_H /* DO NOT REMOVE or MODIFY !!!! */ #define NUM_VALUES_SCORES 2560 extern double score_ex_acc[NUM_VALUES_SCORES]; extern double score_in_acc[NUM_VALUES_SCORES]; extern double score_ex_don[NUM_VALUES_SCORES]; extern double score_in_don[NUM_VALUES_SCORES]; #endif /* SITES_SCORE_H */ kmer-code-2013-trunk/libsim4/sim4core/extend.C0000644000000000000000000001727111415365503017644 0ustar rootroot#include "sim4.H" int Sim4::extend_bw(char *s1, char *s2, int m, int n, int offset1, int offset2, int *line1, int *line2) { int col, /* column number */ row, /* row number */ max_d, /* bound on the length of the edit script */ d, /* current compressed distance */ k, /* current diagonal */ DELTA, /* n-m */ ORIGIN, lower, upper, magic_d; int *last_d, *temp_d; /* column containing the last p */ int *min_row, *min_diag; /* min (b)/ max (f) row (and diagonal) */ /* reached for cost d=0, ... m. */ DELTA = n-m; max_d = m+1; ORIGIN = m; for (row=m, col=n; row>0 && col>0 && (s1[row-1]==s2[col-1]); row--,col--) /*LINTED empty loop body*/; if ((row == 0) || (col == 0)) { *line1 = row+offset1; *line2 = col+offset2; return 0; } int *allocdSpace = (int *)ckalloc((m+n+1+m+n+1+m+1+m+1) * sizeof(int)); last_d = allocdSpace; // m+n+1 temp_d = last_d + m+n+1; // m+n+1 min_row = temp_d + m+n+1; // m+1 min_diag = min_row + m+1; // m+1 for (k=0; k<=m+n; ++k) last_d[k]=m+1; last_d[ORIGIN+DELTA] = row; lower = ORIGIN + DELTA - 1; upper = ORIGIN + DELTA + 1; for (d=1; d<=m; d++) min_row[d] = m+1; min_row[0] = last_d[ORIGIN+DELTA]; min_diag[0] = ORIGIN + DELTA; d = 0; while ((++d<=max_d) && ((d-1<=good_ratio(m-min_row[d-1])) || ((d>=2) && (d-2<=good_ratio(m-min_row[d-2]))))) { /* for each relevant diagonal ... */ for (k = lower; k <= upper; k++) { /* find a d on diagonal k */ if (k==-d+DELTA+ORIGIN) { /* move down from the last d-1 on diagonal k+1 */ row = last_d[k+1]; /* op = INSERT; */ } else if (k==d+DELTA+ORIGIN) { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]-1; /* op = DELETE; */ } else if ((last_d[k]-1<=last_d[k+1]) && (last_d[k]-1<=last_d[k-1]-1)) { /* substitution */ row = last_d[k]-1; /* op = SUBSTITUTE; */ } else if ((last_d[k-1]-1<=last_d[k+1]) && (last_d[k-1]-1<=last_d[k]-1)) { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]-1; /* op = DELETE; */ } else { /* move left from the last d-1 on diagonal k+1 */ row = last_d[k+1]; /* op = INSERT; */ } /* code common to the three cases */ /* slide down the diagonal */ col = row+k-ORIGIN; while ((row > 0) && (col > 0) && (s1[row-1]==s2[col-1])) { row--; col--; } temp_d[k] = row; if ((row == 0) && (col == 0)) { /* hit southeast corner; have the answer */ *line1 = row+offset1; *line2 = col+offset2; ckfree(allocdSpace); return d; } if (row == 0) { /* hit first row; don't look further */ *line1 = row+offset1; *line2 = col+offset2; ckfree(allocdSpace); return d; } if (col == 0) { /* hit last column; don't look further */ *line1 = row+offset1; *line2 = col+offset2; ckfree(allocdSpace); return d; } } min_row[d] = last_d[ORIGIN+DELTA]; min_diag[d] = ORIGIN+DELTA; for (k=lower; k<=upper; ++k) if (temp_d[k]_interspecies ? 2 : 3); while ((d>0) && (min_row[d-1]-min_row[d]=2) && (d-2<=good_ratio(max_row[d-2]))))) { /* for each relevant diagonal ... */ for (k = lower; k <= upper; k++) { /* find a d on diagonal k */ if (k==-d+ORIGIN) { /* move down from the last d-1 on diagonal k+1 */ row = last_d[k+1]+1; /* op = DELETE; */ } else if (k==d+ORIGIN) { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]; /* op = INSERT; */ } else if ((last_d[k]>=last_d[k+1]) && (last_d[k]+1>=last_d[k-1])) { /* substitution */ row = last_d[k]+1; /* op = SUBSTITUTE; */ } else if ((last_d[k+1]+1>=last_d[k-1]) && (last_d[k+1]>=last_d[k])) { /* move down from the last d-1 on diagonal k+1 */ row = last_d[k+1]+1; /* op = DELETE; */ } else { /* move right from the last d-1 on diagonal k-1 */ row = last_d[k-1]; /* op = INSERT; */ } /* code common to the three cases */ /* slide down the diagonal */ col = row+k-ORIGIN; if (row>=0) while ((row < m) && (col < n) && (s1[row]==s2[col])) { row++; col++; } temp_d[k] = row; if ((row == m) && (col == n)) { /* hit southeast corner; have the answer */ *line1 = row+offset1; *line2 = col+offset2; ckfree(allocdSpace); return d; } if (row == m) { /* hit last row; don't look further */ *line1 = row+offset1; *line2 = col+offset2; ckfree(allocdSpace); return d; } if (col == n) { /* hit last column; don't look further */ *line1 = row+offset1; *line2 = col+offset2; ckfree(allocdSpace); return d; } } max_row[d] = last_d[ORIGIN]; max_diag[d] = ORIGIN; for (k=lower; k<=upper; ++k) if (temp_d[k]>max_row[d]) { max_row[d] = temp_d[k]; max_diag[d] = k; } for (k=lower; k<=upper; k++) { last_d[k] = temp_d[k]; } --lower; ++upper; } /* report here the previous maximal match, stored in max_diag and max_row */ magic_d = (globalParams->_interspecies ? 2 : 3); while ((d>0) && (max_row[d]-max_row[d-1] #include #include // Define this only if Liliana is watching you. //#define SPLSCORE class Exon { private: Exon() { next_exon = 0L; frGEN = 0; frEST = 0; toGEN = 0; toEST = 0; ori = 'U'; length = 0; edist = 0; flag = 0; percentID = 0; alignmentLength = 0; numMatches = 0; numNs = 0; numInDel = 0; numEdits = 0; #ifdef SPLSCORE splScore = -999999; #endif }; ~Exon() {}; void init(int f1, int f2, int t1, int t2, int len, int edost, int flog, Exon *next) { next_exon = next; frGEN = f1; frEST = f2; toGEN = t1; toEST = t2; ori = 'U'; length = (len < 0) ? (t2-f2+1) : len; edist = edost; flag = flog; percentID = 0; alignmentLength = 0; numMatches = 0; numNs = 0; numInDel = 0; numEdits = 0; #ifdef SPLSCORE splScore = -999999; #endif }; friend class exonManager; public: void printList(char *label) { Exon *l = this; fprintf(stdout, "%s", label); while (l) { fprintf(stdout, "GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d\n", l->frGEN, l->toGEN, l->frEST, l->toEST, l->flag); l = l->next_exon; } fprintf(stdout, "----------------------------------------\n"); fflush(stdout); }; Exon *next_exon; int frGEN, toGEN; // Genomic coords int frEST, toEST; // cDNA coords int ori; int length; // - 'flag' controls whether the second blast pass should be run // or not on the adjacent (unmatched) fragment. // - 'edist' is an approximation for the error rate within the exon // int flag; int edist; int percentID; int alignmentLength; int numMatches; int numNs; int numInDel; int numEdits; #ifdef SPLSCORE double splScore; #endif }; // Just for allocating exons. It takes care of deleting exons. You // are not allowed to delete them. // class exonManager { public: exonManager() { _listLen = 1; _listMax = 8; _list = new Exon* [_listMax]; _curLen = 0; _curMax = 256; _list[0] = new Exon [_curMax]; }; ~exonManager() { for (uint32 i=0; i<_listLen; i++) delete [] _list[i]; delete [] _list; }; Exon *newExon(int f1, int f2, int t1, int t2, int len, int edist, int flag, Exon *next) { // If the current list is full, move to the next one, or allocate // more space. // if (_curLen >= _curMax) { if (_listLen >= _listMax) { _listMax *= 2; Exon **l = new Exon* [_listMax]; memcpy(l, _list, sizeof(Exon**) * _listLen); delete [] _list; _list = l; } _list[_listLen++] = new Exon [_curMax]; _curLen = 0; } Exon *e = _list[_listLen - 1] + _curLen; _curLen++; e->init(f1, f2, t1, t2, len, edist, flag, next); //fprintf(stderr, "exonManager::newExon()-- return exon at %p\n", e); return(e);; }; private: // Exon pointers are valid throughout the lifetime of execution, // so we can't use realloc here. Instead, we keep a list of arrays // of exons. // uint32 _listLen; uint32 _listMax; Exon **_list; uint32 _curLen; uint32 _curMax; }; #endif // EXON_H kmer-code-2013-trunk/libsim4/sim4core/sites_acceptor.C0000644000000000000000000034052511415365503021365 0ustar rootroot#include "sim4.H" /* DO NOT REMOVE or MODIFY !!!! */ double acc[NUM_MODELS_ACC][NUM_VALUES_ACC] = {/*, acc[0]=..., */ {-1.345152, 0.000100, 0.000100, 0.000100, -1.516403, 0.000100, 0.000100, 0.000100, -1.848330, 0.000100, 0.000100, 0.000100, -1.014731, 0.000100, 0.000100, 0.000100, -1.089822, -1.526594, -1.951685, -1.189365, -1.292020, -1.351816, -3.183077, -0.855532, -1.224284, -1.536010, -1.873073, -1.087249, -1.572873, -1.590312, -1.469376, -1.025490, -1.140688, -1.477160, -1.993420, -1.152370, -1.368373, -1.352513, -3.019521, -0.825388, -1.306903, -1.476802, -1.836329, -1.074190, -1.608595, -1.565977, -1.488318, -1.007323, -1.171335, -1.414913, -2.051619, -1.143902, -1.291423, -1.328795, -3.117248, -0.876961, -1.262329, -1.538477, -1.925760, -1.031328, -1.744068, -1.562269, -1.549265, -0.908500, -1.189067, -1.438308, -2.101403, -1.090999, -1.403285, -1.363658, -3.037634, -0.797309, -1.280513, -1.493606, -2.068970, -0.990939, -1.740625, -1.503680, -1.477796, -0.983128, -1.209863, -1.505547, -2.069425, -1.039568, -1.474199, -1.230376, -2.996159, -0.846586, -1.419817, -1.521397, -1.871802, -0.951946, -1.802024, -1.527524, -1.514342, -0.921278, -1.227731, -1.440212, -2.175919, -1.030966, -1.528092, -1.297167, -2.938102, -0.783554, -1.463417, -1.532827, -1.880613, -0.915953, -2.005152, -1.487168, -1.552308, -0.849591, -1.275353, -1.398643, -2.207587, -1.011233, -1.666028, -1.256129, -3.130884, -0.728629, -1.596293, -1.485792, -1.913030, -0.859442, -2.064598, -1.512069, -1.531959, -0.828817, -1.424035, -1.384118, -2.168237, -0.930553, -1.730066, -1.186225, -2.971241, -0.763321, -1.693668, -1.501296, -1.913030, -0.808149, -2.219761, -1.492997, -1.580384, -0.774855, -1.501896, -1.410925, -2.214846, -0.857539, -1.711810, -1.214889, -3.039708, -0.744716, -1.892247, -1.423981, -1.964220, -0.758756, -2.206199, -1.488013, -1.595464, -0.773823, -1.417809, -1.367333, -2.659731, -0.837010, -1.915395, -1.184710, -3.073095, -0.691941, -2.070218, -1.426285, -1.978510, -0.702456, -2.328366, -1.427370, -1.648702, -0.754370, -1.414676, -1.470765, -2.909702, -0.749180, -2.055749, -1.215381, -3.166519, -0.628754, -2.138912, -1.429557, -2.075599, -0.659092, -2.351480, -1.377296, -1.696583, -0.756719, -1.545985, -1.414307, -3.174114, -0.689209, -2.158969, -1.218231, -3.244292, -0.598188, -2.360033, -1.374866, -2.107753, -0.632619, -2.498192, -1.422895, -1.640368, -0.728065, -1.572480, -1.467562, -3.496505, -0.631713, -2.098699, -1.160430, -3.206880, -0.647123, -2.223828, -1.477001, -2.209229, -0.591134, -2.623254, -1.423354, -1.709677, -0.682000, -1.654411, -1.480458, -3.908202, -0.577704, -2.366086, -1.163774, -3.307587, -0.584754, -2.532024, -1.372169, -2.332311, -0.562339, -2.568229, -1.418198, -1.795426, -0.663351, -1.770572, -1.487618, -4.441573, -0.524119, -2.399435, -1.207826, -3.559201, -0.541417, -2.614654, -1.537096, -2.121484, -0.524335, -2.722673, -1.399220, -1.826412, -0.641463, -1.749200, -1.357721, -4.321109, -0.587787, -2.401038, -1.181602, -3.566789, -0.554529, -2.232051, -1.496704, -2.198150, -0.583725, -2.594245, -1.367348, -1.836785, -0.671019, -1.672136, -1.365517, -5.421616, -0.593327, -2.215511, -1.177523, -3.662429, -0.584847, -2.479396, -1.445079, -2.178072, -0.567010, -2.504500, -1.284897, -1.664075, -0.793545, -1.789737, -1.323529, -4.816229, -0.582135, -2.029729, -1.142265, -3.478014, -0.656498, -2.493205, -1.514879, -2.286191, -0.517737, -2.438169, -1.180850, -1.805005, -0.818284, -1.571802, -1.290899, -5.028555, -0.671860, -2.062680, -1.030025, -3.701808, -0.710883, -2.230398, -1.303450, -2.186723, -0.676020, -2.509336, -1.168315, -1.880561, -0.786836, -1.803530, -1.374661, -4.448059, -0.560967, -2.353114, -0.874837, -3.901340, -0.759751, -2.401061, -1.480130, -2.301531, -0.541861, -2.902595, -1.173849, -2.203739, -0.643300, -1.440629, -1.268511, -4.158876, -0.762827, -2.306540, -0.910387, -3.742844, -0.745832, -1.924677, -1.367867, -2.315543, -0.691718, -2.796046, -1.628159, -2.518702, -0.412347, -0.709182, -1.565863, -2.505260, -1.526056, -1.065038, -1.104807, -2.495496, -1.420659, -1.176059, -1.723113, -1.148356, -1.630454, -1.817762, -1.496771, -1.271445, -1.098612, -2.519205, -0.344118, -5.635175, -1.574743, -2.977455, -0.464725, -6.435324, -1.142044, -3.092476, -0.304870, -5.161439, -1.552784, -3.035229, -0.562993, -5.197109, -0.975771, -0.000001, -15.545396, -15.545396, -15.545396, 0.000000, -17.996645, -17.996645, -17.996645, -0.000007, -12.923923, -12.923923, -12.923923, 0.000000, -17.165766, -17.165766, -17.165766, -18.420681, -18.420681, 0.000000, -18.420681, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.335601, -1.971125, -0.730849, -2.152442, -1.386294, -1.386294, -1.386294, -1.386294, -1.384016, -1.765491, -1.670181, -0.941316, -1.355893, -1.392776, -2.624094, -0.864190, -1.345398, -1.730732, -1.729560, -0.954391, -2.123424, -1.682619, -0.996441, -1.123004, -1.065167, -1.683004, -1.656570, -1.277539, -1.150894, -1.393289, -2.468421, -1.047909, -1.235876, -1.477038, -1.500624, -1.354346, -1.750241, -1.494114, -1.103043, -1.309478, -1.182994, 0.000100, 0.000100, 0.000100, -1.609738, 0.000100, 0.000100, 0.000100, -1.565230, 0.000100, 0.000100, 0.000100, -1.256460, 0.000100, 0.000100, 0.000100, -1.125408, -1.759008, -1.359626, -1.400363, -0.989906, -1.446169, -2.882103, -1.087966, -1.195905, -1.576685, -1.415023, -1.394364, -1.479064, -1.637438, -1.351836, -1.142856, -1.101998, -1.756600, -1.417116, -1.375354, -1.013341, -1.409959, -2.810391, -1.100630, -1.191760, -1.602000, -1.376543, -1.417288, -1.488197, -1.623192, -1.353697, -1.143602, -1.107866, -1.754748, -1.411458, -1.374372, -1.009154, -1.441162, -2.820299, -1.081071, -1.186623, -1.611036, -1.379088, -1.413620, -1.470225, -1.619304, -1.388541, -1.131185, -1.099338, -1.791891, -1.394642, -1.376970, -0.992346, -1.443399, -2.900394, -1.084211, -1.176422, -1.587610, -1.414169, -1.410264, -1.467101, -1.614450, -1.374834, -1.147198, -1.127134, -1.762249, -1.353614, -1.402113, -0.993282, -1.411392, -2.980173, -1.093466, -1.191545, -1.588426, -1.380787, -1.424533, -1.487080, -1.626410, -1.341038, -1.152773, -1.082979, -1.785398, -1.407119, -1.390959, -0.975375, -1.470581, -2.791575, -1.103124, -1.156639, -1.669129, -1.382059, -1.402479, -1.472611, -1.612913, -1.404009, -1.121602, -1.108330, -1.739751, -1.381791, -1.413880, -0.999430, -1.447772, -2.865772, -1.079168, -1.196625, -1.583179, -1.386867, -1.416303, -1.479805, -1.620810, -1.377331, -1.132184, -1.093596, -1.760044, -1.405332, -1.395668, -1.016169, -1.421357, -2.823545, -1.086962, -1.170519, -1.563237, -1.402911, -1.450483, -1.476513, -1.626313, -1.360955, -1.144138, -1.115420, -1.786197, -1.388857, -1.365449, -1.003248, -1.435264, -2.831224, -1.089692, -1.245460, -1.567123, -1.366905, -1.391700, -1.509856, -1.617887, -1.350735, -1.134163, -1.094713, -1.788263, -1.394818, -1.385352, -0.978021, -1.424792, -2.860294, -1.120444, -1.157419, -1.625471, -1.416799, -1.400817, -1.486921, -1.590718, -1.382908, -1.141608, -1.119936, -1.746875, -1.372134, -1.403171, -0.989115, -1.409298, -2.807191, -1.128850, -1.192997, -1.592504, -1.378028, -1.422136, -1.513578, -1.623527, -1.331310, -1.143988, -1.079942, -1.778989, -1.407630, -1.398960, -0.979436, -1.446844, -2.832715, -1.107715, -1.157895, -1.650956, -1.369995, -1.427621, -1.455552, -1.632335, -1.374406, -1.144868, -1.133511, -1.771919, -1.336068, -1.405619, -1.010087, -1.407684, -2.901113, -1.090224, -1.196883, -1.583153, -1.356886, -1.447840, -1.490490, -1.628256, -1.352837, -1.139558, -1.093003, -1.771083, -1.409187, -1.385090, -0.976427, -1.416014, -2.877815, -1.125752, -1.148944, -1.581737, -1.421165, -1.444014, -1.496056, -1.624307, -1.361140, -1.131431, -1.112600, -1.790462, -1.360535, -1.394768, -1.015007, -1.423373, -2.828524, -1.085894, -1.182168, -1.578802, -1.408195, -1.416422, -1.481590, -1.641107, -1.348956, -1.141163, -1.098083, -1.767501, -1.372948, -1.417281, -0.971022, -1.461993, -2.833112, -1.106542, -1.140910, -1.607953, -1.400981, -1.453207, -1.467110, -1.634745, -1.334187, -1.167851, -1.135532, -1.768113, -1.320708, -1.422344, -0.994936, -1.451187, -2.793616, -1.094337, -1.178470, -1.664466, -1.323399, -1.440834, -1.461162, -1.677032, -1.315166, -1.162589, -1.079590, -1.764612, -1.391389, -1.425940, -0.957182, -1.451390, -2.859752, -1.125472, -1.110050, -1.684761, -1.382335, -1.452533, -1.458989, -1.660278, -1.336684, -1.156050, -1.113595, -1.796199, -1.317462, -1.435984, -0.982847, -1.447128, -2.786660, -1.112049, -1.155749, -1.650657, -1.341304, -1.462095, -1.480705, -1.615331, -1.352770, -1.154628, -1.072712, -1.752780, -1.401619, -1.433676, -0.982481, -1.385882, -2.812998, -1.153625, -1.126271, -1.646859, -1.382823, -1.459998, -1.464692, -1.599751, -1.375744, -1.157558, -1.163584, -1.710982, -1.311768, -1.437152, -1.042536, -1.434227, -2.937676, -1.032397, -1.204850, -1.548493, -1.379367, -1.444105, -1.479569, -1.601626, -1.391807, -1.132987, -1.049473, -1.928282, -1.260908, -1.509215, -0.905215, -1.496922, -2.818569, -1.164637, -1.076601, -1.832951, -1.279956, -1.508427, -1.381168, -1.719323, -1.269913, -1.242466, -1.104792, -1.399977, -1.367211, -1.787962, -1.091861, -0.939227, -2.731868, -1.568386, -1.217584, -1.262440, -1.355713, -1.811986, -1.512844, -1.240443, -1.308673, -1.512844, 0.000000, -18.809204, -18.809204, -18.809204, 0.000000, -18.796923, -18.796923, -18.796923, 0.000000, -18.544844, -18.544844, -18.544844, 0.000000, -18.354755, -18.354755, -18.354755, -20.030119, -20.030119, 0.000000, -20.030119, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.126271, -1.600280, -1.440370, -1.439358, -1.386294, -1.386294, -1.386294, -1.386294, -1.037596, -1.784752, -1.368686, -1.498725, -0.969139, -1.430044, -2.783348, -1.141120, -1.132712, -1.548982, -1.416992, -1.500874, -1.447700, -1.632824, -1.309515, -1.205436, -1.040898, -1.799518, -1.386480, -1.463045, -0.963590, -1.488005, -2.767785, -1.109100, -1.055648, -1.635182, -1.463108, -1.488989, -1.426120, -1.636021, -1.327101, -1.204766}, /*, acc[1][]=NULL */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, acc[2][]=..., */ {-1.291405, 0.000100, 0.000100, 0.000100, -1.700561, 0.000100, 0.000100, 0.000100, -1.959603, 0.000100, 0.000100, 0.000100, -0.912248, 0.000100, 0.000100, 0.000100, -0.967584, -1.680312, -2.212070, -1.126362, -1.169191, -1.470572, -3.674711, -0.834178, -1.177629, -1.613553, -2.199279, -0.962518, -1.518591, -1.754980, -1.505519, -0.951485, -1.086813, -1.645696, -2.148010, -1.040960, -1.308940, -1.447298, -3.546280, -0.763876, -1.265289, -1.678134, -2.003556, -0.925671, -1.571073, -1.688395, -1.571073, -0.917442, -1.062553, -1.610518, -2.060189, -1.117141, -1.200552, -1.519616, -3.388064, -0.806535, -1.246441, -1.752376, -2.031960, -0.896406, -1.729768, -1.709483, -1.677860, -0.787586, -1.081087, -1.609438, -2.275335, -1.027192, -1.393842, -1.412191, -3.132488, -0.766446, -1.218572, -1.546259, -2.442345, -0.905480, -1.741168, -1.656826, -1.585875, -0.845896, -1.171108, -1.561647, -2.335763, -0.958547, -1.469615, -1.416190, -3.273641, -0.714410, -1.338974, -1.687280, -2.075045, -0.850239, -1.736700, -1.656657, -1.613640, -0.834798, -1.065460, -1.616957, -2.208524, -1.058240, -1.521840, -1.435366, -3.131275, -0.693147, -1.377484, -1.719233, -2.019337, -0.830479, -1.951956, -1.624963, -1.695415, -0.739061, -1.213263, -1.499465, -2.439708, -0.935631, -1.836858, -1.396302, -3.223149, -0.591778, -1.697051, -1.530925, -2.164647, -0.722265, -1.980191, -1.595281, -1.668161, -0.753934, -1.344525, -1.560121, -2.407418, -0.822850, -1.690918, -1.352393, -3.258094, -0.656685, -1.775868, -1.569074, -2.090948, -0.695439, -2.166453, -1.687134, -1.664220, -0.671328, -1.353219, -1.583235, -2.623252, -0.768482, -1.597032, -1.387682, -3.284799, -0.672556, -1.932635, -1.563728, -1.963887, -0.682073, -2.108326, -1.633550, -1.694919, -0.693729, -1.354988, -1.494407, -2.872856, -0.774074, -1.982379, -1.451752, -3.032200, -0.544917, -1.912903, -1.659907, -2.113574, -0.613621, -2.206587, -1.559456, -1.820713, -0.658237, -1.311331, -1.581877, -3.220870, -0.723545, -1.985723, -1.342838, -3.614959, -0.553912, -2.117759, -1.489152, -2.182298, -0.613684, -2.384744, -1.450649, -1.857291, -0.658988, -1.431551, -1.647774, -3.169239, -0.641412, -2.124789, -1.416297, -3.521443, -0.496965, -2.297163, -1.551374, -2.297163, -0.532805, -2.338303, -1.537525, -1.808344, -0.644984, -1.592830, -1.546845, -3.956028, -0.571650, -2.020222, -1.264100, -3.489894, -0.589911, -2.273973, -1.633471, -2.354016, -0.499458, -2.534957, -1.550822, -1.906790, -0.579631, -1.596192, -1.557478, -4.317469, -0.556289, -2.137505, -1.376090, -3.440415, -0.515109, -2.525726, -1.581267, -2.766887, -0.428588, -2.488501, -1.516974, -1.992601, -0.577588, -1.837481, -1.753398, -4.088758, -0.429495, -2.259703, -1.326276, -3.820934, -0.497174, -2.328062, -1.719000, -2.178531, -0.494194, -2.605688, -1.516126, -1.960993, -0.569394, -1.751568, -1.454837, -3.812979, -0.560405, -2.209058, -1.336571, -4.154961, -0.491408, -2.220963, -1.488077, -2.887439, -0.494345, -2.606632, -1.494775, -1.950841, -0.580240, -1.510231, -1.643762, -5.093701, -0.545151, -2.161962, -1.342704, -4.055498, -0.500157, -2.101079, -1.838716, -2.126397, -0.511846, -2.416615, -1.354232, -1.868299, -0.696662, -1.887591, -1.303644, -4.156260, -0.577398, -1.930570, -1.270760, -3.961997, -0.588263, -2.256815, -1.660296, -2.598563, -0.460682, -2.374103, -1.365642, -1.836065, -0.708785, -1.449473, -1.516914, -5.278066, -0.614676, -2.016032, -1.166316, -3.896340, -0.625510, -2.088124, -1.558865, -2.185762, -0.591830, -2.314120, -1.300012, -1.932888, -0.725901, -1.642227, -1.467874, -4.686726, -0.567714, -2.094544, -1.132366, -4.066882, -0.620877, -2.315006, -1.424035, -2.379544, -0.565808, -2.850366, -1.312024, -2.177022, -0.580669, -1.559647, -1.559647, -4.016367, -0.577035, -2.503954, -1.104922, -3.725166, -0.574666, -2.154163, -1.514127, -2.688243, -0.517516, -2.739277, -1.573107, -2.805969, -0.404163, -0.794456, -1.596236, -3.309207, -1.174511, -1.366235, -1.002111, -2.800384, -1.148715, -1.326396, -1.653608, -1.591088, -1.080264, -1.843199, -1.407881, -1.554784, -0.952452, -1.285303, -15.808850, -4.401274, -0.340842, -1.987766, -16.042294, -5.445635, -0.152355, -1.756041, -15.520259, -3.825004, -0.216349, -2.192417, -16.337231, -4.354296, -0.132958, -0.000001, -15.545396, -15.545396, -15.545396, -1.386294, -1.386294, -1.386294, -1.386294, -0.000007, -12.923923, -12.923923, -12.923923, 0.000000, -17.165766, -17.165766, -17.165766, -17.358208, -17.358208, -0.000000, -17.358208, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.369082, -2.109996, -0.673095, -2.168981, -1.386294, -1.386294, -1.386294, -1.386294, -1.298146, -1.822958, -1.974764, -0.851859, -1.422750, -1.310483, -2.670573, -0.867387, -1.219022, -1.825798, -1.941383, -0.916574, -2.265312, -1.674820, -1.142604, -0.941934, -1.012709, -1.623939, -1.822116, -1.280270, -1.110097, -1.455520, -2.486538, -1.038468, -1.173011, -1.572396, -1.536678, -1.317050, -1.656633, -1.537612, -1.155520, -1.274994, -1.168267, 0.000100, 0.000100, 0.000100, -1.631756, 0.000100, 0.000100, 0.000100, -1.573868, 0.000100, 0.000100, 0.000100, -1.250832, 0.000100, 0.000100, 0.000100, -1.111770, -1.773410, -1.365808, -1.402031, -0.973449, -1.482240, -2.922036, -1.075082, -1.161273, -1.627716, -1.417134, -1.393813, -1.445909, -1.669680, -1.354373, -1.145541, -1.075371, -1.771473, -1.449553, -1.370140, -0.999244, -1.427754, -2.860510, -1.094354, -1.185072, -1.635087, -1.382345, -1.392911, -1.482870, -1.641964, -1.374121, -1.119775, -1.079347, -1.812344, -1.424155, -1.361797, -1.017537, -1.450534, -2.866831, -1.057877, -1.168096, -1.626979, -1.394279, -1.408510, -1.444919, -1.660942, -1.419156, -1.101547, -1.087674, -1.815971, -1.415212, -1.356943, -0.970003, -1.459837, -2.943851, -1.090700, -1.154502, -1.616565, -1.421600, -1.406993, -1.446631, -1.631406, -1.389858, -1.139831, -1.113159, -1.795581, -1.369542, -1.381402, -0.990275, -1.436488, -2.993251, -1.076929, -1.183282, -1.610932, -1.389830, -1.406868, -1.452787, -1.683915, -1.354399, -1.132166, -1.044126, -1.802180, -1.432127, -1.409516, -0.970040, -1.499470, -2.851557, -1.079017, -1.137859, -1.667996, -1.409281, -1.400182, -1.456283, -1.627055, -1.435045, -1.101757, -1.073756, -1.765077, -1.402217, -1.422738, -0.985188, -1.478821, -2.870988, -1.072705, -1.172032, -1.608887, -1.414934, -1.397512, -1.443257, -1.630396, -1.377141, -1.152989, -1.078649, -1.802718, -1.432200, -1.361358, -1.010697, -1.421944, -2.911135, -1.077704, -1.147924, -1.603707, -1.401648, -1.446618, -1.451591, -1.633146, -1.398936, -1.128157, -1.102901, -1.824149, -1.402265, -1.344388, -0.994649, -1.447135, -2.884916, -1.081625, -1.208947, -1.618078, -1.375294, -1.384521, -1.471655, -1.654233, -1.370798, -1.122971, -1.074595, -1.824419, -1.411667, -1.372329, -0.967159, -1.435231, -2.854151, -1.126477, -1.115361, -1.678326, -1.427803, -1.403719, -1.469558, -1.636727, -1.378605, -1.128781, -1.091165, -1.796996, -1.391712, -1.387376, -0.969186, -1.438723, -2.862422, -1.120099, -1.181555, -1.652788, -1.343196, -1.424314, -1.488599, -1.680414, -1.352228, -1.110630, -1.056168, -1.798131, -1.428698, -1.398427, -0.987511, -1.448104, -2.857329, -1.093446, -1.121318, -1.706241, -1.363458, -1.440419, -1.443511, -1.654574, -1.390857, -1.127422, -1.100401, -1.813071, -1.355328, -1.401419, -0.986103, -1.439171, -2.909912, -1.092528, -1.172422, -1.609037, -1.365252, -1.448224, -1.449685, -1.688651, -1.353488, -1.132428, -1.074954, -1.801715, -1.418167, -1.380259, -0.962150, -1.469246, -2.912321, -1.098169, -1.111198, -1.657777, -1.414539, -1.438701, -1.477395, -1.662611, -1.376644, -1.109569, -1.094225, -1.846724, -1.378073, -1.364987, -0.978258, -1.476389, -2.828175, -1.089298, -1.153759, -1.627530, -1.406723, -1.413903, -1.448350, -1.696252, -1.352353, -1.129973, -1.073884, -1.808097, -1.371115, -1.424904, -0.950051, -1.464272, -2.848606, -1.126679, -1.100838, -1.642816, -1.418457, -1.461510, -1.443026, -1.673257, -1.327045, -1.168306, -1.110764, -1.808777, -1.322644, -1.425416, -0.962499, -1.501774, -2.804179, -1.094307, -1.158218, -1.711567, -1.318364, -1.436328, -1.432805, -1.716330, -1.335186, -1.144046, -1.065682, -1.805462, -1.395958, -1.412585, -0.951203, -1.458707, -2.975128, -1.108219, -1.079623, -1.707178, -1.404003, -1.455483, -1.433065, -1.684137, -1.377037, -1.128576, -1.110801, -1.830148, -1.312776, -1.421950, -0.975952, -1.456606, -2.835787, -1.104223, -1.152923, -1.698855, -1.316655, -1.455154, -1.475301, -1.645854, -1.360270, -1.133668, -1.034556, -1.830539, -1.399241, -1.437568, -0.991000, -1.386595, -2.834266, -1.139096, -1.118023, -1.705123, -1.348929, -1.461853, -1.435975, -1.648962, -1.390248, -1.136769, -1.121783, -1.757168, -1.324235, -1.444931, -1.010234, -1.441611, -2.916736, -1.063603, -1.158253, -1.598600, -1.373899, -1.466775, -1.430681, -1.670595, -1.377608, -1.137755, -1.002533, -2.027449, -1.274953, -1.505366, -0.829737, -1.672629, -2.807267, -1.152920, -0.998731, -2.009633, -1.294611, -1.497846, -1.332030, -1.862735, -1.236302, -1.236650, -0.821629, -18.641743, -1.084048, -1.504799, -0.596039, -17.786745, -2.236046, -1.072564, -0.884956, -18.252380, -1.023085, -1.479359, -1.171400, -18.337517, -0.967229, -1.171400, 0.000000, -18.809204, -18.809204, -18.809204, -1.386294, -1.386294, -1.386294, -1.386294, 0.000000, -18.544844, -18.544844, -18.544844, 0.000000, -18.354755, -18.354755, -18.354755, -19.685711, -19.685711, 0.000000, -19.685711, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.096132, -1.643951, -1.448112, -1.437124, -1.386294, -1.386294, -1.386294, -1.386294, -1.015896, -1.790409, -1.394095, -1.500705, -0.954781, -1.454286, -2.837458, -1.130220, -1.087743, -1.586525, -1.420871, -1.528389, -1.428880, -1.643753, -1.341125, -1.185418, -0.999478, -1.818946, -1.409813, -1.489245, -0.948067, -1.487705, -2.823988, -1.117026, -1.013357, -1.654263, -1.478194, -1.524548, -1.410640, -1.659420, -1.332923, -1.197135}, /*, acc[3][]=acc[4][]=acc[5][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, acc[6][]=..., */ {-1.461211, 0.000100, 0.000100, 0.000100, -1.349827, 0.000100, 0.000100, 0.000100, -1.795269, 0.000100, 0.000100, 0.000100, -1.070967, 0.000100, 0.000100, 0.000100, -1.083620, -1.371302, -1.951970, -1.324782, -1.278774, -1.349155, -2.969060, -0.889623, -1.219565, -1.390523, -1.661397, -1.324926, -1.659763, -1.473864, -1.356081, -1.129768, -1.309922, -1.402295, -1.874451, -1.106581, -1.328187, -1.265666, -3.027568, -0.904960, -1.425009, -1.357186, -1.639418, -1.177601, -1.763950, -1.397200, -1.469959, -1.045802, -1.177146, -1.359467, -1.995455, -1.206999, -1.428025, -1.177974, -3.266299, -0.881482, -1.553749, -1.250563, -1.711377, -1.134491, -1.849152, -1.465010, -1.431108, -0.987492, -1.215607, -1.340061, -1.887700, -1.237113, -1.506719, -1.228006, -2.679437, -0.874942, -1.386294, -1.501807, -1.897119, -0.974788, -1.945910, -1.245937, -1.346020, -1.173775, -1.406097, -1.475090, -1.772341, -1.032238, -1.571697, -1.121960, -3.075770, -0.866281, -1.588818, -1.473306, -1.673976, -0.969780, -1.947948, -1.542484, -1.336632, -0.965336, -1.427116, -1.382665, -1.979184, -0.991799, -1.658228, -1.195605, -2.443747, -0.867101, -1.585145, -1.354034, -1.941819, -0.932821, -2.077627, -1.491453, -1.391754, -0.913572, -1.490338, -1.229325, -2.036880, -1.044754, -1.677646, -1.143564, -3.124560, -0.797288, -1.656155, -1.459445, -2.019060, -0.811969, -2.026392, -1.531696, -1.313443, -0.959386, -1.497998, -1.394458, -2.138034, -0.890212, -1.885444, -1.050985, -3.020420, -0.798809, -1.791759, -1.466337, -2.021333, -0.754842, -2.447550, -1.355628, -1.547389, -0.814397, -2.207272, -1.123931, -1.931020, -0.867501, -1.799422, -1.113997, -2.882764, -0.797660, -2.125249, -1.192431, -2.212260, -0.760011, -2.260815, -1.473737, -1.381566, -0.878317, -1.565231, -1.369487, -2.611195, -0.769432, -1.771956, -1.093625, -3.047021, -0.804079, -2.045538, -1.287854, -2.079440, -0.755390, -2.401136, -1.314501, -1.558954, -0.842992, -1.563394, -1.301030, -2.613211, -0.809623, -2.185072, -1.086462, -3.180497, -0.676178, -2.074218, -1.423632, -2.033396, -0.687926, -2.503953, -1.218757, -1.611011, -0.860487, -1.686398, -1.280934, -3.295823, -0.693148, -2.360852, -1.262242, -3.159357, -0.544402, -2.644531, -1.297463, -2.265044, -0.594367, -2.878285, -1.426610, -1.677020, -0.660260, -1.696448, -1.491655, -2.708041, -0.644359, -2.114915, -1.085297, -2.925842, -0.717572, -2.547033, -1.556639, -2.167546, -0.516869, -2.772587, -1.218840, -1.606837, -0.817790, -2.143976, -1.067842, -4.158836, -0.647339, -2.809400, -1.086636, -3.463322, -0.560219, -2.595250, -1.371479, -2.212260, -0.575918, -2.812921, -1.193776, -1.628655, -0.819385, -1.619908, -1.428854, -4.564252, -0.594059, -2.427747, -1.060872, -3.200933, -0.644573, -2.369071, -1.530745, -1.922786, -0.609065, -2.726320, -1.196572, -1.678630, -0.808149, -1.851350, -1.212272, -4.102597, -0.636909, -2.480264, -1.046506, -3.019259, -0.661108, -2.542721, -1.397594, -1.814487, -0.670925, -2.437344, -1.201112, -1.769095, -0.818120, -1.923093, -1.083346, -14.077878, -0.662843, -2.425482, -1.154571, -4.143121, -0.543868, -2.743762, -1.291516, -2.576709, -0.536495, -2.840538, -1.226114, -1.701105, -0.764228, -1.746907, -1.225613, -13.901692, -0.630907, -1.907224, -0.954121, -3.427044, -0.835000, -2.779501, -1.365816, -1.980999, -0.607287, -2.583996, -0.996221, -1.789067, -0.946389, -1.819157, -1.026921, -4.304018, -0.763107, -2.340626, -0.980175, -3.667492, -0.687271, -2.354541, -1.130770, -2.290003, -0.731863, -3.015532, -1.005086, -1.997891, -0.799961, -1.033016, -0.439953, -13.981029, -13.981029, -2.003408, -0.199815, -3.073847, -15.462245, -1.691675, -0.487705, -1.600703, -13.946543, -2.296685, -0.398096, -1.479240, -15.726534, -14.513647, -14.513647, -14.513647, -0.000002, -16.142788, -16.142788, -16.142788, -0.000000, -14.513647, -14.513647, -14.513647, -0.000002, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.816592, -1.562886, -0.878135, -1.549597, -14.657080, -0.000001, -14.657080, -14.657080, -14.910785, -0.000001, -14.910785, -14.910785, -15.595535, -0.000001, -15.595535, -15.595535, -14.924074, -0.000001, -14.924074, -14.924074, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -16.473671, -16.473671, -16.473671, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.473671, -16.473671, -0.000000, -16.473671, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.501278, -1.725995, -0.717980, -2.194425, -1.386294, -1.386294, -1.386294, -1.386294, -1.527945, -1.736699, -1.273415, -1.117661, -1.442990, -1.347680, -2.446289, -0.873896, -1.470176, -1.774665, -1.610876, -0.914138, -2.583992, -1.810806, -0.849398, -1.098613, -1.034282, -1.775284, -1.376377, -1.502417, -1.226788, -1.320817, -2.538971, -1.019149, -1.273415, -1.343211, -1.513556, -1.431318, -1.951460, -1.541676, -0.986998, -1.304833, -1.262810, 0.000100, 0.000100, 0.000100, -1.579093, 0.000100, 0.000100, 0.000100, -1.512966, 0.000100, 0.000100, 0.000100, -1.235380, 0.000100, 0.000100, 0.000100, -1.136397, -1.811655, -1.413307, -1.300829, -0.980300, -1.453699, -2.860610, -1.097024, -1.386294, -1.416508, -1.422662, -1.322873, -1.525155, -1.540821, -1.389682, -1.142398, -1.212399, -1.736128, -1.343735, -1.326343, -1.140020, -1.315535, -2.907679, -1.029308, -1.290840, -1.460525, -1.467397, -1.338186, -1.487132, -1.552647, -1.346311, -1.197014, -1.294549, -1.618973, -1.389859, -1.277382, -1.009345, -1.423108, -2.606460, -1.136990, -1.184495, -1.562424, -1.450419, -1.386294, -1.489189, -1.544899, -1.399717, -1.156948, -1.081576, -1.859280, -1.401298, -1.351401, -0.998311, -1.446155, -2.986597, -1.062572, -1.228534, -1.573692, -1.349023, -1.425009, -1.432174, -1.606080, -1.377366, -1.176695, -1.220146, -1.728559, -1.242125, -1.433375, -0.954512, -1.399717, -2.995730, -1.144656, -1.338776, -1.551495, -1.269384, -1.406914, -1.654821, -1.422199, -1.332048, -1.191536, -1.148531, -1.815799, -1.478285, -1.230448, -0.976301, -1.483119, -2.665111, -1.118476, -1.186775, -1.714642, -1.454685, -1.268355, -1.520338, -1.570769, -1.411704, -1.109423, -1.232768, -1.684218, -1.444738, -1.247862, -1.055416, -1.458445, -2.794905, -1.026568, -1.189819, -1.542640, -1.373222, -1.475701, -1.525630, -1.581060, -1.473112, -1.056853, -1.112648, -1.689190, -1.346448, -1.485095, -1.065823, -1.408767, -2.708048, -1.065823, -1.337581, -1.394378, -1.414051, -1.400893, -1.541510, -1.680029, -1.261319, -1.151083, -1.152101, -1.637609, -1.400281, -1.414465, -0.947136, -1.597054, -2.751014, -1.061911, -1.284748, -1.515859, -1.483070, -1.284748, -1.728817, -1.600200, -1.257997, -1.089374, -1.091692, -1.777966, -1.419238, -1.372501, -1.090606, -1.314946, -2.936430, -1.071648, -1.312623, -1.479161, -1.426862, -1.335613, -1.453251, -1.425471, -1.511238, -1.186922, -1.112218, -1.590475, -1.465962, -1.441864, -1.063977, -1.336177, -2.446024, -1.186024, -1.144656, -1.469676, -1.537117, -1.441103, -1.545750, -1.524697, -1.309921, -1.206243, -1.118680, -1.805093, -1.419968, -1.320102, -0.983732, -1.430746, -2.925771, -1.098612, -1.243299, -1.416868, -1.403881, -1.498546, -1.446312, -1.608225, -1.461897, -1.101043, -1.255455, -1.664701, -1.346247, -1.325796, -1.106555, -1.223143, -3.081591, -1.111351, -1.300391, -1.473004, -1.396311, -1.382978, -1.583838, -1.567489, -1.380673, -1.094113, -1.133355, -1.692024, -1.376586, -1.421037, -0.980343, -1.266729, -2.791747, -1.266729, -1.307089, -1.336246, -1.442264, -1.468932, -1.512418, -1.590787, -1.260545, -1.229774, -1.146732, -1.720532, -1.397543, -1.361984, -1.074515, -1.321039, -2.857904, -1.095747, -1.256575, -1.357785, -1.450731, -1.497251, -1.606950, -1.512205, -1.405151, -1.097368, -1.211665, -1.650499, -1.402517, -1.330414, -0.963637, -1.549153, -2.826012, -1.058947, -1.354137, -1.419520, -1.379780, -1.392852, -1.498392, -1.567021, -1.342785, -1.181796, -1.200558, -1.670561, -1.338428, -1.392012, -1.170933, -1.394076, -2.755677, -0.972082, -1.211314, -1.567989, -1.414913, -1.383165, -1.550597, -1.729749, -1.119021, -1.258955, -1.043235, -1.706529, -1.394633, -1.522031, -0.878070, -1.525407, -2.812327, -1.181500, -1.213784, -1.506172, -1.338634, -1.518830, -1.436686, -1.632560, -1.289427, -1.232960, -1.040770, -1.705261, -1.543873, -1.380153, -0.999919, -1.512214, -2.533047, -1.101859, -1.121085, -1.639515, -1.373049, -1.484275, -1.500898, -1.518189, -1.478298, -1.108856, -1.044218, -1.003867, -1.267362, -16.083504, -0.784893, -0.743075, -2.685576, -15.633588, -1.156255, -0.863352, -1.333355, -15.618870, -1.237794, -0.957137, -1.120901, -15.969596, -16.178249, -16.178249, -16.178249, -0.000000, -16.328358, -16.328358, -16.328358, -0.000000, -15.837059, -15.837059, -15.837059, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.534020, -1.470717, -1.241033, -1.326190, -15.699546, -0.000001, -15.699546, -15.699546, -15.762849, -0.000001, -15.762849, -15.762849, -15.992533, -0.000000, -15.992533, -15.992533, -15.907375, -0.000000, -15.907375, -15.907375, -1.386294, -1.386294, -1.386294, -1.386294, 0.000000, -17.233564, -17.233564, -17.233564, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -17.233564, -17.233564, -0.000000, -17.233564, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.206489, -1.518937, -1.495267, -1.356268, -1.386294, -1.386294, -1.386294, -1.386294, -1.002605, -1.826303, -1.331939, -1.569712, -1.061871, -1.181278, -3.070298, -1.200983, -1.204948, -1.558145, -1.380463, -1.434208, -1.378690, -1.604362, -1.291678, -1.300981, -1.098612, -1.788668, -1.403994, -1.370933, -1.049386, -1.541465, -2.494122, -1.040690, -1.266493, -1.680055, -1.423336, -1.234745, -1.600364, -1.479004, -1.391516, -1.134402}, /*, acc[7][]=..., */ {-1.247372, 0.000100, 0.000100, 0.000100, -1.685627, 0.000100, 0.000100, 0.000100, -1.828728, 0.000100, 0.000100, 0.000100, -1.002951, 0.000100, 0.000100, 0.000100, -1.148209, -1.562185, -1.893541, -1.131402, -1.217959, -1.473306, -3.871183, -0.789292, -1.163151, -1.576338, -2.079440, -1.033474, -1.481184, -1.720663, -1.462834, -1.015821, -1.079460, -1.522196, -1.979620, -1.191164, -1.145680, -1.521469, -2.999565, -0.882390, -1.156070, -1.672285, -2.105148, -0.979139, -1.435927, -1.845826, -1.518924, -0.953829, -1.138779, -1.598994, -2.158608, -1.015546, -1.094589, -1.524444, -2.680211, -0.970134, -1.209223, -1.614687, -2.256538, -0.921541, -1.558144, -1.652455, -1.578347, -0.937568, -1.316933, -1.348349, -2.084981, -1.055363, -1.083345, -1.897119, -3.363447, -0.740401, -1.316185, -1.690877, -2.142861, -0.843581, -1.415044, -2.012880, -1.539096, -0.894268, -1.038155, -1.723333, -1.995266, -1.104294, -1.242045, -1.386294, -2.807675, -0.914138, -1.560910, -1.866291, -1.732760, -0.779657, -1.706640, -1.626597, -1.508814, -0.914402, -1.380178, -1.628014, -2.123334, -0.838138, -1.270710, -1.526056, -3.230796, -0.771216, -1.480657, -1.590658, -1.740939, -0.933018, -1.793741, -1.551729, -1.542427, -0.896683, -1.163853, -1.781288, -2.251290, -0.881805, -1.435828, -1.420324, -3.029756, -0.750525, -1.483668, -1.483668, -1.866660, -0.937126, -1.960643, -1.660058, -1.533764, -0.791020, -1.336462, -1.535590, -1.949565, -0.968738, -1.405637, -1.345013, -2.999565, -0.810931, -1.563394, -1.851075, -1.956435, -0.708980, -1.901229, -1.769169, -1.573196, -0.749169, -1.453646, -1.632694, -2.232314, -0.768731, -1.260668, -1.410200, -3.234741, -0.836855, -1.594324, -1.476541, -1.950998, -0.852388, -2.064121, -1.547905, -1.614597, -0.773476, -1.350505, -1.350505, -2.736795, -0.874660, -1.531033, -1.311174, -3.444673, -0.729253, -1.945909, -1.648658, -1.871801, -0.671408, -2.399608, -1.678553, -1.582409, -0.659474, -1.446919, -1.511457, -2.920218, -0.712951, -1.717148, -1.358515, -2.704531, -0.700215, -1.785995, -1.515705, -2.588337, -0.620693, -2.095340, -1.639864, -1.631767, -0.718708, -1.521213, -1.436056, -3.248424, -0.683486, -2.197223, -1.109424, -3.066256, -0.668368, -2.409939, -1.454432, -2.073469, -0.596207, -2.217649, -1.738757, -1.435707, -0.739297, -1.443818, -1.615668, -5.081307, -0.581596, -1.654820, -1.323464, -2.789797, -0.731413, -1.977161, -1.444358, -2.014901, -0.708652, -2.343664, -1.683308, -1.377049, -0.763690, -1.571900, -1.743749, -3.823169, -0.518140, -2.253103, -1.302913, -3.222498, -0.538997, -2.293013, -1.425515, -2.341803, -0.575365, -2.165873, -1.671855, -1.706341, -0.661797, -1.897118, -1.163151, -5.075076, -0.632524, -2.215572, -1.382665, -3.824996, -0.480974, -2.701354, -1.420427, -2.701354, -0.471348, -2.596379, -1.497768, -1.848173, -0.608248, -13.972518, -13.972518, -13.972518, -0.000003, -14.959735, -14.959735, -14.959735, -0.000001, -14.070156, -14.070156, -14.070156, -0.000002, -15.810211, -15.810211, -15.810211, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -2.370475, -1.358124, -1.420063, -0.897170, -1.617736, -1.751267, -14.006134, -0.465059, -2.812407, -1.490654, -4.421824, -0.352822, -2.801760, -1.651858, -2.801760, -0.375566, -2.191558, -1.334622, -1.850255, -0.759708, -1.436725, -1.712977, -14.014364, -0.541343, -1.512588, -1.304949, -3.895200, -0.717163, -2.024378, -1.485385, -2.265538, -0.620390, -2.538110, -1.143653, -1.618546, -0.905982, -14.340241, -14.340241, -14.340241, -0.000002, -15.129235, -15.129235, -15.129235, -0.000001, -14.346141, -14.346141, -14.346141, -0.000002, -15.580242, -15.580242, -15.580242, -0.000001, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.376606, -16.376606, -16.376606, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.759094, -1.571555, -1.452533, -0.951658, -14.617514, -0.000001, -14.617514, -14.617514, -14.805053, -0.000001, -14.805053, -14.805053, -14.924074, -0.000001, -14.924074, -14.924074, -15.424950, -0.000001, -15.424950, -15.424950, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -16.376606, -16.376606, -16.376606, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.376606, -16.376606, -0.000000, -16.376606, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.081767, -1.962259, -0.986249, -1.913992, -1.386294, -1.386294, -1.386294, -1.386294, -1.280478, -1.850392, -1.702472, -0.960536, -1.332806, -1.540445, -2.495952, -0.821981, -1.274742, -1.714108, -1.636722, -1.062023, -1.786536, -1.491073, -1.157929, -1.226922, -1.040960, -1.759639, -1.759639, -1.195110, -0.972462, -1.622123, -2.699677, -1.029620, -1.063106, -1.667305, -1.566209, -1.358570, -1.850793, -1.427470, -1.157646, -1.242203, -1.340886, 0.000100, 0.000100, 0.000100, -1.594874, 0.000100, 0.000100, 0.000100, -1.854831, 0.000100, 0.000100, 0.000100, -0.970290, 0.000100, 0.000100, 0.000100, -1.207094, -1.782455, -1.728388, -1.035244, -1.051546, -1.585626, -2.809387, -0.953106, -1.067842, -1.760985, -1.386294, -1.450832, -1.379864, -1.459906, -1.609437, -1.151605, -1.370034, -1.759497, -1.585145, -0.997360, -1.074516, -1.698667, -3.020405, -0.851373, -1.004585, -2.316761, -1.266948, -1.372308, -1.463852, -2.189786, -1.342492, -0.927549, -1.064712, -1.809149, -1.352393, -1.457753, -1.015923, -1.575535, -2.450992, -1.064713, -1.339775, -1.945907, -1.134981, -1.295323, -1.721442, -1.616082, -1.551543, -0.890146, -1.116962, -1.756040, -1.564985, -1.234745, -1.154966, -1.582408, -3.191819, -0.824726, -0.987948, -1.770704, -1.598855, -1.365241, -1.448815, -1.791758, -1.481604, -0.990983, -0.999673, -1.609437, -1.737270, -1.362578, -1.018571, -1.443452, -3.583474, -0.980831, -0.985285, -1.609437, -1.609437, -1.484274, -1.518783, -1.975540, -1.336462, -0.968738, -1.083346, -1.732039, -1.823010, -1.129866, -1.286211, -1.286211, -2.721281, -0.963439, -1.181995, -1.427116, -1.832578, -1.226446, -1.418043, -1.856296, -1.296682, -1.114361, -1.141538, -1.483286, -1.601069, -1.377926, -1.074516, -1.698667, -2.797266, -0.880361, -1.400088, -1.517870, -1.400088, -1.245938, -1.471287, -2.072058, -1.439539, -0.897943, -1.218572, -1.486835, -1.412727, -1.449095, -0.985285, -1.373049, -3.624296, -1.059393, -1.037245, -1.373716, -1.661396, -1.596858, -1.683545, -1.386294, -1.278081, -1.252763, -1.227230, -1.920374, -1.534714, -1.064712, -0.941610, -1.832579, -2.813398, -0.941610, -1.386294, -1.791757, -1.386294, -1.098614, -1.225613, -1.918757, -1.555853, -1.053763, -1.147403, -1.791758, -1.402295, -1.309922, -1.052094, -1.578184, -2.756822, -0.965083, -1.272966, -1.919589, -1.484274, -1.059393, -1.510998, -1.510998, -1.609437, -1.025491, -1.007264, -2.105871, -1.343735, -1.377636, -1.510591, -1.011603, -2.734353, -1.047970, -1.303407, -1.686397, -1.450010, -1.175574, -1.580450, -1.511457, -1.357307, -1.151455, -1.107830, -2.052287, -1.472472, -1.164988, -0.910562, -1.575535, -2.856455, -1.098613, -1.087440, -1.716046, -1.655422, -1.230541, -1.293921, -1.562184, -1.562184, -1.182696, -1.167606, -1.909541, -1.378914, -1.241714, -1.413693, -1.008230, -2.917751, -1.085191, -1.517870, -1.517870, -1.892560, -0.889264, -1.899746, -1.666132, -1.548350, -0.801137, -1.088142, -1.988924, -1.988924, -0.942961, -1.207812, -1.632694, -2.674137, -0.828324, -1.111859, -1.497519, -1.622682, -1.386294, -1.721442, -1.685074, -1.461932, -0.906407, -1.404643, -1.481604, -1.442384, -1.234745, -1.544899, -0.820983, -4.317393, -1.098614, -1.526055, -1.189585, -1.931517, -1.098614, -2.098984, -1.379864, -1.459906, -0.932552, -13.554151, -13.554151, -13.554151, -0.000004, -13.981029, -13.981029, -13.981029, -0.000003, -13.500805, -13.500805, -13.500805, -0.000004, -14.159103, -14.159103, -14.159103, -0.000002, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.619266, -1.657006, -1.260125, -1.115876, -1.098614, -1.450010, -1.686397, -1.398717, -1.137834, -1.717649, -2.747256, -0.830350, -1.257083, -1.809149, -1.534714, -1.090029, -1.639742, -1.762344, -1.401332, -0.946597, -1.308333, -1.765090, -1.765090, -0.948331, -1.021653, -1.272966, -4.317393, -1.059393, -1.523495, -1.523495, -1.265667, -1.265667, -1.365816, -1.480226, -1.757857, -1.064711, -13.919874, -13.919874, -13.919874, -0.000003, -13.710155, -13.710155, -13.710155, -0.000003, -13.415039, -13.415039, -13.415039, -0.000004, -14.159103, -14.159103, -14.159103, -0.000002, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.224056, -15.224056, -15.224056, -0.000001, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.502856, -1.418595, -1.723255, -1.030109, -13.721204, -0.000003, -13.721204, -13.721204, -13.805464, -0.000003, -13.805464, -13.805464, -13.500805, -0.000004, -13.500805, -13.500805, -14.193950, -0.000002, -14.193950, -14.193950, -1.386294, -1.386294, -1.386294, -1.386294, -0.000001, -15.224056, -15.224056, -15.224056, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.224056, -15.224056, -0.000001, -15.224056, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.313235, -1.536378, -1.481116, -1.243031, -1.386294, -1.386294, -1.386294, -1.386294, -0.871840, -2.061419, -1.481604, -1.481604, -0.813777, -1.386294, -4.477242, -1.219241, -1.200396, -1.699385, -1.760009, -1.066865, -1.726161, -1.880311, -1.159767, -1.033016, -1.136640, -2.007466, -1.496642, -1.136640, -1.127187, -1.318241, -4.262586, -0.930477, -1.233954, -1.884538, -1.661396, -1.002154, -1.301953, -1.650259, -1.570217, -1.114742}, /*, acc[8][]=..., */ {-1.308333, 0.000100, 0.000100, 0.000100, -1.559026, 0.000100, 0.000100, 0.000100, -1.783567, 0.000100, 0.000100, 0.000100, -1.045969, 0.000100, 0.000100, 0.000100, -1.031361, -1.546259, -2.070782, -1.189584, -1.295566, -1.380723, -2.789484, -0.883322, -1.112698, -1.630639, -1.667007, -1.249273, -1.396379, -1.693110, -1.557309, -1.027615, -1.029620, -1.402295, -1.974080, -1.355042, -1.347508, -1.649788, -2.468095, -0.769432, -1.535329, -1.284016, -2.034318, -0.975715, -1.487099, -1.332948, -1.457246, -1.282305, -1.147620, -1.263692, -2.142241, -1.263692, -1.137642, -1.731416, -2.451959, -0.876427, -1.121086, -1.686398, -1.814231, -1.121086, -1.546480, -1.616684, -1.280212, -1.169670, -1.230382, -1.349927, -2.091862, -1.123614, -1.233715, -1.419817, -2.639052, -0.927341, -1.208614, -1.711716, -1.974079, -0.962481, -1.634573, -1.880706, -1.285198, -0.978468, -1.098613, -1.574036, -2.106839, -1.085190, -1.432814, -1.278664, -2.462429, -0.921990, -1.187560, -1.514772, -1.904236, -1.120119, -1.735669, -1.615868, -1.438938, -0.947213, -1.434150, -1.191589, -2.107877, -1.089310, -1.547562, -1.365241, -2.933849, -0.736633, -1.393620, -1.552685, -1.741926, -1.007959, -1.958813, -1.553348, -1.568616, -0.823023, -1.198074, -1.368699, -2.038854, -1.159607, -1.576338, -1.405712, -2.939636, -0.702810, -1.461018, -1.301953, -1.737270, -1.139435, -1.923245, -1.620965, -1.448353, -0.864640, -1.330414, -1.445483, -2.325838, -0.910561, -1.786695, -1.222761, -2.644141, -0.761416, -1.364633, -1.552685, -2.086765, -0.894630, -2.200143, -1.506997, -1.443281, -0.840519, -1.132061, -1.440361, -2.190664, -1.111858, -1.734600, -1.084014, -2.679057, -0.875470, -1.768769, -1.425825, -2.026596, -0.782276, -2.164963, -1.508184, -1.654138, -0.749342, -1.507438, -1.242747, -2.519034, -0.893073, -2.121799, -1.269024, -2.981995, -0.600775, -2.372784, -1.403389, -1.774951, -0.710243, -2.144161, -1.451015, -1.548653, -0.830189, -1.230881, -1.591893, -3.117933, -0.776146, -2.093233, -1.363720, -3.086478, -0.552791, -2.849869, -1.394593, -1.851350, -0.621405, -2.035207, -1.361479, -1.604425, -0.886586, -1.643628, -1.411828, -2.833202, -0.684781, -1.950659, -1.192975, -3.049265, -0.679030, -2.433607, -1.368903, -1.740465, -0.728867, -2.607614, -1.487025, -1.497954, -0.740956, -1.488077, -1.824547, -3.839406, -0.525269, -2.147098, -1.305533, -3.574202, -0.537663, -1.856296, -1.484734, -2.287076, -0.662377, -2.698478, -1.445718, -1.825992, -0.623514, -1.498772, -1.598855, -3.850102, -0.592053, -2.253792, -1.309333, -3.101084, -0.544728, -2.174746, -1.219241, -2.531417, -0.670677, -2.595253, -1.535482, -1.565635, -0.691018, -1.575535, -1.900956, -13.676253, -0.440559, -2.659255, -1.832580, -3.506543, -0.301106, -3.144136, -1.920374, -1.920374, -0.409787, -2.846694, -1.930405, -1.748084, -0.473582, -0.709151, -0.741940, -3.449944, -13.353481, -1.180626, -0.462162, -2.764736, -14.054530, -1.213924, -0.808460, -1.357024, -13.825465, -1.806148, -0.774978, -0.980829, -15.538278, -1.673976, -1.386294, -5.257398, -0.584668, -1.896313, -1.408327, -3.190229, -0.572410, -2.509596, -1.378197, -2.286453, -0.570859, -1.386294, -1.386294, -1.386294, -1.386294, -1.826244, -1.774951, -13.981029, -0.401239, -1.825497, -1.208724, -3.272406, -0.688420, -3.068012, -1.681755, -1.815285, -0.503109, -2.182715, -1.230058, -1.752860, -0.863433, -1.395511, -1.324052, -13.901692, -0.721058, -1.635755, -1.165752, -2.957505, -0.817446, -2.100057, -1.326871, -1.945907, -0.756328, -2.239257, -1.195134, -1.746781, -0.875954, -14.062374, -14.062374, -14.062374, -0.000002, -14.739771, -14.739771, -14.739771, -0.000001, -13.795312, -13.795312, -13.795312, -0.000003, -15.131920, -15.131920, -15.131920, -0.000001, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.956753, -15.956753, -15.956753, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.848572, -1.515303, -1.205149, -1.129641, -14.108183, -0.000002, -14.108183, -14.108183, -14.441451, -0.000002, -14.441451, -14.441451, -14.751606, -0.000001, -14.751606, -14.751606, -14.827113, -0.000001, -14.827113, -14.827113, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -15.956753, -15.956753, -15.956753, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.956753, -15.956753, -0.000000, -15.956753, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.181892, -2.111683, -0.790575, -2.131291, -1.386294, -1.386294, -1.386294, -1.386294, -1.470176, -1.539168, -1.504077, -1.098613, -1.200742, -1.744356, -3.025275, -0.742911, -1.370870, -1.586389, -1.693157, -1.028584, -1.907068, -1.619387, -0.877452, -1.437066, -1.041454, -1.654558, -1.533930, -1.426300, -1.392092, -1.392092, -2.320075, -0.904797, -1.219241, -1.615135, -1.456912, -1.299283, -1.569959, -1.689760, -1.072121, -1.327646, -1.168268, 0.000100, 0.000100, 0.000100, -1.703009, 0.000100, 0.000100, 0.000100, -1.748620, 0.000100, 0.000100, 0.000100, -1.099773, 0.000100, 0.000100, 0.000100, -1.047693, -1.762345, -1.447852, -1.416600, -1.067262, -1.392684, -2.976794, -1.030895, -1.832580, -1.399717, -1.160488, -1.272966, -1.485095, -1.652149, -1.341994, -1.137694, -1.073611, -1.617912, -1.442708, -1.497768, -1.017267, -1.512588, -2.778248, -1.033016, -1.573810, -1.456028, -1.135556, -1.434049, -1.533930, -1.552279, -1.498212, -1.052628, -1.163151, -1.510347, -1.353505, -1.568616, -0.777706, -1.391714, -2.822453, -1.459156, -1.098613, -1.419084, -1.441557, -1.670398, -1.414579, -1.431108, -1.555161, -1.181648, -1.102152, -1.753626, -1.411340, -1.382767, -1.069625, -1.489478, -2.679057, -1.014055, -1.128466, -1.670062, -1.422226, -1.398129, -1.410987, -1.712091, -1.451809, -1.074515, -1.152060, -1.517174, -1.405057, -1.517174, -0.956733, -1.386294, -3.713549, -1.074515, -1.235472, -1.409825, -1.592146, -1.340832, -1.677875, -1.314970, -1.471539, -1.154627, -1.110883, -1.698668, -1.479980, -1.346449, -1.061399, -1.274321, -3.154624, -1.103363, -1.148623, -1.880989, -1.148623, -1.544518, -1.572774, -1.495813, -1.341662, -1.180732, -1.012578, -1.831604, -1.556193, -1.325670, -1.220502, -1.004794, -3.600030, -1.166435, -0.988265, -1.555370, -1.609437, -1.529395, -1.664420, -1.708871, -1.273554, -1.050411, -1.072046, -1.776491, -1.433547, -1.386294, -0.816208, -1.533452, -2.682069, -1.295781, -1.222550, -1.538402, -1.404871, -1.404871, -1.457010, -1.710790, -1.343066, -1.123004, -1.120353, -1.699386, -1.536867, -1.287406, -0.962812, -1.321756, -3.314172, -1.154702, -1.301137, -1.666596, -1.222665, -1.408767, -1.622123, -1.622123, -1.345136, -1.065552, -1.139435, -1.671313, -1.478410, -1.331806, -0.893819, -1.432814, -3.378710, -1.145133, -1.266672, -1.902659, -1.120960, -1.414307, -1.629240, -1.351609, -1.366876, -1.237199, -1.163942, -1.949869, -1.138941, -1.490338, -1.178655, -1.442806, -3.258084, -0.873274, -1.223776, -1.517536, -1.620190, -1.242125, -1.548813, -1.742969, -1.337504, -1.049823, -1.208131, -1.847210, -1.236302, -1.373923, -1.062245, -1.532248, -3.008145, -0.944462, -1.324419, -1.978344, -1.598855, -0.932378, -1.541676, -1.524581, -1.458984, -1.091260, -1.187166, -1.572011, -1.438480, -1.386294, -1.233954, -1.425009, -2.983144, -0.872941, -1.337238, -1.498506, -1.255560, -1.473814, -1.641075, -1.539292, -1.362362, -1.091716, -1.208508, -1.660493, -1.355111, -1.372811, -1.098613, -1.098613, -2.843845, -1.290504, -1.457557, -1.147403, -1.807759, -1.252763, -1.680333, -1.445020, -1.413271, -1.094818, -1.112218, -1.765090, -1.470852, -1.308333, -1.042924, -1.787363, -3.027047, -0.839983, -1.123459, -1.835652, -1.287088, -1.430188, -1.832581, -1.478410, -1.331806, -1.055553, -0.930621, -1.497998, -0.962037, -14.715673, -0.674456, -0.864213, -2.671002, -14.279247, -0.948600, -1.369102, -1.026158, -14.363634, -1.033854, -1.305169, -0.985536, -14.859316, -1.221466, -1.803387, -1.380531, -1.241269, -0.982202, -1.581038, -2.785006, -1.027154, -1.227445, -1.848271, -1.240024, -1.346792, -1.386294, -1.386294, -1.386294, -1.386294, -1.419817, -1.697448, -1.346792, -1.155125, -0.985284, -1.609437, -2.525723, -1.059392, -1.484925, -1.414307, -1.306677, -1.348349, -1.475422, -1.601715, -1.492516, -1.064324, -1.168571, -2.035380, -1.451434, -1.126012, -1.011602, -1.278664, -2.605529, -1.258461, -1.152680, -1.663504, -1.663504, -1.186581, -1.336462, -1.681302, -1.518783, -1.102269, -14.790071, -14.790071, -14.790071, -0.000001, -14.316288, -14.316288, -14.316288, -0.000002, -14.291747, -14.291747, -14.291747, -0.000002, -14.812461, -14.812461, -14.812461, -0.000001, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.969596, -15.969596, -15.969596, -0.000000, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.402669, -1.407397, -1.659389, -1.142484, -14.566929, -0.000002, -14.566929, -14.566929, -14.562201, -0.000002, -14.562201, -14.562201, -14.310209, -0.000002, -14.310209, -14.310209, -14.827113, -0.000001, -14.827113, -14.827113, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -15.969596, -15.969596, -15.969596, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.969596, -15.969596, -0.000000, -15.969596, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.179525, -1.566298, -1.517508, -1.329910, -1.386294, -1.386294, -1.386294, -1.386294, -1.262242, -1.773067, -1.248997, -1.345623, -1.149906, -1.479385, -2.553895, -0.973450, -1.413106, -1.067360, -1.776010, -1.413106, -1.440361, -1.873996, -1.209838, -1.166666, -1.102932, -1.662547, -1.421386, -1.439404, -0.843721, -1.419084, -2.517692, -1.397105, -0.942960, -1.396876, -1.879727, -1.558144, -1.625311, -1.625311, -1.332642, -1.071427}, /*, acc[9][]=acc[10][]=acc[11][]=acc[12][]=acc[13][]=acc[14][]=acc[15][]=NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /*, acc[16][]=..., */ {-1.310701, 0.000100, 0.000100, 0.000100, -1.495130, 0.000100, 0.000100, 0.000100, -1.900595, 0.000100, 0.000100, 0.000100, -1.030945, 0.000100, 0.000100, 0.000100, -1.163853, -1.321756, -2.041570, -1.233649, -1.536234, -1.324926, -3.165467, -0.740673, -1.212448, -1.566087, -1.927099, -1.055263, -1.742590, -1.513404, -1.614757, -0.901808, -1.102573, -1.406255, -2.132190, -1.189584, -1.552279, -1.398129, -2.650888, -0.753773, -1.313172, -1.609437, -1.757857, -1.025491, -1.657609, -1.582574, -1.479626, -0.977535, -1.274655, -1.332642, -2.071597, -1.106518, -1.324926, -1.309177, -4.774865, -0.785930, -1.193923, -1.442384, -1.970449, -1.135654, -1.835078, -1.551310, -1.575121, -0.864063, -1.195941, -1.524444, -1.958079, -1.082612, -1.515912, -1.366876, -3.461811, -0.704982, -1.180443, -1.278081, -1.997201, -1.278081, -1.695132, -1.298478, -1.607025, -1.070039, -1.177656, -1.514127, -2.120262, -1.044125, -1.631827, -1.220092, -2.963056, -0.781838, -1.393215, -1.542746, -1.841238, -0.969401, -1.874362, -1.651219, -1.561988, -0.809653, -1.116470, -1.342998, -2.781472, -1.051088, -1.477464, -1.310410, -2.999884, -0.793450, -1.708108, -1.602748, -1.868450, -0.769841, -2.238896, -1.435084, -1.425606, -0.879559, -1.324222, -1.506543, -2.177709, -0.918758, -1.644805, -1.337320, -3.071915, -0.697016, -1.488077, -1.517064, -1.998901, -0.869039, -2.154948, -1.359387, -1.575915, -0.866419, -1.609437, -1.169487, -2.413807, -0.916292, -1.628856, -1.243194, -2.921619, -0.773191, -1.854937, -1.493925, -1.854937, -0.770926, -2.302584, -1.369053, -1.676879, -0.779369, -1.695298, -1.301395, -2.229378, -0.828490, -1.842311, -1.266948, -3.451740, -0.638340, -2.169050, -1.645805, -2.108426, -0.559617, -2.425644, -1.335001, -1.630715, -0.792677, -1.327454, -1.245216, -2.936881, -0.931559, -2.182782, -1.128135, -2.908716, -0.675129, -2.140063, -1.478667, -1.916921, -0.678550, -2.504552, -1.382411, -1.541743, -0.791185, -1.452986, -1.419084, -3.028509, -0.742746, -1.945909, -1.341994, -3.020419, -0.603237, -1.979356, -1.622682, -2.079439, -0.617163, -2.488962, -1.382254, -1.783695, -0.697204, -1.550597, -1.417066, -3.496485, -0.663296, -1.945909, -1.213023, -3.158925, -0.658989, -2.095968, -1.307514, -2.031429, -0.743580, -2.537747, -1.169051, -1.821612, -0.801781, -1.593308, -1.201267, -2.866263, -0.823202, -2.243284, -1.124673, -3.660340, -0.609895, -2.833202, -1.560247, -1.834683, -0.559618, -2.611656, -1.486378, -1.791759, -0.627865, -1.580450, -1.791758, -4.624876, -0.481840, -2.291811, -1.229569, -3.139104, -0.574162, -2.978912, -1.438480, -1.998093, -0.551179, -2.566735, -1.344330, -1.817077, -0.693147, -1.751752, -1.326871, -13.795312, -0.577637, -2.273596, -1.211353, -3.814027, -0.549557, -2.484900, -1.321756, -1.897118, -0.693149, -2.843849, -1.204561, -1.818570, -0.734558, -1.532897, -1.219241, -13.687682, -0.716139, -2.514463, -1.098613, -3.653888, -0.580050, -2.188637, -1.757856, -1.920374, -0.563937, -2.347704, -1.230045, -1.929501, -0.761615, -1.695614, -1.164988, -13.901692, -0.684017, -2.092512, -1.252763, -3.650647, -0.571045, -2.564941, -1.277095, -1.871800, -0.712567, -2.595253, -1.260254, -1.784324, -0.746800, -1.718999, -1.262242, -3.970246, -0.656108, -2.158383, -1.040904, -3.228819, -0.709787, -2.097137, -1.335001, -1.963607, -0.747216, -2.642808, -1.098612, -1.898369, -0.808125, -1.444563, -1.296144, -4.663343, -0.731615, -2.105874, -0.787299, -3.646310, -0.923564, -1.992427, -1.368276, -1.522426, -0.939282, -2.595656, -1.222969, -1.875842, -0.738516, -1.223776, -1.834683, -2.987351, -0.701588, -2.389595, -0.798508, -3.488201, -0.849152, -1.609437, -1.810107, -1.442384, -0.916292, -2.364277, -1.485029, -1.741749, -0.684637, -0.053655, -14.108183, -2.951918, -14.108183, -0.198852, -14.930654, -1.712978, -14.930654, -0.537856, -14.038657, -0.877071, -14.038657, -0.943014, -15.410851, -0.493399, -15.410851, -0.671575, -1.551933, -2.274933, -1.746089, -1.386294, -1.386294, -1.386294, -1.386294, -1.123004, -1.751612, -1.017643, -1.967835, -1.386294, -1.386294, -1.386294, -1.386294, -15.348068, -0.000001, -15.348068, -15.348068, -14.547880, -0.000002, -14.547880, -14.547880, -14.585621, -0.000001, -14.585621, -14.585621, -14.346141, -0.000002, -14.346141, -14.346141, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -16.173531, -16.173531, -16.173531, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.173531, -16.173531, -0.000000, -16.173531, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.353718, -1.932752, -0.722914, -2.192505, -1.386294, -1.386294, -1.386294, -1.386294, -1.482337, -1.825281, -1.697448, -0.847298, -1.023106, -1.504077, -3.084516, -0.987387, -1.380463, -1.991780, -1.605547, -0.888418, -2.131624, -1.937469, -0.781702, -1.274177, -1.083345, -1.977161, -1.553348, -1.166233, -0.995072, -1.522426, -2.333353, -1.154702, -1.255102, -1.810627, -1.414732, -1.176322, -1.616641, -1.628762, -1.207168, -1.183451, -1.197361, 0.000100, 0.000100, 0.000100, -1.545071, 0.000100, 0.000100, 0.000100, -1.522342, 0.000100, 0.000100, 0.000100, -1.322333, 0.000100, 0.000100, 0.000100, -1.153078, -1.733328, -1.313552, -1.432223, -1.005702, -1.372195, -2.981632, -1.108806, -1.228280, -1.492904, -1.413767, -1.430205, -1.592270, -1.584323, -1.289411, -1.151958, -1.159299, -1.739230, -1.346047, -1.384811, -1.006661, -1.391774, -2.693807, -1.144742, -1.139022, -1.554850, -1.412374, -1.491288, -1.498920, -1.558532, -1.285079, -1.239409, -1.166031, -1.636650, -1.381512, -1.416467, -0.964822, -1.459445, -2.869836, -1.109040, -1.212072, -1.576627, -1.361107, -1.429736, -1.497426, -1.576872, -1.262420, -1.249299, -1.146993, -1.774085, -1.276414, -1.453568, -1.033641, -1.412233, -2.732569, -1.091716, -1.231396, -1.453979, -1.381072, -1.499833, -1.560799, -1.530946, -1.309765, -1.191328, -1.108588, -1.725052, -1.370050, -1.436674, -0.993388, -1.411804, -2.961603, -1.095893, -1.129743, -1.594228, -1.385018, -1.498347, -1.532339, -1.522559, -1.296986, -1.229418, -1.180070, -1.706871, -1.298812, -1.433578, -0.961199, -1.418623, -2.722679, -1.171763, -1.159279, -1.650399, -1.396379, -1.399085, -1.530341, -1.601983, -1.313638, -1.160909, -1.186017, -1.691607, -1.297521, -1.439133, -0.983932, -1.414567, -2.861485, -1.121020, -1.271028, -1.541954, -1.287289, -1.471955, -1.565373, -1.635441, -1.374318, -1.069700, -1.098110, -1.650972, -1.379786, -1.500568, -0.985985, -1.469615, -2.719333, -1.105650, -1.144582, -1.561198, -1.405738, -1.484690, -1.500256, -1.601304, -1.332147, -1.166730, -1.144260, -1.760774, -1.295220, -1.444808, -0.988306, -1.391897, -2.923021, -1.122642, -1.343066, -1.483212, -1.280934, -1.451279, -1.574916, -1.557081, -1.267506, -1.201669, -1.172643, -1.671634, -1.326794, -1.438911, -0.983349, -1.381502, -2.940328, -1.133532, -1.257763, -1.510959, -1.378841, -1.414109, -1.554170, -1.551660, -1.387889, -1.117375, -1.201495, -1.619413, -1.285809, -1.492482, -0.998625, -1.352705, -2.744194, -1.175145, -1.256489, -1.438335, -1.461391, -1.401968, -1.567692, -1.473726, -1.267770, -1.269596, -1.094084, -1.725898, -1.330874, -1.501056, -0.930295, -1.494327, -2.833845, -1.132035, -1.248562, -1.567580, -1.330865, -1.425930, -1.485888, -1.585079, -1.286759, -1.228939, -1.145399, -1.818226, -1.268511, -1.434059, -1.028481, -1.435940, -2.747270, -1.077470, -1.192485, -1.586166, -1.301684, -1.515258, -1.525303, -1.525303, -1.320142, -1.211360, -1.105949, -1.689034, -1.422012, -1.413964, -0.903154, -1.472072, -2.743703, -1.200880, -1.180370, -1.475517, -1.443343, -1.478246, -1.476506, -1.611037, -1.327363, -1.181855, -1.146319, -1.710299, -1.242210, -1.548162, -1.004690, -1.407318, -2.741186, -1.125318, -1.261695, -1.538168, -1.395916, -1.368667, -1.484401, -1.553056, -1.326871, -1.215858, -1.091816, -1.697468, -1.404481, -1.444973, -0.987966, -1.488372, -2.827038, -1.070786, -1.209278, -1.603109, -1.291273, -1.489893, -1.540625, -1.550752, -1.410879, -1.109464, -1.180564, -1.697621, -1.323904, -1.411893, -0.955229, -1.419367, -2.884729, -1.147155, -1.218771, -1.470413, -1.377519, -1.503400, -1.490059, -1.553535, -1.332335, -1.206371, -1.088005, -1.689117, -1.357154, -1.509424, -0.952132, -1.431300, -2.588238, -1.204199, -1.088162, -1.744941, -1.302572, -1.529345, -1.540267, -1.639825, -1.166041, -1.272737, -1.108760, -1.773118, -1.239316, -1.557571, -0.931445, -1.433618, -2.740106, -1.193996, -1.142260, -1.538210, -1.349753, -1.575842, -1.412300, -1.608298, -1.272640, -1.286999, -1.020594, -1.826818, -1.322209, -1.550468, -0.840881, -1.576371, -2.765460, -1.207317, -0.979403, -1.793596, -1.433251, -1.516076, -1.385705, -1.670247, -1.251289, -1.289109, -0.693147, -17.008453, -0.693147, -17.008453, -0.143413, -16.329166, -2.012881, -16.329166, -0.663294, -16.556995, -0.723919, -16.556995, -0.789977, -16.655758, -0.604871, -16.655758, -1.194699, -1.692786, -1.222431, -1.520122, -1.386294, -1.386294, -1.386294, -1.386294, -1.305600, -1.489847, -1.243845, -1.535679, -1.386294, -1.386294, -1.386294, -1.386294, -16.812243, -0.000000, -16.812243, -16.812243, -16.456709, -0.000000, -16.456709, -16.456709, -16.822678, -0.000000, -16.822678, -16.822678, -16.527554, -0.000000, -16.527554, -16.527554, -1.386294, -1.386294, -1.386294, -1.386294, 0.000000, -18.054533, -18.054533, -18.054533, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -18.054533, -18.054533, 0.000000, -18.054533, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.168646, -1.507906, -1.428415, -1.477747, -1.386294, -1.386294, -1.386294, -1.386294, -1.108874, -1.781143, -1.326407, -1.443098, -0.900135, -1.516203, -2.484255, -1.235967, -1.213241, -1.483532, -1.397184, -1.475606, -1.508512, -1.582620, -1.237395, -1.261652, -1.147035, -1.770419, -1.313302, -1.413744, -0.963388, -1.531922, -2.768317, -1.080237, -1.146225, -1.628731, -1.367366, -1.464428, -1.419945, -1.605520, -1.324845, -1.232224}, /*, acc[17][]=..., */ {-1.579177, 0.000100, 0.000100, 0.000100, -1.199687, 0.000100, 0.000100, 0.000100, -1.669691, 0.000100, 0.000100, 0.000100, -1.189867, 0.000100, 0.000100, 0.000100, -1.530875, -1.445718, -1.348554, -1.243194, -1.412963, -1.163151, -2.721292, -0.972096, -1.375823, -1.558144, -1.583462, -1.103890, -1.835027, -1.215988, -1.249511, -1.357400, -1.424035, -1.141172, -1.591088, -1.443453, -1.539233, -1.052423, -2.908716, -0.962811, -1.391282, -1.192431, -1.519115, -1.474663, -1.698458, -1.281565, -1.402193, -1.226303, -1.517870, -1.019624, -1.833723, -1.346021, -1.575005, -1.039863, -2.753657, -0.978709, -1.327296, -1.247254, -1.732760, -1.306677, -1.998095, -1.209639, -1.452869, -1.102008, -1.519115, -1.144422, -1.902106, -1.160171, -1.609438, -1.107745, -3.026499, -0.864619, -1.438938, -1.288656, -1.871801, -1.098613, -2.060978, -1.120592, -1.367832, -1.231257, -1.475242, -1.344622, -1.547562, -1.211091, -1.467376, -1.000757, -2.775705, -1.080094, -1.527373, -1.092056, -1.691675, -1.335001, -1.940179, -1.098613, -1.409551, -1.277492, -1.386294, -1.203973, -1.966111, -1.171183, -1.652258, -1.007120, -2.866698, -0.951329, -1.489730, -1.221466, -1.934414, -1.092849, -2.309228, -1.046988, -1.447747, -1.156550, -1.425296, -1.035100, -1.877280, -1.380845, -1.800648, -0.943199, -3.053406, -0.920726, -1.667007, -1.561647, -1.466337, -0.992554, -2.508435, -1.113254, -1.352005, -1.104444, -2.098984, -0.853771, -1.865370, -1.214784, -1.877701, -0.938597, -2.784420, -0.931106, -1.744691, -1.261840, -1.501070, -1.141696, -2.386465, -1.031921, -1.421386, -1.170071, -1.838278, -1.193923, -1.515506, -1.145133, -2.249940, -0.902868, -2.617663, -0.876551, -1.779782, -1.283347, -1.710789, -0.984854, -2.536043, -1.177922, -1.365974, -1.027862, -1.486835, -0.983733, -2.442340, -1.161414, -2.194295, -0.834671, -2.740837, -0.941534, -2.514229, -1.026158, -1.719303, -0.963638, -2.682072, -0.998529, -1.691676, -0.970358, -1.553348, -1.089044, -2.079438, -1.118031, -2.310246, -0.949929, -2.882764, -0.780853, -2.810898, -1.176778, -1.632252, -0.829907, -2.679437, -1.070002, -1.506719, -1.003063, -1.988924, -0.970359, -2.944423, -0.840307, -2.216970, -0.960552, -3.172477, -0.762540, -2.491822, -1.006443, -1.932209, -0.899197, -3.022857, -1.048780, -1.423473, -1.021381, -1.597602, -1.098614, -3.332176, -0.847300, -2.329982, -0.964743, -3.023127, -0.748717, -2.108426, -1.203973, -2.169050, -0.767256, -3.725684, -1.137930, -1.550942, -0.813343, -2.079437, -1.014732, -2.995712, -0.771111, -3.007424, -0.765468, -3.275686, -0.803690, -3.036541, -1.044125, -1.883873, -0.802963, -3.135490, -1.233387, -1.631417, -0.755948, -2.360842, -1.667704, -3.970199, -0.359379, -2.686483, -0.855506, -2.909626, -0.793374, -4.143088, -1.225364, -1.891841, -0.616776, -2.841887, -1.133781, -1.781018, -0.795393, -1.592629, -1.244325, -13.287885, -0.676344, -2.644753, -0.776035, -3.561037, -0.820207, -2.468093, -1.274177, -1.826244, -0.745335, -2.954907, -1.009000, -1.588818, -0.969780, -1.878767, -1.098614, -4.276572, -0.693150, -2.626218, -0.889521, -3.393469, -0.727317, -3.745544, -1.180626, -2.205126, -0.581509, -3.051636, -1.023492, -1.462405, -1.016935, -1.729236, -0.991642, -13.337481, -0.794933, -1.749868, -1.064444, -3.151662, -0.824390, -2.890361, -1.280934, -1.791758, -0.693149, -2.668569, -0.850286, -1.648211, -1.168085, -2.028145, -0.765908, -13.946543, -0.907558, -2.379544, -0.847298, -3.855441, -0.781603, -2.866263, -0.880360, -3.020411, -0.734648, -2.609053, -0.864363, -2.392830, -0.882601, -13.664692, -0.000003, -13.664692, -13.664692, -15.278767, -0.000001, -15.278767, -15.278767, -13.122372, -0.000006, -13.122372, -13.122372, -15.299386, -0.000001, -15.299386, -15.299386, -1.386294, -1.386294, -1.386294, -1.386294, -16.127056, -0.000000, -16.127056, -16.127056, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -0.995137, -1.108573, -2.225367, -1.648857, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.131920, -0.000001, -15.131920, -15.131920, -15.018484, -0.000001, -15.018484, -15.018484, -13.901692, -0.000003, -13.901692, -13.901692, -14.478201, -0.000002, -14.478201, -14.478201, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -16.127056, -16.127056, -16.127056, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.127056, -16.127056, -0.000000, -16.127056, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.423654, -1.810769, -0.688204, -2.373420, -1.386294, -1.386294, -1.386294, -1.386294, -1.504077, -1.642913, -1.561236, -0.982202, -1.579584, -1.154702, -2.466883, -0.931559, -1.603538, -1.392229, -1.623341, -1.041125, -2.058384, -1.710080, -0.879735, -1.285199, -1.300935, -1.619388, -1.570598, -1.133881, -1.284245, -1.158205, -2.444412, -1.132229, -1.446919, -1.190986, -1.386294, -1.556920, -2.251290, -1.244487, -0.933051, -1.545072, -1.322391, 0.000100, 0.000100, 0.000100, -1.401640, 0.000100, 0.000100, 0.000100, -1.493348, 0.000100, 0.000100, 0.000100, -1.336779, 0.000100, 0.000100, 0.000100, -1.084428, -1.913119, -1.127600, -1.658227, -1.108976, -1.278874, -2.272123, -1.242507, -1.326002, -1.512588, -1.304949, -1.414950, -1.806357, -1.526056, -1.255182, -1.098613, -1.314836, -1.591088, -1.185624, -1.504077, -1.217876, -1.178655, -2.644986, -1.122566, -1.381179, -1.302708, -1.265667, -1.635413, -1.623966, -1.648658, -1.294487, -1.089043, -1.054161, -1.761906, -1.527067, -1.337023, -1.130874, -1.271455, -2.063691, -1.309922, -1.375823, -1.509354, -1.509354, -1.186581, -1.638286, -1.439835, -1.177471, -1.344525, -1.497251, -1.400088, -1.199417, -1.477049, -1.358745, -1.198402, -2.789484, -0.967879, -1.170072, -1.632694, -1.397855, -1.397855, -1.614078, -1.591088, -1.232144, -1.185624, -1.197053, -1.760987, -1.130361, -1.593933, -1.274503, -1.098613, -2.660792, -1.148210, -1.338285, -1.359339, -1.131949, -1.842189, -1.841430, -1.347413, -1.266067, -1.205443, -1.224807, -1.900561, -1.278874, -1.278874, -0.999240, -1.474663, -3.000712, -1.040626, -1.438119, -1.574695, -1.193923, -1.376244, -1.488810, -1.360977, -1.247648, -1.466337, -1.228290, -1.739115, -1.243795, -1.413693, -1.068937, -1.398416, -2.339395, -1.160745, -1.241713, -1.727220, -1.167606, -1.504077, -1.655957, -1.145133, -1.404643, -1.404643, -1.314836, -1.737692, -1.126784, -1.463255, -1.233715, -1.045124, -3.006773, -1.178655, -1.558144, -1.103890, -1.396876, -1.558144, -1.469676, -1.742968, -1.203973, -1.220780, -1.172330, -1.371181, -1.499014, -1.545534, -1.220780, -1.123931, -2.407942, -1.237875, -1.464546, -1.255455, -1.464546, -1.375599, -1.739490, -1.516347, -1.315677, -1.088904, -1.088613, -1.604425, -1.342061, -1.604425, -0.979266, -1.469472, -2.528075, -1.156600, -1.290984, -1.313974, -1.437587, -1.519825, -1.637608, -1.349927, -1.248144, -1.349927, -1.146974, -1.813451, -1.425687, -1.276651, -1.077559, -1.175999, -2.933849, -1.211091, -1.122566, -1.695911, -1.279751, -1.546379, -1.599387, -1.481604, -1.193923, -1.317975, -1.202543, -1.600890, -1.559218, -1.246346, -1.036621, -1.235472, -2.374902, -1.340832, -1.464412, -1.392092, -1.369102, -1.324650, -1.820746, -1.321756, -1.157453, -1.358124, -1.410987, -1.634130, -1.046344, -1.561810, -1.183770, -1.235064, -2.713160, -1.088460, -1.336145, -1.336145, -1.403586, -1.475906, -1.890849, -1.313535, -1.405343, -1.093907, -1.365241, -1.770705, -1.193391, -1.304616, -1.255266, -1.093625, -2.659255, -1.171183, -1.364316, -1.342809, -1.504077, -1.342809, -1.810108, -1.266493, -1.461802, -1.130948, -1.136765, -1.446919, -1.517536, -1.493439, -1.405712, -0.968091, -2.629483, -1.194404, -1.195776, -1.686398, -1.175574, -1.591088, -1.704747, -1.315283, -1.348073, -1.237725, -1.247825, -1.501605, -1.437067, -1.376442, -1.310195, -1.180984, -2.326113, -1.122143, -1.261840, -1.240787, -1.710789, -1.398416, -1.712091, -1.133356, -1.410987, -1.371766, -1.108663, -1.537117, -1.448170, -1.514127, -1.408767, -1.085368, -2.708045, -1.046653, -1.477586, -1.315067, -1.091924, -1.785069, -1.931988, -1.414732, -1.288439, -1.089310, -1.274503, -1.614828, -1.488077, -1.218414, -1.224724, -1.208724, -2.712795, -1.075193, -1.249844, -1.809458, -1.270463, -1.313022, -1.897119, -1.501807, -1.316090, -1.024180, -1.235064, -1.694595, -1.289131, -1.386294, -1.224906, -1.391960, -2.778248, -0.927655, -1.243603, -1.404871, -1.404871, -1.510231, -1.722766, -1.435084, -1.252763, -1.211941, -14.513647, -0.000002, -14.513647, -14.513647, -14.403299, -0.000002, -14.403299, -14.403299, -14.386493, -0.000002, -14.386493, -14.386493, -14.648421, -0.000001, -14.648421, -14.648421, -1.386294, -1.386294, -1.386294, -1.386294, -15.879839, -0.000000, -15.879839, -15.879839, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.067379, -1.271335, -2.737671, -1.168240, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -14.812461, -0.000001, -14.812461, -14.812461, -14.608505, -0.000001, -14.608505, -14.608505, -13.142174, -0.000006, -13.142174, -13.142174, -14.711600, -0.000001, -14.711600, -14.711600, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -15.879839, -15.879839, -15.879839, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.879839, -15.879839, -0.000000, -15.879839, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.327164, -1.401640, -1.312912, -1.516206, -1.386294, -1.386294, -1.386294, -1.386294, -1.137642, -2.010128, -1.199200, -1.410509, -1.316615, -1.005179, -2.628796, -1.224807, -1.405343, -1.464766, -1.313535, -1.367602, -1.687555, -1.569772, -1.127940, -1.261472, -1.421386, -1.624326, -1.170072, -1.381380, -1.343735, -1.207603, -3.135484, -0.924477, -1.355204, -1.533452, -1.418383, -1.258040, -1.638286, -1.460038, -1.103363, -1.420032}, /*, acc[18][]=..., */ {-1.374213, 0.000100, 0.000100, 0.000100, -1.352614, 0.000100, 0.000100, 0.000100, -1.784054, 0.000100, 0.000100, 0.000100, -1.138054, 0.000100, 0.000100, 0.000100, -1.199616, -1.463430, -1.672521, -1.274839, -1.377784, -1.221215, -3.152728, -0.890974, -1.440361, -1.360319, -1.691675, -1.132061, -1.609438, -1.379421, -1.510998, -1.116004, -1.168571, -1.395344, -1.936940, -1.213023, -1.543298, -1.235814, -3.057418, -0.801362, -1.192545, -1.365816, -1.798679, -1.287855, -1.845826, -1.542640, -1.286211, -1.044199, -1.344525, -1.224724, -2.056020, -1.147166, -1.390749, -1.085368, -3.470178, -0.961754, -1.146079, -1.759182, -1.926236, -1.009947, -1.830225, -1.380025, -1.513556, -0.999878, -1.113001, -1.415282, -2.211611, -1.142415, -1.454117, -1.370034, -3.194575, -0.752237, -1.309922, -1.370546, -1.700786, -1.225364, -1.933677, -1.251226, -1.317917, -1.198858, -1.400893, -1.143064, -2.288193, -1.098613, -1.464937, -1.102823, -3.275037, -0.918394, -1.436484, -1.325259, -2.072470, -0.992554, -1.908753, -1.215607, -1.539394, -1.076771, -1.242125, -1.279865, -2.235373, -1.120235, -1.546480, -1.267309, -3.139104, -0.771988, -1.319603, -1.618095, -1.662547, -1.064712, -2.022624, -1.329478, -1.521145, -0.955512, -1.339288, -1.153572, -2.140064, -1.188058, -1.433412, -1.199798, -3.397011, -0.851491, -1.532476, -1.461018, -1.737270, -0.978167, -2.435308, -1.499849, -1.592631, -0.721803, -1.249719, -1.386294, -1.804028, -1.208047, -1.950102, -1.082015, -2.768408, -0.785116, -1.642227, -1.236763, -1.684786, -1.106710, -2.533695, -1.300165, -1.480547, -0.865990, -1.537978, -1.216396, -2.266213, -0.954032, -1.735878, -1.345013, -3.261927, -0.644540, -1.972755, -1.303708, -1.839224, -0.842894, -2.384023, -1.326656, -1.675840, -0.786783, -1.601715, -1.601715, -2.087221, -0.748940, -1.759498, -1.090449, -4.110850, -0.743579, -2.105871, -1.310945, -2.260020, -0.684491, -2.369523, -1.314587, -1.448119, -0.909123, -1.660295, -1.617736, -3.186336, -0.561686, -2.306575, -1.156005, -3.579530, -0.583811, -2.094943, -1.284016, -2.094943, -0.740402, -2.388017, -1.316805, -1.488077, -0.880943, -1.479626, -1.213924, -3.228805, -0.830933, -2.290510, -1.139941, -3.206795, -0.619040, -2.214170, -1.521026, -2.140062, -0.589471, -2.714921, -1.303095, -1.457246, -0.845777, -1.609437, -1.364316, -3.113495, -0.693149, -2.541599, -1.059998, -3.139431, -0.632060, -2.302580, -1.284016, -2.670302, -0.590870, -2.569598, -1.206296, -1.671659, -0.829666, -1.486377, -1.540444, -3.332176, -0.646630, -2.253103, -1.066524, -3.222498, -0.671642, -2.456728, -1.188225, -2.351369, -0.664978, -2.867896, -1.274590, -1.852668, -0.679603, -1.462280, -1.110884, -3.713527, -0.880361, -2.646544, -1.208961, -5.590889, -0.467024, -2.740828, -1.588160, -2.740828, -0.405468, -3.191122, -1.299283, -1.646928, -0.706220, -2.063687, -0.741940, -13.353481, -0.924261, -2.438384, -1.086778, -2.964473, -0.646628, -2.100057, -1.449473, -2.282377, -0.614678, -2.672116, -1.161526, -1.721142, -0.823201, -1.845824, -1.440361, -13.541079, -0.502095, -2.309456, -0.906070, -3.479519, -0.764100, -2.512298, -1.413693, -1.936939, -0.631995, -2.390876, -1.186905, -1.659990, -0.883956, -1.780584, -0.992130, -13.698981, -0.775066, -2.422074, -0.952785, -2.907579, -0.752920, -2.265538, -1.367602, -2.098486, -0.656108, -2.774987, -1.084327, -1.770406, -0.845701, -2.054119, -1.265667, -3.663517, -0.572522, -2.658347, -0.978709, -3.803470, -0.631400, -2.813398, -1.203973, -2.659250, -0.562121, -3.079004, -1.179261, -1.818754, -0.725131, -0.952011, -13.253399, -2.944412, -0.577319, -1.274892, -14.904074, -3.128776, -0.390428, -2.020941, -13.629186, -1.646250, -0.393492, -2.277266, -15.358809, -1.637609, -0.352411, -14.310209, -0.000002, -14.310209, -14.310209, -1.386294, -1.386294, -1.386294, -1.386294, -14.022528, -0.000002, -14.022528, -14.022528, -15.636829, -0.000001, -15.636829, -15.636829, -1.386294, -1.386294, -1.386294, -1.386294, -0.916291, -1.205816, -2.603241, -1.484925, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.101986, -0.000001, -15.101986, -15.101986, -14.812461, -0.000001, -14.812461, -14.812461, -13.415039, -0.000004, -13.415039, -13.415039, -14.533353, -0.000002, -14.533353, -14.533353, -1.386294, -1.386294, -1.386294, -1.386294, -0.000000, -16.018276, -16.018276, -16.018276, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -16.018276, -16.018276, -0.000000, -16.018276, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.428038, -2.045761, -0.661606, -2.163543, -1.386294, -1.386294, -1.386294, -1.386294, -1.428654, -1.796378, -1.573234, -0.949081, -1.360977, -1.584119, -2.564942, -0.773191, -1.454981, -1.657693, -1.603034, -0.981544, -2.079438, -1.699950, -0.955513, -1.178656, -1.190986, -1.489478, -1.654558, -1.275069, -1.232144, -1.273817, -2.351372, -1.098613, -1.407801, -1.193391, -1.572880, -1.407801, -1.669157, -1.343735, -1.116157, -1.499739, -1.200282, 0.000100, 0.000100, 0.000100, -1.570202, 0.000100, 0.000100, 0.000100, -1.555079, 0.000100, 0.000100, 0.000100, -1.273985, 0.000100, 0.000100, 0.000100, -1.276732, -1.545722, -1.340014, -1.402210, -1.110107, -1.266186, -2.502197, -1.181994, -1.215927, -1.426491, -1.507837, -1.418709, -1.569183, -1.529177, -1.423817, -1.095784, -1.143064, -1.708378, -1.279196, -1.506254, -1.001919, -1.371181, -2.531667, -1.205666, -1.179127, -1.572396, -1.255597, -1.609438, -1.488077, -1.572269, -1.246472, -1.276177, -1.183595, -1.559459, -1.341329, -1.504735, -1.074515, -1.365867, -2.844532, -1.063586, -1.319657, -1.617877, -1.168206, -1.498212, -1.726237, -1.378451, -1.271842, -1.238689, -1.060064, -1.583574, -1.482478, -1.508454, -1.010295, -1.382816, -2.856119, -1.116188, -1.316290, -1.556880, -1.457941, -1.243531, -1.565635, -1.558518, -1.336794, -1.145986, -1.282007, -1.571300, -1.225654, -1.508780, -0.998529, -1.255489, -2.785372, -1.255489, -1.315315, -1.458416, -1.392276, -1.384308, -1.515887, -1.574156, -1.266671, -1.232933, -1.315031, -1.752757, -1.253474, -1.299283, -1.000010, -1.364090, -2.372317, -1.261436, -1.285066, -1.693641, -1.063166, -1.639082, -1.480088, -1.570742, -1.292227, -1.238450, -1.306791, -1.592748, -1.212895, -1.475731, -1.066074, -1.314970, -2.946383, -1.094729, -1.356903, -1.426332, -1.350824, -1.413345, -1.655958, -1.586965, -1.282754, -1.116962, -1.239967, -1.579585, -1.229550, -1.550597, -1.127033, -1.427408, -2.304337, -1.089894, -1.275158, -1.484676, -1.371078, -1.426265, -1.676813, -1.529177, -1.147969, -1.276412, -1.148966, -1.642724, -1.356967, -1.460402, -1.184486, -1.342543, -2.300797, -1.100403, -1.328601, -1.334680, -1.378301, -1.514565, -1.632080, -1.314321, -1.401332, -1.239420, -1.179759, -1.709908, -1.296346, -1.434496, -0.995998, -1.502466, -2.716146, -1.073133, -1.211869, -1.467548, -1.467548, -1.421386, -1.521313, -1.468320, -1.285999, -1.291390, -1.376992, -1.494775, -1.300619, -1.382297, -0.978990, -1.452922, -2.713588, -1.126626, -1.235323, -1.393729, -1.431757, -1.504077, -1.656784, -1.445836, -1.276896, -1.221466, -1.310094, -1.619755, -1.304674, -1.343246, -0.989879, -1.359626, -2.599315, -1.213023, -1.219514, -1.456304, -1.426227, -1.463967, -1.471194, -1.635949, -1.292042, -1.201070, -1.414151, -1.353859, -1.291984, -1.496551, -0.995428, -1.323115, -3.094669, -1.143064, -1.324925, -1.487044, -1.324925, -1.417518, -1.686109, -1.339643, -1.345183, -1.229872, -1.208046, -1.657847, -1.293204, -1.442736, -1.169725, -1.159725, -3.061829, -1.111171, -1.289668, -1.493266, -1.276595, -1.509527, -1.616873, -1.384814, -1.299777, -1.278271, -1.225245, -1.486610, -1.341779, -1.519616, -1.235149, -1.224099, -2.931595, -1.016459, -1.262850, -1.336462, -1.477961, -1.485994, -1.560366, -1.525760, -1.279751, -1.222888, -1.308185, -1.605016, -1.324714, -1.335888, -0.964483, -1.417066, -2.777383, -1.157743, -1.211636, -1.465701, -1.449307, -1.441210, -1.554917, -1.554917, -1.188763, -1.298326, -1.302968, -1.562291, -1.206274, -1.517267, -1.196804, -1.173274, -2.559566, -1.167477, -1.229568, -1.742453, -1.267309, -1.382691, -1.472646, -1.551635, -1.309495, -1.241937, -1.163443, -1.566574, -1.381924, -1.479563, -1.012483, -1.377515, -2.676796, -1.152974, -1.325084, -1.525060, -1.389622, -1.318853, -1.503331, -1.483397, -1.276169, -1.303271, -1.202602, -1.491807, -1.278468, -1.628829, -1.096856, -1.402238, -2.630306, -1.055614, -1.212525, -1.485994, -1.535591, -1.343431, -1.765557, -1.413202, -1.235944, -1.220830, -1.061518, -15.744130, -1.113254, -1.122143, -0.764794, -15.595535, -2.274319, -0.840017, -0.964858, -15.536490, -1.105794, -1.244745, -1.471712, -15.738298, -1.002505, -0.907557, -15.994798, -0.000000, -15.994798, -15.994798, -1.386294, -1.386294, -1.386294, -1.386294, -15.793751, -0.000001, -15.793751, -15.793751, -16.031448, -0.000000, -16.031448, -16.031448, -1.386294, -1.386294, -1.386294, -1.386294, -1.089541, -1.187210, -2.877773, -1.196342, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -15.954400, -0.000000, -15.954400, -15.954400, -15.856731, -0.000000, -15.856731, -15.856731, -14.166170, -0.000002, -14.166170, -14.166170, -15.847599, -0.000000, -15.847599, -15.847599, -1.386294, -1.386294, -1.386294, -1.386294, 0.000000, -17.043941, -17.043941, -17.043941, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -17.043941, -17.043941, -0.000000, -17.043941, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.254349, -1.468849, -1.318888, -1.527325, -1.386294, -1.386294, -1.386294, -1.386294, -1.172080, -1.582039, -1.227393, -1.651997, -1.112477, -1.423108, -2.809401, -0.994113, -1.299777, -1.439539, -1.367218, -1.445808, -1.561343, -1.544101, -1.218679, -1.269323, -1.246472, -1.664207, -1.261214, -1.427818, -0.980240, -1.477013, -2.424801, -1.177770, -1.100338, -1.465188, -1.465188, -1.583904, -1.371391, -1.657102, -1.223175, -1.342234}, /*, acc[19..24][]=...NULL, */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, }; kmer-code-2013-trunk/libsim4/sim4core/align.C0000644000000000000000000005532312415066336017452 0ustar rootroot#include "sim4.H" // only for debugging #include #include #if defined (__SVR4) && defined (__sun) // Solaris defines SS in sys/regset.h #undef SS #endif // Define this to do bounds checking on the arrays used here //#define CHECK_BOUNDS #ifdef CHECK_BOUNDS class boundedIntArray { public: boundedIntArray(int offset, int size) { //fprintf(stderr, "boundedIntArray: offset=%d size=%d\n", offset, size); _o = offset; _m = size; _a = new int [_m]; bzero(_a, sizeof(int) * _m); _a -= _o; }; ~boundedIntArray() { _a += _o; delete [] _a; }; int &operator[](int i) { if (i < _o) { fprintf(stderr, "********** i=%d o=%d\n", i, _o); exit(1); } if (i >= _o + _m) { fprintf(stderr, "********** i=%d o=%d m=%d\n", i, _o, _m); exit(1); } return(_a[i]); }; int _o; int _m; int *_a; int _crud; }; #endif int Sim4::align_get_dist(int i1, int j1, int i2, int j2, int limit) { // Compute the boundary diagonals int start = j1 - i1; int lower = MAX(j1-i2, start-limit); int upper = MIN(j2-i1, start+limit); int goal_diag = j2-i2; if (goal_diag > upper || goal_diag < lower) return(-1); // Allocate space for forward vectors #ifdef CHECK_BOUNDS boundedIntArray last_d(lower, upper-lower+1); boundedIntArray temp_d(lower, upper-lower+1); #else int *last_d = (int *)ckalloc((upper-lower+1) * sizeof(int)) - lower; int *temp_d = (int *)ckalloc((upper-lower+1) * sizeof(int)) - lower; #endif // Initialization -- it's set to an easy to recognize value for // debugging. for (int k=lower; k<=upper; ++k) last_d[k] = -2109876543; last_d[start] = snake(start, i1, i2, j2); if (last_d[goal_diag] >= i2) { #ifndef CHECK_BOUNDS ckfree(last_d+lower); ckfree(temp_d+lower); #endif return(0); } for (int c=1; c<=limit; ++c) { int ll = MAX(lower,start-c); int uu = MIN(upper, start+c); for (int k=ll; k<=uu; ++k) { int row; if (k == ll) row = last_d[k+1]+1; // DELETE else if (k == uu) row = last_d[k-1]; // INSERT else if ((last_d[k]>=last_d[k+1]) && (last_d[k]+1>=last_d[k-1])) row = last_d[k]+1; // SUBSTITUTE else if ((last_d[k+1]+1>=last_d[k-1]) && (last_d[k+1]>=last_d[k])) row = last_d[k+1]+1; // DELETE else row = last_d[k-1]; // INSERT temp_d[k] = snake(k,row,i2,j2); } for (int k=ll; k<=uu; ++k) last_d[k] = temp_d[k]; if (last_d[goal_diag] >= i2) { // Free working vectors #ifndef CHECK_BOUNDS ckfree(last_d+lower); ckfree(temp_d+lower); #endif return(c); } } #ifndef CHECK_BOUNDS ckfree(last_d+lower); ckfree(temp_d+lower); #endif // Ran out of distance limit return(-1); } int Sim4::get_dist(int i1, int j1, int i2, int j2, int limit) { int *SS, *DD, *II; int goal_diag; int c, k, t1, t2, t; int start, lower, upper; /* Compute the boundary diagonals */ start = j1 - i1; lower = MAX(j1-i2, start-limit); upper = MIN(j2-i1, start+limit); goal_diag = j2-i2; if (goal_diag > upper || goal_diag < lower) { fprintf(stderr, "The two sequences are not really similar.(1 %d; %d %d %d %d)\n", limit, i1, j1, i2, j2); fprintf(stderr, "Please try exact phase 1 method\n."); #ifndef CHECK_BOUNDS /* Free working vectors */ ckfree(SS+lower); ckfree(DD+lower); ckfree(II+lower-1); #endif return -1; } /* Allocate space for forward vectors */ #ifdef CHECK_BOUNDS boundedIntArray SS(lower, upper-lower+1); boundedIntArray DD(lower, upper-lower+2); boundedIntArray II(lower-1, upper-lower+2); #else SS = (int *)ckalloc((upper-lower+1)*sizeof(int)) - lower; DD = (int *)ckalloc((upper-lower+2)*sizeof(int)) - lower; II = (int *)ckalloc((upper-lower+2)*sizeof(int)) - lower + 1; #endif /* Initialization */ for (k=lower; k<=upper; ++k) SS[k] = -99999; for (k=lower; k<=upper+1; ++k) DD[k] = -99999; for (k=lower-1; k<=upper; ++k) II[k] = -99999; SS[start] = snake(start, i1, i2, j2); if (SS[goal_diag] >= i2) { #ifdef STATS printf("get_dist = %d\n", 0); #endif #ifndef CHECK_BOUNDS /* Free working vectors */ ckfree(SS+lower); ckfree(DD+lower); ckfree(II+lower-1); #endif return 0; } for (c=1; c<=limit; ++c) { t = MAX(lower, start-c); t1 = II[t-1]; for (k=t; k<=MIN(upper, start+c); ++k) { t2 = II[k]; II[k] = MAX(t1, SS[k]); t1 = t2; DD[k] = MAX(DD[k+1]+1, SS[k]); SS[k] = snake(k, MIN(j2-k,MAX(MAX(SS[k]+1, II[k]), DD[k] )), i2, j2); } if (SS[goal_diag] >= i2) { #ifdef STATS printf("get_dist = %d\n", c); #endif #ifndef CHECK_BOUNDS /* Free working vectors */ ckfree(SS+lower); ckfree(DD+lower); ckfree(II+lower-1); #endif return c; } } /* Ran out of distance limit */ fprintf(stderr, "Two sequences are not really similar.\n"); fprintf(stderr, "Please try exact phase 1.\n"); return -1; } void Sim4::align_path(int i1, int j1, int i2, int j2, int dist, edit_script **head, edit_script **tail) { #ifndef CHECK_BOUNDS int *last_d = 0L; int *temp_d = 0L; int *rlast_d = 0L; int *rtemp_d = 0L; #endif edit_script *head1 = 0L; edit_script *tail1 = 0L; edit_script *head2 = 0L; edit_script *tail2 = 0L; //fprintf(stderr, "align_path()-- i1=%d j1=%d i2=%d j2=%d dist=%d\n", i1, j1, i2, j2, dist); int ll=0; int uu=0; *head = *tail = NULL; // Boundary cases if (i1 == i2) { if (j1 == j2) { *head = NULL; } else { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = INSERT; head1->num = j2-j1; head1->next = NULL; *head = *tail = head1; } return; } if (j1 == j2) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = DELETE; head1->num = i2-i1; head1->next = NULL; *head = *tail = head1; return; } if (dist <= 1) { int start = j1-i1; if (j2-i2 == j1-i1) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = SUBSTITUTE; head1->num = i2-i1; head1->next = NULL; *head = *tail = head1; } else if (j2-j1 == i2-i1+1) { int tmp = snake(start,i1,i2,j2); if (tmp>i1) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = SUBSTITUTE; head1->num = tmp-i1; *head = head1; } head2 = (edit_script *) ckalloc(sizeof(edit_script)); head2->op_type = INSERT; head2->num = 1; if (*head) head1->next = head2; else *head = head2; *tail = head2; head2->next = NULL; if (i2-tmp) { head1 = head2; *tail = head2 = (edit_script *)ckalloc(sizeof(edit_script)); head2->op_type = SUBSTITUTE; head2->num = i2-tmp; head2->next = NULL; head1->next = head2; } } else if (j2-j1+1 == i2-i1) { int tmp = snake(start,i1,i2,j2); if (tmp>i1) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = SUBSTITUTE; head1->num = tmp-i1; *head = head1; } head2 = (edit_script *) ckalloc(sizeof(edit_script)); head2->op_type = DELETE; head2->num = 1; if (*head) head1->next = head2; else *head = head2; *tail = head2; head2->next = NULL; if (i2>tmp+1) { head1 = head2; *tail = head2 = (edit_script *)ckalloc(sizeof(edit_script)); head2->op_type = SUBSTITUTE; head2->num = i2-tmp-1; head2->next = NULL; head1->next = head2; } } else { fprintf(stderr, "Sim4::align_path()-- warning: something wrong when aligning."); *head = 0L; *tail = 0L; } return; } // Divide the problem at the middle cost int midc = dist/2; int rmidc = dist - midc; // Compute the boundary diagonals int start = j1 - i1; int lower = MAX(j1-i2, start-midc); int upper = MIN(j2-i1, start+midc); int rstart = j2-i2; int rlower = MAX(j1-i2, rstart-rmidc); int rupper = MIN(j2-i1, rstart+rmidc); #if 0 fprintf(stderr, "dist = %d\n", dist); fprintf(stderr, "midc = %d rmidc = %d\n", midc, rmidc); fprintf(stderr, "j1 = %d\n", j1); fprintf(stderr, "i1 = %d\n", i1); fprintf(stderr, "j2 = %d\n", j2); fprintf(stderr, "i2 = %d\n", i2); fprintf(stderr, "start = %d lower = %d upper = %d\n", start, lower, upper); fprintf(stderr, "rstart = %d rlower = %d rupper = %d\n", rstart, rlower, rupper); #endif // Allocate space for forward vectors #ifdef CHECK_BOUNDS boundedIntArray last_d(lower, upper-lower+1); boundedIntArray temp_d(lower, upper-lower+1); #else last_d = (int *)ckalloc((upper-lower+1)*sizeof(int)) - lower; temp_d = (int *)ckalloc((upper-lower+1)*sizeof(int)) - lower; #endif for (int k=lower; k<=upper; k++) last_d[k] = -1; last_d[start] = snake(start,i1,i2,j2); // Forward computation for (int c=1; c<=midc; ++c) { ll = MAX(lower,start-c); uu = MIN(upper,start+c); //fprintf(stderr, "c=%d ll=%d uu=%d\n", c, ll, uu); for (int k=ll; k<=uu; ++k) { int row; if (k == ll) { // DELETE : down from (k+1,c-1) row = last_d[k+1]+1; } else if (k == uu) { // INSERT : right from (k-1,c-1) row = last_d[k-1]; } else if ((last_d[k]>=last_d[k+1]) && (last_d[k]+1>=last_d[k-1])) { // SUBSTITUTE row = last_d[k]+1; } else if ((last_d[k+1]+1>=last_d[k-1]) && (last_d[k+1]>=last_d[k])) { // DELETE row = last_d[k+1]+1; } else { // INSERT row = last_d[k-1]; } temp_d[k] = snake(k,row,i2,j2); //fprintf(stderr, "k=%d row=%d temp_d[k]=%d\n", k, row, temp_d[k]); } for (int k=ll; k<=uu; ++k) last_d[k] = temp_d[k]; } // Allocate space for backward vectors #ifdef CHECK_BOUNDS boundedIntArray rlast_d(rlower, rupper-rlower+1); boundedIntArray rtemp_d(rlower, rupper-rlower+1); #else rlast_d = (int *)ckalloc((rupper-rlower+1)*sizeof(int)) - rlower; rtemp_d = (int *)ckalloc((rupper-rlower+1)*sizeof(int)) - rlower; #endif for (int k=rlower; k<=rupper; k++) rlast_d[k] = i2+1; rlast_d[rstart] = rsnake(rstart,i2,i1,j1,i2+1); // Backward computation for (int c=1; c<=rmidc; ++c) { ll = MAX(rlower,rstart-c); uu = MIN(rupper,rstart+c); for (int k=ll; k<=uu; ++k) { int row; if (k == ll) { // INSERT : left from (k+1,c-1) row = rlast_d[k+1]; } else if (k == uu) { // DELETE : up from (k-1,c-1) row = rlast_d[k-1]-1; } else if ((rlast_d[k]-1<=rlast_d[k+1]) && (rlast_d[k]-1<=rlast_d[k-1]-1)) { // SUBSTITUTE row = rlast_d[k]-1; } else if ((rlast_d[k-1]-1<=rlast_d[k+1]) && (rlast_d[k-1]-1<=rlast_d[k]-1)) { // DELETE row = rlast_d[k-1]-1; } else { // INSERT row = rlast_d[k+1]; } rtemp_d[k] = rsnake(k,row,i1,j1,i2+1); } for (int k=ll; k<=uu; ++k) rlast_d[k] = rtemp_d[k]; } // Find (mi, mj) such that the distance from (i1, j1) to (mi, mj) // is midc and the distance from (mi, mj) to (i2, j2) is rmidc. int flag = 0; int mi = 0; int mj = 0; ll = MAX(lower,rlower); uu = MIN(upper,rupper); //fprintf(stderr, "ll=%d uu=%d\n", ll, uu); for (int k=ll; k<=uu; ++k) { //fprintf(stderr, "last_d[%d] = %d rlast_d[%d] = %d\n", k, last_d[k], k, rlast_d[k]); if (last_d[k] >= rlast_d[k]) { if (last_d[k] - i1 >= i2 - rlast_d[k]) { mi = last_d[k]; mj = k+mi; } else { mi = rlast_d[k]; mj = k+mi; } flag = 1; break; } } #ifndef CHECK_BOUNDS ckfree(last_d + lower); ckfree(rlast_d + rlower); ckfree(temp_d + lower); ckfree(rtemp_d + rlower); #endif //fprintf(stderr, "flag=%d mi=%d mj=%d\n", flag, mi, mj); if (flag == 0) { //fprintf(stderr, "Sim4::align_path()-- warning: something wrong when dividing\n"); #if 0 // Pick the middle k, keep going. int k= ll + (uu-ll) / 2; if (last_d[k] - i1 >= i2 - rlast_d[k]) { mi = last_d[k]; mj = k+mi; } else { mi = rlast_d[k]; mj = k+mi; } #else //kill(getpid(), SIGSEGV); *head = 0L; *tail = 0L; return; #endif } // Find a path from (i1,j1) to (mi,mj) align_path(i1,j1,mi,mj,midc,&head1,&tail1); // Find a path from (mi,mj) to (i2,j2) align_path(mi,mj,i2,j2,rmidc,&head2,&tail2); // Join these two paths together if (head1) tail1->next = head2; else head1 = head2; *head = head1; if (head2) *tail = tail2; else *tail = tail1; } void Sim4::path(int i1, int j1, char type1, int i2, int j2, char type2, int dist, edit_script **head, edit_script **tail) { int *SS, *DD, *II; /* Forward vectors */ int *RS, *RD, *RI; /* Backward vectors */ edit_script *head1, *tail1, *head2, *tail2; int midc, rmidc; int start, lower, upper; int rstart, rlower, rupper; int c, k, t1, t2, t; int maxint; int mi, mj, mtype; char flag; /* printf("i1=%d,j1=%d,type1=%d,i2=%d,j2=%d,type2=%d,dist=%d\n",i1,j1,type1,i2,j2,type2,dist); */ /* Boundary cases */ if (i1 == i2) { if (j1 == j2) *head = NULL; else { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = INSERT; head1->num = j2-j1; head1->next = NULL; *head = *tail = head1; } return; } if (j1 == j2) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = DELETE; head1->num = i2-i1; head1->next = NULL; *head = *tail = head1; return; } if (dist <= 1) { if (j2-i2 == j1-i1) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = SUBSTITUTE; head1->num = i2-i1; head1->next = NULL; *head = *tail = head1; } else if (j2-i2 > j1-i1) { if (type1 == INSERT) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = INSERT; head1->num = 1; head2 = (edit_script *) ckalloc(sizeof(edit_script)); head2->op_type = SUBSTITUTE; head2->num = i2-i1; } else { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = SUBSTITUTE; head1->num = i2-i1; head2 = (edit_script *) ckalloc(sizeof(edit_script)); head2->op_type = INSERT; head2->num = 1; } head1->next = head2; head2->next = NULL; *head = head1; *tail = head2; } else if (j2-i2 < j1-i1) { if (type1 == DELETE) { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = DELETE; head1->num = 1; head2 = (edit_script *) ckalloc(sizeof(edit_script)); head2->op_type = SUBSTITUTE; head2->num = j2-j1; } else { head1 = (edit_script *) ckalloc(sizeof(edit_script)); head1->op_type = SUBSTITUTE; head1->num = j2-j1; head2 = (edit_script *) ckalloc(sizeof(edit_script)); head2->op_type = DELETE; head2->num = 1; } head1->next = head2; head2->next = NULL; *head = head1; *tail = head2; } return; } /* Divide the problem at the middle cost */ midc = dist/2; rmidc = dist - midc; /* Compute the boundary diagonals */ start = j1 - i1; lower = MAX(j1-i2, start-midc); upper = MIN(j2-i1, start+midc); rstart = j2-i2; rlower = MAX(j1-i2, rstart-rmidc); rupper = MIN(j2-i1, rstart+rmidc); /* Allocate space for forward vectors */ SS = (int *)ckalloc((upper-lower+1)*sizeof(int)) - lower; DD = (int *)ckalloc((upper-lower+2)*sizeof(int)) - lower; II = (int *)ckalloc((upper-lower+2)*sizeof(int)) - lower + 1; /* Forward computation */ for (k=lower; k<=upper; ++k) SS[k] = -99999; for (k=lower; k<=upper+1; ++k) DD[k] = -99999; for (k=lower-1; k<=upper; ++k) II[k] = -99999; if (type1 == SUBSTITUTE) SS[start] = snake(start, i1, i2, j2); else if (type1 == DELETE) { DD[start] = i1; SS[start] = snake(start,i1,i2,j2); } else { II[start] = i1; SS[start] = snake(start,i1,i2,j2); } for (c=1; c<=midc; ++c) { t = MAX(lower, start-c); t1 = II[t-1]; for (k=t; k<=MIN(upper, start+c); ++k) { t2 = II[k]; II[k] = MAX(t1, SS[k]); t1 = t2; DD[k] = MAX(DD[k+1]+1, SS[k]); SS[k] = snake(k, MIN(j2-k,MAX(MAX(SS[k]+1, II[k]), DD[k])), i2, j2); } } /* Allocate space for backward vectors */ RS = (int *)ckalloc((rupper-rlower+1)*sizeof(int)) - rlower; RD = (int *)ckalloc((rupper-rlower+2)*sizeof(int)) - rlower + 1; RI = (int *)ckalloc((rupper-rlower+2)*sizeof(int)) - rlower; /* Backward computation */ maxint = i2 + dist + _estLen; for (k=rlower; k<=rupper; ++k) RS[k] = maxint; for (k=rlower-1; k<=rupper; ++k) RD[k] = maxint; for (k=rlower; k<=rupper+1; ++k) RI[k] = maxint; if (type2 == SUBSTITUTE) RI[rstart] = RD[rstart] = RS[rstart] = rsnake(rstart, i2, i1, j1, i2+1); else if (type2 == DELETE) RD[rstart] = i2; else RI[rstart] = i2; for (c=1; c<=rmidc; ++c) { t = MAX(rlower, rstart-c); t1 = RD[t-1]; for (k=t; k<=MIN(rupper, rstart+c); ++k) { #if 0 int x = MIN(MIN(RS[k]-1,RD[k]),RI[k]); printf("<<<%d>>>", x); assert(0<=x); assert (x<=_estLen); printf("%d", x); #endif RS[k] = rsnake(k, MAX(j1-k, MIN(MIN(RS[k]-1,RD[k]),RI[k])),i1,j1,i2+1); t2 = RD[k]; RD[k] = MIN(t1-1, RS[k]); t1 = t2; RI[k] = MIN(RI[k+1], RS[k]); } } /* Find (mi, mj, mtype) such that the distance from (i1, j1, type1) to (mi, mj, mtype) is midc and the distance from (mi, mj, mtype) to (i2, j2, type2) is rmidc. */ flag = 0; for (k=MAX(lower,rlower); k<=MIN(upper,rupper);++k) { /* printf("k=%d, SS=%d, RS=%d, DD=%d, RD=%d, II=%d, RI=%d\n",k,SS[k],RS[k],DD[k],RD[k],II[k],RI[k]); */ if (SS[k]>=RS[k] || DD[k]>=RD[k] || II[k]>=RI[k]) { if (DD[k]>=RD[k]) { mi = DD[k]; mj = k+mi; mtype = DELETE; } else if (II[k] >= RI[k]) { mi = II[k]; mj = k+mi; mtype = INSERT; } else { mi = SS[k]; mj = k+mi; mtype = SUBSTITUTE; } /* printf("mi=%d, mj=%d, mtype=%d\n", mi, mj, mtype); */ flag = 1; break; } } /* Free working vectors */ ckfree(SS+lower); ckfree(DD+lower); ckfree(II+lower-1); ckfree(RS+rlower); ckfree(RD+rlower-1); ckfree(RI+rlower); if (flag) { /* Find a path from (i1,j1,type1) to (mi,mj,mtype) */ path(i1,j1,type1,mi,mj,mtype,midc,&head1,&tail1); /* Find a path from (mi,mj,mtype) to (i2,j2,type2) */ path(mi,mj,mtype,i2,j2,type2,rmidc,&head2,&tail2); /* Join these two paths together */ if (head1) tail1->next = head2; else head1 = head2; } else { printf("Something wrong when dividing\n"); head1 = NULL; } *head = head1; if (head2) *tail = tail2; else *tail = tail1; } // Condense_script - merge contiguous operations of the same type together void Sim4::Condense_script(edit_script *head) { edit_script *tp, *tp1; tp = head; while (tp != NULL) { while (((tp1 = tp->next) != NULL) && (tp->op_type == tp1->op_type)) { tp->num = tp->num + tp1->num; tp->next = tp1->next; ckfree(tp1); } tp = tp->next; } } // Flip_script - reverse the script list void Sim4::Flip_script(struct edit_script **script) { struct edit_script *ep, *ahead, *behind; ahead = *script; ep = NULL; while (ahead!=NULL) { behind = ep; ep = ahead; ahead = ahead->next; ep->next = behind; } *script = ep; } void Sim4::align_reverse(int *S) { int auxi, *begi, *endi; begi = S; endi = S + *(S-1); while (begi < endi) { auxi = *begi; *begi = *--endi; *endi = auxi; begi++; } return; } void Sim4::Free_script(edit_script *head) { edit_script *tp, *tp1; tp = head; while (tp != NULL) { tp1 = tp->next; ckfree(tp); tp = tp1; } } kmer-code-2013-trunk/libsim4/sim4core/sites_score.C0000644000000000000000000033421511415365503020677 0ustar rootroot#include "sim4.H" /* DO NOT REMOVE or MODIFY !!!! */ double score_ex_acc[NUM_VALUES_SCORES] = { -1.335601, 0.000100, 0.000100, 0.000100, -1.971125, 0.000100, 0.000100, 0.000100, -0.730849, 0.000100, 0.000100, 0.000100, -2.152442, 0.000100, 0.000100, 0.000100, -1.384016, -1.765491, -1.670181, -0.941316, -1.355893, -1.392776, -2.624094, -0.864190, -1.345398, -1.730732, -1.729560, -0.954391, -2.123424, -1.682619, -0.996441, -1.123004, -1.065167, -1.683004, -1.656570, -1.277539, -1.150894, -1.393289, -2.468421, -1.047909, -1.235876, -1.477038, -1.500624, -1.354346, -1.750241, -1.494114, -1.103043, -1.309478, -1.210955, -1.456643, -1.668048, -1.271341, -1.206852, -1.229877, -2.389307, -1.149356, -1.280156, -1.436382, -1.367875, -1.471539, -1.900284, -1.328006, -1.151218, -1.312237, -1.143372, -1.554471, -1.672254, -1.265377, -1.180706, -1.344646, -2.311599, -1.099003, -1.071127, -1.494756, -1.523684, -1.536342, -1.845461, -1.444636, -1.186455, -1.200931, -1.234519, -1.538041, -1.343193, -1.455803, -1.159987, -1.274190, -2.230314, -1.206116, -1.275844, -1.464866, -1.346847, -1.471256, -1.854969, -1.410742, -1.038832, -1.403594, -1.186122, -1.694813, -1.220024, -1.533681, -1.062437, -1.260450, -2.195157, -1.348922, -1.156730, -1.458416, -1.401923, -1.576199, -1.828613, -1.491434, -0.984148, -1.424800, -1.192180, -1.609438, -1.330244, -1.460895, -1.069538, -1.468074, -2.133364, -1.177571, -1.067348, -1.498477, -1.562900, -1.500212, -1.915968, -1.360152, -1.186611, -1.234690, -1.190480, -1.676417, -1.238583, -1.518284, -1.077172, -1.353662, -2.376455, -1.176777, -1.208407, -1.452132, -1.411385, -1.498562, -1.977461, -1.423799, -1.011984, -1.357570, -1.253323, -1.614677, -1.158607, -1.601630, -1.108317, -1.231266, -2.217624, -1.312688, -1.138353, -1.494677, -1.305072, -1.691845, -1.863840, -1.414671, -1.023563, -1.416317, -1.230936, -1.592661, -1.327380, -1.429510, -1.138540, -1.337023, -2.174752, -1.192535, -1.071345, -1.557398, -1.441982, -1.559172, -1.962656, -1.361470, -1.098164, -1.310265, -1.200985, -1.614838, -1.205770, -1.605854, -1.129803, -1.301737, -2.173714, -1.234123, -1.214022, -1.466760, -1.364150, -1.529103, -1.836928, -1.439748, -0.999938, -1.444753, -1.220956, -1.681434, -1.208440, -1.512845, -1.138898, -1.273579, -2.212360, -1.235952, -1.117383, -1.493132, -1.369361, -1.640269, -1.915643, -1.392717, -0.986107, -1.463921, -1.159009, -1.639743, -1.295528, -1.521335, -1.113767, -1.455226, -2.176386, -1.124294, -1.091340, -1.459191, -1.506578, -1.559971, -1.815721, -1.420894, -1.168590, -1.255349, -1.262891, -1.574610, -1.245838, -1.503383, -1.123450, -1.295129, -2.189671, -1.241233, -1.207579, -1.398026, -1.392795, -1.581747, -1.963610, -1.369669, -1.005964, -1.428092, -1.277933, -1.569773, -1.202048, -1.547794, -1.089207, -1.209741, -2.155384, -1.388762, -1.195776, -1.443453, -1.370980, -1.572326, -1.888331, -1.477326, -0.951149, -1.451870, -1.182852, -1.643904, -1.297299, -1.482349, -1.122438, -1.418616, -2.104960, -1.169200, -1.065810, -1.458160, -1.525526, -1.582889, -1.798883, -1.443818, -1.085828, -1.343735, -1.295488, -1.631455, -1.188992, -1.486942, -1.079254, -1.308955, -2.221961, -1.267081, -1.229971, -1.438389, -1.328760, -1.581902, -1.940308, -1.461267, -0.939430, -1.454370, -1.229625, -1.588928, -1.214000, -1.577713, -1.103814, -1.213966, -2.208966, -1.341072, -1.128032, -1.511629, -1.317179, -1.671899, -1.788370, -1.349624, -1.049337, -1.499419, -1.184555, -1.584789, -1.335215, -1.487110, -1.102734, -1.384240, -2.140066, -1.204384, -1.080200, -1.448178, -1.446532, -1.660263, -1.888446, -1.298977, -1.120866, -1.386753, -1.257125, -1.594577, -1.209326, -1.541142, -1.120647, -1.277587, -2.190125, -1.261181, -1.143122, -1.499606, -1.406558, -1.546581, -1.874724, -1.458448, -0.977085, -1.437171, -1.237747, -1.567878, -1.247977, -1.539953, -1.067333, -1.325089, -2.162207, -1.290165, -1.171568, -1.424301, -1.258353, -1.799866, -1.961282, -1.384601, -1.057113, -1.341548, -1.219381, -1.569864, -1.269042, -1.535199, -1.125630, -1.395079, -2.138194, -1.171600, -1.154665, -1.476627, -1.399043, -1.562569, -1.892284, -1.352070, -1.121797, -1.328500, -1.273174, -1.577905, -1.180112, -1.577905, -1.151614, -1.281105, -2.195094, -1.221474, -1.197549, -1.459232, -1.345588, -1.583452, -1.890951, -1.487270, -0.960853, -1.424966, -1.230290, -1.671687, -1.193083, -1.529807, -1.132470, -1.286841, -2.025846, -1.310298, -1.164790, -1.456458, -1.331953, -1.655532, -1.802441, -1.479312, -0.948904, -1.513471, -1.234540, -1.613452, -1.356870, -1.376871, -1.079244, -1.383257, -2.142677, -1.230841, -1.099347, -1.514228, -1.403705, -1.603214, -1.910664, -1.345006, -1.051746, -1.418188, -1.232042, -1.624815, -1.189179, -1.575595, -1.141172, -1.274703, -2.270131, -1.211524, -1.238230, -1.446198, -1.316615, -1.576981, -1.967337, -1.303777, -1.028560, -1.464708, -1.275878, -1.554886, -1.227546, -1.529750, -1.087590, -1.247032, -2.241284, -1.311952, -1.149444, -1.505604, -1.361816, -1.584215, -1.890155, -1.479980, -0.986981, -1.391873, -1.205927, -1.612003, -1.319745, -1.452885, -1.154726, -1.309772, -2.180861, -1.197227, -1.054008, -1.504077, -1.491871, -1.588222, -1.922668, -1.377356, -1.103326, -1.310173, -1.290627, -1.580910, -1.184747, -1.545100, -1.120620, -1.303989, -2.160765, -1.247442, -1.205392, -1.404328, -1.353642, -1.626606, -1.845827, -1.400752, -1.009410, -1.464213, -1.199175, -1.618591, -1.258856, -1.530622, -1.111575, -1.293897, -2.191425, -1.255259, -1.133596, -1.475954, -1.370924, -1.631207, -1.873750, -1.450602, -0.992430, -1.421509, -1.206243, -1.619514, -1.287183, -1.484401, -1.175333, -1.297996, -2.227741, -1.169785, -1.135417, -1.415790, -1.458772, -1.592304, -1.882364, -1.357552, -1.108091, -1.345953, -1.312815, -1.592356, -1.176732, -1.517733, -1.129410, -1.324668, -2.165735, -1.216655, -1.214091, -1.415233, -1.324486, -1.638808, -1.827456, -1.439690, -0.952614, -1.530419, -1.230672, -1.569762, -1.202320, -1.612880, -1.120874, -1.305796, -2.183768, -1.236386, -1.100460, -1.534065, -1.319174, -1.691360, -1.848282, -1.382408, -1.009656, -1.482037, -1.192138, -1.600549, -1.333431, -1.465028, -1.112761, -1.336876, -2.204917, -1.209237, -1.171048, -1.447301, -1.457089, -1.505797, -1.964450, -1.340752, -1.128628, -1.292288, -1.270576, -1.656773, -1.154380, -1.544945, -1.084854, -1.344365, -2.158925, -1.252133, -1.142498, -1.604726, -1.280848, -1.598336, -1.943552, -1.419084, -0.981092, -1.427248, -1.261372, -1.577526, -1.230282, -1.523079, -1.154926, -1.264728, -2.117872, -1.264728, -1.135020, -1.492831, -1.286715, -1.727814, -2.002417, -1.366058, -0.948974, -1.501738, -1.179219, -1.592662, -1.325051, -1.499202, -1.137299, -1.365658, -2.198894, -1.160985, -1.143767, -1.502423, -1.379694, -1.574372, -1.753819, -1.421064, -1.167477, -1.293604, -1.289000, -1.635497, -1.155149, -1.539037, -1.159237, -1.269238, -2.193310, -1.225258, -1.192264, -1.462796, -1.356516, -1.573481, -1.875383, -1.414488, -0.972655, -1.489099, -1.201797, -1.596451, -1.251610, -1.557376, -1.086788, -1.345506, -2.132659, -1.259583, -1.161896, -1.554457, -1.231563, -1.692996, -1.835473, -1.353477, -1.010639, -1.522640, -1.200165, -1.544028, -1.386294, -1.446740, -1.074299, -1.377696, -2.040487, -1.286129, -1.141172, -1.448234, -1.451434, -1.554304, -1.895922, -1.393515, -1.119513, -1.290330, -1.278099, -1.589413, -1.202082, -1.528686, -1.133991, -1.260209, -2.205311, -1.257224, -1.209663, -1.363576, -1.343806, -1.686825, -1.809300, -1.416258, -1.049302, -1.412930, -1.232036, -1.563347, -1.239828, -1.563347, -1.142506, -1.268012, -2.052407, -1.305025, -1.153453, -1.503497, -1.309649, -1.649278, -1.905347, -1.401502, -1.026262, -1.399752, -1.268921, -1.569353, -1.282114, -1.455708, -1.132240, -1.336090, -2.353680, -1.140062, -1.142072, -1.493786, -1.405637, -1.555371, -1.903114, -1.356570, -1.131052, -1.306891, -1.258505, -1.542294, -1.232830, -1.557960, -1.132543, -1.340082, -2.076069, -1.236012, -1.208791, -1.443004, -1.352110, -1.577305, -1.834336, -1.406362, -1.049210, -1.406362, -1.202462, -1.581855, -1.280934, -1.531665, -1.143636, -1.378359, -2.103631, -1.179809, -1.113755, -1.533405, -1.290459, -1.710739, -1.889009, -1.370414, -1.001542, -1.480762, -1.170553, -1.576018, -1.343132, -1.505163, -1.124063, -1.364014, -2.263498, -1.153836, -1.103987, -1.434931, -1.465954, -1.612121, -1.806562, -1.406374, -1.123997, -1.325111, -1.293344, -1.609088, -1.144132, -1.574661, -1.146531, -1.243449, -2.237143, -1.247883, -1.280330, -1.423670, -1.249746, -1.636561, -1.869786, -1.379057, -1.007272, -1.474848, -1.231473, -1.538100, -1.246328, -1.580898, -1.067429, -1.358306, -2.190256, -1.247839, -1.189061, -1.519773, -1.234607, -1.683179, -1.853150, -1.360364, -1.029269, -1.472192, -1.160772, -1.650320, -1.338037, -1.459265, -1.165275, -1.321014, -2.227810, -1.159847, -1.137573, -1.486277, -1.449789, -1.520645, -1.786011, -1.433047, -1.155640, -1.276774, -1.273726, -1.526127, -1.221862, -1.569467, -1.124703, -1.297393, -2.251947, -1.214651, -1.184638, -1.402477, -1.378085, -1.629154, -1.885832, -1.346835, -1.049159, -1.435472, -1.253333, -1.548590, -1.209055, -1.592472, -1.097402, -1.366318, -2.159215, -1.218061, -1.162369, -1.517260, -1.245751, -1.713583, -1.815168, -1.450890, -0.992609, -1.460073, -1.191243, -1.508802, -1.325115, -1.564419, -1.097743, -1.384122, -2.183401, -1.193444, -1.184943, -1.467336, -1.449636, -1.473850, -1.916834, -1.387616, -1.119975, -1.283844, -1.260960, -1.606178, -1.170434, -1.581176, -1.196543, -1.252552, -2.121249, -1.230637, -1.272663, -1.474848, -1.349685, -1.461968, -1.949495, -1.398697, -0.974619, -1.455003, -1.202384, -1.580450, -1.249786, -1.574676, -1.130482, -1.275179, -2.250038, -1.229816, -1.194145, -1.477760, -1.278271, -1.658784, -1.858725, -1.448801, -1.029202, -1.378386, -1.227165, -1.538118, -1.269452, -1.555390, -1.190249, -1.377870, -2.119136, -1.128235, -1.148682, -1.413005, -1.483933, -1.547446, -1.819087, -1.369744, -1.154035, -1.315677, -1.321019, -1.563689, -1.154709, -1.567216, -1.173361, -1.290303, -2.163656, -1.202032, -1.192784, -1.419760, -1.323339, -1.668087, -1.892492, -1.354595, -1.037684, -1.439787, -1.305802, -1.530746, -1.195561, -1.559998, -1.105983, -1.283630, -2.251292, -1.248895, -1.250128, -1.527339, -1.251445, -1.559087, -1.742785, -1.399683, -1.071944, -1.444134, -1.193063, -1.585055, -1.331716, -1.479512, -1.163570, -1.339203, -2.174835, -1.164913, -1.124019, -1.557180, -1.384367, -1.542634, -1.748455, -1.361913, -1.180146, -1.337302, -1.340001, -1.599512, -1.122041, -1.558056, -1.103363, -1.326506, -2.220324, -1.223259, -1.228985, -1.431598, -1.361062, -1.550719, -1.837148, -1.435005, -1.052543, -1.372132, -1.254306, -1.541988, -1.261086, -1.525902, -1.117554, -1.319094, -2.046308, -1.286150, -1.211171, -1.436218, -1.329430, -1.610949, -1.834968, -1.413187, -1.018570, -1.444331, -1.206837, -1.571380, -1.310536, -1.498684, -1.141625, -1.367710, -2.140900, -1.176157, -1.112502, -1.492081, -1.396306, -1.615202, -1.834206, -1.329906, -1.203973, -1.288233, -1.302269, -1.576013, -1.190351, -1.527059, -1.130388, -1.362076, -2.204436, -1.169086, -1.221474, -1.475480, -1.298146, -1.592098, -1.922147, -1.421787, -0.983639, -1.433483, -1.243504, -1.549269, -1.224861, -1.582668, -1.098612, -1.287155, -2.220755, -1.265549, -1.158097, -1.535490, -1.321383, -1.590615, -1.736951, -1.484592, -1.054449, -1.390445, -1.225500, -1.572788, -1.353111, -1.425084, -1.159183, -1.265156, -2.313011, -1.187118, -1.139970, -1.466987, -1.430498, -1.558924, -1.770974, -1.352993, -1.177049, -1.334915, -1.329297, -1.460681, -1.224210, -1.564136, -1.173926, -1.349868, -2.237171, -1.124501, -1.312813, -1.406658, -1.307939, -1.534329, -1.874969, -1.419242, -1.065185, -1.346770, -1.295130, -1.600617, -1.163070, -1.551921, -1.120564, -1.309440, -2.224214, -1.218138, -1.234378, -1.477862, -1.303851, -1.563628, -1.764318, -1.375227, -1.029143, -1.520507, -1.226343, -1.571334, -1.319214, -1.463074, -1.129765, -1.313388, -2.257403, -1.192831, -1.240312, -1.464441, -1.438507, -1.417921, -1.746526, -1.383727, -1.192895, -1.303290, -1.267040, -1.632153, -1.169949, -1.549073, -1.098612, -1.328065, -2.313123, -1.194929, -1.250583, -1.473345, -1.291024, -1.562958, -1.872377, -1.422208, -1.005675, -1.430189, -1.245719, -1.529425, -1.197751, -1.641273, -1.075996, -1.352767, -2.179684, -1.246739, -1.224076, -1.500329, -1.247333, -1.631459, -1.780586, -1.447647, -1.031035, -1.428366, -1.170722, -1.518832, -1.359915, -1.540811, -1.115529, -1.318535, -2.164227, -1.238245, -1.159120, -1.415898, -1.463225, -1.550971, -1.787364, -1.388941, -1.141364, -1.332273, -1.286750, -1.553734, -1.175390, -1.591217, -1.135847, -1.339364, -2.260368, -1.163069, -1.276618, -1.450602, -1.337339, -1.495893, -1.848166, -1.411771, -1.074574, -1.357436, -1.228730, -1.580892, -1.182330, -1.634855, -1.096925, -1.237889, -2.256794, -1.304078, -1.243246, -1.480356, -1.281871, -1.577555, -1.797017, -1.415353, -1.035689, -1.442291, -1.187680, -1.535886, -1.348129, -1.513757, -1.178655, -1.273560, -2.247971, -1.181361, -1.198796, -1.414411, -1.414411, -1.549730, -1.771203, -1.384087, -1.159182, -1.325852, -1.243145, -1.632309, -1.146801, -1.617494, -1.140095, -1.302539, -2.213612, -1.207093, -1.301889, -1.451267, -1.324471, -1.479438, -1.912758, -1.387901, -1.098211, -1.312072, -1.201256, -1.644238, -1.203843, -1.579951, -1.168149, -1.306349, -2.174667, -1.188740, -1.245053, -1.580450, -1.254506, -1.509705, -1.743662, -1.432584, -1.062129, -1.424159, -1.207826, -1.608322, -1.315542, -1.458429, -1.217778, -1.313388, -2.155218, -1.141537, -1.197917, -1.414791, -1.355638, -1.622430, -1.781883, -1.448575, -1.104835, -1.326108, -1.242346, -1.624130, -1.207970, -1.535223, -1.145132, -1.261081, -2.338921, -1.197199, -1.322975, -1.449561, -1.242463, -1.559160, -1.821286, -1.404393, -1.073035, -1.383508, -1.219449, -1.624259, -1.193574, -1.587610, -1.061617, -1.356589, -2.172409, -1.263499, -1.199487, -1.456113, -1.311691, -1.628894, -1.681399, -1.481933, -1.048532, -1.442921, -1.245178, -1.547315, -1.267568, -1.524287, -1.154946, -1.309561, -2.203378, -1.188896, -1.205380, -1.465698, -1.390523, -1.511585, -1.763734, -1.361676, -1.202662, -1.302056, -1.126271, 0.000100, 0.000100, 0.000100, -1.600280, 0.000100, 0.000100, 0.000100, -1.440370, 0.000100, 0.000100, 0.000100, -1.439358, 0.000100, 0.000100, 0.000100, -1.037596, -1.784752, -1.368686, -1.498725, -0.969139, -1.430044, -2.783348, -1.141120, -1.132712, -1.548982, -1.416992, -1.500874, -1.447700, -1.632824, -1.309515, -1.205436, -1.040898, -1.799518, -1.386480, -1.463045, -0.963590, -1.488005, -2.767785, -1.109100, -1.055648, -1.635182, -1.463108, -1.488989, -1.426120, -1.636021, -1.327101, -1.204766, -1.129552, -1.740917, -1.366506, -1.400531, -0.971197, -1.487555, -2.855071, -1.085022, -1.193866, -1.526188, -1.376560, -1.482169, -1.548347, -1.605485, -1.326118, -1.135993, -1.100949, -1.810472, -1.366225, -1.391107, -0.983656, -1.435603, -2.817303, -1.113768, -1.088187, -1.674189, -1.373706, -1.502620, -1.486649, -1.650060, -1.316997, -1.158701, -1.122221, -1.787219, -1.319179, -1.428517, -0.984662, -1.476402, -2.714434, -1.103423, -1.187851, -1.653014, -1.329156, -1.431494, -1.466866, -1.649626, -1.339846, -1.154087, -1.090029, -1.768602, -1.397145, -1.402749, -0.972428, -1.446867, -2.775311, -1.126395, -1.155890, -1.654090, -1.350847, -1.448442, -1.461769, -1.657078, -1.307973, -1.180544, -1.092893, -1.805843, -1.348418, -1.423921, -1.006053, -1.478381, -2.720757, -1.077313, -1.207902, -1.588394, -1.355462, -1.430894, -1.477705, -1.621565, -1.363536, -1.144147, -1.085392, -1.777565, -1.383597, -1.416728, -0.951430, -1.464386, -2.832186, -1.127914, -1.109729, -1.685765, -1.366911, -1.468999, -1.514412, -1.633126, -1.334034, -1.135280, -1.147771, -1.716052, -1.364878, -1.396465, -0.972880, -1.463530, -2.865694, -1.097676, -1.174990, -1.626847, -1.360646, -1.434919, -1.501165, -1.583283, -1.350575, -1.162388, -1.118144, -1.755150, -1.392228, -1.379472, -0.983027, -1.461923, -2.825658, -1.094331, -1.125699, -1.651206, -1.397985, -1.441125, -1.457759, -1.599563, -1.399085, -1.144337, -1.129659, -1.750671, -1.333735, -1.428334, -0.992917, -1.431679, -2.847736, -1.100747, -1.188522, -1.536829, -1.395296, -1.458834, -1.510725, -1.620993, -1.316081, -1.160392, -1.101488, -1.754472, -1.402788, -1.391449, -0.994619, -1.444694, -2.816815, -1.095055, -1.152710, -1.615064, -1.396846, -1.435628, -1.485824, -1.621041, -1.352133, -1.147896, -1.108305, -1.788701, -1.355932, -1.406506, -0.994473, -1.453574, -2.819786, -1.088475, -1.196367, -1.594841, -1.376259, -1.417797, -1.486754, -1.611917, -1.368832, -1.139478, -1.132685, -1.734931, -1.375451, -1.391528, -1.007601, -1.384571, -2.824122, -1.123579, -1.162633, -1.591122, -1.406459, -1.432832, -1.486578, -1.579927, -1.353164, -1.172981, -1.110632, -1.786596, -1.370098, -1.390152, -1.007251, -1.425332, -2.856373, -1.088050, -1.204719, -1.582583, -1.362712, -1.432069, -1.505046, -1.627606, -1.348084, -1.133650, -1.099678, -1.756700, -1.419732, -1.375831, -1.009766, -1.425926, -2.825292, -1.090292, -1.148516, -1.630048, -1.380470, -1.445992, -1.500193, -1.563789, -1.395807, -1.139627, -1.119926, -1.748937, -1.379355, -1.394341, -0.974565, -1.408163, -2.893751, -1.131085, -1.219123, -1.561759, -1.390892, -1.402868, -1.500151, -1.636566, -1.331887, -1.144831, -1.107160, -1.728876, -1.385630, -1.419421, -0.970204, -1.452216, -2.748959, -1.130262, -1.181169, -1.621320, -1.362764, -1.429242, -1.485533, -1.616611, -1.371334, -1.135444, -1.137082, -1.763959, -1.346103, -1.395824, -0.998285, -1.427183, -2.817670, -1.103343, -1.180518, -1.596952, -1.390117, -1.421625, -1.489618, -1.610295, -1.386794, -1.124430, -1.116263, -1.745481, -1.405639, -1.375443, -1.008551, -1.394440, -2.866718, -1.107465, -1.154666, -1.592375, -1.399592, -1.449457, -1.471385, -1.621312, -1.360288, -1.151486, -1.115641, -1.797624, -1.373824, -1.372527, -1.010982, -1.420947, -2.803558, -1.096436, -1.210434, -1.563439, -1.365486, -1.438595, -1.528921, -1.560112, -1.348835, -1.159715, -1.102655, -1.749688, -1.418201, -1.378191, -1.000249, -1.423572, -2.820570, -1.103259, -1.144354, -1.599328, -1.398657, -1.458393, -1.468434, -1.591596, -1.380171, -1.156523, -1.117323, -1.757429, -1.356246, -1.415799, -1.003464, -1.390065, -2.808274, -1.126936, -1.215629, -1.616261, -1.338513, -1.416336, -1.488300, -1.606673, -1.360643, -1.148241, -1.126311, -1.743883, -1.404856, -1.364414, -0.979704, -1.429952, -2.819989, -1.121930, -1.163389, -1.603140, -1.353596, -1.478405, -1.493600, -1.620567, -1.355430, -1.140015, -1.129339, -1.750269, -1.398605, -1.362247, -1.015107, -1.408231, -2.826934, -1.097014, -1.198853, -1.590029, -1.394071, -1.400487, -1.498766, -1.583641, -1.420240, -1.109537, -1.111094, -1.732346, -1.420046, -1.377417, -1.010322, -1.416869, -2.811313, -1.098711, -1.183085, -1.603880, -1.410708, -1.391980, -1.511164, -1.616073, -1.349504, -1.135378, -1.092299, -1.762656, -1.416958, -1.384230, -0.991101, -1.418866, -2.759871, -1.128420, -1.222521, -1.578551, -1.382878, -1.392779, -1.512551, -1.622017, -1.333280, -1.144002, -1.103343, -1.737175, -1.407523, -1.396449, -1.021297, -1.383406, -2.829530, -1.108333, -1.164309, -1.593690, -1.386855, -1.448905, -1.490179, -1.615717, -1.355478, -1.145404, -1.100528, -1.728022, -1.398047, -1.416381, -1.004054, -1.417066, -2.865305, -1.095959, -1.197338, -1.557674, -1.399188, -1.424702, -1.480965, -1.593994, -1.354883, -1.166383, -1.080517, -1.755001, -1.424867, -1.397831, -1.015071, -1.396198, -2.863323, -1.099589, -1.157040, -1.589954, -1.416250, -1.431176, -1.513473, -1.612226, -1.360713, -1.127214, -1.107955, -1.744252, -1.400473, -1.392271, -0.997921, -1.409712, -2.866910, -1.107854, -1.201282, -1.543529, -1.429959, -1.401420, -1.484209, -1.606771, -1.368944, -1.144420, -1.080199, -1.763655, -1.423002, -1.394062, -0.987412, -1.446748, -2.876851, -1.091197, -1.188613, -1.579726, -1.386390, -1.429833, -1.479287, -1.607892, -1.356032, -1.157696, -1.119333, -1.754193, -1.412146, -1.359318, -1.003084, -1.446069, -2.935178, -1.065261, -1.189625, -1.616130, -1.393082, -1.391549, -1.497491, -1.617884, -1.356836, -1.137819, -1.086190, -1.752897, -1.395915, -1.420379, -0.991829, -1.433753, -2.842751, -1.101342, -1.189068, -1.592662, -1.385344, -1.419338, -1.492075, -1.584301, -1.374462, -1.148766, -1.108117, -1.755378, -1.366134, -1.419286, -1.019912, -1.388676, -2.823840, -1.106874, -1.177407, -1.587164, -1.420713, -1.402924, -1.490007, -1.602995, -1.381971, -1.132418, -1.084284, -1.741287, -1.456347, -1.372488, -1.006540, -1.438776, -2.782787, -1.092327, -1.182775, -1.610384, -1.410401, -1.387429, -1.508961, -1.583511, -1.352623, -1.155036, -1.079251, -1.749611, -1.416770, -1.411356, -1.005714, -1.437290, -2.877651, -1.077695, -1.205867, -1.590660, -1.408968, -1.376814, -1.472634, -1.641902, -1.380314, -1.122183, -1.089408, -1.733487, -1.441638, -1.384787, -0.995813, -1.454324, -2.843641, -1.082329, -1.202403, -1.622215, -1.373014, -1.391405, -1.497100, -1.586819, -1.381384, -1.138135, -1.092886, -1.740050, -1.395978, -1.420264, -0.992713, -1.435504, -2.820122, -1.103115, -1.218408, -1.586061, -1.371750, -1.402804, -1.494981, -1.585067, -1.375374, -1.145490, -1.082524, -1.785688, -1.424208, -1.374842, -1.010960, -1.425555, -2.893535, -1.077707, -1.206546, -1.578786, -1.387988, -1.406624, -1.450446, -1.638883, -1.384456, -1.136696, -1.106158, -1.739669, -1.404900, -1.393511, -0.969419, -1.465799, -2.872858, -1.098814, -1.173950, -1.611365, -1.372720, -1.436221, -1.489675, -1.586248, -1.352916, -1.166753, -1.079295, -1.749216, -1.419433, -1.408934, -1.025706, -1.448621, -2.761051, -1.069042, -1.213167, -1.568222, -1.391085, -1.404355, -1.474529, -1.628472, -1.362878, -1.142686, -1.112156, -1.744808, -1.391385, -1.395369, -0.986754, -1.412570, -2.837427, -1.123553, -1.206402, -1.536012, -1.397607, -1.434215, -1.500579, -1.571887, -1.383285, -1.143842, -1.119211, -1.746448, -1.414993, -1.362020, -1.035625, -1.423103, -2.804184, -1.068642, -1.195478, -1.585555, -1.394862, -1.407562, -1.484095, -1.608443, -1.385087, -1.130749, -1.084609, -1.764792, -1.447404, -1.364283, -1.014454, -1.437426, -2.839277, -1.074720, -1.183079, -1.590897, -1.400600, -1.412745, -1.509046, -1.621280, -1.345687, -1.136710, -1.073018, -1.727659, -1.469052, -1.385567, -0.992790, -1.452865, -2.919631, -1.074099, -1.183375, -1.596519, -1.418848, -1.389646, -1.488610, -1.613039, -1.376595, -1.131354, -1.111813, -1.772183, -1.389889, -1.378431, -0.978005, -1.449443, -2.820122, -1.109755, -1.168341, -1.598226, -1.388257, -1.438207, -1.496772, -1.623791, -1.383255, -1.114018, -1.113110, -1.718779, -1.432014, -1.372977, -1.005968, -1.405631, -2.846268, -1.105526, -1.168550, -1.573256, -1.403542, -1.443547, -1.482778, -1.605955, -1.394000, -1.126349, -1.069216, -1.788267, -1.437410, -1.378602, -1.000628, -1.434713, -2.841763, -1.091106, -1.201371, -1.625782, -1.383885, -1.378892, -1.476222, -1.621212, -1.382028, -1.130807, -1.080710, -1.760117, -1.425596, -1.393296, -0.998660, -1.419294, -2.893939, -1.095463, -1.189364, -1.612721, -1.396954, -1.390738, -1.498562, -1.633154, -1.363871, -1.122187, -1.105207, -1.756754, -1.417132, -1.371031, -1.029781, -1.409988, -2.903988, -1.067217, -1.191096, -1.606963, -1.404178, -1.386104, -1.504216, -1.596247, -1.382963, -1.126029, -1.069367, -1.757270, -1.451256, -1.386361, -0.969500, -1.432663, -2.910444, -1.115952, -1.174583, -1.592871, -1.414729, -1.407642, -1.487322, -1.640764, -1.352461, -1.134484, -1.080790, -1.772033, -1.401940, -1.408267, -0.996846, -1.402447, -2.980034, -1.096096, -1.202589, -1.590676, -1.405514, -1.384074, -1.492060, -1.607198, -1.390227, -1.122013, -1.093764, -1.746183, -1.447642, -1.364595, -0.986490, -1.436341, -2.861504, -1.102205, -1.204614, -1.577734, -1.391693, -1.406105, -1.495258, -1.588796, -1.371615, -1.145890, -1.088841, -1.734206, -1.436496, -1.389925, -0.995666, -1.427561, -2.823377, -1.104958, -1.199838, -1.571801, -1.410977, -1.397633, -1.511537, -1.603247, -1.365508, -1.130300, -1.097292, -1.753322, -1.421877, -1.379249, -1.007028, -1.420077, -2.814561, -1.099401, -1.191675, -1.599676, -1.416958, -1.378821, -1.517077, -1.587958, -1.357937, -1.142204, -1.079666, -1.729765, -1.444051, -1.398388, -1.008808, -1.401458, -2.825029, -1.109263, -1.183541, -1.602676, -1.386294, -1.416959, -1.502306, -1.612852, -1.368579, -1.128257, -1.085608, -1.731580, -1.453419, -1.380196, -1.005278, -1.414121, -2.867149, -1.096443, -1.175017, -1.617817, -1.400493, -1.400882, -1.487917, -1.637930, -1.387419, -1.108492, -1.121119, -1.751412, -1.414239, -1.356946, -1.011526, -1.404681, -2.827524, -1.103429, -1.212066, -1.614423, -1.383179, -1.375819, -1.512540, -1.600635, -1.371552, -1.126489, -1.088032, -1.719191, -1.445506, -1.393185, -1.019556, -1.400402, -2.808290, -1.101298, -1.202394, -1.592986, -1.364357, -1.424356, -1.475416, -1.621276, -1.380462, -1.132559, -1.103914, -1.743120, -1.447349, -1.353778, -0.986356, -1.435104, -2.844398, -1.106220, -1.210721, -1.575776, -1.417867, -1.374601, -1.518314, -1.589986, -1.375063, -1.126497, -1.111996, -1.743615, -1.395376, -1.392429, -1.015902, -1.422867, -2.888794, -1.075114, -1.222221, -1.592663, -1.382219, -1.382219, -1.495377, -1.618006, -1.359279, -1.137261, -1.090528, -1.729093, -1.441600, -1.386429, -1.025052, -1.437336, -2.809900, -1.068710, -1.202026, -1.561214, -1.394436, -1.420632, -1.483379, -1.592273, -1.397997, -1.131416, -1.085173, -1.716288, -1.436478, -1.407881, -1.018268, -1.420209, -2.788804, -1.091770, -1.214908, -1.599707, -1.370256, -1.397295, -1.509196, -1.610272, -1.400072, -1.101116, -1.075000, -1.751899, -1.460100, -1.374188, -1.013008, -1.401503, -2.867840, -1.097134, -1.220411, -1.570351, -1.385717, -1.399280, -1.480249, -1.631263, -1.392959, -1.113531, -1.093547, -1.752635, -1.438758, -1.368709, -1.009720, -1.442346, -2.847688, -1.074911, -1.203518, -1.590309, -1.396885, -1.391772, -1.506910, -1.619840, -1.374975, -1.115870, -1.093045, -1.766362, -1.396785, -1.400553, -0.991178, -1.453055, -2.863656, -1.084868, -1.225684, -1.565904, -1.391993, -1.390444, -1.472658, -1.620457, -1.381920, -1.133885, -1.111509, -1.780463, -1.427281, -1.337819, -1.008894, -1.430317, -2.788068, -1.094805, -1.214274, -1.581644, -1.412639, -1.370665, -1.490178, -1.618602, -1.381363, -1.123143, -1.093618, -1.761577, -1.448472, -1.353653, -1.015050, -1.394369, -2.919128, -1.091700, -1.231212, -1.568539, -1.409596, -1.364649, -1.491935, -1.627550, -1.379019, -1.118323, -1.091818, -1.756495, -1.438218, -1.368868, -0.991826, -1.438063, -2.873564, -1.092978, -1.230992, -1.583161, -1.408599, -1.354072, -1.528802, -1.574514, -1.394996, -1.113942, -1.098813, -1.738982, -1.408348, -1.400445, -1.013380, -1.372857, -2.924011, -1.108935, -1.213430, -1.593020, -1.392742, -1.381825, -1.508419, -1.638652, -1.366635, -1.110072, -1.091017, -1.742816, -1.461844, -1.357551, -0.997190, -1.418709, -2.911225, -1.094670, -1.221318, -1.599664, -1.378222, -1.381633, -1.486327, -1.594584, -1.383942, -1.138759, -1.076303, -1.755954, -1.436422, -1.391801, -0.993697, -1.447438, -2.855403, -1.087406, -1.221016, -1.574256, -1.389901, -1.391074, -1.498641, -1.629441, -1.367970, -1.121153, -1.075410, -1.758174, -1.433135, -1.394637, -1.002481, -1.404941, -2.883767, -1.103443, -1.242030, -1.584432, -1.404295, -1.344774, -1.498074, -1.630279, -1.382150, -1.110098, -1.057530, -1.812951, -1.432606, -1.382982, -0.995229, -1.455031, -2.806057, -1.089083, -1.223332, -1.581554, -1.389968, -1.382249, -1.504249, -1.606143, -1.394442, -1.111137, -1.081093, -1.756007, -1.485396, -1.340826, -1.009673, -1.397732, -2.922146, -1.094559, -1.194712, -1.576307, -1.400837, -1.410256, -1.491029, -1.657111, -1.362780, -1.114066, -1.092304, -1.744072, -1.440955, -1.374174, -1.007079, -1.400416, -2.900640, -1.098913, -1.234793, -1.549192, -1.403799, -1.382208, -1.477759, -1.625122, -1.384035, -1.125783, -1.082985, -1.726922, -1.461630, -1.379407, -1.005128, -1.425216, -2.868765, -1.088335, -1.202197, -1.576070, -1.386585, -1.415710, -1.484746, -1.605507, -1.362602, -1.149931, -1.094802, -1.772336, -1.406639, -1.384322, -1.007059, -1.438318, -2.875229, -1.075936, -1.191076, -1.560781, -1.436729, -1.392595, -1.501040, -1.618783, -1.386714, -1.111494}; double score_in_acc[NUM_VALUES_SCORES] = { -1.298283, 0.000100, 0.000100, 0.000100, -1.622017, 0.000100, 0.000100, 0.000100, -1.458005, 0.000100, 0.000100, 0.000100, -1.214697, 0.000100, 0.000100, 0.000100, -1.145865, -1.703467, -1.369585, -1.403288, -1.117524, -1.324925, -2.653849, -1.088537, -1.348761, -1.632038, -1.240590, -1.363774, -1.588764, -1.711509, -1.299845, -1.071031, -1.141364, -1.769972, -1.417617, -1.316812, -1.161662, -1.357406, -2.658621, -1.022590, -1.405044, -1.571298, -1.270193, -1.323918, -1.525853, -1.719786, -1.286922, -1.116754, -1.172124, -1.799542, -1.339070, -1.336257, -1.089115, -1.356116, -2.768078, -1.069898, -1.428221, -1.591932, -1.246820, -1.311871, -1.512183, -1.698999, -1.303796, -1.123297, -1.118658, -1.866122, -1.370615, -1.329504, -1.158005, -1.318347, -2.745148, -1.038256, -1.294532, -1.622510, -1.297697, -1.364880, -1.585332, -1.682929, -1.295613, -1.091950, -1.175124, -1.762440, -1.361920, -1.333986, -1.148780, -1.361130, -2.802596, -1.005250, -1.343147, -1.612972, -1.314835, -1.305031, -1.574873, -1.677744, -1.282395, -1.112309, -1.204841, -1.785457, -1.348706, -1.298416, -1.090608, -1.367881, -2.619658, -1.089115, -1.350808, -1.562117, -1.381899, -1.272337, -1.542852, -1.632656, -1.290686, -1.152843, -1.132323, -1.808473, -1.330078, -1.388876, -1.126586, -1.356210, -2.695810, -1.047713, -1.406026, -1.599277, -1.307756, -1.264375, -1.464895, -1.776886, -1.285941, -1.129041, -1.172914, -1.800796, -1.339545, -1.334066, -1.116904, -1.383708, -2.902354, -1.001938, -1.385423, -1.602487, -1.321320, -1.267253, -1.560902, -1.669979, -1.324179, -1.091296, -1.142221, -1.740058, -1.335298, -1.417576, -1.097597, -1.407337, -2.857893, -1.010205, -1.368180, -1.582172, -1.316975, -1.302111, -1.529619, -1.665285, -1.314267, -1.122166, -1.173501, -1.738112, -1.422474, -1.295543, -1.121358, -1.370921, -2.659760, -1.048895, -1.331900, -1.661338, -1.257432, -1.340339, -1.585292, -1.706759, -1.297611, -1.077412, -1.141557, -1.846318, -1.398164, -1.288039, -1.080940, -1.453179, -2.684389, -1.025451, -1.385411, -1.590607, -1.216727, -1.387179, -1.575101, -1.658963, -1.290183, -1.116399, -1.129464, -1.839745, -1.368877, -1.333538, -1.202765, -1.361236, -2.575360, -1.001938, -1.391921, -1.710158, -1.233316, -1.275682, -1.567292, -1.629505, -1.347328, -1.092341, -1.141873, -1.749575, -1.412487, -1.334102, -1.118394, -1.402927, -2.750161, -1.012246, -1.376783, -1.661600, -1.269187, -1.284764, -1.534313, -1.688213, -1.356004, -1.073359, -1.112985, -1.749840, -1.426994, -1.356377, -1.135721, -1.342624, -2.804386, -1.029722, -1.425389, -1.581417, -1.266159, -1.301993, -1.582710, -1.669579, -1.322885, -1.079132, -1.174950, -1.805213, -1.353433, -1.315373, -1.149802, -1.343735, -2.737633, -1.028040, -1.417797, -1.607193, -1.252314, -1.303938, -1.500160, -1.724404, -1.306176, -1.115436, -1.127993, -1.761055, -1.385552, -1.369364, -1.142369, -1.359143, -2.584507, -1.053762, -1.415975, -1.592558, -1.262853, -1.305413, -1.565542, -1.692378, -1.338019, -1.065432, -1.188735, -1.806730, -1.337009, -1.314631, -1.157453, -1.403994, -2.715597, -0.983674, -1.296644, -1.671192, -1.264946, -1.361505, -1.588939, -1.756872, -1.258698, -1.081017, -1.159958, -1.725224, -1.407720, -1.333069, -1.113047, -1.418660, -2.643312, -1.026440, -1.391427, -1.672839, -1.259827, -1.273453, -1.481151, -1.720110, -1.286072, -1.148149, -1.106656, -1.799803, -1.419618, -1.338649, -1.116414, -1.447828, -2.688522, -0.995642, -1.404210, -1.604685, -1.262993, -1.306818, -1.610432, -1.718930, -1.278948, -1.071160, -1.153233, -1.819835, -1.377840, -1.308554, -1.149025, -1.325424, -2.743720, -1.041193, -1.336838, -1.578454, -1.288128, -1.365532, -1.557740, -1.753983, -1.268907, -1.093128, -1.152372, -1.830464, -1.409035, -1.275127, -1.174060, -1.333467, -2.773612, -1.008152, -1.394131, -1.579955, -1.295964, -1.300730, -1.530692, -1.763423, -1.259558, -1.113590, -1.175127, -1.771869, -1.404313, -1.288552, -1.141172, -1.478933, -2.630860, -0.965853, -1.427116, -1.589851, -1.222458, -1.341174, -1.563006, -1.747181, -1.297560, -1.069930, -1.152995, -1.805080, -1.391221, -1.305351, -1.116640, -1.492064, -2.739002, -0.959513, -1.346907, -1.594432, -1.350235, -1.281020, -1.572513, -1.724873, -1.318812, -1.058926, -1.184853, -1.739655, -1.399289, -1.302485, -1.148460, -1.297347, -2.749599, -1.062282, -1.398987, -1.613953, -1.270716, -1.296734, -1.559404, -1.733972, -1.312591, -1.067011, -1.148216, -1.886465, -1.389703, -1.265701, -1.205310, -1.360576, -2.762618, -0.965582, -1.377515, -1.615186, -1.231388, -1.358467, -1.479472, -1.735464, -1.413514, -1.042317, -1.128164, -1.778941, -1.433464, -1.312766, -1.205196, -1.391020, -2.734053, -0.950391, -1.378593, -1.558031, -1.280661, -1.348369, -1.582745, -1.720441, -1.299283, -1.070389, -1.158720, -1.753726, -1.368760, -1.352619, -1.132777, -1.392932, -2.692898, -1.016575, -1.346629, -1.729280, -1.232080, -1.306423, -1.579218, -1.722319, -1.292700, -1.076800, -1.153483, -1.770845, -1.438427, -1.283830, -1.100713, -1.381062, -2.716480, -1.049972, -1.453936, -1.639578, -1.223087, -1.280246, -1.584690, -1.697130, -1.324770, -1.061559, -1.161497, -1.726234, -1.408357, -1.329972, -1.122985, -1.352051, -2.756328, -1.042821, -1.366639, -1.634903, -1.260068, -1.322639, -1.581961, -1.657988, -1.328745, -1.081469, -1.206581, -1.722394, -1.346530, -1.339402, -1.201495, -1.350707, -2.756344, -0.976373, -1.415229, -1.649907, -1.265545, -1.262390, -1.559619, -1.710659, -1.300375, -1.088837, -1.151589, -1.757319, -1.406914, -1.322526, -1.133470, -1.392865, -2.718176, -1.011349, -1.343589, -1.630157, -1.328651, -1.278842, -1.538740, -1.782208, -1.329705, -1.042628, -1.158253, -1.806897, -1.366341, -1.321389, -1.179569, -1.359516, -2.602168, -1.016965, -1.442838, -1.552482, -1.210506, -1.370518, -1.626707, -1.651198, -1.304156, -1.078213, -1.216685, -1.744185, -1.481141, -1.198574, -1.160668, -1.337291, -2.852797, -1.003800, -1.365241, -1.611568, -1.286738, -1.313281, -1.586128, -1.712145, -1.378789, -1.013568, -1.177988, -1.755419, -1.395878, -1.303233, -1.200645, -1.307483, -2.563950, -1.045546, -1.443508, -1.624633, -1.253234, -1.268197, -1.638091, -1.683333, -1.310915, -1.048902, -1.160245, -1.718607, -1.506045, -1.253550, -1.220805, -1.345968, -2.682112, -0.977365, -1.391116, -1.634677, -1.253419, -1.306891, -1.631241, -1.698267, -1.323899, -1.035026, -1.189085, -1.730086, -1.447889, -1.261725, -1.187804, -1.324379, -2.675646, -1.020875, -1.478874, -1.488924, -1.323581, -1.271859, -1.607579, -1.696562, -1.308736, -1.060872, -1.139975, -1.758488, -1.457200, -1.291056, -1.198848, -1.310261, -2.876024, -0.987919, -1.445783, -1.571075, -1.322438, -1.237420, -1.671989, -1.700829, -1.310975, -1.021547, -1.146307, -1.770461, -1.496668, -1.244577, -1.121967, -1.402136, -2.635135, -1.031192, -1.460009, -1.572059, -1.361069, -1.191510, -1.555920, -1.681375, -1.316319, -1.094322, -1.177529, -1.710966, -1.473374, -1.265427, -1.190159, -1.333896, -2.675406, -1.011978, -1.492018, -1.489945, -1.329603, -1.254865, -1.494918, -1.662604, -1.391925, -1.086213, -1.149385, -1.802479, -1.488023, -1.229427, -1.269360, -1.295157, -2.385096, -1.041136, -1.474900, -1.605184, -1.348642, -1.168756, -1.656966, -1.690639, -1.280618, -1.058384, -1.152680, -1.626732, -1.490415, -1.338036, -1.164243, -1.307589, -2.748562, -1.040297, -1.360435, -1.584459, -1.367502, -1.260016, -1.688485, -1.672037, -1.353435, -0.996998, -1.193228, -1.680168, -1.476678, -1.265772, -1.212720, -1.283884, -2.831789, -1.003086, -1.396340, -1.521119, -1.404062, -1.243289, -1.633779, -1.533391, -1.383176, -1.084041, -1.131207, -1.652185, -1.575601, -1.277109, -1.206684, -1.311542, -2.883177, -0.979627, -1.381881, -1.599658, -1.377975, -1.221321, -1.559634, -1.663175, -1.321991, -1.097673, -1.161205, -1.697471, -1.547712, -1.234393, -1.172611, -1.307240, -2.799089, -1.024402, -1.475968, -1.552583, -1.380856, -1.176426, -1.615688, -1.678765, -1.292969, -1.078260, -1.206296, -1.690351, -1.491397, -1.233802, -1.212325, -1.350475, -2.829859, -0.956184, -1.429438, -1.512994, -1.382463, -1.240127, -1.703363, -1.591425, -1.310955, -1.064988, -1.151942, -1.739983, -1.527910, -1.232834, -1.298971, -1.297257, -2.761883, -0.939727, -1.414751, -1.483604, -1.462856, -1.208580, -1.674384, -1.634474, -1.364554, -1.016037, -1.122974, -1.690016, -1.544599, -1.284551, -1.235359, -1.303662, -2.799155, -0.975848, -1.426116, -1.434484, -1.453573, -1.245532, -1.631318, -1.620768, -1.366625, -1.045216, -1.165331, -1.633839, -1.562724, -1.260765, -1.217656, -1.301576, -2.934865, -0.970651, -1.345412, -1.506342, -1.536264, -1.195130, -1.637784, -1.551563, -1.414957, -1.047718, -1.174527, -1.624557, -1.620659, -1.216492, -1.272260, -1.329985, -2.691130, -0.947996, -1.366220, -1.474374, -1.502744, -1.225912, -1.584006, -1.633979, -1.355266, -1.073476, -1.206041, -1.572185, -1.649744, -1.200879, -1.267592, -1.336935, -2.749850, -0.936738, -1.309161, -1.514739, -1.528958, -1.226469, -1.612766, -1.582913, -1.424151, -1.036834, -1.152225, -1.645935, -1.664285, -1.197630, -1.294772, -1.359531, -2.745825, -0.903712, -1.394431, -1.500034, -1.529520, -1.163907, -1.602047, -1.636507, -1.385701, -1.039280, -1.167465, -1.657543, -1.703770, -1.151245, -1.301788, -1.324456, -2.681209, -0.932605, -1.403536, -1.473306, -1.545801, -1.164862, -1.551844, -1.565656, -1.468956, -1.052608, -1.149824, -1.605157, -1.630724, -1.249453, -1.290241, -1.264181, -2.943531, -0.942517, -1.304290, -1.505188, -1.605008, -1.184918, -1.560259, -1.639609, -1.418269, -1.039222, -1.142960, -1.591712, -1.800934, -1.163455, -1.256629, -1.382217, -2.845545, -0.900752, -1.368236, -1.468548, -1.696499, -1.103040, -1.599954, -1.551023, -1.520592, -1.001085, -1.123103, -1.510434, -1.846211, -1.217116, -1.332298, -1.365316, -2.867628, -0.858030, -1.350102, -1.465518, -1.732147, -1.099842, -1.576476, -1.622477, -1.491624, -0.991873, -1.098226, -1.563375, -1.902803, -1.177786, -1.364002, -1.356972, -3.182572, -0.808679, -1.283400, -1.578517, -1.747200, -1.071879, -1.594149, -1.564256, -1.478361, -1.022585, -1.089822, -1.526594, -1.951685, -1.189365, -1.292020, -1.351816, -3.183077, -0.855532, -1.224284, -1.536010, -1.873073, -1.087249, -1.572873, -1.590312, -1.469376, -1.025490, -1.140688, -1.477160, -1.993420, -1.152370, -1.368373, -1.352513, -3.019521, -0.825388, -1.306903, -1.476802, -1.836329, -1.074190, -1.608595, -1.565977, -1.488318, -1.007323, -1.171335, -1.414913, -2.051619, -1.143902, -1.291423, -1.328795, -3.117248, -0.876961, -1.262329, -1.538477, -1.925760, -1.031328, -1.744068, -1.562269, -1.549265, -0.908500, -1.189067, -1.438308, -2.101403, -1.090999, -1.403285, -1.363658, -3.037634, -0.797309, -1.280513, -1.493606, -2.068970, -0.990939, -1.740625, -1.503680, -1.477796, -0.983128, -1.209863, -1.505547, -2.069425, -1.039568, -1.474199, -1.230376, -2.996159, -0.846586, -1.419817, -1.521397, -1.871802, -0.951946, -1.802024, -1.527524, -1.514342, -0.921278, -1.227731, -1.440212, -2.175919, -1.030966, -1.528092, -1.297167, -2.938102, -0.783554, -1.463417, -1.532827, -1.880613, -0.915953, -2.005152, -1.487168, -1.552308, -0.849591, -1.275353, -1.398643, -2.207587, -1.011233, -1.666028, -1.256129, -3.130884, -0.728629, -1.596293, -1.485792, -1.913030, -0.859442, -2.064598, -1.512069, -1.531959, -0.828817, -1.424035, -1.384118, -2.168237, -0.930553, -1.730066, -1.186225, -2.971241, -0.763321, -1.693668, -1.501296, -1.913030, -0.808149, -2.219761, -1.492997, -1.580384, -0.774855, -1.501896, -1.410925, -2.214846, -0.857539, -1.711810, -1.214889, -3.039708, -0.744716, -1.892247, -1.423981, -1.964220, -0.758756, -2.206199, -1.488013, -1.595464, -0.773823, -1.417809, -1.367333, -2.659731, -0.837010, -1.915395, -1.184710, -3.073095, -0.691941, -2.070218, -1.426285, -1.978510, -0.702456, -2.328366, -1.427370, -1.648702, -0.754370, -1.414676, -1.470765, -2.909702, -0.749180, -2.055749, -1.215381, -3.166519, -0.628754, -2.138912, -1.429557, -2.075599, -0.659092, -2.351480, -1.377296, -1.696583, -0.756719, -1.545985, -1.414307, -3.174114, -0.689209, -2.158969, -1.218231, -3.244292, -0.598188, -2.360033, -1.374866, -2.107753, -0.632619, -2.498192, -1.422895, -1.640368, -0.728065, -1.572480, -1.467562, -3.496505, -0.631713, -2.098699, -1.160430, -3.206880, -0.647123, -2.223828, -1.477001, -2.209229, -0.591134, -2.623254, -1.423354, -1.709677, -0.682000, -1.654411, -1.480458, -3.908202, -0.577704, -2.366086, -1.163774, -3.307587, -0.584754, -2.532024, -1.372169, -2.332311, -0.562339, -2.568229, -1.418198, -1.795426, -0.663351, -1.770572, -1.487618, -4.441573, -0.524119, -2.399435, -1.207826, -3.559201, -0.541417, -2.614654, -1.537096, -2.121484, -0.524335, -2.722673, -1.399220, -1.826412, -0.641463, -1.749200, -1.357721, -4.321109, -0.587787, -2.401038, -1.181602, -3.566789, -0.554529, -2.232051, -1.496704, -2.198150, -0.583725, -2.594245, -1.367348, -1.836785, -0.671019, -1.672136, -1.365517, -5.421616, -0.593327, -2.215511, -1.177523, -3.662429, -0.584847, -2.479396, -1.445079, -2.178072, -0.567010, -2.504500, -1.284897, -1.664075, -0.793545, -1.789737, -1.323529, -4.816229, -0.582135, -2.029729, -1.142265, -3.478014, -0.656498, -2.493205, -1.514879, -2.286191, -0.517737, -2.438169, -1.180850, -1.805005, -0.818284, -1.571802, -1.290899, -5.028555, -0.671860, -2.062680, -1.030025, -3.701808, -0.710883, -2.230398, -1.303450, -2.186723, -0.676020, -2.509336, -1.168315, -1.880561, -0.786836, -1.803530, -1.374661, -4.448059, -0.560967, -2.353114, -0.874837, -3.901340, -0.759751, -2.401061, -1.480130, -2.301531, -0.541861, -2.902595, -1.173849, -2.203739, -0.643300, -1.440629, -1.268511, -4.158876, -0.762827, -2.306540, -0.910387, -3.742844, -0.745832, -1.924677, -1.367867, -2.315543, -0.691718, -2.796046, -1.628159, -2.518702, -0.412347, -0.709182, -1.565863, -2.505260, -1.526056, -1.065038, -1.104807, -2.495496, -1.420659, -1.176059, -1.723113, -1.148356, -1.630454, -1.817762, -1.496771, -1.271445, -1.098612, -2.519205, -0.344118, -5.635175, -1.574743, -2.977455, -0.464725, -6.435324, -1.142044, -3.092476, -0.304870, -5.161439, -1.552784, -3.035229, -0.562993, -5.197109, -0.975771, -1.185282, 0.000100, 0.000100, 0.000100, -1.629335, 0.000100, 0.000100, 0.000100, -1.586308, 0.000100, 0.000100, 0.000100, -1.225537, 0.000100, 0.000100, 0.000100, -1.068323, -1.765095, -1.447197, -1.386229, -1.010964, -1.435962, -2.836073, -1.080016, -1.227770, -1.546849, -1.422713, -1.374058, -1.471573, -1.655439, -1.386840, -1.109985, -1.087415, -1.780367, -1.427841, -1.368412, -0.984899, -1.408182, -2.870136, -1.123179, -1.220772, -1.551013, -1.393404, -1.407577, -1.490451, -1.606219, -1.383691, -1.128765, -1.086542, -1.768407, -1.447530, -1.359209, -0.997638, -1.467427, -2.826601, -1.074352, -1.231911, -1.557713, -1.407346, -1.374936, -1.491872, -1.630735, -1.368864, -1.124338, -1.083233, -1.781284, -1.455884, -1.347517, -0.999942, -1.417129, -2.893504, -1.095691, -1.210139, -1.598587, -1.394925, -1.379079, -1.502643, -1.611902, -1.385679, -1.115360, -1.069250, -1.762012, -1.462922, -1.372485, -0.999921, -1.456048, -2.920234, -1.064169, -1.196587, -1.588488, -1.394583, -1.404029, -1.474635, -1.625410, -1.400741, -1.115070, -1.095417, -1.758171, -1.433297, -1.367595, -0.985825, -1.453125, -2.785193, -1.104691, -1.172592, -1.603082, -1.416870, -1.399625, -1.486991, -1.645648, -1.367963, -1.119523, -1.086440, -1.788556, -1.442117, -1.351097, -0.997773, -1.464743, -2.764173, -1.087273, -1.191705, -1.612409, -1.389649, -1.395426, -1.530923, -1.603852, -1.368676, -1.114436, -1.088908, -1.740367, -1.446936, -1.375671, -0.962156, -1.454487, -2.965130, -1.100040, -1.202579, -1.608677, -1.389725, -1.385153, -1.457992, -1.614451, -1.396346, -1.136946, -1.070214, -1.740863, -1.452347, -1.395636, -1.008765, -1.397822, -2.875205, -1.103248, -1.230409, -1.577385, -1.401677, -1.365987, -1.472914, -1.619215, -1.386155, -1.131173, -1.072389, -1.777718, -1.465439, -1.355533, -1.001679, -1.439101, -2.841342, -1.086932, -1.253103, -1.547728, -1.382221, -1.383771, -1.461387, -1.622813, -1.409659, -1.119275, -1.080096, -1.805542, -1.434282, -1.355706, -0.993512, -1.443300, -2.814878, -1.097613, -1.221483, -1.574932, -1.394665, -1.385216, -1.456980, -1.613141, -1.381719, -1.149945, -1.081344, -1.751019, -1.426309, -1.398085, -0.974742, -1.432800, -2.872219, -1.116276, -1.205927, -1.605319, -1.381793, -1.391781, -1.490724, -1.637619, -1.372736, -1.117994, -1.095671, -1.751723, -1.434574, -1.370448, -1.018717, -1.460333, -2.790822, -1.063016, -1.192923, -1.581267, -1.417638, -1.391642, -1.478188, -1.630044, -1.372491, -1.131486, -1.094195, -1.764900, -1.437956, -1.360336, -0.986257, -1.443898, -2.886446, -1.092880, -1.226947, -1.580121, -1.399821, -1.369545, -1.464939, -1.632832, -1.411593, -1.109336, -1.086497, -1.732911, -1.434755, -1.395694, -0.994503, -1.449570, -2.856268, -1.084891, -1.213900, -1.591372, -1.406183, -1.369474, -1.510584, -1.617526, -1.368700, -1.119654, -1.071576, -1.783697, -1.463664, -1.354297, -1.017993, -1.410677, -2.800665, -1.096815, -1.203909, -1.579766, -1.374661, -1.422825, -1.514059, -1.598054, -1.357135, -1.138504, -1.080167, -1.761482, -1.450171, -1.369867, -0.987816, -1.435841, -2.898561, -1.094840, -1.206588, -1.553140, -1.432450, -1.384385, -1.467937, -1.630367, -1.375785, -1.136000, -1.078253, -1.783372, -1.435202, -1.371710, -1.020853, -1.432572, -2.871679, -1.065909, -1.203028, -1.592183, -1.397207, -1.390508, -1.446590, -1.616457, -1.384979, -1.152968, -1.074445, -1.808446, -1.453168, -1.344095, -0.998673, -1.413492, -2.813142, -1.113756, -1.201433, -1.582950, -1.390649, -1.406680, -1.461042, -1.610131, -1.401928, -1.133135, -1.079562, -1.753512, -1.474717, -1.353837, -0.996560, -1.394687, -2.849046, -1.123798, -1.207408, -1.572000, -1.436658, -1.363835, -1.496758, -1.647149, -1.364427, -1.114681, -1.096113, -1.785524, -1.405910, -1.374447, -1.012822, -1.408867, -2.820343, -1.100208, -1.239593, -1.591115, -1.346494, -1.399789, -1.488028, -1.619519, -1.375906, -1.128315, -1.062328, -1.749116, -1.470891, -1.383492, -0.996609, -1.429360, -2.864612, -1.095406, -1.192497, -1.573677, -1.383390, -1.433266, -1.489476, -1.578559, -1.389978, -1.142074, -1.095617, -1.747685, -1.441456, -1.366856, -1.017171, -1.426723, -2.874666, -1.073371, -1.197630, -1.599553, -1.398839, -1.389416, -1.493797, -1.613382, -1.392075, -1.115627, -1.077575, -1.751836, -1.432862, -1.396345, -0.969419, -1.480507, -2.856330, -1.091552, -1.184660, -1.596671, -1.388731, -1.418040, -1.479430, -1.649741, -1.371548, -1.119574, -1.088476, -1.748081, -1.439194, -1.378151, -0.989244, -1.467803, -2.817096, -1.084911, -1.209765, -1.587848, -1.384668, -1.398525, -1.484660, -1.612368, -1.406629, -1.111582, -1.069751, -1.744277, -1.462069, -1.384782, -0.997336, -1.407862, -2.914204, -1.101939, -1.198716, -1.559326, -1.412473, -1.408128, -1.472936, -1.662203, -1.368991, -1.118822, -1.125386, -1.745862, -1.405659, -1.363465, -1.003919, -1.460663, -2.845668, -1.068912, -1.227344, -1.576786, -1.416130, -1.356189, -1.494146, -1.678019, -1.357637, -1.103982, -1.097068, -1.734444, -1.466561, -1.351175, -0.992039, -1.447684, -2.908687, -1.080226, -1.206476, -1.577944, -1.404401, -1.390957, -1.462419, -1.660108, -1.385144, -1.114989, -1.096901, -1.726793, -1.433294, -1.387415, -0.985276, -1.457335, -2.849275, -1.090877, -1.195846, -1.587540, -1.410480, -1.389910, -1.498122, -1.585828, -1.361086, -1.154262, -1.100595, -1.718642, -1.442999, -1.379124, -0.987017, -1.405579, -2.920000, -1.114271, -1.180930, -1.579562, -1.411188, -1.414331, -1.498636, -1.601425, -1.373160, -1.134288, -1.085937, -1.762471, -1.417822, -1.392098, -0.987958, -1.430462, -2.864892, -1.104200, -1.170190, -1.591433, -1.411181, -1.417977, -1.457221, -1.651592, -1.400481, -1.112017, -1.083080, -1.755803, -1.415168, -1.403255, -0.991776, -1.405179, -2.869457, -1.117703, -1.244684, -1.574032, -1.400185, -1.353987, -1.484967, -1.641068, -1.374730, -1.118379, -1.093401, -1.765453, -1.399008, -1.398472, -1.006048, -1.391903, -2.846789, -1.115636, -1.214680, -1.591011, -1.398288, -1.376519, -1.474014, -1.611108, -1.391033, -1.131618, -1.089404, -1.749134, -1.425169, -1.389538, -0.991063, -1.417723, -2.835198, -1.115211, -1.195297, -1.591661, -1.418677, -1.379259, -1.488467, -1.610557, -1.372475, -1.136232, -1.074679, -1.744452, -1.459403, -1.380404, -0.979969, -1.409815, -2.853890, -1.130508, -1.187401, -1.572414, -1.439346, -1.384852, -1.488576, -1.623657, -1.370690, -1.129489, -1.100565, -1.720610, -1.436295, -1.384086, -0.989515, -1.405693, -2.941571, -1.107865, -1.193296, -1.624427, -1.372395, -1.401383, -1.499162, -1.611677, -1.375934, -1.125388, -1.113428, -1.740532, -1.424953, -1.364055, -1.001107, -1.444199, -2.838078, -1.084548, -1.233987, -1.542371, -1.392664, -1.400050, -1.467197, -1.624325, -1.371282, -1.143809, -1.116950, -1.756062, -1.418765, -1.354845, -0.978586, -1.452234, -2.929783, -1.088556, -1.194238, -1.585914, -1.421809, -1.382192, -1.483117, -1.618702, -1.369178, -1.137564, -1.070821, -1.745083, -1.442211, -1.401456, -1.030765, -1.412444, -2.843003, -1.074500, -1.210780, -1.594312, -1.382732, -1.393944, -1.498071, -1.601824, -1.362079, -1.143246, -1.101202, -1.782059, -1.417887, -1.358632, -1.006935, -1.443214, -2.810413, -1.083777, -1.236254, -1.587548, -1.372181, -1.380113, -1.510728, -1.629736, -1.358294, -1.120335, -1.101096, -1.741419, -1.432259, -1.372576, -0.938922, -1.466371, -2.915596, -1.126957, -1.204036, -1.561474, -1.404951, -1.407259, -1.498785, -1.578828, -1.365024, -1.155159, -1.100448, -1.760863, -1.415070, -1.376451, -1.003930, -1.435045, -2.800009, -1.094671, -1.227060, -1.557857, -1.395543, -1.392065, -1.457093, -1.656519, -1.384392, -1.121443, -1.090723, -1.718840, -1.432632, -1.402129, -1.000990, -1.422882, -2.849650, -1.097808, -1.169944, -1.604002, -1.369417, -1.451279, -1.468368, -1.614640, -1.373850, -1.146942, -1.086284, -1.767317, -1.426594, -1.379852, -1.008632, -1.426126, -2.756270, -1.104078, -1.193780, -1.579812, -1.417316, -1.392116, -1.449603, -1.629447, -1.367723, -1.156414, -1.094903, -1.758106, -1.415869, -1.384926, -1.019079, -1.408412, -2.806677, -1.096207, -1.212910, -1.592647, -1.378006, -1.397550, -1.519645, -1.620523, -1.346242, -1.129487, -1.093277, -1.757898, -1.409089, -1.393880, -1.020721, -1.430347, -2.799278, -1.080031, -1.187791, -1.616589, -1.398558, -1.387983, -1.477309, -1.621138, -1.356175, -1.150650, -1.079913, -1.787939, -1.405754, -1.394778, -0.978014, -1.436253, -2.820010, -1.119263, -1.205578, -1.619586, -1.409686, -1.353709, -1.514185, -1.631254, -1.367408, -1.109990, -1.091992, -1.771125, -1.416811, -1.379021, -1.005039, -1.422991, -2.801270, -1.101884, -1.209013, -1.598402, -1.405737, -1.370019, -1.496393, -1.654361, -1.332362, -1.136326, -1.111231, -1.786994, -1.391350, -1.367887, -0.985037, -1.437139, -2.789389, -1.116239, -1.227753, -1.583652, -1.365631, -1.399963, -1.497745, -1.600315, -1.381792, -1.128850, -1.107796, -1.715785, -1.434618, -1.379578, -0.987207, -1.439693, -2.846959, -1.101528, -1.176950, -1.585149, -1.410613, -1.415221, -1.477711, -1.625874, -1.369576, -1.136669, -1.105330, -1.743701, -1.411151, -1.385638, -0.999267, -1.420223, -2.828953, -1.105287, -1.194124, -1.577011, -1.397406, -1.413864, -1.488727, -1.631518, -1.379806, -1.117549, -1.114402, -1.759949, -1.409633, -1.364134, -1.005595, -1.418573, -2.807751, -1.103303, -1.196177, -1.582880, -1.395691, -1.408099, -1.483558, -1.601802, -1.367107, -1.149502, -1.086994, -1.782704, -1.387158, -1.407295, -0.998784, -1.430728, -2.930251, -1.081299, -1.194510, -1.608775, -1.379212, -1.405525, -1.486790, -1.621603, -1.377111, -1.126967, -1.096250, -1.773016, -1.393282, -1.395136, -0.992432, -1.485132, -2.804879, -1.071952, -1.188665, -1.608208, -1.375283, -1.417338, -1.511397, -1.626613, -1.334945, -1.140586, -1.104778, -1.779344, -1.393791, -1.378984, -1.009809, -1.427732, -2.770541, -1.098920, -1.193156, -1.567267, -1.413940, -1.406744, -1.477026, -1.619703, -1.359516, -1.149037, -1.106785, -1.753777, -1.422890, -1.365533, -0.990630, -1.423033, -2.855709, -1.108177, -1.170207, -1.600293, -1.387326, -1.434991, -1.460456, -1.618395, -1.383070, -1.143048, -1.125408, -1.759008, -1.359626, -1.400363, -0.989906, -1.446169, -2.882103, -1.087966, -1.195905, -1.576685, -1.415023, -1.394364, -1.479064, -1.637438, -1.351836, -1.142856, -1.101998, -1.756600, -1.417116, -1.375354, -1.013341, -1.409959, -2.810391, -1.100630, -1.191760, -1.602000, -1.376543, -1.417288, -1.488197, -1.623192, -1.353697, -1.143602, -1.107866, -1.754748, -1.411458, -1.374372, -1.009154, -1.441162, -2.820299, -1.081071, -1.186623, -1.611036, -1.379088, -1.413620, -1.470225, -1.619304, -1.388541, -1.131185, -1.099338, -1.791891, -1.394642, -1.376970, -0.992346, -1.443399, -2.900394, -1.084211, -1.176422, -1.587610, -1.414169, -1.410264, -1.467101, -1.614450, -1.374834, -1.147198, -1.127134, -1.762249, -1.353614, -1.402113, -0.993282, -1.411392, -2.980173, -1.093466, -1.191545, -1.588426, -1.380787, -1.424533, -1.487080, -1.626410, -1.341038, -1.152773, -1.082979, -1.785398, -1.407119, -1.390959, -0.975375, -1.470581, -2.791575, -1.103124, -1.156639, -1.669129, -1.382059, -1.402479, -1.472611, -1.612913, -1.404009, -1.121602, -1.108330, -1.739751, -1.381791, -1.413880, -0.999430, -1.447772, -2.865772, -1.079168, -1.196625, -1.583179, -1.386867, -1.416303, -1.479805, -1.620810, -1.377331, -1.132184, -1.093596, -1.760044, -1.405332, -1.395668, -1.016169, -1.421357, -2.823545, -1.086962, -1.170519, -1.563237, -1.402911, -1.450483, -1.476513, -1.626313, -1.360955, -1.144138, -1.115420, -1.786197, -1.388857, -1.365449, -1.003248, -1.435264, -2.831224, -1.089692, -1.245460, -1.567123, -1.366905, -1.391700, -1.509856, -1.617887, -1.350735, -1.134163, -1.094713, -1.788263, -1.394818, -1.385352, -0.978021, -1.424792, -2.860294, -1.120444, -1.157419, -1.625471, -1.416799, -1.400817, -1.486921, -1.590718, -1.382908, -1.141608, -1.119936, -1.746875, -1.372134, -1.403171, -0.989115, -1.409298, -2.807191, -1.128850, -1.192997, -1.592504, -1.378028, -1.422136, -1.513578, -1.623527, -1.331310, -1.143988, -1.079942, -1.778989, -1.407630, -1.398960, -0.979436, -1.446844, -2.832715, -1.107715, -1.157895, -1.650956, -1.369995, -1.427621, -1.455552, -1.632335, -1.374406, -1.144868, -1.133511, -1.771919, -1.336068, -1.405619, -1.010087, -1.407684, -2.901113, -1.090224, -1.196883, -1.583153, -1.356886, -1.447840, -1.490490, -1.628256, -1.352837, -1.139558, -1.093003, -1.771083, -1.409187, -1.385090, -0.976427, -1.416014, -2.877815, -1.125752, -1.148944, -1.581737, -1.421165, -1.444014, -1.496056, -1.624307, -1.361140, -1.131431, -1.112600, -1.790462, -1.360535, -1.394768, -1.015007, -1.423373, -2.828524, -1.085894, -1.182168, -1.578802, -1.408195, -1.416422, -1.481590, -1.641107, -1.348956, -1.141163, -1.098083, -1.767501, -1.372948, -1.417281, -0.971022, -1.461993, -2.833112, -1.106542, -1.140910, -1.607953, -1.400981, -1.453207, -1.467110, -1.634745, -1.334187, -1.167851, -1.135532, -1.768113, -1.320708, -1.422344, -0.994936, -1.451187, -2.793616, -1.094337, -1.178470, -1.664466, -1.323399, -1.440834, -1.461162, -1.677032, -1.315166, -1.162589, -1.079590, -1.764612, -1.391389, -1.425940, -0.957182, -1.451390, -2.859752, -1.125472, -1.110050, -1.684761, -1.382335, -1.452533, -1.458989, -1.660278, -1.336684, -1.156050, -1.113595, -1.796199, -1.317462, -1.435984, -0.982847, -1.447128, -2.786660, -1.112049, -1.155749, -1.650657, -1.341304, -1.462095, -1.480705, -1.615331, -1.352770, -1.154628, -1.072712, -1.752780, -1.401619, -1.433676, -0.982481, -1.385882, -2.812998, -1.153625, -1.126271, -1.646859, -1.382823, -1.459998, -1.464692, -1.599751, -1.375744, -1.157558, -1.163584, -1.710982, -1.311768, -1.437152, -1.042536, -1.434227, -2.937676, -1.032397, -1.204850, -1.548493, -1.379367, -1.444105, -1.479569, -1.601626, -1.391807, -1.132987, -1.049473, -1.928282, -1.260908, -1.509215, -0.905215, -1.496922, -2.818569, -1.164637, -1.076601, -1.832951, -1.279956, -1.508427, -1.381168, -1.719323, -1.269913, -1.242466, -1.104792, -1.399977, -1.367211, -1.787962, -1.091861, -0.939227, -2.731868, -1.568386, -1.217584, -1.262440, -1.355713, -1.811986, -1.512844, -1.240443, -1.308673, -1.512844}; double score_ex_don[NUM_VALUES_SCORES] = { -1.385894, 0.000100, 0.000100, 0.000100, -1.414282, 0.000100, 0.000100, 0.000100, -1.446894, 0.000100, 0.000100, 0.000100, -1.303793, 0.000100, 0.000100, 0.000100, -1.335241, -1.584170, -1.183264, -1.489392, -1.204384, -1.325258, -2.223904, -1.120235, -1.335341, -1.336958, -1.328900, -1.562937, -1.993044, -1.394802, -1.147292, -1.209513, -1.287013, -1.601473, -1.214976, -1.489040, -1.120482, -1.347074, -2.250884, -1.175802, -1.307538, -1.408008, -1.272852, -1.585463, -1.945910, -1.407572, -1.158650, -1.208998, -1.253559, -1.581985, -1.304998, -1.436437, -1.221762, -1.289535, -2.199747, -1.142377, -1.246984, -1.374375, -1.444723, -1.496909, -1.882237, -1.356374, -1.263781, -1.179040, -1.341174, -1.537163, -1.162126, -1.557553, -1.210456, -1.247727, -2.176045, -1.199674, -1.301487, -1.361505, -1.341096, -1.560459, -1.951814, -1.327012, -1.141460, -1.296956, -1.257347, -1.486356, -1.235654, -1.615694, -1.207496, -1.300586, -2.042237, -1.207496, -1.264663, -1.392624, -1.287263, -1.643938, -1.881247, -1.464472, -1.163471, -1.190728, -1.254729, -1.516129, -1.314321, -1.484437, -1.198137, -1.356129, -2.109703, -1.141580, -1.233054, -1.425260, -1.391083, -1.517100, -1.894316, -1.377111, -1.291823, -1.131643, -1.400390, -1.583343, -1.149251, -1.463878, -1.202461, -1.311185, -2.165140, -1.154194, -1.326569, -1.454477, -1.336486, -1.434125, -1.932369, -1.335296, -1.158592, -1.279461, -1.328337, -1.521580, -1.238268, -1.483553, -1.162328, -1.270617, -2.150199, -1.237449, -1.227946, -1.401725, -1.325000, -1.634081, -1.855396, -1.372851, -1.130313, -1.319409, -1.248651, -1.516914, -1.344782, -1.456185, -1.244395, -1.261202, -2.150322, -1.164352, -1.214949, -1.375174, -1.381513, -1.613039, -1.956996, -1.346665, -1.211761, -1.200847, -1.329661, -1.511983, -1.215854, -1.520951, -1.185875, -1.256712, -2.155158, -1.224012, -1.344383, -1.341072, -1.339421, -1.533695, -2.061970, -1.385528, -1.118346, -1.218121, -1.268418, -1.595679, -1.173108, -1.577217, -1.160320, -1.255128, -2.126186, -1.264844, -1.272510, -1.441249, -1.281093, -1.582276, -1.798000, -1.377006, -1.137056, -1.342181, -1.312737, -1.506586, -1.287276, -1.455728, -1.225574, -1.285764, -2.168645, -1.153156, -1.203062, -1.383565, -1.426531, -1.565886, -1.923623, -1.353451, -1.199531, -1.223358, -1.288693, -1.569247, -1.233045, -1.492707, -1.210494, -1.311561, -2.125450, -1.161004, -1.328422, -1.306021, -1.333288, -1.606442, -2.041220, -1.348073, -1.066661, -1.323381, -1.344774, -1.501587, -1.259226, -1.457784, -1.148714, -1.252962, -2.143847, -1.272625, -1.223071, -1.469081, -1.231082, -1.695205, -1.851393, -1.411185, -1.130847, -1.286022, -1.265278, -1.516002, -1.321950, -1.462587, -1.211323, -1.293873, -2.083518, -1.192378, -1.175999, -1.422399, -1.422399, -1.564370, -1.943855, -1.331470, -1.219588, -1.212649, -1.328149, -1.576760, -1.181944, -1.506398, -1.193201, -1.302095, -2.097657, -1.197177, -1.267077, -1.427619, -1.247394, -1.654360, -1.975623, -1.357439, -1.081805, -1.327938, -1.268149, -1.514720, -1.227038, -1.581731, -1.100665, -1.362783, -2.160578, -1.216908, -1.240000, -1.405749, -1.267798, -1.692918, -1.854267, -1.434190, -1.075299, -1.332077, -1.228006, -1.621955, -1.290402, -1.450506, -1.193881, -1.262186, -2.177005, -1.202168, -1.175372, -1.375904, -1.468919, -1.567765, -1.950576, -1.349322, -1.228101, -1.185662, -1.280631, -1.502751, -1.235290, -1.566041, -1.171693, -1.290947, -2.194797, -1.191495, -1.274402, -1.340609, -1.312623, -1.662826, -1.967650, -1.303773, -1.144584, -1.308026, -1.238674, -1.626367, -1.196804, -1.553857, -1.129809, -1.340694, -2.062479, -1.244439, -1.236807, -1.382682, -1.292338, -1.691963, -1.835764, -1.405753, -1.079123, -1.364797, -1.323445, -1.493448, -1.288839, -1.454163, -1.215349, -1.361060, -2.018891, -1.157719, -1.155703, -1.357054, -1.472780, -1.617175, -1.882063, -1.316876, -1.173670, -1.307397, -1.315221, -1.492384, -1.212882, -1.563461, -1.214255, -1.273706, -2.192850, -1.165785, -1.313993, -1.414489, -1.286171, -1.551741, -1.961482, -1.392654, -1.011403, -1.397450, -1.335823, -1.485556, -1.205618, -1.555068, -1.133775, -1.333475, -2.149755, -1.210287, -1.214825, -1.405137, -1.258111, -1.750343, -1.926924, -1.389562, -1.019072, -1.409398, -1.311909, -1.557272, -1.275130, -1.424662, -1.192664, -1.327152, -2.024904, -1.206141, -1.198954, -1.409969, -1.430266, -1.536679, -1.931305, -1.389328, -1.124030, -1.270003, -1.314019, -1.492302, -1.240221, -1.527455, -1.251524, -1.285326, -2.107530, -1.152542, -1.300381, -1.395389, -1.324847, -1.541818, -1.898156, -1.352703, -1.090108, -1.364788, -1.259778, -1.567958, -1.225586, -1.540719, -1.178854, -1.303931, -2.001480, -1.252564, -1.203059, -1.435712, -1.308999, -1.651878, -1.905552, -1.329180, -1.095817, -1.377140, -1.253340, -1.506193, -1.303005, -1.509666, -1.183445, -1.379405, -2.137629, -1.126655, -1.143417, -1.417405, -1.415807, -1.628620, -1.880313, -1.351320, -1.170636, -1.278733, -1.378113, -1.541940, -1.164197, -1.505946, -1.152680, -1.316425, -2.201758, -1.185773, -1.341049, -1.380789, -1.264509, -1.586352, -1.892528, -1.397002, -1.022309, -1.417954, -1.279542, -1.557940, -1.164217, -1.613954, -1.082378, -1.355672, -2.029950, -1.301604, -1.163300, -1.356178, -1.347533, -1.771825, -1.835875, -1.351161, -1.102318, -1.388352, -1.146963, -1.524594, -1.379366, -1.546685, -1.208717, -1.314682, -2.129774, -1.158375, -1.182817, -1.419290, -1.424094, -1.556022, -1.934235, -1.344714, -1.121917, -1.312315, -1.308775, -1.613864, -1.107114, -1.608334, -1.239839, -1.269777, -2.090889, -1.183851, -1.305755, -1.379864, -1.224980, -1.694691, -1.976807, -1.379114, -1.046739, -1.352446, -1.232734, -1.577741, -1.207558, -1.593870, -1.125126, -1.277180, -2.128886, -1.281588, -1.182010, -1.428445, -1.260578, -1.770978, -1.977632, -1.363765, -0.989588, -1.451677, -1.217143, -1.504825, -1.350675, -1.501464, -1.191802, -1.359712, -2.067271, -1.161357, -1.137430, -1.382941, -1.430169, -1.664688, -1.926072, -1.253446, -1.187115, -1.333090, -1.286474, -1.488182, -1.192377, -1.637451, -1.170953, -1.257732, -2.152635, -1.239763, -1.245443, -1.323486, -1.343159, -1.687610, -1.959891, -1.360895, -1.006611, -1.438684, -1.273022, -1.555102, -1.189528, -1.587271, -1.118625, -1.380870, -2.162857, -1.181183, -1.228220, -1.398602, -1.257361, -1.738175, -1.903167, -1.349486, -1.082423, -1.375327, -1.221820, -1.544594, -1.301544, -1.515052, -1.165304, -1.352944, -2.057355, -1.197652, -1.158052, -1.352031, -1.434697, -1.666004, -1.864785, -1.271503, -1.188305, -1.347369, -1.332790, -1.538602, -1.205572, -1.504815, -1.235042, -1.237779, -2.133729, -1.201460, -1.250648, -1.368246, -1.308502, -1.666522, -2.041594, -1.361308, -1.073626, -1.301636, -1.241334, -1.567631, -1.180710, -1.632695, -1.114944, -1.348686, -2.109522, -1.233782, -1.191864, -1.390971, -1.262138, -1.805067, -1.930587, -1.378604, -0.999284, -1.448749, -1.184943, -1.550223, -1.352443, -1.498665, -1.138147, -1.319696, -2.225242, -1.189643, -1.161145, -1.406201, -1.379253, -1.660555, -1.992129, -1.294027, -1.176061, -1.269616, -1.335732, -1.551955, -1.161545, -1.550232, -1.143119, -1.261060, -2.160425, -1.263909, -1.236042, -1.374689, -1.263193, -1.749849, -1.964725, -1.326528, -1.080145, -1.367032, -1.314958, -1.498754, -1.190488, -1.589472, -1.109372, -1.295064, -2.177212, -1.262321, -1.154009, -1.430919, -1.278129, -1.789864, -1.911490, -1.409475, -0.978958, -1.460303, -1.257805, -1.535490, -1.331208, -1.443015, -1.204381, -1.293993, -2.097141, -1.193555, -1.086273, -1.414530, -1.476800, -1.654848, -1.953186, -1.345019, -1.128507, -1.294161, -1.295965, -1.542663, -1.193443, -1.563606, -1.136555, -1.287349, -2.128798, -1.258445, -1.204402, -1.367173, -1.353795, -1.676410, -1.953570, -1.360134, -1.032193, -1.404941, -1.252570, -1.570009, -1.212777, -1.566298, -1.103838, -1.284952, -2.117861, -1.303972, -1.175205, -1.395554, -1.291124, -1.780305, -1.869861, -1.444978, -0.994159, -1.426829, -1.272390, -1.494225, -1.313812, -1.484469, -1.117812, -1.376013, -2.048106, -1.232912, -1.118877, -1.425666, -1.400782, -1.678782, -1.949080, -1.313091, -1.159180, -1.291477, -1.373569, -1.501337, -1.145812, -1.579875, -1.083641, -1.346729, -2.114707, -1.269826, -1.179178, -1.447547, -1.277759, -1.722265, -1.860929, -1.394579, -1.071749, -1.369927, -1.232469, -1.607163, -1.216965, -1.551900, -1.105273, -1.321237, -2.131390, -1.260887, -1.140045, -1.423428, -1.309988, -1.774990, -1.866370, -1.378914, -0.964264, -1.550224, -1.192733, -1.592270, -1.287449, -1.527067, -1.123608, -1.368874, -2.105725, -1.208166, -1.159472, -1.317628, -1.447954, -1.695390, -1.959774, -1.291360, -1.121356, -1.353342, -1.344762, -1.522366, -1.157585, -1.575234, -1.143925, -1.280531, -2.039570, -1.296602, -1.167426, -1.404442, -1.291783, -1.780530, -1.902910, -1.320489, -1.086018, -1.401191, -1.244550, -1.568846, -1.256606, -1.518566, -1.102203, -1.315206, -2.114703, -1.277356, -1.220229, -1.357242, -1.300165, -1.743077, -1.860241, -1.344257, -1.005582, -1.524180, -1.226580, -1.607230, -1.307670, -1.444711, -1.184415, -1.324177, -2.221705, -1.140594, -1.127270, -1.357307, -1.502724, -1.628583, -1.962710, -1.297184, -1.153205, -1.307118, -1.241912, -1.614629, -1.177454, -1.588939, -1.131039, -1.256202, -2.190382, -1.270488, -1.332897, -1.281022, -1.351370, -1.611195, -1.976682, -1.358736, -1.015939, -1.417066, -1.286079, -1.496800, -1.217086, -1.591053, -1.173725, -1.297439, -2.076677, -1.230281, -1.186318, -1.511356, -1.193846, -1.766136, -1.870724, -1.395831, -1.019831, -1.436725, -1.203594, -1.525526, -1.344367, -1.506544, -1.152749, -1.354442, -2.127900, -1.180732, -1.099725, -1.364654, -1.438427, -1.747163, -1.983889, -1.328545, -1.142600, -1.277922, -1.264054, -1.567240, -1.192330, -1.582989, -1.190102, -1.317033, -2.080248, -1.194084, -1.218647, -1.416745, -1.300140, -1.664029, -1.909439, -1.377665, -1.013977, -1.440132, -1.272303, -1.541736, -1.165010, -1.640488, -1.122468, -1.327667, -2.067111, -1.262738, -1.226633, -1.384418, -1.299659, -1.694846, -1.756831, -1.355510, -1.050128, -1.515491, -1.191069, -1.529253, -1.368276, -1.492273, -1.113337, -1.360246, -2.079036, -1.238253, -1.084030, -1.407228, -1.422732, -1.737871, -1.832353, -1.318674, -1.196532, -1.308470, -1.260968, -1.563898, -1.147420, -1.661537, -1.094947, -1.360120, -2.130928, -1.237579, -1.197458, -1.414129, -1.300737, -1.700617, -2.037443, -1.321966, -1.026429, -1.407545, -1.277906, -1.474202, -1.233987, -1.603014, -1.093261, -1.361431, -2.172361, -1.221879, -1.161734, -1.416973, -1.317476, -1.732738, -1.884504, -1.343809, -1.019047, -1.485865, -1.271815, -1.534963, -1.313182, -1.447102, -1.110041, -1.335651, -2.153549, -1.233072, -1.116229, -1.416630, -1.454124, -1.627845, -1.877874, -1.278426, -1.160106, -1.365876, -1.318412, -1.431471, -1.220522, -1.617435, -1.133459, -1.277234, -2.123088, -1.274348, -1.204404, -1.397124, -1.326506, -1.674407, -1.954158, -1.311541, -1.053643, -1.426090, -1.298379, -1.547059, -1.180596, -1.574761, -1.070732, -1.281244, -2.207345, -1.310970, -1.293351, -1.382035, -1.239656, -1.686865, -1.799749, -1.359254, -1.078914, -1.436146, -1.203973, -1.622633, -1.316154, -1.450483, -1.180461, -1.383891, -2.138063, -1.125844, -1.154160, -1.367559, -1.346569, -1.773024, -2.026101, -1.303002, -1.145578, -1.278709, -1.307400, -1.483397, -1.216489, -1.578214, -1.131774, -1.270256, -2.251085, -1.232734, -1.196219, -1.239918, -1.415920, -1.798454, -1.949115, -1.271538, -1.060161, -1.466204, -1.255666, -1.451374, -1.291582, -1.579733, -1.044565, -1.425441, -2.129500, -1.241828, -1.171273, -1.378714, -1.302224, -1.794567, -1.770930, -1.370742, -1.042873, -1.498495, -1.189383, -1.504077, -1.344499, -1.547880, -1.098204, -1.361672, -2.153644, -1.223367, -1.142846, -1.387898, -1.379905, -1.715422, -1.817285, -1.305906, -1.212409, -1.312572, -1.294710, -1.518819, -1.239627, -1.525281, -1.101437, -1.293258, -2.211018, -1.260042, -1.186253, -1.394982, -1.311322, -1.729711, -2.024442, -1.251498, -1.093562, -1.399033, -1.267367, -1.530952, -1.186700, -1.625204, -1.098612, -1.330312, -2.082973, -1.281169, -1.121176, -1.437797, -1.352605, -1.725479, -1.855953, -1.334138, -1.066742, -1.443148, -1.158898, -1.536001, -1.368044, -1.530980, -1.134798, -1.365275, -2.129703, -1.189642, -1.107623, -1.305412, -1.516044, -1.720274, -1.936491, -1.241783, -1.182719, -1.345238, -1.258447, -1.543638, -1.242367, -1.543638, -1.125164, -1.304118, -2.301412, -1.191150, -1.148520, -1.400237, -1.355942, -1.722320, -1.883632, -1.267998, -1.131486, -1.410525, -1.262612, -1.513926, -1.241912, -1.569208, -1.007244, -1.363919, -2.251887, -1.294926, -1.181868, -1.379694, -1.373136, -1.669571, -1.793031, -1.322709, -1.129583, -1.410733, -1.176794, -1.577667, -1.459502, -1.374656, -1.185036, -1.331292, -2.261440, -1.121076, -1.121170, -1.376839, -1.485661, -1.632841, -1.793512, -1.365496, -1.233897, -1.247523, -1.252226, -1.550393, -1.266089, -1.513965, -1.070613, -1.349927, -2.178875, -1.256061, -1.213108, -1.417831, -1.357549, -1.593572, -1.856071, -1.411138, -1.111148, -1.306862, -1.219528, -1.572057, -1.301152, -1.492568, -0.993210, -1.443255, -2.295896, -1.228316, -1.112406, -1.452069, -1.423229, -1.628556, -1.763656, -1.478352, -1.059877, -1.370139, -1.122953, -1.642972, -1.471964, -1.379106, -1.051087, -1.404152, -2.400110, -1.157855, -1.030281, -1.375599, -1.646510, -1.621085, -1.724012, -1.518701, -1.236717, -1.163732, -1.117603, -1.599586, -1.315587, -1.596323, -0.968354, -1.333230, -2.393915, -1.326530, -0.973824, -1.341693, -1.554107, -1.899903, -1.563394, -1.337397, -1.211732, -1.468084, -0.998394, -1.139395, -1.632676, -2.153211, -0.818638, -1.064793, -2.641517, -1.945501, -0.946117, -1.071815, -1.639264, -2.587007, -1.473391, -0.957669, -1.381619, -1.995872, -0.454442, -2.214835, -2.129729, -1.986628, -0.293988, -2.510470, -2.991967, -2.093170, -0.434752, -2.092543, -2.115746, -2.219543, -1.262582, -1.757518, -1.302403, -1.299283, -2.384259, -3.736880, -0.175496, -3.101671, -1.513713, -2.694443, -0.710651, -1.509572, -1.858238, -2.956849, -0.358565, -2.370948, -3.019029, -3.316280, -0.183299, -2.496840, -1.242298, 0.000100, 0.000100, 0.000100, -1.578327, 0.000100, 0.000100, 0.000100, -1.574650, 0.000100, 0.000100, 0.000100, -1.211065, 0.000100, 0.000100, 0.000100, -1.123866, -1.720387, -1.462557, -1.330522, -1.061131, -1.365952, -2.800505, -1.084654, -1.277848, -1.565415, -1.413511, -1.312766, -1.546281, -1.581237, -1.370970, -1.116633, -1.121998, -1.731069, -1.444373, -1.341770, -1.072902, -1.392056, -2.773738, -1.058449, -1.251255, -1.582163, -1.417443, -1.324290, -1.546320, -1.602778, -1.323177, -1.141483, -1.115748, -1.771768, -1.417554, -1.347106, -1.027126, -1.426483, -2.812371, -1.073672, -1.255814, -1.589083, -1.401257, -1.328946, -1.538923, -1.620794, -1.357019, -1.107999, -1.135400, -1.732635, -1.423153, -1.343483, -1.024090, -1.400305, -2.784571, -1.100817, -1.252002, -1.569625, -1.407929, -1.342103, -1.533882, -1.617371, -1.356280, -1.113930, -1.113697, -1.743936, -1.445264, -1.342689, -1.046225, -1.395464, -2.825490, -1.073702, -1.244452, -1.593912, -1.398987, -1.339697, -1.507638, -1.608896, -1.385753, -1.113755, -1.105669, -1.761565, -1.449055, -1.337722, -1.054503, -1.410613, -2.827225, -1.054220, -1.262306, -1.589100, -1.416800, -1.307848, -1.502097, -1.610379, -1.359177, -1.137395, -1.111209, -1.742705, -1.471572, -1.323399, -1.014036, -1.397978, -2.794367, -1.111703, -1.280201, -1.545798, -1.429274, -1.311580, -1.519826, -1.601706, -1.373000, -1.119780, -1.106236, -1.761038, -1.446936, -1.339252, -1.028121, -1.397359, -2.841623, -1.088439, -1.261246, -1.559904, -1.400853, -1.346473, -1.540602, -1.622975, -1.322681, -1.133152, -1.118192, -1.734510, -1.485008, -1.308806, -1.038139, -1.393620, -2.800799, -1.087820, -1.287914, -1.550760, -1.397762, -1.328334, -1.526613, -1.623579, -1.375097, -1.100446, -1.103879, -1.737740, -1.456957, -1.348782, -1.050339, -1.405937, -2.801846, -1.066109, -1.268099, -1.557169, -1.425492, -1.318590, -1.538100, -1.615235, -1.324717, -1.137907, -1.104633, -1.760179, -1.452246, -1.337087, -0.996166, -1.448341, -2.850608, -1.084891, -1.282216, -1.549764, -1.403366, -1.329869, -1.551146, -1.580816, -1.345634, -1.133783, -1.123336, -1.724506, -1.454437, -1.335548, -1.060020, -1.387279, -2.826546, -1.065429, -1.260617, -1.575648, -1.382175, -1.352412, -1.521082, -1.616763, -1.369807, -1.112220, -1.124388, -1.737787, -1.458988, -1.321376, -1.042676, -1.386099, -2.818764, -1.085416, -1.255847, -1.549402, -1.433181, -1.330891, -1.533449, -1.604620, -1.382278, -1.101905, -1.108604, -1.721001, -1.486421, -1.328354, -1.016683, -1.416133, -2.804428, -1.093580, -1.264356, -1.579386, -1.387669, -1.340095, -1.529165, -1.618806, -1.345768, -1.124519, -1.109669, -1.714631, -1.478223, -1.338422, -1.076822, -1.359895, -2.818472, -1.070204, -1.278541, -1.593057, -1.396585, -1.306326, -1.514716, -1.600112, -1.401508, -1.102529, -1.123227, -1.737167, -1.440377, -1.339737, -1.020192, -1.388811, -2.905761, -1.092445, -1.270186, -1.547523, -1.424512, -1.324935, -1.519133, -1.601961, -1.371069, -1.121588, -1.118991, -1.737281, -1.440285, -1.345028, -1.049255, -1.389887, -2.818301, -1.075900, -1.259663, -1.580311, -1.441641, -1.295357, -1.498808, -1.598859, -1.390409, -1.122383, -1.108109, -1.741473, -1.444166, -1.352425, -1.054054, -1.393443, -2.880565, -1.057981, -1.262425, -1.545494, -1.445718, -1.315837, -1.539910, -1.609304, -1.352250, -1.118054, -1.128252, -1.759558, -1.460806, -1.301103, -1.025305, -1.389807, -2.813540, -1.102027, -1.287016, -1.544535, -1.429386, -1.305489, -1.545225, -1.611647, -1.365258, -1.103034, -1.123786, -1.754212, -1.437073, -1.330719, -1.023151, -1.414845, -2.811653, -1.086277, -1.258383, -1.559998, -1.414365, -1.336855, -1.538351, -1.607586, -1.372032, -1.104721, -1.128465, -1.722574, -1.449851, -1.334615, -1.043194, -1.382463, -2.793372, -1.092138, -1.256340, -1.597638, -1.388420, -1.333844, -1.554425, -1.633045, -1.353610, -1.093478, -1.120322, -1.735018, -1.434417, -1.350254, -1.022833, -1.408785, -2.694076, -1.113583, -1.269074, -1.544047, -1.404887, -1.347081, -1.537799, -1.639127, -1.349640, -1.103634, -1.138117, -1.760881, -1.420579, -1.323843, -1.032299, -1.430335, -2.765336, -1.074030, -1.288399, -1.587817, -1.396279, -1.300482, -1.502892, -1.626594, -1.382236, -1.108967, -1.107639, -1.735777, -1.445745, -1.355462, -1.046808, -1.382197, -2.772889, -1.092324, -1.265892, -1.568253, -1.402187, -1.333531, -1.536628, -1.620842, -1.335678, -1.126448, -1.134182, -1.736283, -1.429190, -1.336978, -1.008206, -1.427035, -2.891742, -1.079904, -1.274805, -1.533251, -1.437623, -1.319795, -1.544505, -1.616730, -1.364998, -1.100651, -1.135816, -1.717987, -1.427807, -1.348695, -1.037170, -1.395939, -2.869820, -1.075187, -1.281652, -1.563178, -1.391645, -1.330722, -1.561785, -1.564969, -1.351566, -1.132243, -1.135276, -1.753692, -1.446481, -1.308830, -1.049466, -1.397293, -2.844794, -1.065767, -1.267686, -1.575997, -1.412350, -1.316223, -1.525381, -1.640799, -1.342801, -1.116196, -1.151836, -1.735897, -1.440763, -1.305803, -1.043935, -1.348271, -2.878841, -1.102485, -1.274314, -1.572029, -1.376429, -1.346015, -1.558161, -1.611757, -1.320351, -1.130417, -1.133673, -1.740466, -1.428679, -1.335269, -1.039697, -1.437934, -2.813282, -1.052647, -1.248541, -1.588230, -1.433751, -1.307938, -1.525019, -1.626636, -1.361401, -1.110204, -1.108592, -1.740549, -1.441143, -1.355201, -1.044865, -1.402472, -2.798538, -1.074778, -1.254115, -1.562304, -1.428485, -1.326715, -1.503085, -1.654462, -1.358160, -1.111003, -1.130063, -1.710850, -1.461723, -1.330146, -1.049879, -1.394269, -2.794267, -1.076325, -1.267423, -1.546161, -1.443419, -1.312077, -1.530486, -1.598548, -1.357101, -1.127092, -1.132145, -1.701283, -1.449039, -1.345483, -1.040908, -1.413072, -2.816953, -1.068033, -1.264140, -1.586210, -1.403335, -1.320349, -1.536345, -1.619063, -1.349468, -1.116660, -1.138421, -1.714675, -1.452341, -1.325645, -1.047573, -1.395043, -2.787677, -1.079322, -1.279992, -1.570790, -1.419219, -1.301303, -1.531744, -1.629160, -1.357032, -1.107682, -1.131961, -1.693852, -1.454590, -1.345947, -1.055820, -1.387077, -2.857468, -1.064574, -1.279489, -1.571223, -1.394330, -1.324145, -1.565953, -1.593316, -1.341349, -1.119729, -1.141971, -1.716747, -1.439104, -1.331727, -1.048025, -1.405601, -2.764073, -1.075531, -1.250373, -1.618283, -1.394657, -1.318841, -1.548085, -1.614809, -1.367391, -1.097675, -1.132050, -1.744872, -1.436953, -1.326855, -1.013179, -1.411746, -2.870563, -1.089001, -1.275670, -1.576496, -1.396809, -1.321732, -1.518169, -1.600788, -1.366269, -1.126724, -1.141804, -1.711171, -1.434892, -1.339555, -1.053648, -1.398157, -2.769449, -1.074150, -1.288803, -1.567119, -1.428879, -1.286714, -1.526594, -1.633157, -1.348661, -1.115273, -1.110227, -1.721046, -1.450828, -1.357665, -1.025323, -1.417910, -2.856881, -1.073963, -1.267067, -1.564645, -1.414618, -1.323647, -1.519019, -1.621279, -1.333260, -1.140025, -1.121248, -1.710170, -1.457343, -1.345390, -1.049402, -1.411537, -2.786622, -1.065791, -1.269033, -1.583067, -1.403392, -1.317551, -1.537491, -1.607340, -1.381091, -1.098544, -1.116858, -1.754235, -1.451849, -1.326077, -1.034520, -1.408438, -2.783995, -1.083861, -1.250146, -1.583102, -1.410356, -1.331262, -1.539492, -1.599903, -1.338173, -1.135475, -1.129279, -1.722656, -1.448744, -1.334547, -1.049119, -1.409215, -2.815054, -1.062713, -1.294202, -1.551007, -1.433424, -1.289683, -1.525423, -1.604666, -1.368136, -1.117999, -1.123871, -1.737322, -1.470037, -1.312774, -1.040044, -1.438515, -2.792138, -1.055578, -1.280080, -1.579120, -1.402545, -1.309859, -1.509734, -1.614094, -1.345595, -1.140762, -1.120660, -1.754441, -1.444765, -1.327535, -1.074713, -1.373294, -2.828861, -1.060586, -1.276879, -1.568597, -1.427469, -1.298866, -1.536595, -1.602742, -1.372685, -1.108304, -1.137033, -1.730855, -1.448616, -1.319747, -1.060341, -1.415689, -2.805606, -1.048583, -1.282428, -1.600270, -1.391641, -1.301483, -1.532328, -1.611555, -1.362008, -1.113996, -1.147518, -1.719185, -1.428671, -1.332803, -1.044867, -1.409073, -2.849344, -1.061289, -1.288718, -1.581987, -1.426637, -1.277638, -1.528099, -1.640736, -1.349071, -1.109469, -1.134894, -1.705976, -1.423231, -1.362521, -1.045581, -1.389165, -2.805240, -1.082532, -1.278257, -1.601620, -1.384382, -1.311448, -1.557634, -1.638720, -1.337812, -1.100459, -1.129551, -1.752663, -1.429178, -1.331791, -1.056237, -1.414732, -2.765237, -1.060489, -1.225204, -1.631949, -1.423710, -1.309573, -1.518164, -1.617299, -1.353250, -1.126848, -1.132538, -1.719471, -1.437950, -1.342429, -1.060783, -1.400616, -2.817022, -1.056741, -1.268395, -1.596352, -1.385245, -1.324934, -1.516963, -1.626723, -1.353563, -1.121681, -1.132348, -1.693098, -1.445234, -1.354467, -1.044467, -1.397903, -2.830356, -1.072902, -1.318915, -1.544277, -1.443369, -1.262387, -1.528711, -1.603032, -1.363463, -1.120471, -1.133361, -1.706959, -1.459906, -1.330383, -1.028045, -1.432302, -2.770285, -1.076191, -1.284631, -1.553543, -1.405709, -1.322169, -1.512762, -1.627493, -1.322521, -1.149437, -1.127699, -1.761013, -1.432091, -1.325979, -1.060534, -1.397629, -2.818619, -1.058838, -1.314872, -1.598540, -1.423470, -1.243327, -1.542966, -1.632017, -1.353518, -1.101449, -1.142571, -1.732257, -1.430842, -1.327987, -1.037607, -1.426054, -2.793440, -1.066437, -1.266382, -1.618981, -1.395934, -1.300307, -1.514827, -1.623753, -1.335079, -1.139874, -1.104185, -1.764759, -1.475759, -1.314213, -1.065700, -1.396333, -2.793175, -1.059054, -1.293279, -1.608780, -1.414928, -1.263880, -1.524227, -1.647857, -1.351876, -1.105654, -1.158217, -1.711439, -1.431770, -1.322520, -1.064138, -1.408448, -2.711574, -1.067055, -1.297836, -1.592969, -1.369635, -1.311718, -1.550784, -1.668413, -1.336807, -1.088595, -1.139800, -1.754740, -1.437111, -1.310995, -1.041886, -1.425671, -2.804109, -1.060440, -1.313742, -1.580650, -1.395545, -1.281399, -1.526701, -1.641460, -1.366684, -1.096312, -1.128676, -1.720953, -1.439682, -1.344615, -1.044461, -1.415129, -2.770190, -1.071267, -1.274958, -1.572709, -1.370952, -1.350118, -1.547719, -1.608459, -1.328143, -1.132885, -1.124899, -1.776905, -1.456387, -1.298002, -1.046609, -1.458791, -2.842561, -1.026998, -1.300217, -1.628569, -1.425220, -1.234908, -1.519043, -1.618752, -1.383119, -1.102205, -1.129110, -1.712391, -1.451658, -1.339168, -1.037982, -1.409258, -2.805602, -1.075766, -1.306689, -1.538553, -1.349563, -1.365551, -1.534368, -1.621379, -1.332399, -1.130293, -1.126573, -1.765379, -1.452495, -1.306559, -1.050783, -1.390100, -2.806970, -1.076172, -1.311387, -1.588396, -1.393596, -1.279703, -1.554273, -1.630875, -1.354944, -1.093813, -1.125414, -1.766618, -1.428970, -1.327964, -1.019173, -1.403413, -2.748562, -1.110695, -1.293193, -1.602554, -1.358945, -1.319408, -1.537744, -1.635890, -1.295300, -1.150305, -1.139669, -1.727977, -1.435710, -1.329970, -1.047735, -1.410837, -2.836927, -1.059226, -1.315044, -1.616241, -1.443033, -1.215091, -1.509565, -1.601313, -1.377765, -1.123250, -1.147084, -1.706149, -1.415484, -1.354537, -1.019558, -1.411698, -2.764523, -1.101084, -1.269213, -1.574792, -1.387169, -1.338971, -1.559372, -1.646411, -1.301203, -1.124584, -1.147703, -1.729602, -1.436708, -1.318370, -1.055289, -1.385604, -2.780210, -1.079663, -1.351219, -1.531050, -1.461460, -1.228028, -1.549385, -1.617898, -1.361534, -1.099496, -1.151407, -1.754948, -1.411830, -1.319776, -1.050915, -1.401599, -2.688128, -1.090135, -1.241243, -1.573631, -1.409243, -1.349553, -1.545278, -1.646472, -1.275865, -1.155725, -1.115330, -1.725386, -1.467504, -1.333365, -1.013765, -1.454776, -2.790033, -1.072132, -1.297666, -1.591738, -1.465940, -1.229635, -1.566106, -1.635796, -1.326129, -1.106112, -1.132382, -1.694966, -1.435779, -1.361797, -1.035082, -1.415765, -2.750601, -1.084196, -1.263016, -1.542865, -1.379261, -1.379645, -1.538440, -1.652647, -1.277014, -1.155586, -1.158534, -1.717264, -1.426202, -1.323216, -1.035753, -1.442324, -2.874427, -1.043514, -1.320681, -1.606422, -1.415923, -1.238768, -1.559786, -1.633414, -1.371349, -1.076480, -1.132807, -1.742767, -1.395414, -1.366075, -1.040805, -1.433209, -2.716573, -1.072399, -1.302183, -1.590220, -1.350175, -1.328075, -1.528791, -1.664106, -1.280650, -1.152057, -1.155969, -1.720721, -1.472503, -1.283883, -1.041538, -1.462711, -2.761848, -1.043255, -1.365532, -1.606216, -1.425608, -1.191558, -1.565445, -1.628702, -1.355364, -1.087768, -1.183324, -1.735393, -1.405177, -1.301636, -1.029218, -1.411087, -2.845431, -1.076675, -1.293637, -1.628141, -1.329458, -1.328729, -1.570520, -1.639295, -1.281771, -1.138183, -1.184483, -1.772223, -1.389960, -1.290782, -1.045807, -1.419826, -2.801157, -1.061039, -1.367126, -1.625682, -1.400528, -1.197602, -1.553913, -1.626613, -1.335963, -1.111449, -1.147511, -1.738507, -1.419002, -1.328644, -1.028389, -1.418593, -2.796172, -1.080812, -1.237031, -1.594943, -1.390488, -1.355255, -1.577765, -1.673013, -1.259595, -1.132848, -1.188812, -1.715508, -1.409654, -1.304466, -1.065507, -1.413570, -2.871109, -1.034124, -1.355146, -1.621385, -1.426527, -1.189662, -1.563022, -1.578627, -1.369474, -1.108665, -1.172417, -1.736817, -1.379024, -1.337588, -1.007130, -1.421078, -2.740301, -1.112520, -1.264406, -1.579098, -1.332746, -1.395617, -1.567721, -1.652838, -1.233003, -1.175845, -1.203550, -1.698913, -1.401067, -1.306903, -1.042003, -1.405527, -2.856929, -1.065459, -1.329379, -1.580363, -1.514550, -1.172633, -1.533140, -1.591912, -1.366855, -1.121780, -1.189564, -1.653730, -1.417380, -1.339552, -1.076494, -1.348020, -2.793821, -1.083908, -1.335665, -1.489445, -1.400451, -1.327900, -1.564922, -1.589900, -1.259134, -1.193858, -1.165974, -1.762895, -1.387537, -1.319968, -1.060081, -1.440125, -2.778800, -1.036874, -1.356823, -1.631915, -1.476295, -1.144179, -1.552356, -1.644394, -1.317984, -1.116427, -1.128674, -1.567696, -1.470539, -1.434596, -0.991671, -1.306884, -2.611551, -1.255403, -1.266786, -1.487478, -1.395204, -1.408377, -1.601340, -1.509013, -1.205371, -1.281340, -1.131920, -2.753591, -1.393460, -1.006017, -0.901514, -2.705863, -2.954560, -0.744164, -1.265302, -2.731009, -1.407433, -0.896680, -1.460255, -2.900763, -1.202876, -0.885489}; double score_in_don[NUM_VALUES_SCORES] = {-0.478520, 0.000100, 0.000100, 0.000100, -3.470547, 0.000100, 0.000100, 0.000100, -1.146963, 0.000100, 0.000100, 0.000100, -3.454598, 0.000100, 0.000100, 0.000100, -0.415031, -2.375713, -2.259474, -1.949628, -0.328148, -2.520914, -3.174837, -1.847972, -0.183708, -3.100533, -2.491223, -3.219190, -0.935461, -2.922524, -1.451677, -1.140622, -2.447088, -3.145141, -0.202256, -2.927916, -1.341329, -1.830612, -0.989187, -1.578387, -2.560945, -3.098087, -0.182121, -3.120559, -2.420583, -2.470345, -0.358279, -2.058100, -1.370294, -1.902511, -1.226400, -1.192613, -1.203973, -1.382792, -2.171248, -1.093363, -1.716096, -2.018802, -1.959097, -0.604343, -1.670320, -2.090491, -1.139263, -0.999314, -1.061786, -1.735801, -1.364941, -1.502794, -1.011423, -1.508000, -2.117894, -1.221927, -1.121546, -1.432701, -1.521532, -1.527011, -1.171592, -1.915998, -1.144701, -1.493377, -1.360640, -1.488907, -1.564893, -1.175145, -1.235830, -1.235830, -2.437877, -1.104227, -1.434473, -1.351348, -1.381335, -1.379815, -1.703031, -1.583154, -1.339306, -1.048351, -1.244275, -1.529555, -1.482686, -1.316055, -1.292946, -1.202040, -2.470091, -1.077688, -1.391529, -1.361451, -1.216870, -1.615149, -1.828850, -1.450746, -1.320862, -1.084409, -1.227998, -1.585702, -1.417540, -1.347041, -1.264398, -1.287256, -2.297413, -1.075741, -1.543335, -1.419175, -1.116076, -1.528520, -1.754513, -1.551330, -1.286488, -1.082335, -1.185089, -1.636288, -1.392199, -1.382098, -1.196416, -1.309063, -2.332659, -1.106804, -1.515828, -1.347139, -1.162641, -1.571509, -1.763248, -1.524356, -1.332962, -1.058267, -1.240804, -1.586059, -1.412742, -1.336961, -1.214265, -1.328283, -2.226224, -1.108042, -1.498936, -1.398395, -1.172126, -1.514440, -1.773840, -1.670736, -1.297199, -0.997189, -1.269479, -1.582050, -1.406810, -1.314735, -1.290538, -1.268065, -2.425517, -1.035443, -1.530734, -1.437208, -1.178973, -1.433643, -1.763087, -1.526391, -1.403493, -1.006682, -1.243740, -1.555666, -1.404148, -1.366212, -1.256408, -1.207976, -2.514448, -1.091786, -1.503633, -1.421132, -1.193211, -1.456749, -1.745093, -1.608793, -1.326772, -1.021902, -1.259102, -1.638934, -1.326612, -1.360513, -1.268417, -1.248940, -2.424632, -1.068847, -1.447361, -1.453016, -1.188763, -1.485680, -1.712074, -1.579692, -1.415243, -0.992622, -1.264235, -1.647912, -1.350045, -1.324767, -1.299575, -1.237346, -2.416001, -1.055779, -1.544733, -1.567819, -1.077587, -1.436679, -1.729687, -1.591537, -1.316842, -1.046777, -1.231284, -1.622855, -1.382004, -1.348331, -1.277873, -1.307432, -2.381351, -1.026019, -1.508645, -1.506774, -1.163648, -1.407224, -1.717651, -1.672881, -1.375180, -0.967550, -1.184668, -1.671036, -1.454626, -1.299704, -1.270001, -1.253687, -2.391908, -1.072175, -1.531260, -1.531260, -1.103382, -1.445082, -1.821439, -1.620947, -1.293472, -1.004632, -1.247869, -1.592098, -1.386294, -1.349439, -1.285058, -1.285058, -2.422428, -1.027153, -1.577637, -1.420008, -1.169465, -1.421745, -1.678523, -1.571602, -1.419905, -1.010872, -1.226678, -1.685121, -1.385442, -1.305269, -1.277200, -1.244964, -2.422109, -1.065668, -1.614304, -1.503195, -1.127262, -1.367929, -1.658876, -1.550967, -1.359779, -1.076204, -1.276047, -1.606486, -1.405050, -1.291291, -1.339533, -1.190471, -2.533962, -1.036737, -1.521923, -1.602131, -1.119097, -1.371733, -1.681636, -1.615341, -1.350955, -1.032501, -1.175866, -1.686227, -1.453386, -1.300286, -1.244745, -1.261973, -2.624847, -1.031589, -1.568703, -1.556306, -1.078886, -1.423802, -1.676537, -1.560465, -1.356065, -1.063433, -1.256683, -1.703513, -1.360277, -1.284565, -1.292962, -1.301247, -2.324564, -1.034159, -1.614676, -1.499083, -1.086496, -1.425811, -1.667153, -1.636538, -1.341347, -1.035440, -1.241680, -1.714656, -1.301285, -1.351336, -1.232077, -1.294495, -2.448871, -1.055879, -1.524567, -1.503870, -1.171130, -1.386294, -1.691123, -1.745093, -1.287411, -1.007655, -1.265136, -1.682488, -1.349867, -1.299700, -1.204612, -1.356103, -2.345644, -1.059171, -1.483668, -1.571011, -1.124089, -1.425400, -1.682649, -1.553702, -1.317474, -1.094089, -1.223528, -1.620625, -1.345522, -1.395816, -1.205999, -1.341092, -2.454092, -1.040987, -1.488605, -1.520002, -1.120181, -1.472374, -1.641671, -1.554229, -1.385961, -1.064477, -1.240167, -1.667425, -1.369139, -1.318241, -1.250047, -1.285477, -2.415732, -1.056449, -1.541859, -1.535936, -1.135734, -1.387991, -1.688436, -1.629647, -1.323538, -1.041284, -1.295957, -1.648076, -1.370182, -1.273382, -1.162504, -1.300053, -2.605001, -1.076232, -1.562899, -1.535119, -1.146096, -1.358030, -1.695322, -1.629894, -1.248039, -1.098286, -1.270045, -1.623324, -1.353815, -1.332997, -1.228781, -1.376632, -2.537588, -0.978520, -1.528723, -1.521075, -1.151155, -1.392556, -1.608123, -1.683116, -1.322907, -1.056752, -1.207401, -1.720060, -1.348813, -1.337798, -1.269988, -1.263254, -2.572090, -1.021493, -1.495005, -1.561435, -1.171580, -1.362419, -1.628337, -1.661785, -1.326772, -1.053796, -1.220525, -1.688599, -1.349927, -1.343667, -1.204761, -1.314689, -2.348542, -1.090138, -1.472608, -1.533930, -1.182606, -1.392280, -1.621516, -1.673760, -1.309502, -1.064484, -1.234769, -1.661025, -1.421215, -1.281093, -1.265666, -1.342627, -2.400272, -1.004857, -1.550538, -1.628446, -1.105751, -1.344502, -1.679551, -1.590604, -1.304970, -1.083057, -1.228979, -1.694131, -1.355383, -1.325005, -1.210006, -1.317491, -2.476396, -1.049958, -1.448170, -1.508462, -1.143348, -1.491655, -1.662343, -1.643839, -1.295533, -1.069100, -1.199949, -1.693521, -1.384683, -1.329853, -1.167471, -1.309646, -2.483970, -1.092073, -1.494983, -1.556070, -1.117054, -1.437413, -1.653371, -1.620581, -1.337152, -1.054920, -1.227113, -1.743366, -1.341687, -1.307381, -1.206638, -1.305565, -2.476022, -1.062178, -1.491702, -1.630339, -1.100725, -1.400322, -1.581649, -1.657893, -1.343288, -1.070497, -1.194240, -1.712510, -1.325530, -1.382337, -1.202348, -1.366014, -2.466539, -1.023007, -1.473154, -1.602804, -1.109066, -1.428782, -1.632520, -1.686399, -1.271455, -1.081888, -1.266135, -1.699409, -1.363557, -1.274610, -1.201668, -1.252269, -2.464348, -1.113560, -1.426111, -1.577752, -1.188665, -1.391539, -1.647515, -1.658060, -1.272821, -1.087899, -1.219364, -1.781363, -1.343802, -1.289816, -1.236605, -1.274721, -2.411203, -1.077588, -1.426147, -1.611935, -1.151088, -1.410696, -1.670858, -1.658479, -1.265393, -1.080701, -1.176248, -1.772666, -1.361700, -1.326284, -1.145826, -1.372794, -2.567866, -1.044290, -1.489247, -1.513124, -1.195684, -1.379360, -1.580593, -1.613437, -1.359714, -1.084086, -1.179716, -1.760252, -1.329693, -1.362289, -1.204934, -1.281510, -2.464934, -1.085727, -1.446594, -1.590971, -1.184088, -1.366976, -1.622109, -1.607121, -1.311619, -1.100602, -1.203973, -1.807508, -1.370790, -1.265737, -1.232484, -1.239041, -2.519317, -1.083951, -1.423066, -1.512298, -1.236110, -1.393880, -1.706609, -1.604045, -1.272387, -1.086853, -1.228766, -1.655022, -1.320652, -1.389108, -1.258834, -1.217094, -2.451936, -1.097217, -1.410517, -1.553348, -1.227029, -1.381179, -1.616413, -1.542544, -1.343845, -1.118673, -1.185551, -1.734468, -1.365009, -1.337314, -1.173233, -1.285471, -2.532632, -1.094626, -1.522534, -1.558683, -1.178763, -1.332680, -1.633058, -1.612188, -1.309982, -1.092452, -1.171691, -1.657834, -1.403769, -1.370669, -1.174450, -1.300220, -2.536791, -1.080519, -1.441655, -1.584336, -1.159452, -1.407632, -1.663487, -1.683008, -1.279935, -1.059430, -1.189139, -1.703385, -1.357474, -1.362030, -1.169648, -1.307571, -2.593446, -1.066283, -1.406878, -1.566242, -1.225135, -1.376159, -1.666063, -1.639769, -1.324782, -1.046614, -1.218194, -1.691620, -1.386683, -1.308916, -1.161347, -1.315245, -2.528106, -1.082589, -1.398978, -1.575681, -1.187166, -1.422131, -1.638318, -1.655501, -1.296748, -1.075059, -1.190472, -1.789820, -1.340375, -1.319816, -1.217771, -1.332328, -2.504212, -1.025803, -1.517203, -1.571270, -1.168552, -1.339114, -1.651736, -1.690817, -1.261485, -1.076808, -1.230710, -1.662657, -1.375505, -1.325882, -1.182748, -1.266972, -2.586972, -1.088760, -1.452929, -1.586965, -1.209498, -1.334753, -1.551249, -1.616300, -1.293979, -1.154526, -1.210366, -1.722064, -1.367357, -1.315145, -1.171574, -1.374252, -2.559373, -1.022376, -1.417919, -1.611572, -1.220455, -1.335172, -1.578029, -1.646082, -1.301095, -1.112972, -1.190627, -1.721002, -1.354930, -1.350475, -1.205775, -1.263126, -2.544059, -1.081094, -1.378114, -1.591443, -1.189522, -1.427333, -1.614376, -1.659954, -1.231159, -1.142930, -1.164000, -1.761725, -1.372115, -1.337706, -1.158192, -1.270851, -2.690048, -1.086261, -1.425400, -1.574987, -1.218511, -1.359040, -1.611861, -1.691182, -1.290104, -1.076047, -1.192362, -1.705701, -1.370427, -1.343798, -1.156881, -1.304133, -2.623218, -1.074238, -1.381117, -1.533262, -1.278170, -1.369141, -1.570153, -1.680341, -1.294176, -1.103969, -1.215735, -1.721485, -1.356197, -1.320248, -1.225687, -1.319372, -2.636673, -1.000918, -1.430189, -1.599409, -1.187458, -1.371505, -1.543365, -1.642620, -1.316136, -1.124862, -1.191911, -1.707851, -1.402695, -1.312408, -1.169159, -1.323569, -2.482012, -1.079947, -1.415080, -1.547348, -1.237193, -1.370089, -1.666285, -1.696509, -1.254419, -1.071517, -1.184525, -1.722064, -1.398463, -1.315145, -1.154293, -1.399927, -2.584481, -1.014264, -1.359783, -1.543435, -1.232083, -1.435682, -1.656493, -1.651308, -1.290503, -1.072179, -1.217448, -1.722690, -1.341061, -1.332328, -1.203485, -1.371278, -2.489308, -1.013066, -1.362713, -1.640277, -1.240046, -1.344241, -1.596653, -1.618053, -1.260879, -1.152845, -1.249875, -1.703592, -1.302373, -1.348825, -1.136674, -1.334117, -2.518384, -1.093222, -1.393040, -1.534580, -1.264597, -1.371282, -1.642395, -1.611780, -1.306663, -1.089960, -1.198532, -1.718877, -1.377757, -1.320685, -1.170544, -1.394829, -2.487743, -1.024909, -1.458147, -1.469177, -1.273391, -1.357368, -1.619607, -1.609438, -1.291444, -1.117336, -1.269075, -1.653270, -1.317379, -1.348424, -1.209810, -1.311310, -2.517036, -1.045315, -1.343662, -1.577716, -1.288453, -1.358801, -1.616082, -1.612755, -1.301272, -1.109265, -1.180224, -1.806762, -1.359997, -1.302161, -1.198376, -1.325505, -2.567759, -1.032993, -1.372981, -1.521742, -1.278602, -1.386727, -1.677430, -1.615126, -1.305867, -1.069024, -1.261931, -1.705137, -1.335934, -1.301097, -1.148696, -1.433362, -2.451005, -1.026559, -1.462535, -1.551791, -1.220966, -1.341174, -1.601954, -1.624906, -1.327323, -1.089509, -1.211231, -1.751174, -1.350265, -1.311454, -1.188331, -1.308918, -2.563487, -1.055200, -1.408767, -1.648659, -1.198291, -1.341174, -1.635473, -1.596188, -1.296143, -1.112040, -1.182386, -1.710843, -1.385902, -1.336963, -1.279484, -1.281224, -2.583997, -0.997907, -1.400794, -1.552656, -1.219045, -1.400794, -1.616082, -1.663873, -1.334670, -1.053614, -1.257500, -1.634968, -1.357948, -1.333742, -1.189991, -1.231398, -2.635740, -1.101940, -1.456475, -1.599431, -1.248835, -1.279798, -1.597124, -1.568551, -1.314285, -1.137846, -1.180544, -1.671728, -1.452397, -1.305793, -1.161614, -1.294320, -2.536414, -1.097256, -1.388047, -1.559898, -1.313689, -1.303964, -1.646252, -1.651474, -1.318999, -1.055384, -1.221098, -1.658899, -1.404459, -1.312132, -1.147145, -1.301788, -2.688082, -1.071771, -1.357766, -1.578680, -1.243163, -1.394134, -1.593334, -1.571217, -1.372110, -1.092462, -1.187524, -1.771086, -1.267879, -1.412850, -1.165925, -1.333248, -2.515851, -1.067253, -1.447831, -1.558372, -1.229349, -1.339567, -1.640178, -1.603137, -1.335068, -1.073968, -1.183615, -1.701648, -1.338188, -1.389793, -1.262881, -1.269684, -2.531016, -1.030963, -1.391844, -1.557187, -1.280168, -1.336876, -1.667305, -1.593831, -1.297945, -1.093605, -1.225439, -1.652703, -1.392248, -1.323043, -1.138797, -1.377988, -2.512151, -1.059534, -1.453866, -1.538860, -1.291811, -1.283967, -1.617459, -1.547796, -1.362609, -1.099945, -1.186863, -1.690065, -1.380473, -1.351485, -1.170482, -1.404489, -2.709438, -0.973612, -1.453283, -1.515241, -1.252084, -1.345019, -1.641302, -1.670389, -1.306277, -1.057647, -1.245380, -1.705138, -1.366216, -1.289709, -1.228141, -1.314402, -2.698895, -0.990883, -1.466782, -1.544899, -1.263487, -1.297063, -1.650327, -1.548813, -1.332632, -1.103432, -1.220773, -1.797056, -1.375383, -1.250125, -1.229243, -1.384448, -2.538050, -0.972851, -1.386294, -1.581847, -1.275443, -1.327657, -1.599243, -1.659670, -1.354466, -1.050783, -1.251980, -1.749200, -1.348249, -1.271344, -1.207467, -1.340998, -2.620789, -1.003166, -1.422861, -1.586367, -1.320298, -1.247391, -1.646544, -1.656284, -1.297003, -1.069767, -1.175943, -1.715872, -1.408255, -1.320148, -1.099103, -1.391213, -2.513778, -1.087390, -1.368057, -1.556940, -1.357783, -1.282337, -1.582868, -1.609125, -1.305144, -1.128807, -1.189052, -1.731376, -1.347905, -1.352219, -1.199483, -1.336449, -2.646801, -1.007871, -1.387193, -1.607643, -1.276631, -1.306044, -1.611761, -1.585530, -1.337457, -1.099275, -1.173668, -1.760708, -1.386294, -1.313616, -1.168154, -1.397877, -2.644832, -0.991765, -1.473749, -1.497233, -1.243282, -1.351891, -1.645721, -1.550411, -1.321429, -1.114096, -1.197093, -1.758327, -1.349090, -1.323810, -1.188033, -1.340118, -2.607426, -1.022650, -1.411782, -1.551544, -1.320102, -1.282790, -1.609438, -1.582039, -1.340309, -1.100575, -1.226956, -1.769545, -1.325442, -1.306669, -1.217049, -1.229973, -2.595349, -1.087740, -1.364081, -1.534176, -1.249623, -1.418246, -1.631705, -1.580080, -1.354424, -1.077782, -1.230299, -1.688045, -1.363447, -1.319995, -1.209053, -1.330314, -2.501159, -1.035255, -1.433944, -1.471472, -1.277428, -1.373211, -1.628974, -1.677764, -1.312377, -1.055844, -1.183717, -1.700598, -1.355487, -1.372528, -1.156130, -1.309038, -2.551419, -1.086938, -1.335265, -1.532412, -1.281550, -1.413498, -1.663939, -1.633480, -1.390534, -1.003983, -1.234816, -1.732189, -1.298394, -1.349981, -1.210419, -1.308098, -2.665255, -1.016077, -1.416106, -1.543009, -1.311864, -1.293576, -1.650438, -1.641899, -1.310541, -1.064941, -1.174096, -1.728264, -1.353851, -1.366235, -1.215146, -1.332528, -2.722439, -0.984009, -1.366741, -1.561881, -1.347985, -1.288952, -1.641852, -1.599500, -1.305855, -1.098290, -1.518318, 0.000100, 0.000100, 0.000100, -1.650989, 0.000100, 0.000100, 0.000100, -1.228402, 0.000100, 0.000100, 0.000100, -1.216383, 0.000100, 0.000100, 0.000100, -1.156200, -1.788296, -1.501707, -1.219676, -1.085152, -1.409175, -2.928513, -1.009700, -1.359202, -1.665857, -1.398529, -1.180433, -1.588399, -1.610113, -1.532489, -0.967886, -1.195646, -1.629226, -1.437924, -1.331844, -1.045906, -1.402171, -2.878129, -1.060378, -1.304552, -1.549151, -1.341843, -1.366829, -1.600224, -1.601107, -1.285358, -1.139672, -1.175097, -1.835931, -1.409790, -1.246354, -1.099655, -1.419533, -2.840918, -1.002897, -1.366974, -1.666650, -1.447375, -1.136135, -1.574024, -1.622230, -1.372203, -1.073574, -1.158644, -1.718377, -1.426295, -1.322253, -0.994002, -1.441001, -2.797775, -1.101803, -1.285864, -1.604010, -1.342650, -1.341906, -1.551531, -1.705969, -1.221082, -1.166133, -1.173539, -1.749182, -1.426353, -1.285074, -1.020789, -1.462937, -2.878790, -1.044319, -1.337747, -1.600078, -1.440283, -1.207906, -1.534570, -1.623598, -1.400524, -1.076472, -1.155850, -1.752763, -1.407940, -1.319500, -1.056948, -1.422322, -2.716046, -1.063643, -1.242005, -1.579490, -1.397563, -1.355118, -1.566043, -1.664274, -1.253461, -1.151105, -1.178903, -1.767702, -1.417641, -1.275203, -1.017385, -1.426898, -2.732214, -1.098612, -1.337898, -1.663610, -1.408090, -1.192244, -1.532042, -1.609303, -1.347952, -1.126681, -1.170419, -1.682621, -1.438948, -1.321631, -1.011322, -1.395969, -2.827050, -1.110233, -1.257048, -1.544841, -1.414122, -1.350840, -1.561129, -1.634762, -1.267677, -1.159581, -1.181508, -1.744166, -1.413459, -1.290645, -1.047779, -1.452279, -2.863266, -1.026760, -1.353832, -1.608056, -1.445722, -1.184502, -1.569288, -1.631044, -1.341875, -1.094442, -1.157210, -1.695319, -1.385427, -1.378802, -1.019484, -1.460449, -2.739130, -1.071600, -1.299219, -1.553907, -1.398654, -1.313394, -1.571478, -1.611526, -1.256183, -1.177849, -1.173777, -1.681616, -1.470781, -1.291042, -1.062049, -1.393770, -2.872693, -1.051055, -1.333589, -1.613382, -1.449718, -1.195274, -1.517493, -1.630471, -1.375772, -1.101846, -1.151812, -1.722895, -1.421518, -1.331653, -1.069317, -1.440340, -2.777490, -1.028017, -1.310492, -1.545073, -1.396636, -1.310859, -1.573239, -1.645737, -1.280487, -1.133653, -1.163424, -1.716389, -1.419252, -1.324347, -1.054260, -1.390561, -2.805094, -1.072614, -1.304788, -1.619053, -1.426564, -1.235976, -1.556064, -1.579214, -1.349738, -1.128279, -1.134084, -1.752204, -1.418147, -1.336631, -1.046328, -1.371239, -2.738557, -1.107697, -1.262807, -1.569511, -1.399226, -1.338624, -1.561983, -1.634813, -1.261913, -1.164177, -1.149102, -1.730105, -1.435239, -1.317684, -1.048281, -1.411939, -2.773963, -1.068930, -1.344283, -1.624986, -1.400823, -1.217514, -1.558488, -1.576909, -1.360161, -1.119896, -1.143615, -1.740395, -1.407704, -1.342547, -1.024624, -1.417717, -2.743840, -1.095181, -1.273648, -1.580273, -1.391559, -1.325821, -1.551185, -1.647419, -1.288679, -1.139989, -1.158992, -1.702199, -1.438096, -1.322245, -1.023658, -1.402571, -2.801916, -1.096422, -1.304857, -1.576896, -1.427636, -1.264792, -1.521862, -1.661267, -1.343430, -1.106098, -1.138200, -1.741679, -1.427852, -1.329704, -1.047245, -1.390151, -2.798486, -1.081292, -1.285624, -1.535460, -1.407041, -1.334466, -1.545106, -1.636580, -1.317879, -1.125890, -1.176447, -1.702330, -1.425002, -1.313561, -1.063059, -1.399984, -2.892036, -1.042596, -1.309093, -1.619154, -1.422325, -1.235408, -1.547213, -1.593763, -1.348497, -1.125871, -1.132469, -1.789614, -1.419485, -1.313414, -1.019486, -1.410987, -2.779834, -1.098807, -1.279151, -1.548954, -1.395255, -1.341371, -1.575319, -1.631627, -1.310876, -1.115158, -1.153088, -1.737641, -1.409965, -1.330849, -1.056724, -1.368546, -2.866388, -1.075795, -1.298011, -1.601275, -1.388999, -1.287493, -1.528927, -1.623465, -1.369626, -1.103176, -1.128337, -1.735637, -1.420318, -1.352845, -1.008316, -1.398550, -2.865152, -1.104919, -1.251055, -1.556816, -1.404598, -1.356637, -1.541305, -1.628064, -1.304234, -1.145047, -1.175293, -1.672557, -1.429697, -1.331338, -1.016490, -1.432441, -2.816684, -1.079964, -1.287691, -1.621387, -1.433402, -1.244891, -1.524094, -1.624874, -1.376315, -1.100400, -1.158464, -1.704643, -1.434707, -1.324223, -1.050512, -1.430522, -2.728158, -1.062132, -1.249473, -1.557418, -1.434639, -1.330052, -1.585114, -1.580638, -1.321346, -1.131790, -1.164100, -1.728435, -1.402215, -1.331113, -1.059808, -1.372555, -2.796179, -1.081825, -1.300133, -1.559798, -1.450537, -1.262727, -1.550550, -1.617361, -1.348561, -1.109167, -1.154306, -1.698963, -1.438007, -1.330101, -1.007274, -1.425422, -2.771039, -1.103274, -1.289683, -1.534921, -1.398789, -1.338369, -1.521240, -1.633265, -1.298097, -1.160858, -1.159115, -1.712770, -1.421435, -1.329901, -1.042185, -1.415047, -2.789384, -1.070190, -1.257303, -1.572847, -1.430407, -1.313361, -1.566241, -1.601882, -1.334236, -1.119954, -1.152726, -1.704643, -1.405726, -1.357905, -1.049265, -1.398059, -2.819276, -1.069792, -1.279642, -1.585487, -1.385914, -1.320806, -1.544011, -1.637666, -1.306899, -1.135113, -1.161998, -1.691435, -1.434283, -1.329543, -1.055501, -1.391015, -2.890764, -1.056629, -1.308657, -1.611665, -1.400201, -1.259785, -1.552974, -1.599023, -1.364183, -1.106546, -1.137087, -1.730976, -1.404839, -1.359731, -1.048713, -1.406299, -2.866579, -1.056502, -1.301594, -1.613486, -1.362064, -1.299828, -1.565280, -1.617398, -1.295468, -1.143234, -1.152845, -1.701040, -1.439481, -1.329089, -1.041800, -1.414177, -2.781930, -1.072546, -1.294359, -1.615712, -1.366353, -1.301431, -1.523239, -1.588825, -1.381001, -1.119316, -1.153405, -1.744755, -1.396023, -1.338754, -1.035950, -1.395026, -2.850257, -1.080414, -1.247154, -1.600895, -1.389541, -1.340277, -1.523609, -1.631927, -1.309119, -1.150540, -1.132256, -1.747000, -1.444114, -1.318847, -1.050389, -1.407550, -2.727585, -1.078576, -1.276440, -1.564233, -1.411729, -1.316764, -1.518904, -1.645848, -1.372724, -1.094271, -1.128775, -1.765789, -1.397534, -1.353583, -1.030513, -1.369269, -2.794565, -1.115529, -1.282375, -1.556538, -1.402843, -1.324784, -1.544759, -1.628896, -1.305972, -1.140743, -1.119645, -1.730341, -1.433179, -1.355451, -1.061603, -1.398643, -2.750161, -1.069297, -1.257066, -1.591139, -1.432208, -1.298145, -1.561296, -1.586714, -1.361100, -1.111209, -1.138677, -1.770062, -1.397005, -1.339034, -1.054093, -1.406581, -2.746608, -1.071872, -1.280665, -1.596271, -1.351932, -1.344167, -1.526404, -1.610039, -1.334293, -1.141123, -1.142144, -1.671001, -1.464105, -1.341236, -1.049070, -1.395982, -2.712133, -1.091383, -1.306748, -1.563136, -1.359548, -1.335425, -1.540016, -1.595523, -1.375589, -1.108271, -1.131113, -1.759395, -1.440197, -1.315664, -1.030385, -1.402887, -2.793080, -1.090608, -1.247253, -1.560457, -1.393368, -1.368732, -1.522937, -1.618093, -1.338765, -1.134808, -1.141393, -1.776498, -1.426213, -1.304974, -1.065803, -1.371063, -2.785661, -1.078740, -1.308561, -1.582943, -1.397689, -1.282821, -1.519913, -1.613985, -1.351621, -1.129001, -1.147093, -1.701519, -1.430650, -1.343688, -1.043280, -1.397598, -2.758296, -1.087337, -1.278849, -1.569831, -1.353456, -1.365644, -1.563026, -1.579930, -1.328687, -1.140420, -1.147734, -1.734766, -1.425837, -1.324650, -1.015115, -1.428704, -2.838127, -1.080326, -1.277021, -1.601112, -1.404764, -1.294495, -1.546907, -1.601277, -1.374686, -1.101005, -1.136711, -1.730971, -1.422518, -1.343584, -1.052933, -1.396624, -2.832028, -1.064898, -1.266755, -1.593073, -1.372877, -1.341021, -1.546281, -1.616671, -1.335252, -1.122974, -1.124485, -1.750968, -1.437326, -1.331760, -1.009070, -1.425936, -2.796789, -1.096155, -1.292527, -1.549514, -1.407414, -1.315660, -1.513669, -1.615169, -1.368900, -1.118849, -1.136501, -1.756182, -1.431777, -1.318737, -1.047698, -1.432997, -2.782393, -1.053334, -1.261736, -1.590033, -1.393524, -1.329083, -1.508654, -1.633979, -1.349313, -1.126325, -1.139590, -1.768739, -1.423105, -1.314766, -1.044402, -1.420913, -2.811372, -1.059924, -1.268332, -1.590288, -1.403405, -1.312769, -1.535330, -1.559744, -1.386494, -1.125545, -1.126691, -1.762346, -1.415594, -1.341424, -1.035708, -1.437582, -2.731503, -1.071768, -1.262056, -1.571996, -1.388213, -1.347929, -1.527746, -1.603740, -1.364278, -1.120038, -1.128189, -1.740604, -1.439334, -1.332256, -1.037474, -1.384311, -2.851044, -1.086572, -1.264251, -1.581786, -1.419796, -1.308673, -1.536749, -1.609101, -1.377915, -1.100364, -1.129624, -1.745882, -1.423683, -1.341211, -1.077492, -1.360869, -2.718186, -1.087318, -1.292844, -1.560837, -1.393002, -1.319675, -1.542755, -1.599767, -1.350726, -1.123275, -1.122939, -1.719909, -1.438148, -1.353912, -1.019076, -1.408987, -2.799644, -1.097066, -1.264904, -1.574877, -1.430926, -1.303377, -1.528319, -1.582163, -1.360264, -1.136386, -1.127941, -1.754431, -1.435112, -1.327246, -1.057786, -1.354909, -2.791271, -1.098225, -1.277865, -1.523990, -1.429649, -1.331297, -1.528063, -1.592185, -1.356935, -1.132845, -1.118198, -1.734416, -1.456629, -1.333292, -1.054306, -1.392925, -2.771918, -1.076823, -1.243939, -1.563312, -1.436878, -1.329371, -1.538914, -1.588965, -1.373533, -1.114625, -1.114174, -1.762750, -1.413050, -1.359296, -1.030937, -1.386101, -2.806270, -1.100066, -1.279643, -1.572263, -1.376749, -1.339828, -1.539308, -1.620469, -1.332956, -1.127110, -1.132293, -1.756575, -1.430088, -1.325073, -1.044530, -1.405092, -2.829869, -1.067760, -1.263073, -1.542070, -1.441361, -1.321732, -1.569515, -1.584682, -1.364866, -1.104350, -1.126733, -1.735999, -1.422784, -1.352304, -1.035509, -1.425352, -2.728281, -1.081169, -1.266783, -1.567128, -1.427648, -1.310268, -1.526242, -1.615034, -1.346828, -1.127934, -1.135978, -1.717888, -1.439778, -1.337629, -1.031355, -1.456076, -2.831441, -1.045832, -1.264621, -1.579378, -1.408499, -1.320355, -1.544289, -1.589774, -1.365707, -1.116681, -1.113647, -1.739169, -1.452574, -1.339383, -1.036056, -1.380603, -2.801541, -1.099597, -1.240759, -1.580230, -1.399468, -1.354101, -1.536242, -1.649324, -1.323536, -1.119510, -1.145446, -1.737165, -1.406948, -1.343198, -1.042674, -1.384714, -2.848554, -1.081267, -1.282963, -1.565872, -1.415031, -1.305778, -1.544226, -1.589703, -1.367956, -1.115016, -1.138113, -1.731687, -1.420056, -1.343653, -1.045275, -1.402423, -2.824428, -1.069844, -1.295123, -1.544398, -1.388588, -1.334581, -1.526150, -1.596662, -1.348254, -1.138296, -1.130550, -1.726069, -1.440548, -1.338023, -1.046438, -1.398475, -2.814218, -1.073270, -1.255229, -1.590107, -1.428286, -1.304286, -1.526205, -1.620987, -1.340161, -1.129688, -1.150000, -1.738616, -1.424460, -1.320651, -1.051213, -1.411066, -2.821005, -1.058266, -1.309351, -1.514259, -1.400509, -1.333466, -1.508507, -1.618113, -1.363914, -1.124456, -1.134824, -1.757423, -1.426076, -1.325076, -1.043255, -1.417321, -2.803214, -1.065032, -1.279713, -1.532647, -1.383815, -1.365321, -1.519064, -1.587143, -1.349879, -1.147900, -1.131152, -1.736176, -1.439274, -1.331632, -1.036649, -1.380280, -2.888622, -1.084123, -1.293743, -1.515937, -1.391231, -1.357177, -1.523053, -1.627413, -1.370635, -1.103910, -1.108869, -1.748558, -1.431883, -1.357962, -1.061592, -1.384073, -2.915935, -1.051578, -1.237868, -1.599547, -1.396187, -1.345264, -1.518417, -1.614892, -1.366809, -1.117458, -1.135170, -1.743547, -1.433209, -1.327303, -1.032677, -1.404282, -2.782984, -1.089006, -1.276197, -1.584373, -1.355692, -1.354570, -1.537405, -1.581161, -1.348853, -1.140098, -1.119314, -1.766928, -1.440377, -1.325008, -1.031262, -1.397101, -2.773566, -1.097538, -1.247002, -1.611318, -1.417301, -1.306953, -1.558331, -1.618170, -1.352569, -1.100584, -1.127350, -1.745691, -1.426519, -1.341543, -1.013821, -1.410808, -2.826955, -1.096522, -1.246982, -1.565019, -1.406036, -1.353121, -1.546611, -1.609172, -1.353099, -1.113097, -1.106069, -1.768911, -1.425146, -1.354210, -1.041831, -1.416955, -2.763873, -1.073840, -1.267899, -1.586226, -1.391059, -1.327764, -1.528048, -1.581214, -1.378274, -1.122999, -1.097651, -1.755814, -1.460312, -1.341643, -1.042227, -1.411974, -2.852335, -1.061431, -1.257354, -1.559004, -1.404205, -1.348278, -1.539860, -1.633473, -1.350901, -1.104635, -1.117918, -1.750674, -1.448205, -1.330325, -1.037544, -1.390667, -2.754117, -1.099307, -1.252474, -1.609727, -1.405165, -1.313288, -1.544736, -1.598525, -1.341162, -1.130426, -1.115304, -1.712051, -1.448541, -1.359550, -1.043741, -1.401096, -2.844219, -1.068980, -1.271704, -1.585130, -1.402577, -1.313925, -1.537842, -1.601314, -1.357195, -1.120420, -1.118869, -1.711025, -1.448926, -1.355384, -1.055699, -1.397169, -2.892420, -1.051787, -1.268116, -1.602063, -1.395080, -1.311761, -1.534456, -1.595880, -1.366850, -1.118442, -1.122673, -1.746898, -1.441286, -1.333118, -1.041974, -1.407369, -2.817838, -1.070839, -1.288822, -1.542899, -1.387650, -1.343299, -1.511921, -1.608170, -1.333747, -1.152735, -1.115062, -1.751051, -1.457413, -1.325479, -1.023172, -1.415943, -2.803849, -1.086859, -1.247188, -1.612570, -1.393999, -1.327148, -1.533530, -1.625640, -1.360522, -1.105894, -1.116745, -1.757243, -1.436452, -1.338012, -1.045930, -1.409458, -2.877161, -1.055365, -1.263699, -1.558217, -1.401522, -1.344537, -1.537866, -1.617753, -1.335388, -1.127749, -1.108063, -1.732413, -1.449533, -1.353768, -1.042162, -1.399918, -2.825833, -1.074606, -1.284279, -1.552279, -1.421440, -1.309263, -1.542621, -1.605882, -1.357213, -1.114466, -1.119742, -1.716693, -1.439762, -1.358708, -1.044607, -1.383184, -2.799425, -1.089021, -1.257743, -1.590896, -1.395404, -1.330938, -1.526378, -1.632356, -1.339031, -1.123590, -1.126907, -1.782094, -1.445095, -1.302111, -1.023461, -1.422535, -2.841483, -1.075257, -1.252573, -1.602432, -1.389148, -1.333558, -1.510365, -1.626385, -1.363120, -1.118804, -1.121348, -1.743086, -1.448594, -1.330747, -1.053977, -1.423663, -2.819205, -1.047062, -1.260617, -1.553619, -1.412063, -1.341705, -1.532467, -1.615983, -1.347069, -1.123004, -1.118920, -1.773352, -1.416841, -1.342753, -1.036011, -1.408534, -2.776839, -1.083538, -1.241985, -1.598424, -1.402658, -1.335479, -1.557092, -1.603839, -1.371762, -1.095209}; kmer-code-2013-trunk/leaff/0000755000000000000000000000000012641613360014225 5ustar rootrootkmer-code-2013-trunk/leaff/gc.C0000644000000000000000000000664012322046702014724 0ustar rootroot#include "bio++.H" #include "seqCache.H" void computeGCcontent(char *filename) { seqCache *A = new seqCache(filename); for (uint32 idx=0; idx < A->getNumberOfSequences(); idx++) { seqInCore *S = A->getSequenceInCore(idx); char *s = S->sequence(); uint32 genomeLength = S->sequenceLength(); fprintf(stdout, ">%s\n", S->header()); int gc[256] = {0}; gc['c'] = 1; gc['C'] = 1; gc['g'] = 1; gc['G'] = 1; // Replace the sequence with "g or c". We can't do this inline, // since output reports the sequence too. The extra 1000 at the // end is important, since we do not bother checking for the end // of the valid data, just assume that it's zero. // char *g = new char [S->sequenceLength() + 1000]; for (uint32 i=0; i 1) ? g[i-2] : 0); ave5 += g[i+2] - ((i > 2) ? g[i-3] : 0); ave11 += g[i+5] - ((i > 5) ? g[i-6] : 0); ave51 += g[i+25] - ((i > 25) ? g[i-25] : 0); ave101 += g[i+50] - ((i > 50) ? g[i-51] : 0); ave201 += g[i+100] - ((i > 100) ? g[i-101] : 0); ave501 += g[i+250] - ((i > 250) ? g[i-251] : 0); ave1001 += g[i+500] - ((i > 500) ? g[i-501] : 0); ave2001 += g[i+1000] - ((i > 1000) ? g[i-1001] : 0); fprintf(stdout, uint32FMT"\t"uint32FMT"\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", i, s[i], ave3 / (double)((i >= 1) ? 3 - ((i < genomeLength - 1) ? 0 : i + 2 - genomeLength) : i+2), ave5 / (double)((i >= 2) ? 5 - ((i < genomeLength - 2) ? 0 : i + 3 - genomeLength) : i+3), ave11 / (double)((i >= 5) ? 11 - ((i < genomeLength - 4) ? 0 : i + 5 - genomeLength) : i+6), ave51 / (double)((i >= 25) ? 51 - ((i < genomeLength - 24) ? 0 : i + 25 - genomeLength) : i+26), ave101 / (double)((i >= 50) ? 101 - ((i < genomeLength - 49) ? 0 : i + 50 - genomeLength) : i+51), ave201 / (double)((i >= 100) ? 201 - ((i < genomeLength - 99) ? 0 : i + 100 - genomeLength) : i+101), ave501 / (double)((i >= 250) ? 501 - ((i < genomeLength - 249) ? 0 : i + 250 - genomeLength) : i+251), ave1001 / (double)((i >= 500) ? 1001 - ((i < genomeLength - 499) ? 0 : i + 500 - genomeLength) : i+501), ave2001 / (double)((i >= 1000) ? 2001 - ((i < genomeLength - 999) ? 0 : i + 1000 - genomeLength) : i+1001)); } delete [] g; delete S; } } kmer-code-2013-trunk/leaff/stats.C0000644000000000000000000000566612322046702015500 0ustar rootroot#include "bio++.H" #include "seqCache.H" #include using namespace std; void stats(char *filename, uint64 refLen) { seqCache *F = new seqCache(filename); bool V[256]; for (uint32 i=0; i<256; i++) V[i] = false; V['n'] = true; V['N'] = true; uint32 numSeq = F->getNumberOfSequences(); uint64 Ss = 0; // actual length of span uint64 Rs = 0; // reference length of span uint32 *Ls = new uint32 [numSeq]; uint64 Sb = 0; uint64 Rb = 0; uint32 *Lb = new uint32 [numSeq]; for (uint32 i=0; igetSequenceInCore(s); uint32 len = S->sequenceLength(); uint32 span = len; uint32 base = len; for (uint32 pos=1; possequence()[pos]]) base--; } Ss += span; Sb += base; Ls[S->getIID()] = span; Lb[S->getIID()] = base; delete S; } if (refLen > 0) { Rs = refLen; Rb = refLen; } else { Rs = Ss; Rb = Sb; } //qsort(Ls, numSeq, sizeof(uint32), uint32_compare); //qsort(Lb, numSeq, sizeof(uint32), uint32_compare); sort(Ls, Ls + numSeq); sort(Lb, Lb + numSeq); reverse(Ls, Ls + numSeq); reverse(Lb, Lb + numSeq); uint32 n50s[11] = {0}; uint32 l50s[11] = {0}; uint32 n50b[11] = {0}; uint32 l50b[11] = {0}; uint32 sizes[11] = {0}; uint32 sizeb[11] = {0}; for (uint32 i=0; i<11; i++) { sizes[i] = i * Rs / 10; sizeb[i] = i * Rb / 10; //fprintf(stderr, "SIZE %2d s=%d b=%d\n", i, sizes[i], sizeb[i]); } for (uint32 i=0, sum=0, n=1; (i < numSeq) && (n < 11); i++) { if ((sum < sizes[n]) && (sizes[n] <= sum + Ls[i])) { n50s[n] = Ls[i]; l50s[n] = i; n++; } sum += Ls[i]; } for (uint32 i=0, sum=0, n=1; (i < numSeq) && (n < 11); i++) { if ((sum < sizeb[n]) && (sizeb[n] <= sum + Lb[i])) { n50b[n] = Ls[i]; l50b[n] = i; n++; } sum += Lb[i]; } //for (uint32 i=0, sum=0; sum < Rb/2; i++) { //} fprintf(stdout, "%s\n", F->getSourceName()); fprintf(stdout, "\n"); fprintf(stdout, "numSeqs "uint32FMT"\n", numSeq); fprintf(stdout, "\n"); fprintf(stdout, "SPAN (smallest "uint32FMT" largest "uint32FMT")\n", Ls[numSeq-1], Ls[0]); for (uint32 i=1; i<10; i++) fprintf(stdout, "n"uint32FMT" "uint32FMT" at index "uint32FMT"\n", 10 * i, n50s[i], l50s[i]); fprintf(stdout, "totLen "uint64FMTW(10)"\n", Ss); fprintf(stdout, "refLen "uint64FMTW(10)"\n", Rs); fprintf(stdout, "\n"); fprintf(stdout, "BASES (smallest "uint32FMT" largest "uint32FMT")\n", Lb[numSeq-1], Lb[0]); for (uint32 i=1; i<10; i++) fprintf(stdout, "n"uint32FMT" "uint32FMT" at index "uint32FMT"\n", 10 * i, n50b[i], l50b[i]); fprintf(stdout, "totLen "uint64FMTW(10)"\n", Sb); fprintf(stdout, "refLen "uint64FMTW(10)"\n", Rb); delete [] Ls; delete [] Lb; } kmer-code-2013-trunk/leaff/blocks.C0000644000000000000000000000212612322046702015603 0ustar rootroot#include "bio++.H" #include "seqCache.H" void dumpBlocks(char *filename) { seqCache *F = 0L; seqInCore *S = 0L; bool V[256] = {0}; for (uint32 i=0; i<256; i++) V[i] = false; V['n'] = true; V['N'] = true; F = new seqCache(filename); for (uint32 s=0; sgetNumberOfSequences(); s++) { seqInCore *S = F->getSequenceInCore(s); uint32 len = S->sequenceLength(); char begseq = S->sequence()[0]; bool nnn = V[begseq]; uint32 begpos = 0; uint32 pos = 0; for (pos=0; possequence()[pos]; if (nnn != V[seq]) { fprintf(stdout, "%c "uint32FMT" "uint32FMT" "uint32FMT" "uint32FMT"\n", begseq, s, begpos, pos, pos - begpos); nnn = V[seq]; begpos = pos; begseq = seq; } } fprintf(stdout, "%c "uint32FMT" "uint32FMT" "uint32FMT" "uint32FMT"\n", begseq, s, begpos, pos, pos - begpos); fprintf(stdout, ". "uint32FMT" "uint32FMT" "uint32FMT"\n", s, pos, uint32ZERO); delete S; } delete F; } kmer-code-2013-trunk/leaff/simseq.C0000644000000000000000000001210711215036525015631 0ustar rootroot#include #include #include #include #include #include "bio.h" // This is Liliana Florea's sequencing error simulator. Bri hacked // it to use a real RNG, and to make it work from leaff. typedef struct edit_script { int optype; int num; struct edit_script *next; } EditScript_t; typedef struct align { int offset, len; EditScript_t *script; } Align_t; // This guy is provided by leaff extern mt_s *mtctx; // RAND returns x numbers, starting at number y. // #define RAND(x,y) (int)((y) + (mtRandom32(mtctx) % (x))) #define max(x,y) ((x)>=(y) ? (x):(y)) #define min(x,y) ((x)<=(y) ? (x):(y)) #define MOV 3 #define SUB 2 #define INS 1 #define DEL 0 EditScript_t * new_script(int optype, int num, EditScript_t *next) { EditScript_t *newtp = (EditScript_t *)malloc(sizeof(EditScript_t)); newtp->optype = optype; newtp->num = num; newtp->next = next; return newtp; } /* DEL(pos), SUB(pos) - modifY position pos; INS - insert right before pos */ void insert(Align_t *aln, int in_pos, int in_optype) { int i, num, optype; EditScript_t *t, *tp; //fprintf(stderr, "Modify script op=%d pos=%d\n", in_optype, in_pos); for (t=aln->script, i=0, tp=NULL; t; tp=t, t=t->next) { num = t->num; optype = t->optype; switch (optype) { case INS: if (in_pos==i+1) { if (tp) tp->next = new_script(in_optype, 1, tp->next); else aln->script = new_script(in_optype, 1, aln->script); return; } break; case DEL: i += num; break; case SUB: case MOV: if (inum = l; tp = t; tp->next = new_script(in_optype, 1, tp->next); tp = tp->next; tp->next = new_script(optype, r, tp->next); } else if (!l) { if (tp) tp->next = new_script(in_optype, 1, t); else aln->script = new_script(in_optype, 1, aln->script); if (in_optype!=INS) t->num -= 1; } else { tp = t; tp->next = new_script(in_optype, 1, tp->next); if (in_optype!=INS) t->num -= 1; } return; } i += num; break; default: fprintf(stderr, "Unrecognized optype (%d).\n", in_optype); break; } } //fprintf(stderr, "Failed to modify sequence (%d,%d).\n", in_optype, in_pos); } void print_simseq(char *seq, char *hdr, Align_t *aln, double P, int CUT, int COPY) { int k, e; char *s; char let_4[4] = {'A','C','G','T'}; char let_3A[3] = {'C','G','T'}; char let_3C[3] = {'A','G','T'}; char let_3G[3] = {'A','C','T'}; char let_3T[3] = {'A','C','G'}; EditScript_t *t; fprintf(stdout, ">"); while ((*hdr) && !isspace(*hdr)) fprintf(stdout, "%c", *hdr++); fprintf(stdout, ":seq=%d:copy=%d:loc=%d-%d:err=%1.2f\n", CUT+1, COPY+1, aln->offset, aln->offset+aln->len-1, P); s = seq + aln->offset-1; for (t=aln->script; t; t=t->next) { if (*s == 0) break; switch (t->optype) { case INS: for (k=0; knum; k++) { e = RAND(4,0); fprintf(stdout, "%c", let_4[e]); } break; case DEL: while (*s && t->num) { s++; t->num--; } break; case SUB: for (k=0; knum; k++) { e = RAND(3,0); if (*s=='A') fprintf(stdout, "%c", let_3A[e]); else if (*s=='C') fprintf(stdout, "%c", let_3C[e]); else if (*s=='G') fprintf(stdout, "%c", let_3G[e]); else if (*s=='T') fprintf(stdout, "%c", let_3T[e]); else fprintf(stdout, "%c", 'A'); s++; } break; case MOV: for (k=0; knum; k++) { if (*s == 0) { k = t->num; } else { fprintf(stdout, "%c", *s); s++; } } break; default: fprintf(stderr, "Unrecognized optype (%d).\n", t->optype); break; } } fprintf(stdout, "\n"); } void simseq(char *seq, char *hdr, int len, int N, int L, int C, double P) { Align_t align; int i, j, k; int start; EditScript_t *s; for (i=0; inext; free(s); } } } } kmer-code-2013-trunk/leaff/dups.C0000644000000000000000000001010112322046702015271 0ustar rootroot#include "bio++.H" #include "seqCache.H" md5_s * computeMD5ForEachSequence(seqCache *F) { uint32 numSeqs = F->getNumberOfSequences(); md5_s *result = new md5_s [numSeqs]; for (uint32 idx=0; idx < numSeqs; idx++) { seqInCore *s1 = F->getSequenceInCore(idx); md5_string(result+idx, s1->sequence(), s1->sequenceLength()); result[idx].i = s1->getIID(); delete s1; } return(result); } void mapDuplicates_Print(char *filea, seqInCore *sa, char *fileb, seqInCore *sb) { if (strcmp(sa->sequence(), sb->sequence()) == 0) fprintf(stdout, uint32FMT" <-> "uint32FMT"\n", sa->getIID(), sb->getIID()); else fprintf(stderr, "COLLISION DETECTED BETWEEN %s:"uint32FMT" AND %s:"uint32FMT"!\nPLEASE REPORT THIS TO bri@walenz.org!\n", filea, sa->getIID(), fileb, sb->getIID()); } void findDuplicates(char *filename) { seqInCore *s1 = 0L; seqInCore *s2 = 0L; seqCache *A = new seqCache(filename); uint32 numSeqs = A->getNumberOfSequences(); fprintf(stderr, "Computing MD5's for each sequence in '%s'.\n", filename); md5_s *result = computeMD5ForEachSequence(A); fprintf(stderr, "Sorting MD5's.\n"); qsort(result, numSeqs, sizeof(md5_s), md5_compare); fprintf(stderr, "Verifying identity, and output\n"); for (uint32 idx=1; idxgetSequenceInCore(result[idx-1].i); s2 = A->getSequenceInCore(result[idx].i); if (strcmp(s1->sequence(), s2->sequence()) == 0) { fprintf(stdout, uint32FMT":%s\n"uint32FMT":%s\n\n", result[idx-1].i, s1->header(), result[idx ].i, s2->header()); } else { fprintf(stderr, "COLLISION DETECTED BETWEEN IID "uint32FMT" AND "uint32FMT"!\nPLEASE REPORT THIS TO bri@walenz.org!\n", result[idx-1].i, result[idx].i); } delete s1; delete s2; } } delete [] result; delete A; } void mapDuplicates(char *filea, char *fileb) { fprintf(stderr, "Computing MD5's for each sequence in '%s'.\n", filea); seqCache *A = new seqCache(filea); md5_s *resultA = computeMD5ForEachSequence(A); fprintf(stderr, "Computing MD5's for each sequence in '%s'.\n", fileb); seqCache *B = new seqCache(fileb); md5_s *resultB = computeMD5ForEachSequence(B); uint32 numSeqsA = A->getNumberOfSequences(); uint32 numSeqsB = B->getNumberOfSequences(); uint32 idxA = 0; uint32 idxB = 0; fprintf(stderr, "Sorting MD5's.\n"); qsort(resultA, numSeqsA, sizeof(md5_s), md5_compare); qsort(resultB, numSeqsB, sizeof(md5_s), md5_compare); fprintf(stderr, "Finding duplicates.\n"); while ((idxAgetSequenceInCore(resultA[idxA].i); seqInCore *sb = B->getSequenceInCore(resultB[idxB].i); mapDuplicates_Print(filea, sa, fileb, sb); // While the B sequence matches the current A sequence, output a match // uint32 idxBb = idxB+1; int resb = md5_compare(resultA+idxA, resultB+idxBb); while (resb == 0) { seqInCore *sbb = B->getSequenceInCore(resultB[idxBb].i); mapDuplicates_Print(filea, sa, fileb, sbb); delete sbb; idxBb++; resb = md5_compare(resultA+idxA, resultB+idxBb); } // And likewise for A // uint32 idxAa = idxA+1; int resa = md5_compare(resultA+idxAa, resultB+idxB); while (resa == 0) { seqInCore *saa = A->getSequenceInCore(resultA[idxAa].i); mapDuplicates_Print(filea, saa, fileb, sb); delete saa; idxAa++; resa = md5_compare(resultA+idxAa, resultB+idxB); } delete sa; delete sb; idxA++; idxB++; } else { if (res < 0) idxA++; else idxB++; } } delete A; delete B; } kmer-code-2013-trunk/leaff/fragmenter.C0000644000000000000000000001216512322046702016464 0ustar rootroot#include #include #include "bio++.H" #include "seqCache.H" // Splits a sequence into itty-bitty pieces. // // By default, splits into non-overlapping pieces of length L. // Pieces will not start with nor end with N, but may have embedded N's. // // All pieces will be at least L long. Most pieces will be exactly L // long. All pieces will be less than 2L long. // // If a piece has more than (currently) 50 N's, it will be broken -- // the first piece and last piece will be saved, and the middle (with // the N's) will be discarded. void usage(char *name) { fprintf(stderr, "usage: %s [-overlap len] -length len -input X.fasta -output Y.fasta -log T.log\n", name); exit(1); } int main(int argc, char **argv) { uint32 desiredLength = 0; uint32 overlapLength = 0; bool beVerbose = false; seqCache *F = 0L; seqInCore *B = 0L; uint32 Bid = 0; FILE *O = 0L; FILE *L = 0L; uint32 fragmentIndex = 0; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-length") == 0) { desiredLength = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-overlap") == 0) { overlapLength = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-input") == 0) { F = new seqCache(argv[++arg]); } else if (strcmp(argv[arg], "-output") == 0) { errno = 0; O = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "ERROR: Can't open output file '%s': %s\n", argv[arg], strerror(errno)), exit(1); } else if (strcmp(argv[arg], "-log") == 0) { errno = 0; L = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "ERROR: Can't open log file '%s': %s\n", argv[arg], strerror(errno)), exit(1); } else if (strcmp(argv[arg], "-verbose") == 0) { beVerbose = true; } else { usage(argv[arg]); } arg++; } if ((F == 0L) || (O == 0L) || (L == 0L)) usage(argv[0]); B = F->getSequenceInCore(Bid); while (B) { if (beVerbose) fprintf(stderr, "working on %s\n", B->header()); char *seq = (char *)B->sequence(); uint32 pos = 0; uint32 max = 16384; uint32 *sta = new uint32 [max]; uint32 *end = new uint32 [max]; // step 1: build a list of regions to output. Scan the sequence, // making a new region if we see a significant chunk of N, or if we // hit the desiredLength. // uint32 s = 0; uint32 e = 0; while (s < B->sequenceLength()) { // Skip any N at the start while ((seq[s] == 'n') || (seq[s] == 'N') && (s < B->sequenceLength())) s++; // Construct the preliminary block. // e = s + desiredLength; if (e > B->sequenceLength()) e = B->sequenceLength(); fprintf(stderr, "got block1 "uint32FMT" - "uint32FMT"\n", s, e); // Scan from s to e, looking for significant N. If we find it, // reset e and stop. // uint32 numN = 0; for (uint32 i=s; i= 50) { e = i; break; } } fprintf(stderr, "got block2 "uint32FMT" - "uint32FMT"\n", s, e); // Back up e until we hit the first non-N if ((s < e) && ((seq[e] == 'n') || (seq[e] == 'N'))) { while ((s <= e) && ((seq[e] == 'n') || (seq[e] == 'N'))) e--; e++; } fprintf(stderr, "got block3 "uint32FMT" - "uint32FMT"\n", s, e); // Add this region // if (s > e) { fprintf(stderr, "ERROR! s>e! "uint32FMT" "uint32FMT"\n", s, e); } if (s != e) { fprintf(stderr, "ADD ["uint32FMTW(3)"] "uint32FMTW(9)" "uint32FMTW(9)" length "uint32FMTW(9)"\n", pos, s, e, e-s); sta[pos] = s; end[pos] = e; pos++; if (pos >= max) { fprintf(stderr, "ERROR! max exceeded!\n"); } } s = e; } // If we're supposed to be overlapping, fiddle with the begin position to make it so. // if (overlapLength > 0) { for (uint32 p=1; pheader(), sta[p], end[p], end[p] - sta[p]); fwrite(seq+sta[p], sizeof(char), end[p] - sta[p], O); fprintf(O, "\n"); #endif fprintf(L, uint32FMT" : "uint32FMT"["uint32FMT"-"uint32FMT"]\n", fragmentIndex++, B->getIID(), sta[p], end[p]); } delete [] sta; delete [] end; delete B; B = F->getSequenceInCore(++Bid); } fclose(L); fclose(O); } kmer-code-2013-trunk/leaff/Make.include0000644000000000000000000000115511512763666016464 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ $/.CXX_SRCS :=$/leaff.C $/blocks.C $/dups.C $/gc.C $/partition.C $/simseq.C $/stats.C $/.CXX_EXES :=$/leaff $/.CLEAN :=$/*.o $/leaff : $/leaff.o $/blocks.o $/dups.o $/gc.o $/partition.o $/simseq.o $/stats.o \ ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) $(eval $/%.d $/%.o: CFLAGS +=-I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/leaff/leaff.C0000644000000000000000000006535512415073322015421 0ustar rootroot#include "bio++.H" #include "seqCache.H" #include "seqStore.H" // Analysis functions // void dumpBlocks(char *filename); void stats(char *filename, uint64 refLen); void partitionBySize(char *prefix, uint64 partitionSize, char *filename); void partitionByBucket(char *prefix, uint64 partitionSize, char *filename); void partitionBySegment(char *prefix, uint64 numSegments, char *filename); void simseq(char *,char *,int,int,int,int,double); void computeGCcontent(char *name); void findDuplicates(char *filename); void mapDuplicates(char *filea, char *fileb); void processFile(char *filename); void processArray(int argc, char **argv); bool doReverse = false; bool doComplement = false; bool withDefLine = true; char *specialDefLine = 0L; uint32 withLineBreaks = 0; bool toUppercase = false; char translate[256] = {0}; seqCache *fasta = 0L; uint32 begPos = (uint32)0; uint32 endPos = ~(uint32)0; uint32 endExtract = ~(uint32)0; mt_s *mtctx = 0L; static void failIfNoSource(void) { if (fasta == 0L) fprintf(stderr, "No source file specified.\n"), exit(1); } static void failIfNotRandomAccess(void) { if (fasta->randomAccessSupported() == false) fprintf(stderr, "Algorithm required random access; soruce file not supported.\n"), exit(1); } static void helpStandard(char *program) { fprintf(stderr, "usage: %s [-f fasta-file] [options]\n", program); fprintf(stderr, "\n"); fprintf(stderr, "SOURCE FILES\n"); fprintf(stderr, " -f file: use sequence in 'file' (-F is also allowed for historical reasons)\n"); fprintf(stderr, " -A file: read actions from 'file'\n"); fprintf(stderr, "\n"); fprintf(stderr, "SOURCE FILE EXAMINATION\n"); fprintf(stderr, " -d: print the number of sequences in the fasta\n"); fprintf(stderr, " -i name: print an index, labelling the source 'name'\n"); fprintf(stderr, "\n"); fprintf(stderr, "OUTPUT OPTIONS\n"); fprintf(stderr, " -6 <#>: insert a newline every 60 letters\n"); fprintf(stderr, " (if the next arg is a number, newlines are inserted every\n"); fprintf(stderr, " n letters, e.g., -6 80. Disable line breaks with -6 0,\n"); fprintf(stderr, " or just don't use -6!)\n"); fprintf(stderr, " -e beg end: Print only the bases from position 'beg' to position 'end'\n"); fprintf(stderr, " (space based, relative to the FORWARD sequence!) If\n"); fprintf(stderr, " beg == end, then the entire sequence is printed. It is an\n"); fprintf(stderr, " error to specify beg > end, or beg > len, or end > len.\n"); fprintf(stderr, " -ends n Print n bases from each end of the sequence. One input\n"); fprintf(stderr, " sequence generates two output sequences, with '_5' or '_3'\n"); fprintf(stderr, " appended to the ID. If 2n >= length of the sequence, the\n"); fprintf(stderr, " sequence itself is printed, no ends are extracted (they\n"); fprintf(stderr, " overlap).\n"); fprintf(stderr, " -C: complement the sequences\n"); fprintf(stderr, " -H: DON'T print the defline\n"); fprintf(stderr, " -h: Use the next word as the defline (\"-H -H\" will reset to the\n"); fprintf(stderr, " original defline\n"); fprintf(stderr, " -R: reverse the sequences\n"); fprintf(stderr, " -u: uppercase all bases\n"); fprintf(stderr, "\n"); fprintf(stderr, "SEQUENCE SELECTION\n"); fprintf(stderr, " -G n s l: print n randomly generated sequences, 0 < s <= length <= l\n"); fprintf(stderr, " -L s l: print all sequences such that s <= length < l\n"); fprintf(stderr, " -N l h: print all sequences such that l <= %% N composition < h\n"); fprintf(stderr, " (NOTE 0.0 <= l < h < 100.0)\n"); fprintf(stderr, " (NOTE that you cannot print sequences with 100%% N\n"); fprintf(stderr, " This is a useful bug).\n"); fprintf(stderr, " -q file: print sequences from the seqid list in 'file'\n"); fprintf(stderr, " -r num: print 'num' randomly picked sequences\n"); fprintf(stderr, " -s seqid: print the single sequence 'seqid'\n"); fprintf(stderr, " -S f l: print all the sequences from ID 'f' to 'l' (inclusive)\n"); fprintf(stderr, " -W: print all sequences (do the whole file)\n"); fprintf(stderr, "\n"); fprintf(stderr, "LONGER HELP\n"); fprintf(stderr, " -help analysis\n"); fprintf(stderr, " -help examples\n"); } static void helpAnalysis(char *program) { fprintf(stderr, "usage: %s [-f ] [options]\n", program); fprintf(stderr, "\n"); fprintf(stderr, " --findduplicates a.fasta\n"); fprintf(stderr, " Reports sequences that are present more than once. Output\n"); fprintf(stderr, " is a list of pairs of deflines, separated by a newline.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --mapduplicates a.fasta b.fasta\n"); fprintf(stderr, " Builds a map of IIDs from a.fasta and b.fasta that have\n"); fprintf(stderr, " identical sequences. Format is \"IIDa <-> IIDb\"\n"); fprintf(stderr, "\n"); fprintf(stderr, " --md5 a.fasta:\n"); fprintf(stderr, " Don't print the sequence, but print the md5 checksum\n"); fprintf(stderr, " (of the entire sequence) followed by the entire defline.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --partition prefix [ n[gmk]bp | n ] a.fasta\n"); fprintf(stderr, " --partitionmap [ n[gmk]bp | n ] a.fasta\n"); fprintf(stderr, " Partition the sequences into roughly equal size pieces of\n"); fprintf(stderr, " size nbp, nkbp, nmbp or ngbp; or into n roughly equal sized\n"); fprintf(stderr, " parititions. Sequences larger that the partition size are\n"); fprintf(stderr, " in a partition by themself. --partitionmap writes a\n"); fprintf(stderr, " description of the partition to stdout; --partiton creates\n"); fprintf(stderr, " a fasta file 'prefix-###.fasta' for each partition.\n"); fprintf(stderr, " Example: -F some.fasta --partition parts 130mbp\n"); fprintf(stderr, " -F some.fasta --partition parts 16\n"); fprintf(stderr, "\n"); fprintf(stderr, " --segment prefix n a.fasta\n"); fprintf(stderr, " Splits the sequences into n files, prefix-###.fasta.\n"); fprintf(stderr, " Sequences are not reordered.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --gccontent a.fasta\n"); fprintf(stderr, " Reports the GC content over a sliding window of\n"); fprintf(stderr, " 3, 5, 11, 51, 101, 201, 501, 1001, 2001 bp.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --testindex a.fasta\n"); fprintf(stderr, " Test the index of 'file'. If index is up-to-date, leaff\n"); fprintf(stderr, " exits successfully, else, leaff exits with code 1. If an\n"); fprintf(stderr, " index file is supplied, that one is tested, otherwise, the\n"); fprintf(stderr, " default index file name is used.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --dumpblocks a.fasta\n"); fprintf(stderr, " Generates a list of the blocks of N and non-N. Output\n"); fprintf(stderr, " format is 'base seq# beg end len'. 'N 84 483 485 2' means\n"); fprintf(stderr, " that a block of 2 N's starts at space-based position 483\n"); fprintf(stderr, " in sequence ordinal 84. A '.' is the end of sequence\n"); fprintf(stderr, " marker.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --errors L N C P a.fasta\n"); fprintf(stderr, " For every sequence in the input file, generate new\n"); fprintf(stderr, " sequences including simulated sequencing errors.\n"); fprintf(stderr, " L -- length of the new sequence. If zero, the length\n"); fprintf(stderr, " of the original sequence will be used.\n"); fprintf(stderr, " N -- number of subsequences to generate. If L=0, all\n"); fprintf(stderr, " subsequences will be the same, and you should use\n"); fprintf(stderr, " C instead.\n"); fprintf(stderr, " C -- number of copies to generate. Each of the N\n"); fprintf(stderr, " subsequences will have C copies, each with different\n"); fprintf(stderr, " errors.\n"); fprintf(stderr, " P -- probability of an error.\n"); fprintf(stderr, "\n"); fprintf(stderr, " HINT: to simulate ESTs from genes, use L=500, N=10, C=10\n"); fprintf(stderr, " -- make C=10 sequencer runs of N=10 EST sequences\n"); fprintf(stderr, " of length 500bp each.\n"); fprintf(stderr, " to simulate mRNA from genes, use L=0, N=10, C=10\n"); fprintf(stderr, " to simulate reads from genomes, use L=800, N=10, C=1\n"); fprintf(stderr, " -- of course, N= should be increased to give the\n"); fprintf(stderr, " appropriate depth of coverage\n"); fprintf(stderr, "\n"); fprintf(stderr, " --stats a.fasta [refLen]\n"); fprintf(stderr, " Reports size statistics; number, N50, sum, largest.\n"); fprintf(stderr, " If 'refLen' is supplied, N50 is based on this size.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --seqstore out.seqStore\n"); fprintf(stderr, " Converts the input file (-f) to a seqStore file.\n"); } static void helpExamples(char *program) { fprintf(stderr, "usage: %s [-f ] [options]\n", program); fprintf(stderr, "\n"); fprintf(stderr, "Options are ORDER DEPENDENT. Sequences are printed whenever an ACTION occurs\n"); fprintf(stderr, "on the command line. SEQUENCE OPTIONS are not reset when a sequence is printed.\n"); fprintf(stderr, "\n"); fprintf(stderr, "SEQUENCES are numbered starting at ZERO, not one.\n"); fprintf(stderr, "\n"); fprintf(stderr, " Print the first 10 bases of the fourth sequence in file 'genes':\n"); fprintf(stderr, " -f genes -e 0 10 -s 3\n"); fprintf(stderr, "\n"); fprintf(stderr, " Print the first 10 bases of the fourth and fifth sequences:\n"); fprintf(stderr, " -f genes -e 0 10 -s 3 -s 4\n"); fprintf(stderr, "\n"); fprintf(stderr, " Print the fourth and fifth sequences reverse complemented, and the sixth\n"); fprintf(stderr, " sequence forward. The second set of -R -C toggle off reverse-complement:\n"); fprintf(stderr, " -f genes -R -C -s 3 -s 4 -R -C -s 5\n"); fprintf(stderr, "\n"); fprintf(stderr, " Convert file 'genes' to a seqStore 'genes.seqStore'. The seqStore\n"); fprintf(stderr, " provides better performance with the kmer tools.\n"); fprintf(stderr, " -f genes --seqstore genes.seqStore\n"); } static void printSequence(char *def, char *seq, uint32 beg, uint32 end) { if (beg >= end) return; if ((endExtract != ~uint32ZERO) && (endExtract + endExtract < end - beg)) { char d[1024]; uint32 l = strlen(seq); sprintf(d, "%s_5", def); printSequence(d, seq, 0, endExtract); sprintf(d, "%s_3", def); printSequence(d, seq, l-endExtract, l); return; } if (specialDefLine) def = specialDefLine; if (withDefLine == false) def = 0L; uint32 limit = end - beg; char *n = new char [end - beg + 1]; char *m; if ((doReverse == false) && (doComplement == false)) { m = n; seq += beg; while (limit--) *(m++) = translate[*(seq++)]; } else if ((doReverse == true) && (doComplement == false)) { m = n + limit - 1; seq += beg; while (limit--) *(m--) = translate[*(seq++)]; } else if ((doReverse == false) && (doComplement == true)) { m = n; seq += beg; while (limit--) *(m++) = complementSymbol[translate[*(seq++)]]; } else if ((doReverse == true) && (doComplement == true)) { m = n + limit - 1; seq += beg; while (limit--) *(m--) = complementSymbol[translate[*(seq++)]]; } n[end-beg] = 0; if (def) fprintf(stdout, ">%s\n", def); if (withLineBreaks) { char *t = n; char *a = new char [withLineBreaks+1]; while (*t) { uint32 i=0; while ((*t) && (i < withLineBreaks)) a[i++] = *(t++); a[i++] = '\n'; a[i] = 0; fprintf(stdout, "%s", a); } delete [] a; } else { fprintf(stdout, "%s\n", n); } delete [] n; } static void printSequence(seqInCore *sic) { printSequence(sic->header(), sic->sequence(), (begPos!=(uint32)0) ? begPos:0, (endPos!=~uint32(0)) ? endPos:sic->sequenceLength()); } static void printSequence(uint32 sid) { seqInCore *sic = fasta->getSequenceInCore(sid); if (sic == 0L) fprintf(stderr, "WARNING: Didn't find sequence with iid '"uint32FMT"'\n", sid); else printSequence(sic); delete sic; } static void printSequence(char *sid) { seqInCore *sic = fasta->getSequenceInCore(sid); if (sic == 0L) fprintf(stderr, "WARNING: Didn't find sequence with name/iid '%s'\n", sid); else printSequence(sic); delete sic; } static void printIDsFromFile(char *name) { uint32 idLen = 0; uint32 idMax = 63; char *id = new char [idMax+1]; readBuffer B(name); char x = B.read(); // For optimal performance, we should sort the list of ID's given // by their IID, but the user might have a good reason for wanting // them unsorted. while (B.eof() == false) { while (whitespaceSymbol[x] && (B.eof() == false)) x = B.read(); if (B.eof() == false) { idLen = 0; while (!whitespaceSymbol[x] && (B.eof() == false)) { id[idLen++] = x; x = B.read(); if (idLen >= idMax) { idMax *= 2; char *newid = new char [idMax+1]; memcpy(newid, id, sizeof(char) * idLen); delete [] id; id = newid; } } id[idLen] = 0; seqInCore *S = fasta->getSequenceInCore(id); if (S == 0L) fprintf(stderr, "WARNING: Didn't find sequence with name/iid '%s'\n", id); else printSequence(S); } } delete [] id; } void processArray(int argc, char **argv) { int arg = 1; while (arg < argc) { if ((strcmp(argv[arg], "-f") == 0) || (strcmp(argv[arg], "-F") == 0)) { delete fasta; fasta = new seqCache(argv[++arg]); } else if (strcmp(argv[arg], "-i") == 0) { failIfNoSource(); ++arg; if ((argv[arg] == 0L) || (argv[arg][0] == '-')) fprintf(stderr, "ERROR: next arg to -i should be 'name', I got '%s'\n", (argv[arg] == 0L) ? "(nullpointer)" : argv[arg]), exit(1); for (uint32 s=0; sgetNumberOfSequences(); s++) fprintf(stdout, "G\tseq\t%s:"uint32FMT"\t"uint32FMT"\t%s\n", argv[arg], s, fasta->getSequenceLength(s), ">unimplemented"); } else if (strcmp(argv[arg], "-d") == 0) { failIfNoSource(); printf(uint32FMT"\n", fasta->getNumberOfSequences()); } else if (strcmp(argv[arg], "-L") == 0) { uint32 small = strtouint32(argv[++arg], 0L); uint32 large = strtouint32(argv[++arg], 0L); failIfNoSource(); for (uint32 s=0; sgetNumberOfSequences(); s++) if ((small <= fasta->getSequenceLength(s)) && (fasta->getSequenceLength(s) < large)) printSequence(s); } else if (strcmp(argv[arg], "-N") == 0) { double small = atof(argv[++arg]); double large = atof(argv[++arg]); failIfNoSource(); for (uint32 s=0; sgetNumberOfSequences(); s++) { seqInCore *S = fasta->getSequenceInCore(s); uint32 Ns = 0; uint32 len = S->sequenceLength(); char *seq = S->sequence(); for (uint32 i=begPos; igetNumberOfSequences(); s++) printSequence(s); } else if (strcmp(argv[arg], "-G") == 0) { uint32 n = strtouint32(argv[++arg], 0L); uint32 s = strtouint32(argv[++arg], 0L); uint32 l = strtouint32(argv[++arg], 0L); char bases[4] = {'A', 'C', 'G', 'T'}; char *def = new char [1024]; char *seq = new char [l + 1]; if (s == 0) s = 1; if (s > l) fprintf(stderr, "leaff: usage: -G num-seqs min-length max-length\n"), exit(1); for (uint32 i=0; igetSequenceIID(argv[++arg]); uint32 highID = fasta->getSequenceIID(argv[++arg]); if (lowID > highID) { uint32 t = lowID; lowID = highID; highID = t; } for (uint32 s=lowID; (s <= highID) && (s <= fasta->getNumberOfSequences()); s++) printSequence(s); } else if (strcmp(argv[arg], "-r") == 0) { uint32 num = strtouint32(argv[++arg], 0L); failIfNoSource(); failIfNotRandomAccess(); // Impossible to fix, or load whole thing into memory if (num >= fasta->getNumberOfSequences()) num = fasta->getNumberOfSequences(); uint32 *seqs = new uint32 [fasta->getNumberOfSequences()]; for (uint32 i=0; igetNumberOfSequences(); i++) seqs[i] = i; for (uint32 i=0; igetNumberOfSequences(); i++) { uint32 j = mtRandom32(mtctx) % (fasta->getNumberOfSequences() - i) + i; uint32 t = seqs[j]; seqs[j] = seqs[i]; seqs[i] = t; } for (uint32 i=0; igetNumberOfSequences(); s++) { seqInCore *S = fasta->getSequenceInCore(s); fprintf(stdout, "%s %s\n", md5_toascii(md5_string(&md5, S->sequence(), S->sequenceLength()), sum), S->header()); delete S; } delete fasta; exit(0); } else if ((strcmp(argv[arg], "--partition") == 0) || (strcmp(argv[arg], "--partitionmap") == 0)) { char *prefix = 0L; if (strcmp(argv[arg], "--partition") == 0) prefix = argv[++arg]; // does the next arg end with gbp, mbp, kbp or bp? If so, // partition by length, else partition into buckets. // int al = strlen(argv[arg+1]); uint64 ps = strtouint64(argv[arg+1], 0L); char a3 = (al<3) ? '0' : (char)toLower[argv[arg+1][al-3]]; char a2 = (al<2) ? '0' : (char)toLower[argv[arg+1][al-2]]; char a1 = (al<1) ? '0' : (char)toLower[argv[arg+1][al-1]]; // partition! if (!isdigit(a1) || !isdigit(a2) || !isdigit(a3)) { if ((a3 == 'g') && (a2 == 'b') && (a1 == 'p')) { ps *= 1000000000; } else if ((a3 == 'm') && (a2 == 'b') && (a1 == 'p')) { ps *= 1000000; } else if ((a3 == 'k') && (a2 == 'b') && (a1 == 'p')) { ps *= 1000; } else if (isdigit(a3) && (a2 == 'b') && (a1 == 'p')) { ps *= 1; } else { fprintf(stderr, "Unknown partition size option '%s'\n", argv[arg+1]), exit(1); } if (ps == 0) fprintf(stderr, "Unknown or zero partition size '%s'\n", argv[arg+1]), exit(1); partitionBySize(prefix, ps, argv[arg+2]); } else { if (ps == 0) fprintf(stderr, "Unknown or zero partition size '%s'\n", argv[arg+1]), exit(1); partitionByBucket(prefix, ps, argv[arg+2]); } exit(0); } else if (strcmp(argv[arg], "--segment") == 0) { partitionBySegment(argv[arg+1], strtouint32(argv[arg+2], 0L), argv[arg+3]); exit(0); } else if (strcmp(argv[arg], "--gccontent") == 0) { computeGCcontent(argv[++arg]); exit(0); } else if (strcmp(argv[arg], "--dumpblocks") == 0) { dumpBlocks(argv[++arg]); exit(0); } else if (strcmp(argv[arg], "--stats") == 0) { stats(argv[arg+1], (argv[arg+2] != 0L) ? strtouint64(argv[arg+2], 0L) : 0); exit(0); } else if (strcmp(argv[arg], "--errors") == 0) { int L = strtouint32(argv[++arg], 0L); // Desired length int l = 0; // min of desired length, length of sequence int N = strtouint32(argv[++arg], 0L); // number of copies per sequence int C = strtouint32(argv[++arg], 0L); // number of mutations per copy double P = atof(argv[++arg]); // probability of mutation uint32 i = 0; fasta = new seqCache(argv[++arg]); seqInCore *S = fasta->getSequenceInCore(i++); while (S) { char *seq = S->sequence(); char *hdr = S->header(); int len = S->sequenceLength(); l = len; if ((L > 0) && (L < len)) l = L; simseq(seq, hdr, len, N, l, C, P); delete S; S = fasta->getSequenceInCore(i++); } delete fasta; exit(0); } else if (strcmp(argv[arg], "--seqstore") == 0) { constructSeqStore(argv[++arg], fasta); exit(0); } else if (strcmp(argv[arg], "-help") == 0) { if ((argv[arg+1]) && (strcmp(argv[arg+1], "analysis") == 0)) helpAnalysis(argv[0]); else if ((argv[arg+1]) && (strcmp(argv[arg+1], "examples") == 0)) helpExamples(argv[0]); else helpStandard(argv[0]); exit(0); } else { helpStandard(argv[0]); fprintf(stderr, "Unknown option '%s'\n", argv[arg]); exit(1); } arg++; } delete fasta; fasta = 0L; } void processFile(char *filename) { FILE *F = NULL; if (strcmp(filename, "-") == 0) { F = stdin; } else { errno = 0; F = fopen(filename, "r"); if (errno) fprintf(stderr, "Couldn't open '%s': %s\n", filename, strerror(errno)), exit(1); } uint64 max = 16 * 1024 * 1024; uint64 pos = 0; size_t len = 0; char *data = new char [max]; // Suck the file into 'data' while (!feof(F)) { errno = 0; len = fread(data+pos, 1, max - pos, F); if (errno) fprintf(stderr, "Couldn't read "uint64FMT" bytes from '%s': %s\n", (uint64)(max-pos), filename, strerror(errno)), exit(1); pos += len; if (pos >= max) { max += 16 * 1024 * 1024; char *tmpd = new char [max]; memcpy(tmpd, data, pos); delete [] data; data = tmpd; } } if (strcmp(filename, "-") != 0) fclose(F); len = pos; // (over)count the number of words; we start at two, since the // first arg is the name of the program, and if there is only one // word and no whitespace in the file, the below loop fails to // count the second word. int argc = 2; char **argv = 0L; for (uint32 i=0; i struct partition_s { uint32 length; uint32 index; uint32 partition; }; static int partition_s_compare(const void *A, const void *B) { const partition_s *a = (const partition_s *)A; const partition_s *b = (const partition_s *)B; if (a->length < b->length) return(1); if (a->length > b->length) return(-1); return(0); } static partition_s * loadPartition(seqCache *F) { uint32 n = F->getNumberOfSequences(); partition_s *p = new partition_s [n]; for (uint32 i=0; igetSequenceLength(i); p[i].index = i; p[i].partition = 0; } qsort(p, n, sizeof(partition_s), partition_s_compare); return(p); } static void outputPartition(seqCache *F, char *prefix, partition_s *p, uint32 openP, uint32 n) { char filename[1024]; // Check that everything has been partitioned // for (uint32 i=0; igetSequenceInCore(p[i].index); fprintf(file, ">%s\n", S->header()); fwrite(S->sequence(), sizeof(char), S->sequenceLength(), file); fprintf(file, "\n"); if (S->sequenceLength() != p[i].length) { fprintf(stderr, "Huh? '%s' "uint32FMT" != "uint32FMT"\n", S->header(), S->sequenceLength(), p[i].length); } delete S; } fclose(file); } } else { // This dumps the partition information to stdout. // fprintf(stdout, uint32FMT"\n", openP); for (uint32 o=1; o<=openP; o++) { uint32 sizeP = 0; for (uint32 i=0; igetNumberOfSequences(); partition_s *p = loadPartition(F); uint32 openP = 1; // Currently open partition uint32 sizeP = 0; // Size of open partition uint32 seqsP = n; // Number of sequences to partition // For any sequences larger than partitionSize, create // partitions containing just one sequence // for (uint32 i=0; i partitionSize) { p[i].partition = openP++; seqsP--; } } // For the remaining, iterate through the list, // greedily placing the longest sequence that fits // into the open partition // while (seqsP > 0) { for (uint32 i=0; igetNumberOfSequences(); partition_s *p = loadPartition(F); if (partitionSize > n) partitionSize = n; // The size, in bases, of each partition // uint32 *s = new uint32 [partitionSize]; for (uint32 i=0; igetNumberOfSequences(); partition_s *p = new partition_s [n]; uint32 numSeqPerPart = (uint32)ceil(n / (double)numSegments); for (uint32 i=0; igetSequenceLength(i); p[i].index = i; p[i].partition = i / numSeqPerPart + 1; } outputPartition(F, prefix, p, numSegments, n); delete [] p; delete F; } kmer-code-2013-trunk/meryl/0000755000000000000000000000000012641613360014300 5ustar rootrootkmer-code-2013-trunk/meryl/merge.listmerge.C0000644000000000000000000002731512322046702017501 0ustar rootroot#include #include #include #include "meryl.H" #include "libmeryl.H" using namespace std; #include struct mMer { kMer _mer; uint32 _cnt; uint32 _off; uint32 _nxt; uint32 _stp; }; class mMerList { public: mMerList(uint32 maxSize) { _posLen = 0; _posMax = 2 * maxSize; _pos = new uint32 [_posMax]; _mmmLen = 0; _mmmMax = maxSize; _mmm = new mMer [_mmmMax]; _tip = ~uint32ZERO; _fre = 0; for (uint32 i=0; i<_mmmMax; i++) { _mmm[i]._cnt = 0; _mmm[i]._off = 0; _mmm[i]._nxt = i+1; _mmm[i]._stp = 0; } _mmm[_mmmMax-1]._nxt = ~uint32ZERO; }; ~mMerList() { delete [] _pos; delete [] _mmm; }; bool loadMore(void) { return((_mmmMax < _tip) || (_mmm[_tip]._stp == 1)); }; uint32 length(void) { return(_mmmLen); }; kMer *pop(uint32 &cnt, uint32* &pos) { kMer *ret = 0L; //fprintf(stderr, "POP tip="uint32FMT"\n", _tip); if (_tip < _mmmMax) { uint32 f = _tip; ret = &_mmm[f]._mer; cnt = _mmm[f]._cnt; pos = (_mmm[f]._off != ~uint32ZERO) ? _pos + _mmm[f]._off : 0L; // Move tip to the next thing _tip = _mmm[f]._nxt; // And append this one to the free list. _mmm[f]._nxt = _fre; _fre = f; _mmmLen--; //fprintf(stderr, "POP f="uint32FMT" tip="uint32FMT" len="uint32FMT"\n", f, _tip, _mmmLen); } return(ret); }; // rebuild the position list, squeezes out empty items void rebuild(void) { if (_posLen > 0) { assert(0); uint32 *np = new uint32 [_posMax]; _posLen = 0; for (uint32 i=0; i<_mmmLen; i++) { mMer *m = _mmm + i; if (m->_off != ~uint32ZERO) { _mmm[_mmmLen]._off = _posLen; for (uint32 p=0; p_cnt; p++, _posLen++) np[_posLen] = _pos[p]; } } delete [] _pos; _pos = np; } }; // Read more mers from the file void read(merylStreamReader *R, uint32 num, bool loadAll) { uint32 xxx = 0; uint32 las = ~uint32ZERO; uint32 pos = _tip; bool stop = false; //fprintf(stderr, "read()- loading "uint32FMT"\n", num); assert(_mmmLen + num < _mmmMax); // Load until we hit the sentinal. if (loadAll == false) num = ~uint32ZERO; for (xxx=0; (xxx < num) && (stop == false) && (R->nextMer()); xxx++) { // Insert into a free node uint32 fre = _fre; _fre = _mmm[fre]._nxt; _mmm[fre]._mer = R->theFMer(); _mmm[fre]._cnt = R->theCount(); _mmm[fre]._off = ~uint32ZERO; _mmm[fre]._stp = 0; uint32 *ppp = R->thePositions(); if (ppp) { _mmm[fre]._off = _posLen; if (_posMax <= _posLen + _mmm[fre]._cnt) { fprintf(stderr, "Reallocate _pos\n"); _posMax *= 2; uint32 *tmp = new uint32 [_posMax]; memcpy(tmp, _pos, sizeof(uint32) * _posLen); delete [] _pos; _pos = tmp; } for (uint32 i=0; i<_mmm[fre]._cnt; i++, _posLen++) _pos[_posLen] = ppp[i]; } // Keep count _mmmLen++; // Figure out where to put it in the list. New duplicates must // go AFTER the existing -- that's the job of <=. while ((pos < _mmmMax) && (_mmm[pos]._mer <= R->theFMer())) { las = pos; pos = _mmm[pos]._nxt; } if (_mmmMax < _tip) { // No tip, make new list. _mmm[fre]._nxt = _tip; _tip = fre; las = ~uint32ZERO; pos = _tip; } else if (_mmmMax < las) { // Valid list, but we want to insert before the start _mmm[fre]._nxt = _tip; _tip = fre; las = ~uint32ZERO; pos = _tip; } else if (pos < _mmmMax) { // Valid pos, insert in the middle (after las, before pos) _mmm[fre]._nxt = _mmm[las]._nxt; _mmm[las]._nxt = fre; las = fre; //pos = _mmm[las]._nxt; } else { // Have a list, but we ran off the end, append (after las) _mmm[fre]._nxt = ~uint32ZERO; _mmm[las]._nxt = fre; pos = fre; if (loadAll == false) stop = true; } } // Set the sentinal. This forces us to load more mers. // if (loadAll == true) { //fprintf(stderr, "read()-- stop on tip = "uint32FMT"\n", las); _mmm[las]._stp = 1; } //fprintf(stderr, "read()-- now up to "uint32FMT" mers ("uint32FMT" pos); loaded "uint32FMT" out of "uint32FMT" requested.\n", _mmmLen, _posLen, xxx, num); }; private: uint32 _posLen; uint32 _posMax; uint32 *_pos; uint32 _mmmLen; uint32 _mmmMax; mMer *_mmm; uint32 _tip; uint32 _fre; }; void multipleOperations(merylArgs *args) { if (args->mergeFilesLen < 2) { fprintf(stderr, "ERROR - must have at least two databases (you gave "uint32FMT")!\n", args->mergeFilesLen); exit(1); } if (args->outputFile == 0L) { fprintf(stderr, "ERROR - no output file specified.\n"); exit(1); } if ((args->personality != PERSONALITY_MERGE) && (args->personality != PERSONALITY_MIN) && (args->personality != PERSONALITY_MINEXIST) && (args->personality != PERSONALITY_MAX) && (args->personality != PERSONALITY_ADD) && (args->personality != PERSONALITY_AND) && (args->personality != PERSONALITY_NAND) && (args->personality != PERSONALITY_OR) && (args->personality != PERSONALITY_XOR)) { fprintf(stderr, "ERROR - only personalities min, minexist, max, add, and, nand, or, xor\n"); fprintf(stderr, "ERROR - are supported in multipleOperations().\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); } uint32 maxSize = 64 * 1024 * 1024; merylStreamReader **R = new merylStreamReader* [args->mergeFilesLen]; merylStreamWriter *W = 0L; mMerList *M = new mMerList(maxSize + maxSize / 4); for (uint32 i=0; imergeFilesLen; i++) R[i] = new merylStreamReader(args->mergeFiles[i]); // Verify that the mersizes are all the same // bool fail = false; uint32 merSize = R[0]->merSize(); uint32 merComp = R[0]->merCompression(); for (uint32 i=0; imergeFilesLen; i++) { fail |= (merSize != R[i]->merSize()); fail |= (merComp != R[i]->merCompression()); } if (fail) fprintf(stderr, "ERROR: mer size or compression level differ.\n"), exit(1); // Open the output file, using the largest prefix size found in the // input/mask files. // uint32 prefixSize = 0; for (uint32 i=0; imergeFilesLen; i++) if (prefixSize < R[i]->prefixSize()) prefixSize = R[i]->prefixSize(); W = new merylStreamWriter(args->outputFile, merSize, merComp, prefixSize); // Load mers from all files, remember the largest mer we load. // bool loadAll = true; for (uint32 i=0; imergeFilesLen; i++) { M->read(R[i], maxSize / args->mergeFilesLen, loadAll); loadAll = false; } fprintf(stderr, "Initial load: length="uint32FMT"\n", M->length()); bool moreStuff = true; kMer currentMer; // The current mer we're operating on uint32 currentCount = uint32ZERO; // The count (operation dependent) of this mer uint32 currentTimes = uint32ZERO; // Number of files it's in uint32 currentPositionsMax = 0; uint32 *currentPositions = 0L; kMer *thisMer; // The mer we just read uint32 thisCount = uint32ZERO; // The count of the mer we just read uint32 *thisPositions = 0L; speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); currentMer.setMerSize(merSize); while (moreStuff) { // Load more stuff if needed. // if (M->loadMore() == true) { M->rebuild(); uint32 additionalLoading = 8192; if (maxSize / args->mergeFilesLen > M->length()) additionalLoading = maxSize / args->mergeFilesLen - M->length(); loadAll = true; for (uint32 i=0; imergeFilesLen; i++) { if (R[i]->validMer()) { M->read(R[i], additionalLoading, loadAll); loadAll = false; } } } // All done? Exit. if (M->length() == 0) moreStuff = false; thisMer = M->pop(thisCount, thisPositions); // If we've hit a different mer, write out the last one if ((M->length() == 0) || (*thisMer != currentMer)) { switch (args->personality) { case PERSONALITY_MIN: if (currentTimes == args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_MERGE: case PERSONALITY_MINEXIST: case PERSONALITY_MAX: case PERSONALITY_ADD: W->addMer(currentMer, currentCount, currentPositions); break; case PERSONALITY_AND: if (currentTimes == args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_NAND: if (currentTimes != args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_OR: W->addMer(currentMer, currentCount); break; case PERSONALITY_XOR: if ((currentTimes % 2) == 1) W->addMer(currentMer, currentCount); break; default: fprintf(stderr, "ERROR - invalid personality in multipleOperations::write\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); break; } currentMer = *thisMer; currentCount = uint32ZERO; currentTimes = uint32ZERO; C->tick(); } if (moreStuff == false) break; // Perform the operation switch (args->personality) { case PERSONALITY_MERGE: if (thisPositions) { if (currentPositionsMax == 0) { currentPositionsMax = 1048576; currentPositions = new uint32 [currentPositionsMax]; } if (currentPositionsMax < currentCount + thisCount) { while (currentPositionsMax < currentCount + thisCount) currentPositionsMax *= 2; uint32 *t = new uint32 [currentPositionsMax]; memcpy(t, currentPositions, sizeof(uint32) * currentCount); delete [] currentPositions; currentPositions = t; } if (thisCount < 16) { for (uint32 i=0; i thisCount) currentCount = thisCount; } break; case PERSONALITY_MAX: if (currentCount < thisCount) currentCount = thisCount; break; case PERSONALITY_ADD: currentCount += thisCount; break; case PERSONALITY_AND: case PERSONALITY_NAND: case PERSONALITY_OR: case PERSONALITY_XOR: currentCount = 1; break; default: fprintf(stderr, "ERROR - invalid personality in multipleOperations::operate\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); break; } currentTimes++; } for (uint32 i=0; imergeFilesLen; i++) delete R[i]; delete R; delete W; delete M; delete C; } kmer-code-2013-trunk/meryl/meryl.H0000644000000000000000000000653412532056456015557 0ustar rootroot#ifndef MERYL_H #define MERYL_H #include "bio++.H" #define PERSONALITY_MERGE 0xff #define PERSONALITY_MIN 0x01 #define PERSONALITY_MINEXIST 0x02 #define PERSONALITY_MAX 0x03 #define PERSONALITY_MAXEXIST 0x04 #define PERSONALITY_ADD 0x05 #define PERSONALITY_SUB 0x06 #define PERSONALITY_DIVIDE 0x07 #define PERSONALITY_ABS 0x08 #define PERSONALITY_AND 0x10 #define PERSONALITY_NAND 0x11 #define PERSONALITY_OR 0x12 #define PERSONALITY_XOR 0x13 #define PERSONALITY_LEQ 0x14 #define PERSONALITY_GEQ 0x15 #define PERSONALITY_EQ 0x16 class merylArgs { public: merylArgs(int argc, char **argv); merylArgs(const char *prefix); ~merylArgs(); void usage(void); void clear(void); uint64 hash(kMer const &mer) { return(mer.startOfMer(numBuckets_log2)); }; bool writeConfig(void); bool readConfig(const char *prefix); public: char *execName; char *options; bool beVerbose; bool doForward; bool doReverse; bool doCanonical; char *inputFile; char *outputFile; char *queryFile; uint32 merSize; uint32 merComp; bool positionsEnabled; uint64 numMersEstimated; uint64 numMersActual; uint64 numBasesActual; uint64 mersPerBatch; uint64 basesPerBatch; uint64 numBuckets; uint32 numBuckets_log2; uint32 merDataWidth; uint64 merDataMask; uint32 bucketPointerWidth; uint32 numThreads; uint64 memoryLimit; uint64 segmentLimit; bool configBatch; bool countBatch; bool mergeBatch; uint32 batchNumber; char *sgeJobName; char *sgeBuildOpt; char *sgeMergeOpt; bool isOnGrid; uint32 lowCount; uint32 highCount; uint32 desiredCount; bool outputCount; bool outputAll; bool outputPosition; bool includeDefLine; bool includeMer; uint32 mergeFilesMax; uint32 mergeFilesLen; char **mergeFiles; uint32 personality; }; uint64 estimateNumMersInMemorySize(uint32 merSize, uint64 mem, bool positionsEnabled, bool beVerbose); uint64 estimateMemory(uint32 merSize, uint64 numMers, bool positionsEnabled); uint32 optimalNumberOfBuckets(uint32 merSize, uint64 numMers, bool positionsEnabled); void estimate(merylArgs *args); void build(merylArgs *args); void multipleOperations(merylArgs *args); void binaryOperations(merylArgs *args); void unaryOperations(merylArgs *args); void dump(merylArgs *args); void dumpThreshold(merylArgs *args); void dumpPositions(merylArgs *args); void countUnique(merylArgs *args); void dumpDistanceBetweenMers(merylArgs *args); void plotHistogram(merylArgs *args); #endif // MERYL_H kmer-code-2013-trunk/meryl/maskMers.C0000644000000000000000000004073612527037073016204 0ustar rootroot#include #include #include #include #include "bio++.H" #include "seqStream.H" #include "libmeryl.H" #include #define MAX_COVERAGE 51 // Wed May 20 02:39:41 EDT 2015 // // This appears to be an analysis of the repeat/unique kmer content of a genome. From 2008. // // The 'mate rescue' appears to be measuring if mates from a library of some size would // anchor a repeat to a unique. I think. class mateRescueData { public: mateRescueData() { _mean = 0; _stddev = 0; _coverage = 0; _normal = 0L; _normalZero = 0; }; void init(int32 mean_, int32 stddev_, uint32 coverage_) { _mean = mean_; _stddev = stddev_; _coverage = coverage_; assert(_mean > 3 * _stddev); double a = 1.0 / (_stddev * sqrt(2 * M_PI)); double c = 2 * _stddev * _stddev; int32 b1l = (int32)floor(-3 * _stddev); int32 b1h = (int32)ceil ( 3 * _stddev); _normal = new double [b1h - b1l + 1]; _normalZero = -b1l; for (int32 l=0; l= _numSeq) || (onlySeqIID_ == i)) { fprintf(stderr, "Loading sequence "uint32FMT" of length "uint32FMT"\n", i, _seqLen[i]); _masking[i] = new char [_seqLen[i]]; _repeatID[i] = new uint32 [_seqLen[i]]; //memset(_masking[i], 'g', sizeof(char) * _seqLen[i]); //memset(_repeatID[i], 0, sizeof(uint32) * _seqLen[i]); fread(_masking[i], sizeof(char), _seqLen[i], maskMersFile); fread(_repeatID[i], sizeof(uint32), _seqLen[i], maskMersFile); } else { fseek(maskMersFile, sizeof(char) * _seqLen[i], SEEK_CUR); fseek(maskMersFile, sizeof(uint32) * _seqLen[i], SEEK_CUR); _seqLen[i] = 0; } } fclose(maskMersFile); } void merMaskedSequence::saveMasking(void) { FILE *maskMersFile = fopen(_maskMersName, "w"); fwrite(&_numSeq, sizeof(uint32), 1, maskMersFile); fwrite(&_merSize, sizeof(uint32), 1, maskMersFile); fwrite( _seqLen, sizeof(uint32), _numSeq, maskMersFile); for (uint32 i=0; i<_numSeq; i++) { fwrite(_masking[i], sizeof(char), _seqLen[i], maskMersFile); fwrite(_repeatID[i], sizeof(uint32), _seqLen[i], maskMersFile); } fclose(maskMersFile); } void merMaskedSequence::buildMasking(void) { seqStream *STR = new seqStream(_fastaName); _numSeq = STR->numberOfSequences(); _seqLen = new int32 [_numSeq]; _masking = new char * [_numSeq]; _repeatID = new uint32 * [_numSeq]; _merSize = 0; fprintf(stderr, uint32FMT" sequences in '%s'\n", _numSeq, _fastaName); for (uint32 i=0; i<_numSeq; i++) { _seqLen[i] = STR->lengthOf(i); _masking[i] = new char [_seqLen[i]]; _repeatID[i] = new uint32 [_seqLen[i]]; memset(_masking[i], 'g', sizeof(char) * _seqLen[i]); memset(_repeatID[i], 0, sizeof(uint32) * _seqLen[i]); } // g -> gap in sequence // u -> unique mer // r -> repeat mer // // For all the r's we also need to remember the other locations // that repeat is at. We annotate the map with a repeat id, set if // another copy of the repeat is nearby. merylStreamReader *MS = new merylStreamReader(_merylName); speedCounter *CT = new speedCounter(" Masking mers in sequence: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, true); uint32 rid = 0; _merSize = MS->merSize(); while (MS->nextMer()) { //fprintf(stderr, "mer count="uint64FMT" pos="uint32FMT"\n", MS->theCount(), MS->getPosition(0)); if (MS->theCount() == 1) { uint32 p = MS->getPosition(0); uint32 s = STR->sequenceNumberOfPosition(p); p -= STR->startOf(s); _masking[s][p] = 'u'; } else { std::sort(MS->thePositions(), MS->thePositions() + MS->theCount()); uint32 lastS = ~uint32ZERO; uint32 lastP = 0; rid++; for (uint32 i=0; itheCount(); i++) { uint32 p = MS->getPosition(i); uint32 s = STR->sequenceNumberOfPosition(p); p -= STR->startOf(s); // Always set the masking. _masking[s][p] = 'r'; // If there is a repeat close by, set the repeat ID. if ((s == lastS) && (lastP + 40000 > p)) { _repeatID[s][lastP] = rid; _repeatID[s][p] = rid; } lastS = s; lastP = p; } } CT->tick(); } delete CT; delete MS; delete STR; saveMasking(); } void computeDensity(merMaskedSequence *S, char *outputPrefix) { char outputName[FILENAME_MAX]; FILE *outputFile; uint32 windowSizeMax = 10000; for (uint32 s=0; snumSeq(); s++) { // seqLen == 0 iff that sequence is not loaded. if (S->seqLen(s) == 0) continue; sprintf(outputName, "%s.density.seq"uint32FMTW(02), outputPrefix, s); outputFile = fopen(outputName, "w"); fprintf(stderr, "Starting '%s'\n", outputName); fprintf(outputFile, "#window\tunique\trepeat\tgaps\n"); // Not the most efficient, but good enough for us right now. for (int32 p=0; pseqLen(s); ) { uint32 windowSize = 0; uint32 uniqueSum = 0; uint32 repeatSum = 0; uint32 gapSum = 0; while ((windowSize < windowSizeMax) && (p < S->seqLen(s))) { char m = S->masking(s, p); if (m == 'u') uniqueSum++; if (m == 'g') gapSum++; if (m == 'r') repeatSum++; windowSize++; p++; } fprintf(outputFile, uint32FMT"\t%f\t%f\t%f\n", p - windowSize, (double)uniqueSum / windowSize, (double)repeatSum / windowSize, (double)gapSum / windowSize); } fclose(outputFile); } } // For each 'r' mer, compute the number of 'u' mers // that are within some mean +- stddev range. // // We count for two blocks: // // | <- mean -> | <- mean -> | // ---[block1]---------------mer---------------[block2]--- // // Once we know that, we can compute the probability that // a repeat mer can be rescued. // // p1 = uniq/total -- for 1 X coverage // pn = 1 - (1-p1)^n -- for n X coverage void computeMateRescue(merMaskedSequence *S, char *outputPrefix, mateRescueData *lib, uint32 libLen) { char outputName[FILENAME_MAX]; FILE *outputFile; FILE *outputData; uint32 closeRepeatsLen = 0; uint32 closeRepeatsMax = 80000; int32 *closeRepeats = new int32 [closeRepeatsMax]; speedCounter *CT = new speedCounter(" Examining repeats: %7.2f Kbases -- %5.2f Kbases/second\r", 1000.0, 0x1ffff, true); uint32 totalDepth = 0; for (uint32 l=0; lnumSeq(); s++) { // seqLen == 0 iff that sequence is not loaded. if (S->seqLen(s) == 0) continue; fprintf(stderr, "Starting sequence "uint32FMT"\n", s); sprintf(outputName, "%s.mateRescue.seq"uint32FMTW(02)".out", outputPrefix, s); outputFile = fopen(outputName, "w"); sprintf(outputName, "%s.mateRescue.seq"uint32FMTW(02)".dat", outputPrefix, s); outputData = fopen(outputName, "w"); double numRR[MAX_COVERAGE] = {0}; // num repeats rescued (expected) for [] X coverage double numNR[MAX_COVERAGE] = {0}; // num repeats nonrescuable (expected) for [] X coverage uint32 numRT = 0; // num repeats total for (int32 p=0; pseqLen(s); p++) { CT->tick(); double pRtot = 0.0; double pFtot = 0.0; if ((S->masking(s, p) != 'g') && (S->masking(s, p) != 'u') && (S->masking(s, p) != 'r')) fprintf(stderr, "INVALID MASKING - got %d = %c\n", S->masking(s, p), S->masking(s, p)); if (S->masking(s, p) == 'r') { numRT++; // Index over x-coverage in libraries. MUST BE 1. uint32 ridx = 1; for (uint32 l=0; lrepeatID(s, p) > 0) { int32 pl = (int32)floor(p - 3 * stddev); int32 ph = (int32)ceil (p + 3 * stddev); if (pl < 0) pl = 0; if (ph > S->seqLen(s)) ph = S->seqLen(s); for (int32 pi=pl; pirepeatID(s, pi) == S->repeatID(s, p)) && (pi != p)) closeRepeats[closeRepeatsLen++] = pi; } int32 b1l = (int32)floor(p - mean - 3 * stddev); int32 b1h = (int32)ceil (p - mean + 3 * stddev); int32 b2l = (int32)floor(p + mean - 3 * stddev); int32 b2h = (int32)ceil (p + mean + 3 * stddev); if (b1l < 0) b1l = 0; if (b1h < 0) b1h = 0; if (b1h > S->seqLen(s)) b1h = S->seqLen(s); if (b2l < 0) b2l = 0; if (b2h > S->seqLen(s)) b2h = S->seqLen(s); if (b2l > S->seqLen(s)) b2l = S->seqLen(s); //fprintf(stderr, "b1: %d-%d b2:%d-%d\n", b1l, b1h, b2l, b2h); // probability we can rescue this repeat with this mate pair double pRescue = 0.0; double pFailed = 0.0; if (closeRepeatsLen == 0) { // No close repeats, use the fast method. for (int32 b=b1l; bmasking(s, b) == 'u') pRescue += lib[l].normal(b - p + mean); } for (int32 b=b2l; bmasking(s, b) == 'u') pRescue += lib[l].normal(b - p - mean); } } else { // Close repeats, gotta be slow. for (int32 b=b1l; bmasking(s, b) == 'u') { int32 mrl = b + mean - 3 * stddev; int32 mrh = b + mean + 3 * stddev; bool rescuable = true; for (uint32 cri=0; rescuable && crimasking(s, b) == 'u') { int32 mrl = b - mean - 3 * stddev; int32 mrh = b - mean + 3 * stddev; bool rescuable = true; for (uint32 cri=0; rescuable && crimerSize(), numRT, numRR[x], numNR[x], x, lib[l].mean(), lib[l].stddev()); n++; if (n >= lib[l].coverage()) { l++; n = 0; } } fclose(outputFile); fclose(outputData); } delete CT; } int main(int argc, char **argv) { char *merylName = 0L; char *fastaName = 0L; char *outputPrefix = 0L; uint32 onlySeqIID = ~uint32ZERO; bool doDensity = false; bool doRescue = false; mateRescueData lib[MAX_COVERAGE]; uint32 libLen = 0; int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-mers") == 0) { merylName = argv[++arg]; } else if (strcmp(argv[arg], "-seq") == 0) { fastaName = argv[++arg]; } else if (strcmp(argv[arg], "-only") == 0) { onlySeqIID = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-output") == 0) { outputPrefix = argv[++arg]; } else if (strcmp(argv[arg], "-d") == 0) { doDensity = true; } else if (strcmp(argv[arg], "-r") == 0) { if (atoi(argv[arg+3]) > 0) { doRescue = true; lib[libLen++].init(atoi(argv[arg+1]), atoi(argv[arg+2]), atoi(argv[arg+3])); } arg += 3; } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); err++; } arg++; } if ((err) || (merylName == 0L) || (fastaName == 0L) || (outputPrefix == 0L)) { fprintf(stderr, "usage: %s -mers mers -seq fasta -output prefix [-d] [-r mean stddev coverage]\n", argv[0]); exit(1); } merMaskedSequence *S = new merMaskedSequence(fastaName, merylName, onlySeqIID); if (doDensity) computeDensity(S, outputPrefix); if (doRescue) computeMateRescue(S, outputPrefix, lib, libLen); return(0); } kmer-code-2013-trunk/meryl/test/0000755000000000000000000000000012641613360015257 5ustar rootrootkmer-code-2013-trunk/meryl/test/test-seq1.fasta0000644000000000000000000000053207605137611020131 0ustar rootroot> 1 A 20 CG 0 T AAAAAAAAAAAAAAAAAAAAGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG > 1 A 1 CG 0 T ----not-a-mer------ -----is-a-mer------- AAAAAAAAAAAAAAAAAAAANGCGCGCGCGCGCGCGCGCGNCGCGCGCGCGCGCGCGCGCG > (zero 63 bases) ATNCGGATYCGATCGASCHJAGSVHYWERIGHWEEIRVHSDKFVHWIERVHIWRVHKSDFVKS > 20 T NNNNTTTTTTTTTTTTTTTTTTTTNNNNTTTTTTTTTTTTTTTTTTTTNNNN kmer-code-2013-trunk/meryl/test/Makefile0000644000000000000000000000254010707437430016723 0ustar rootrootPROG = stupidcount exhaustive INCLUDE = -I.. -I../../libutil -I../../libbio -I../../libmeryl LIBS = -L.. -L../../libutil -L../../libbio -L../../libmeryl -lmeryl -lbio -lutil -lm MERSIZE = 26 include ../../Make.compilers all: $(PROG) test-reduce stupidcount: stupidcount.C $(CXX) $(CXXFLAGS_COMPILE) -c -o stupidcount.o stupidcount.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o stupidcount stupidcount.o $(LIBS) exhaustive: exhaustive.C kmerlite.H $(CXX) $(CXXFLAGS_COMPILE) -c -o exhaustive.o exhaustive.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o exhaustive exhaustive.o $(LIBS) test-exhaustive: exhaustive ../meryl ../../leaff/leaff ../../leaff/leaff -G 1000 10000 40000 > g.fasta ../meryl -B -s g.fasta -o s -m $(MERSIZE) -threads 7 ./exhaustive -m s -f g.fasta test-reduce: ../meryl ../meryl -B -f -m 20 -s test-seq1.fasta -o 1 # Build the initial table ../meryl -Dt -n 0 -s 1 > 2.reduce.fasta # Dump the initial table as fasta ../meryl -B -f -m 20 -s 2.reduce.fasta -o 2 # Build a new table on the dumped fasta ../meryl -M sub -s 1 -s 2 -o 3 # Remove one copy of each mer ../meryl -Dt -n 1 -s 3 # Dump the resulting file echo 1 10 9 1 is correct touch test-reduce test: ../meryl -B -s test-seq1.fasta -o t -m 20 clean: rm -f $(PROG) *.o *.mc??? test-reduce *.seqStore* g.fasta 2.reduce.fasta *.fastaidx kmer-code-2013-trunk/meryl/test/stupidcount.C0000644000000000000000000000163112322046702017741 0ustar rootroot#include "bio++.H" // Reads a sequence file, outputs a list of the mers in it. You can // then pipe this to unix sort and uniq to do a mercount. You // probably don't want to count large things this way... int main(int argc, char **argv) { char *seqName = 0L; uint32 merSize = 20; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-s") == 0) { seqName = argv[++arg]; } else if (strcmp(argv[arg], "-m") == 0) { merSize = strtouint32(argv[++arg], 0L); } arg++; } if (seqName == 0L) { fprintf(stderr, "usage: %s [-m mersize] -s seqfile.fasta\n", argv[0]); exit(1); } seqStream *CS = new seqStream(seqName, true); merStream *MS = new merStream(new kMerBuilder(merSize), CS); char str[1024]; while (MS->nextMer()) fprintf(stdout, "%s\n", MS->theFMer().merToString(str)); delete MS; delete CS; exit(0); } kmer-code-2013-trunk/meryl/test/test-seq3.fasta0000644000000000000000000000007407605137611020134 0ustar rootroot> ACGCTCAGCTACTACGACTTAGAGAAAATAGCGATATAGCGATCGATCGATTAGAGA kmer-code-2013-trunk/meryl/test/kmerlite.H0000644000000000000000000000626612322046702017212 0ustar rootroot#include "bio++.H" #ifndef KMER_LITE_H #define KMER_LITE_H //////////////////////////////////////// // // This is kMerLite -- derived from kMer.H, removing // most of the accessors. // // Assumes that KMER_WORDS is already defined. class kMerLite { public: // Used by some of the test routines. void dump(void) const { for (uint32 i=0; i> 5; char *str = instr; if ((merSize & uint32MASK(6)) == 0) lastWord++; // We build the string right to left, print any partial word // first, then print whole words until we run out of words to // print. if (merSize & uint32MASK(5)) { ::merToString(merSize & uint32MASK(5), _wd[lastWord], str); str += merSize & uint32MASK(5); } while (lastWord > 0) { lastWord--; ::merToString(32, _wd[lastWord], str); str += 32; } return(instr); }; #if KMER_WORDS == 1 bool operator!=(kMerLite const &r) const { return(_wd[0] != r._wd[0]); }; bool operator==(kMerLite const &r) const { return(_wd[0] == r._wd[0]); }; bool operator<(kMerLite const &r) const { return(_wd[0] < r._wd[0]); }; bool operator>(kMerLite const &r) const { return(_wd[0] > r._wd[0]); }; bool operator<=(kMerLite const &r) const { return(_wd[0] <= r._wd[0]); }; bool operator>=(kMerLite const &r) const { return(_wd[0] >= r._wd[0]); }; #else bool operator!=(kMerLite const &r) const { uint64 res = uint64ZERO; for (uint32 i=KMER_WORDS; i--; ) res |= _wd[i] ^ r._wd[i]; return(res != uint64ZERO); }; bool operator==(kMerLite const &r) const { uint64 res = uint64ZERO; for (uint32 i=KMER_WORDS; i--; ) res |= _wd[i] ^ r._wd[i]; return(res == uint64ZERO); }; bool operator<(kMerLite const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (_wd[i] < r._wd[i]) return(true); if (_wd[i] > r._wd[i]) return(false); } return(false); }; bool operator>(kMerLite const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (_wd[i] > r._wd[i]) return(true); if (_wd[i] < r._wd[i]) return(false); } return(false); }; bool operator<=(kMerLite const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (_wd[i] < r._wd[i]) return(true); if (_wd[i] > r._wd[i]) return(false); } return(true); }; bool operator>=(kMerLite const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (_wd[i] > r._wd[i]) return(true); if (_wd[i] < r._wd[i]) return(false); } return(true); }; #endif private: uint64 _wd[KMER_WORDS]; }; #endif // KMER_LITE_H kmer-code-2013-trunk/meryl/test/test-seq2.fasta0000644000000000000000000000107107605137611020131 0ustar rootroot> 2 A 20 CG 0 T AAAAAAAAAAAAAAAAAAAAN AAAAAAAAAAAAAAAAAAAAN AAAAAAAAAAAAAAAAAAAAN AAAAAAAAAAAAAAAAAAAAN AAAAAAAAAAAAAAAAAAAAN AAAAAAAAAAAAAAAAAAACN AAAAAAAAAAAAAAAAAAACN AAAAAAAAAAAAAAAAAAACN AAAAAAAAAAAAAAAAAAACN AAAAAAAAAAAAAAAAAAACN AAAAAAAAAAAAAAAAAAAAAAAGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG > 1 A 1 CG 0 T ----not-a-mer------ -----is-a-mer------- AAAAAAAAAAAAAAAAAAAANGCGCGCGCGCGCGCGCGCGNCGCGCGCGCGCGCGCGCGCG > (zero 63 bases) ATNCGGATYCGATCGASCHJAGSVHYWERIGHWEEIRVHSDKFVHWIERVHIWRVHKSDFVKS > 20 T NNNNTTTTTTTTTTTTTTTTTTTTNNNNTTTTTTTTTTTTTTTTTTTTNNNN kmer-code-2013-trunk/meryl/test/exhaustive.C0000644000000000000000000001250012322046702017542 0ustar rootroot#include "bio++.H" #include "libmeryl.H" #include "kmerlite.H" // This tests that all the mers in an input fasta file are counted // properly. It does not test that the meryl output contains exactly // those mers, just that those mers are there. // // If you can fit into one batch, then it _will_ verift that the // meryl output is exactly correct. // // Reads a meryl-format kmer count in chunks. Each chunk is stored // in a searchable structure (we should be using, say, an extended // existDB, but we're using a balanced binary tree). The entire // source fasta file is then streamed against the kmer chunk, // decrementing the count for each mer. When the whole file is // streamed, any kmers with positive count are reported. // NB: My hacked kazlib returns a pointer to whatever we give it. // Since we gave it a pointer to an object, it gives us back a // pointer to "a pointer to an object". Hence, this ugliness. // int kMerLiteSort(void const *a, void const *b) { kMerLite const *A = *((kMerLite * const *)a); kMerLite const *B = *((kMerLite * const *)b); if (*A < *B) return(-1); if (*A > *B) return(1); return(0); } int main(int argc, char **argv) { char *merylCount = 0L; char *fastaName = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { merylCount = argv[++arg]; } else if (strcmp(argv[arg], "-f") == 0) { fastaName = argv[++arg]; } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); } arg++; } if ((merylCount == 0L) || (fastaName == 0L)) { fprintf(stderr, "usage: %s -m -f \n", argv[0]); exit(1); } // Open the count files // merylStreamReader *MSR = new merylStreamReader(merylCount); fprintf(stderr, "Mers are "uint32FMT" bases.\n", MSR->merSize()); fprintf(stderr, "There are "uint64FMT" unique (copy = 1) mers.\n", MSR->numberOfUniqueMers()); fprintf(stderr, "There are "uint64FMT" distinct mers.\n", MSR->numberOfDistinctMers()); fprintf(stderr, "There are "uint64FMT" mers total.\n", MSR->numberOfTotalMers()); // Guess how many mers we can fit into 512MB, then report how many chunks we need to do. uint32 merSize = MSR->merSize(); uint64 memoryLimit = 700 * 1024 * 1024; uint64 perMer = sizeof(kMerLite) + sizeof(dnode_t); uint64 mersPerBatch = memoryLimit / perMer; uint32 numBatches = MSR->numberOfDistinctMers() / mersPerBatch; uint32 batch = 0; dnode_t *nodes = new dnode_t [mersPerBatch]; kMerLite *mers = new kMerLite [mersPerBatch]; if (MSR->numberOfDistinctMers() % mersPerBatch) numBatches++; fprintf(stderr, "perMer: "uint64FMT" bytes ("uint64FMT" for kMerLite, "uint64FMT" for dnode_t.\n", perMer, (uint64)sizeof(kMerLite), (uint64)sizeof(dnode_t)); fprintf(stderr, "We can fit "uint64FMT" mers into "uint64FMT"MB.\n", mersPerBatch, memoryLimit >> 20); fprintf(stderr, "So we need "uint32FMT" batches to verify the count.\n", numBatches); while (MSR->validMer()) { uint64 mersRemain = mersPerBatch; dict_t *merDict = dict_create(mersPerBatch, kMerLiteSort); batch++; // STEP 1: Insert mersPerBatch into the merDict // fprintf(stderr, "STEP 1 BATCH "uint32FMTW(2)": Insert into merDict\n", batch); while (MSR->nextMer() && mersRemain) { mersRemain--; mers[mersRemain] = MSR->theFMer(); // initialize the node with the value, then insert the node // into the tree using the key int32 val = (int32)MSR->theCount(); dnode_init(&nodes[mersRemain], (void *)val); dict_insert(merDict, &nodes[mersRemain], &mers[mersRemain]); } // STEP 2: Stream the original file, decrementing the count // fprintf(stderr, "STEP 2 BATCH "uint32FMTW(2)": Stream fasta\n", batch); seqStream *CS = new seqStream(fastaName, true); merStream *MS = new merStream(new kMerBuilder(merSize), CS); kMerLite mer; dnode_t *nod; while (MS->nextMer()) { mer = MS->theFMer(); nod = dict_lookup(merDict, &mer); if (nod != 0L) { int32 val = (int32)dnode_get(nod); val--; dnode_put(nod, (void *)val); } else { // Unless the whole meryl file fit into our merDict, we cannot warn if // we don't find mers. // if (numBatches == 1) { char str[1024]; fprintf(stderr, "Didn't find node for mer '%s'\n", mer.merToString(merSize, str)); } } } delete MS; delete CS; // STEP 3: Check every node in the tree to make sure that the counts // are exactly zero. // fprintf(stderr, "STEP 3 BATCH "uint32FMTW(2)": Check\n", batch); nod = dict_first(merDict); while (nod) { int32 val = (int32)dnode_get(nod); kMerLite const *nodmer = (kMerLite const *)dnode_getkey(nod); if (val != 0) { char str[1024]; fprintf(stderr, "Got count "int32FMT" for mer '%s'\n", val, nodmer->merToString(merSize, str)); } nod = dict_next(merDict, nod); } // STEP 4: Destroy the dictionary. // fprintf(stderr, "STEP 4 BATCH "uint32FMTW(2)": Destroy\n", batch); while ((nod = dict_first(merDict))) dict_delete(merDict, nod); dict_destroy(merDict); } } kmer-code-2013-trunk/meryl/build.C0000644000000000000000000006402112532056456015514 0ustar rootroot#include #include #include #include #include #include "bio++.H" #include "meryl.H" #include "libmeryl.H" #include "seqStream.H" #include "merStream.H" void runThreaded(merylArgs *args); // You probably want this to be the same as KMER_WORDS, but in rare // cases, it can be less. // #define SORTED_LIST_WIDTH KMER_WORDS // to make the sorted list be wider, we also need to store wide // things in the bitpackedarray buckets. probably easy (do multiple // adds of data, each at most 64 bits) but not braindead. #if SORTED_LIST_WIDTH == 1 class sortedList_t { public: uint64 _w; uint32 _p; bool operator<(sortedList_t &that) { return(_w < that._w); }; bool operator>=(sortedList_t &that) { return(_w >= that._w); }; sortedList_t &operator=(sortedList_t &that) { _w = that._w; _p = that._p; return(*this); }; }; #else class sortedList_t { public: uint64 _w[SORTED_LIST_WIDTH]; uint32 _p; bool operator<(sortedList_t &that) { for (uint32 i=SORTED_LIST_WIDTH; i--; ) { if (_w[i] < that._w[i]) return(true); if (_w[i] > that._w[i]) return(false); } return(false); }; bool operator>=(sortedList_t &that) { for (uint32 i=SORTED_LIST_WIDTH; i--; ) { if (_w[i] > that._w[i]) return(true); if (_w[i] < that._w[i]) return(false); } return(true); }; sortedList_t &operator=(sortedList_t &that) { for (uint32 i=SORTED_LIST_WIDTH; i--; ) _w[i] = that._w[i]; _p = that._p; return(*this); }; }; #endif void adjustHeap(sortedList_t *M, int64 i, int64 n) { sortedList_t m = M[i]; int64 j = (i << 1) + 1; // let j be the left child while (j < n) { if (j= M[j]) // a position for M[i] has been found break; M[(j-1)/2] = M[j]; // Move larger child up a level j = (j << 1) + 1; } M[(j-1)/2] = m; } void submitPrepareBatch(merylArgs *args) { FILE *F; char nam[1024]; char cmd[1024]; sprintf(nam, "%s-prepare.sh", args->outputFile); errno = 0; F = fopen(nam, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", nam, strerror(errno)), exit(1); fprintf(F, "#!/bin/sh\n\n"); fprintf(F, ". $SGE_ROOT/$SGE_CELL/common/settings.sh\n"); fprintf(F, "%s -forcebuild %s\n", args->execName, args->options); fclose(F); if (args->sgeMergeOpt) sprintf(cmd, "qsub -cwd -b n -j y -o %s-prepare.err %s -N mp%s %s-prepare.sh", args->outputFile, args->sgeMergeOpt, args->sgeJobName, args->outputFile); else sprintf(cmd, "qsub -cwd -b n -j y -o %s-prepare.err -N mp%s %s-prepare.sh", args->outputFile, args->sgeJobName, args->outputFile); fprintf(stderr, "%s\n", cmd); if (system(cmd)) fprintf(stderr, "%s\nFailed to execute qsub command: %s\n", cmd, strerror(errno)), exit(1); } void submitCountBatches(merylArgs *args) { FILE *F; char nam[1024]; char cmd[1024]; sprintf(nam, "%s-count.sh", args->outputFile); errno = 0; F = fopen(nam, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", nam, strerror(errno)), exit(1); fprintf(F, "#!/bin/sh\n\n"); fprintf(F, ". $SGE_ROOT/$SGE_CELL/common/settings.sh\n"); fprintf(F, "batchnum=`expr $SGE_TASK_ID - 1`\n"); fprintf(F, "%s -v -countbatch $batchnum -o %s\n", args->execName, args->outputFile); fclose(F); if (args->sgeBuildOpt) sprintf(cmd, "qsub -t 1-"uint64FMT" -cwd -b n -j y -o %s-count-\\$TASK_ID.err %s -N mc%s %s-count.sh", args->segmentLimit, args->outputFile, args->sgeBuildOpt, args->sgeJobName, args->outputFile); else sprintf(cmd, "qsub -t 1-"uint64FMT" -cwd -b n -j y -o %s-count-\\$TASK_ID.err -N mc%s %s-count.sh", args->segmentLimit, args->outputFile, args->sgeJobName, args->outputFile); fprintf(stderr, "%s\n", cmd); if (system(cmd)) fprintf(stderr, "%s\nFailed to execute qsub command: %s\n", cmd, strerror(errno)), exit(1); // submit the merge sprintf(nam, "%s-merge.sh", args->outputFile); errno = 0; F = fopen(nam, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", nam, strerror(errno)), exit(1); fprintf(F, "#!/bin/sh\n\n"); fprintf(F, ". $SGE_ROOT/$SGE_CELL/common/settings.sh\n"); fprintf(F, "%s -mergebatch -o %s\n", args->execName, args->outputFile); fclose(F); if (args->sgeMergeOpt) sprintf(cmd, "qsub -hold_jid mc%s -cwd -b n -j y -o %s-merge.err %s -N mm%s %s-merge.sh", args->sgeJobName, args->outputFile, args->sgeMergeOpt, args->sgeJobName, args->outputFile); else sprintf(cmd, "qsub -hold_jid mc%s -cwd -b n -j y -o %s-merge.err -N mm%s %s-merge.sh", args->sgeJobName, args->outputFile, args->sgeJobName, args->outputFile); fprintf(stderr, "%s\n", cmd); if (system(cmd)) fprintf(stderr, "%s\nFailed to execute qsub command: %s\n", cmd, strerror(errno)), exit(1); } void prepareBatch(merylArgs *args) { bool fatalError = false; if (args->inputFile == 0L) fprintf(stderr, "ERROR - no input file specified.\n"), fatalError = true; if (args->outputFile == 0L) fprintf(stderr, "ERROR - no output file specified.\n"), fatalError = true; if ((args->doForward == false) && (args->doReverse == false) && (args->doCanonical == false)) fprintf(stderr, "ERROR - need to specify at least one of -f, -r, -C\n"), fatalError = true; if ((args->doForward && args->doReverse) || (args->doForward && args->doCanonical) || (args->doReverse && args->doCanonical)) fprintf(stderr, "ERROR - only one of -f, -r and -C may be specified!\n"), fatalError = true; if (args->lowCount > args->highCount) fprintf(stderr, "ERROR - lowCount > highCount??\n"), fatalError = true; if (args->segmentLimit && args->memoryLimit) fprintf(stderr, "ERROR: Only one of -memory and -segments can be specified.\n"), fatalError=true; if (fatalError) exit(1); // If we were given no segment or memory limit, but threads, we // really want to create n segments. // if ((args->numThreads > 0) && (args->segmentLimit == 0) && (args->memoryLimit == 0)) args->segmentLimit = args->numThreads; { seqStream *seqstr = new seqStream(args->inputFile); args->numBasesActual = 0; for (uint32 i=0; inumberOfSequences(); i++) args->numBasesActual += seqstr->lengthOf(i); merStream *merstr = new merStream(new kMerBuilder(args->merSize), seqstr, true, true); args->numMersActual = merstr->approximateNumberOfMers() + 1; delete merstr; } #warning not submitting prepareBatch to grid #if 0 if ((args->isOnGrid) || (args->sgeJobName == 0L)) { } else { // Shucks, we need to build the merstream file. Lets do it // on the grid! // submitPrepareBatch(args); exit(0); } #endif // If there is a memory limit, figure out how to divide the work into an integer multiple of // numThreads segments. // // Otherwise, if there is a segment limit, split the total number of mers into n pieces. // // Otherwise, we must be doing it all in one fell swoop. // if (args->memoryLimit) { args->mersPerBatch = estimateNumMersInMemorySize(args->merSize, args->memoryLimit, args->positionsEnabled, args->beVerbose); if (args->mersPerBatch > args->numMersActual) args->mersPerBatch = args->numMersActual; args->mersPerBatch = (uint64)ceil((double)args->mersPerBatch / (double)args->numThreads); args->segmentLimit = (uint64)ceil((double)args->numMersActual / (double)args->mersPerBatch); args->segmentLimit = args->numThreads * (uint32)ceil((double)args->segmentLimit / (double)args->numThreads); } else if (args->segmentLimit) { args->mersPerBatch = (uint64)ceil((double)args->numMersActual / (double)args->segmentLimit); } else { args->mersPerBatch = args->numMersActual; args->segmentLimit = 1; } args->basesPerBatch = (uint64)ceil((double)args->numBasesActual / (double)args->segmentLimit); // Choose the optimal number of buckets to reduce memory usage. Yes, this is already done in // estimateNumMersInMemorySize() (but not saved) and we need to do it for the other cases anyway. // // We use the number of mers per batch + 1 because we need to store the first position after the // last mer. That is, if there are two mers, we will store that the first mer is at position 0, // the second mer is at position 1, and the end of the second mer is at position 2. // args->bucketPointerWidth = logBaseTwo64(args->basesPerBatch + 1); args->numBuckets_log2 = optimalNumberOfBuckets(args->merSize, args->basesPerBatch, args->positionsEnabled); args->numBuckets = (uint64ONE << args->numBuckets_log2); args->merDataWidth = args->merSize * 2 - args->numBuckets_log2; if (args->merDataWidth > SORTED_LIST_WIDTH * 64) { fprintf(stderr, " numMersActual = "uint64FMT"\n", args->numMersActual); fprintf(stderr, " mersPerBatch = "uint64FMT"\n", args->mersPerBatch); fprintf(stderr, " basesPerBatch = "uint64FMT"\n", args->basesPerBatch); fprintf(stderr, " numBuckets = "uint64FMT" ("uint32FMT" bits)\n", args->numBuckets, args->numBuckets_log2); fprintf(stderr, " bucketPointerWidth = "uint32FMT"\n", args->bucketPointerWidth); fprintf(stderr, " merDataWidth = "uint32FMT"\n", args->merDataWidth); fprintf(stderr, "Sorry! merSize too big! Increase KMER_WORDS in libbio.kmer.H\n"); exit(1); } if (args->beVerbose) { if (args->memoryLimit) fprintf(stderr, "Computing "uint64FMT" segments using "uint32FMT" threads and "uint64FMT"MB memory ("uint64FMT"MB if in one batch).\n", args->segmentLimit, args->numThreads, estimateMemory(args->merSize, args->mersPerBatch, args->positionsEnabled) * args->numThreads, estimateMemory(args->merSize, args->numMersActual, args->positionsEnabled)); else fprintf(stderr, "Computing "uint64FMT" segments using "uint32FMT" threads and "uint64FMT"MB memory ("uint64FMT"MB if in one batch).\n", estimateMemory(args->merSize, args->mersPerBatch, args->positionsEnabled) * args->numThreads, estimateMemory(args->merSize, args->numMersActual, args->positionsEnabled)); fprintf(stderr, " numMersActual = "uint64FMT"\n", args->numMersActual); fprintf(stderr, " mersPerBatch = "uint64FMT"\n", args->mersPerBatch); fprintf(stderr, " basesPerBatch = "uint64FMT"\n", args->basesPerBatch); fprintf(stderr, " numBuckets = "uint64FMT" ("uint32FMT" bits)\n", args->numBuckets, args->numBuckets_log2); fprintf(stderr, " bucketPointerWidth = "uint32FMT"\n", args->bucketPointerWidth); fprintf(stderr, " merDataWidth = "uint32FMT"\n", args->merDataWidth); } } void runSegment(merylArgs *args, uint64 segment) { merStream *M = 0L; merylStreamWriter *W = 0L; speedCounter *C = 0L; uint32 *bucketSizes = 0L; uint64 *bucketPointers = 0L; uint64 *merDataArray[SORTED_LIST_WIDTH] = { 0L }; uint32 *merPosnArray = 0L; // If this segment exists already, skip it. // // XXX: This should be a command line option. // XXX: This should check that the files are complete meryl files. // char *filename = new char [strlen(args->outputFile) + 17]; sprintf(filename, "%s.batch"uint64FMT".mcdat", args->outputFile, segment); if (fileExists(filename)) { if (args->beVerbose) fprintf(stderr, "Found result for batch "uint64FMT" in %s.\n", segment, filename); delete [] filename; return; } if ((args->beVerbose) && (args->segmentLimit > 1)) fprintf(stderr, "Computing segment "uint64FMT" of "uint64FMT".\n", segment+1, args->segmentLimit); delete [] filename; // // We can do all allocations up front: // mer data storage (the buckets themselves, plus 64 for slop) // bucket pointers (plus an extra bucket at the end and a little for slop) // bucket size counting space, last because we toss it out quickly // if (args->beVerbose) fprintf(stderr, " Allocating "uint64FMT"MB for mer storage ("uint32FMT" bits wide).\n", (args->basesPerBatch * args->merDataWidth + 64) >> 23, args->merDataWidth); // Mer storage - if mers are bigger than 32, we allocate full // words. The last allocation is always a bitPacked array. for (uint64 mword=0, width=args->merDataWidth; width > 0; ) { if (width >= 64) { merDataArray[mword] = new uint64 [ args->basesPerBatch + 1 ]; width -= 64; mword++; } else { merDataArray[mword] = new uint64 [ (args->basesPerBatch * width + 64) >> 6 ]; width = 0; } } if (args->positionsEnabled) { if (args->beVerbose) fprintf(stderr, " Allocating "uint64FMT"MB for mer position storage.\n", (args->basesPerBatch * 32 + 32) >> 23); merPosnArray = new uint32 [ args->basesPerBatch + 1 ]; } if (args->beVerbose) fprintf(stderr, " Allocating "uint64FMT"MB for bucket pointer table ("uint32FMT" bits wide).\n", (args->numBuckets * args->bucketPointerWidth + 128) >> 23, args->bucketPointerWidth); bucketPointers = new uint64 [(args->numBuckets * args->bucketPointerWidth + 128) >> 6]; if (args->beVerbose) fprintf(stderr, " Allocating "uint64FMT"MB for counting the size of each bucket.\n", args->numBuckets >> 18); bucketSizes = new uint32 [ args->numBuckets ]; for (uint64 i=args->numBuckets; i--; ) bucketSizes[i] = uint32ZERO; // Position the mer stream at the start of this segments' mers. // The last segment goes until the stream runs out of mers, // everybody else does args->basesPerBatch mers. C = new speedCounter(" Counting mers in buckets: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); M = new merStream(new kMerBuilder(args->merSize, args->merComp), new seqStream(args->inputFile), true, true); M->setBaseRange(args->basesPerBatch * segment, args->basesPerBatch * segment + args->basesPerBatch); char mstring[256]; if (args->doForward) { while (M->nextMer()) { //fprintf(stderr, "FMER %s\n", M->theFMer().merToString(mstring)); bucketSizes[ args->hash(M->theFMer()) ]++; C->tick(); } } if (args->doReverse) { while (M->nextMer()) { //fprintf(stderr, "RMER %s\n", M->theRMer().merToString(mstring)); bucketSizes[ args->hash(M->theRMer()) ]++; C->tick(); } } if (args->doCanonical) { while (M->nextMer()) { if (M->theFMer() <= M->theRMer()) { //fprintf(stderr, "FMER %s\n", M->theFMer().merToString(mstring)); bucketSizes[ args->hash(M->theFMer()) ]++; } else { //fprintf(stderr, "RMER %s\n", M->theRMer().merToString(mstring)); bucketSizes[ args->hash(M->theRMer()) ]++; } C->tick(); } } delete C; delete M; // Create the hash index using the counts. The hash points // to the end of the bucket; when we add a word, we move the // hash bucket pointer down one. // // When done, we can deallocate the counting table. // if (args->beVerbose) fprintf(stderr, " Creating bucket pointers.\n"); { uint64 mi=0; uint64 mj=0; uint64 mc=0; while (mi < args->numBuckets) { mc += bucketSizes[mi++]; setDecodedValue(bucketPointers, mj, args->bucketPointerWidth, mc); mj += args->bucketPointerWidth; } // Add the location of the end of the table. This is not // modified when adding words, but is used to determine // the size of the last bucket. // setDecodedValue(bucketPointers, mj, args->bucketPointerWidth, mc); } // All done with the counting table, get rid of it. // if (args->beVerbose) fprintf(stderr, " Releasing "uint64FMT"MB from counting the size of each bucket.\n", args->numBuckets >> 18); delete [] bucketSizes; C = new speedCounter(" Filling mers into list: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); M = new merStream(new kMerBuilder(args->merSize, args->merComp), new seqStream(args->inputFile), true, true); M->setBaseRange(args->basesPerBatch * segment, args->basesPerBatch * segment + args->basesPerBatch); while (M->nextMer()) { kMer const &m = ((args->doReverse) || (args->doCanonical && (M->theFMer() > M->theRMer()))) ? M->theRMer() : M->theFMer(); uint64 element = preDecrementDecodedValue(bucketPointers, args->hash(m) * args->bucketPointerWidth, args->bucketPointerWidth); #if SORTED_LIST_WIDTH == 1 // Even though this would work in the general loop below, we // special case one word mers to avoid the loop overhead. // setDecodedValue(merDataArray[0], element * args->merDataWidth, args->merDataWidth, m.endOfMer(args->merDataWidth)); #else for (uint64 mword=0, width=args->merDataWidth; width>0; ) { if (width >= 64) { merDataArray[mword][element] = m.getWord(mword); width -= 64; mword++; } else { setDecodedValue(merDataArray[mword], element * width, width, m.getWord(mword) & uint64MASK(width)); width = 0; } } #endif if (args->positionsEnabled) merPosnArray[element] = M->thePositionInStream(); C->tick(); } delete C; delete M; char *batchOutputFile = new char [strlen(args->outputFile) + 33]; sprintf(batchOutputFile, "%s.batch"uint64FMT, args->outputFile, segment); C = new speedCounter(" Writing output: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); W = new merylStreamWriter((args->segmentLimit == 1) ? args->outputFile : batchOutputFile, args->merSize, args->merComp, args->numBuckets_log2, args->positionsEnabled); // Sort each bucket into sortedList, then output the mers // sortedList_t *sortedList = 0L; uint32 sortedListMax = 0; uint32 sortedListLen = 0; for (uint64 bucket=0, bucketPos=0; bucket < args->numBuckets; bucket++) { uint64 st = getDecodedValue(bucketPointers, bucketPos, args->bucketPointerWidth); bucketPos += args->bucketPointerWidth; uint64 ed = getDecodedValue(bucketPointers, bucketPos, args->bucketPointerWidth); if (ed < st) { fprintf(stderr, "ERROR: In segment "uint64FMT"\n", segment); fprintf(stderr, "ERROR: Bucket "uint64FMT" (out of "uint64FMT") ends before it starts!\n", bucket, args->numBuckets); fprintf(stderr, "ERROR: start="uint64FMT"\n", st); fprintf(stderr, "ERROR: end ="uint64FMT"\n", ed); } assert(ed >= st); if ((ed - st) > (uint64ONE << 30)) { fprintf(stderr, "ERROR: In segment "uint64FMT"\n", segment); fprintf(stderr, "ERROR: Bucket "uint64FMT" (out of "uint64FMT") is HUGE!\n", bucket, args->numBuckets); fprintf(stderr, "ERROR: start="uint64FMT"\n", st); fprintf(stderr, "ERROR: end ="uint64FMT"\n", ed); } // Nothing here? Keep going. if (ed == st) continue; sortedListLen = (uint32)(ed - st); // Allocate more space, if we need to. // if (sortedListLen > sortedListMax) { delete [] sortedList; sortedList = new sortedList_t [2 * sortedListLen + 1]; sortedListMax = 2 * sortedListLen; } // Clear out the sortedList -- if we don't, we leave the high // bits unset which will probably make the sort random. // bzero(sortedList, sizeof(sortedList_t) * sortedListLen); // Unpack the mers into the sorting array // if (args->positionsEnabled) for (uint64 i=st; imerDataWidth; imerDataWidth) sortedList[i-st]._w = getDecodedValue(merDataArray[0], J, args->merDataWidth); #else for (uint64 i=st; imerDataWidth; width>0; ) { if (width >= 64) { sortedList[i-st]._w[mword] = merDataArray[mword][i]; width -= 64; mword++; } else { sortedList[i-st]._w[mword] = getDecodedValue(merDataArray[mword], i * width, width); width = 0; } } } #endif // Sort if there is more than one item // if (sortedListLen > 1) { for (int64 t=(sortedListLen-2)/2; t>=0; t--) adjustHeap(sortedList, t, sortedListLen); for (int64 t=sortedListLen-1; t>0; t--) { sortedList_t tv = sortedList[t]; sortedList[t] = sortedList[0]; sortedList[0] = tv; adjustHeap(sortedList, 0, t); } } // Dump the list of mers to the file. // kMer mer(args->merSize); for (uint32 t=0; ttick(); // Build the complete mer // #if SORTED_LIST_WIDTH == 1 mer.setWord(0, sortedList[t]._w); #else for (uint64 mword=0; mword < SORTED_LIST_WIDTH; mword++) mer.setWord(mword, sortedList[t]._w[mword]); #endif mer.setBits(args->merDataWidth, args->numBuckets_log2, bucket); // Add it if (args->positionsEnabled) W->addMer(mer, 1, &sortedList[t]._p); else W->addMer(mer, 1, 0L); } } delete [] sortedList; delete C; delete W; delete [] batchOutputFile; for (uint32 x=0; xbeVerbose) fprintf(stderr, "Segment "uint64FMT" finished.\n", segment); } void build(merylArgs *args) { if (!args->countBatch && !args->mergeBatch) prepareBatch(args); // Three choices: // // threaded -- start threads, launch pieces in each thread. This // thread waits for completion and then merges the results. // // batched -- write info file and exit. Compute and merge is done // on separate invocations. // // segmented -- write info file, then do each piece sequentially. // After all pieces finished, do a merge. // // bool doMerge = false; if (args->configBatch) { // Write out our configuration and exit if we are -configbatch // args->writeConfig(); if (args->sgeJobName) { fprintf(stdout, "Batch prepared. Submitting to the grid.\n"); submitCountBatches(args); } else { fprintf(stdout, "Batch prepared. Please run:\n"); for (uint64 s=0; ssegmentLimit; s++) fprintf(stdout, "%s -countbatch "uint64FMT" -o %s\n", args->execName, s, args->outputFile); fprintf(stdout, "%s -mergebatch -o %s\n", args->execName, args->outputFile); } } else if (args->countBatch) { // Read back the configuration, run the segment and exit if we // are -countbatch // merylArgs *savedArgs = new merylArgs(args->outputFile); savedArgs->beVerbose = args->beVerbose; runSegment(savedArgs, args->batchNumber); delete savedArgs; } else if (args->mergeBatch) { // Check that all the files exist if we are -mergebatch and // continue with execution // // MEMORY LEAK! We should delete this at the end of the // function, but it's a pain, and who cares? // merylArgs *savedArgs = new merylArgs(args->outputFile); savedArgs->beVerbose = args->beVerbose; args = savedArgs; doMerge = true; } else { if (args->numThreads > 1) // Run, using threads. There is a lot of baloney needed, so it's // all in a separate function. // runThreaded(args); else // No special options given, do all the work here and now // for (uint64 s=0; ssegmentLimit; s++) runSegment(args, s); // Either case, we want to merge now. // doMerge = true; } // If there is more than one segment, merge them to get the output. // // We do this by contructing a meryl command line and recursively // (effectively) calling meryl. // // The command line is // // ./meryl -M merge [-v] -s batch1 -s batch2 ... -s batchN -o outputFile // if ((doMerge) && (args->segmentLimit > 1)) { if (args->beVerbose) fprintf(stderr, "Merge results.\n"); int argc = 0; char **argv = new char* [7 + 2 * args->segmentLimit]; bool *arga = new bool [7 + 2 * args->segmentLimit]; arga[argc] = false; argv[argc++] = "meryl-build-merge"; arga[argc] = false; argv[argc++] = "-M"; arga[argc] = false; argv[argc++] = "merge"; if (args->beVerbose) { arga[argc] = false; argv[argc++] = "-v"; } for (uint32 i=0; isegmentLimit; i++) { arga[argc] = false; argv[argc++] = "-s"; arga[argc] = true; argv[argc] = new char [strlen(args->outputFile) + 33]; sprintf(argv[argc], "%s.batch"uint32FMT, args->outputFile, i); argc++; } arga[argc] = false; argv[argc++] = "-o"; arga[argc] = false; argv[argc++] = args->outputFile; merylArgs *addArgs = new merylArgs(argc, argv); multipleOperations(addArgs); // Cleanup the memory leak. // delete addArgs; for (int i=0; ioutputFile) + 17]; for (uint32 i=0; isegmentLimit; i++) { sprintf(filename, "%s.batch"uint32FMT".mcidx", args->outputFile, i); unlink(filename); sprintf(filename, "%s.batch"uint32FMT".mcdat", args->outputFile, i); unlink(filename); sprintf(filename, "%s.batch"uint32FMT".mcpos", args->outputFile, i); unlink(filename); } delete [] filename; } // If we just merged, delete the merstream file // if (doMerge) { char *filename = new char [strlen(args->outputFile) + 17]; sprintf(filename, "%s.merStream", args->outputFile); unlink(filename); delete [] filename; } } kmer-code-2013-trunk/meryl/mapMers-depth.C0000644000000000000000000001275512516507276017135 0ustar rootroot#include #include #include #include "bio++.H" #include "seqCache.H" #include "merStream.H" #include "libmeryl.H" #include "existDB.H" // Three outputs: // // 1) Number of kmers that span this position. Count of each kmer is ignored. // 2) Count of the kmer that begins at this position. // 3) Stats of the counts of the kmers that span this position (e.g., ave, min, max, stddev). int main(int argc, char **argv) { uint32 merSize = 0; char *merylFile = 0L; char *fastaFile = 0L; bool outputCount = false; bool outputDepth = false; bool outputStats = false; int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) merSize = strtouint32(argv[++arg], 0L); else if (strcmp(argv[arg], "-mers") == 0) merylFile = argv[++arg]; else if (strcmp(argv[arg], "-seq") == 0) fastaFile = argv[++arg]; else if (strcmp(argv[arg], "-count") == 0) outputCount = true; else if (strcmp(argv[arg], "-depth") == 0) outputDepth = true; else if (strcmp(argv[arg], "-stats") == 0) outputStats = true; else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); err++; } arg++; } if (merSize == 0) err++; if (fastaFile == 0L) err++; if (merylFile == 0L) err++; if (outputCount + outputDepth + outputStats != 1) err++; if (err) { fprintf(stderr, "usage: %s -mers MERYL -m MERSIZE -seq IN.FASTA [-count | -depth | -stats] > output\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "For sequence ordinal 's' and position in that sequence 'p':\n"); fprintf(stderr, "\n"); fprintf(stderr, " -count - report the count (c) of the single kmer that starts at position (p).\n"); fprintf(stderr, " Format: 's p c'\n"); fprintf(stderr, " -depth - report the number (n) of kmers that span position (p). Format: 's p n'\n"); fprintf(stderr, " -stats - report the min (m), max (M), ave (a) count of all mers that span\n"); fprintf(stderr, " position (p). Format: 's p m M a t n'\n"); fprintf(stderr, " (also reports total count (t) and number of kmers (n))\n"); fprintf(stderr, "\n"); if (merSize == 0) fprintf(stderr, "ERROR: No mer size (-m) suppled.\n"); if (fastaFile == 0L) fprintf(stderr, "ERROR: No fasta input (-seq) suppled.\n"); if (merylFile == 0L) fprintf(stderr, "ERROR: No meryl database (-mers) suppled.\n"); if (outputCount + outputDepth + outputStats != 1) fprintf(stderr, "ERROR: Exactly one of -count, -depth and -stats may be supplied.\n"); exit(1); } // Open the input sequences seqCache *F = new seqCache(fastaFile); // Load kmer counts from a meryl database. existDBcompressBuckets is broken. existDB *E = new existDB(merylFile, merSize, existDBcounts | existDBcompressCounts, 0, UINT32_MAX); // For each sequence... for (uint32 Sid=0; Sid < F->getNumberOfSequences(); Sid++) { seqInCore *S = F->getSequenceInCore(Sid); merStream *MS = new merStream(new kMerBuilder(merSize), new seqStream(S->sequence(), S->sequenceLength()), true, true); // Build a lists of the min, max and total count at each position. uint32 *mincount = new uint32 [S->sequenceLength() + 1]; uint32 *maxcount = new uint32 [S->sequenceLength() + 1]; uint32 *totcount = new uint32 [S->sequenceLength() + 1]; uint32 *numcount = new uint32 [S->sequenceLength() + 1]; for (uint32 xx=0; xxsequenceLength() + 1; xx++) { mincount[xx] = UINT32_MAX; maxcount[xx] = 0; totcount[xx] = 0; numcount[xx] = 0; } // Scan the sequence, find the count. while (MS->nextMer()) { uint32 pos = MS->thePositionInSequence(); uint32 cnt = E->count(MS->theFMer()) + E->count(MS->theRMer()); if (cnt == 0) // Mer doesn't exist in the database. continue; if (outputCount) totcount[pos] = cnt; if (outputDepth) for (uint32 xx=pos; xxsequenceLength(); x++) if (numcount[x] == 0) mincount[x] = 0; // Report the single kmer count? if (outputCount) { for (uint32 x=0; x < S->sequenceLength(); x++) fprintf(stdout, "%u\t%u\t%u\n", Sid, x, totcount[x]); } // Report the depth? if (outputDepth) { for (uint32 x=0; x < S->sequenceLength(); x++) fprintf(stdout, "%u\t%u\t%u\n", Sid, x, numcount[x]); } // Report the min/max/ave count? if (outputStats) { for (uint32 x=0; x < S->sequenceLength(); x++) fprintf(stdout, "%u\t%u\t%u\t%u\t%u\t%u\t%u\n", Sid, x, mincount[x], maxcount[x], (numcount[x] > 0) ? totcount[x] / numcount[x] : 0, totcount[x], numcount[x]); } delete [] mincount; delete [] maxcount; delete [] totcount; delete [] numcount; delete MS; delete S; } delete F; delete E; } kmer-code-2013-trunk/meryl/mervin.C0000644000000000000000000004017412322046702015706 0ustar rootroot#include #include #include #include #include "bio++.H" #include "sweatShop.H" #include "libmeryl.H" #include using namespace std; // var, old, new -- returns true if "(var == old) and var <- new" // // CAS - #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 const uint32 pileMax = 32768; const uint32 kmerSize = 22; const uint32 kmerBits = 2 * kmerSize; const uint32 pilePreSize = 6; const uint32 pilePreBits = 2 * pilePreSize; const uint32 sortPreSize = 10; const uint32 sortPreBits = 2 * sortPreSize; class kmerPile { public: kmerPile(uint32 prefix) { pileLen = 0; pilePrefix = prefix; }; ~kmerPile() { }; void initialize(uint32 prefix) { pileLen = 0; pilePrefix = prefix; }; void addMer(uint64 mer) { pileDat[pileLen++] = mer; }; void sort(void) { ::sort(pileDat, pileDat + pileLen); }; uint32 pileLen; uint32 pilePrefix; uint64 pileDat[pileMax]; }; class kmerSorter { public: kmerSorter() { sorterLocked = 0; sorterLen = 0; sorterMax = 4; sorterMer = new uint64 [sorterMax]; sorterCnt = new uint32 [sorterMax]; }; ~kmerSorter() { delete [] sorterMer; delete [] sorterCnt; }; void merge(uint64 *pileDat, uint32 pileLen) { uint32 nmax = MAX(16, sorterLen + pileLen / 4); uint64 *nmer = new uint64 [nmax]; uint32 *ncnt = new uint32 [nmax]; uint32 npos = 0; assert(nmax > 0); uint32 spos = 0; uint32 ppos = 0; bool useSorterFirst = false; if ((sorterLen > 0) && (pileLen > 0)) { useSorterFirst = (sorterMer[0] < pileDat[0]); } else if (spos < sorterLen) { useSorterFirst = true; } else if (ppos < pileLen) { useSorterFirst = false; } else { assert(0); } if (useSorterFirst) { nmer[0] = sorterMer[spos]; ncnt[0] = sorterCnt[spos]; spos++; } else { nmer[0] = pileDat[ppos]; ncnt[0] = 1; ppos++; } while ((spos < sorterLen) && (ppos < pileLen)) { if (nmax <= npos + 1) { nmax += (pileLen - ppos) + (sorterLen - spos) + 1; uint64 *nmermore = new uint64 [nmax]; uint32 *ncntmore = new uint32 [nmax]; memcpy(nmermore, nmer, sizeof(uint64) * (npos + 1)); memcpy(ncntmore, ncnt, sizeof(uint32) * (npos + 1)); delete [] nmer; nmer = nmermore; delete [] ncnt; ncnt = ncntmore; } if (nmer[npos] == sorterMer[spos]) { ncnt[npos] += sorterCnt[spos]; spos++; } else if (nmer[npos] == pileDat[ppos]) { ncnt[npos] += 1; ppos++; } else if (sorterMer[spos] < pileDat[ppos]) { npos++; nmer[npos] = sorterMer[spos]; ncnt[npos] = sorterCnt[spos]; spos++; } else { npos++; nmer[npos] = pileDat[ppos]; ncnt[npos] = 1; ppos++; } } uint32 remain = (sorterLen - spos) + (pileLen - ppos); if (nmax < npos + 1 + remain) { nmax = npos + 1 + remain; uint64 *nmermore = new uint64 [nmax]; uint32 *ncntmore = new uint32 [nmax]; memcpy(nmermore, nmer, sizeof(uint64) * (npos + 1)); memcpy(ncntmore, ncnt, sizeof(uint32) * (npos + 1)); delete [] nmer; nmer = nmermore; delete [] ncnt; ncnt = ncntmore; } while (spos < sorterLen) { if (nmer[npos] == sorterMer[spos]) { ncnt[npos] += sorterCnt[spos]; } else { npos++; nmer[npos] = sorterMer[spos]; ncnt[npos] = sorterCnt[spos]; } spos++; } while (ppos < pileLen) { if (nmer[npos] == pileDat[ppos]) { ncnt[npos] += 1; } else { npos++; nmer[npos] = pileDat[ppos]; ncnt[npos] = 1; } ppos++; } delete [] sorterMer; delete [] sorterCnt; sorterMer = nmer; sorterCnt = ncnt; sorterLen = npos + 1; sorterMax = nmax; #if 1 bool broken = false; for (uint32 i=1; i= sorterMer[i]) broken = true; } #endif }; void write(uint32 prefix, FILE *F, merylStreamWriter *W) { char km[64] = {0}; uint32 kp = pilePreSize; uint32 np = 0; { uint32 pre = prefix; for (uint32 pp=0; pp>= 2; } } np = kmerSize - pilePreSize; for (uint32 ii=0; ii>= 2; } fprintf(F, ">"uint32FMT"\n%s\n", sorterCnt[ii], km); if (W) W->addMer(prefix, pilePreBits, sorterMer[ii], kmerBits - pilePreBits, sorterCnt[ii], 0L); } }; volatile uint32 sorterLocked; uint32 sorterLen; uint32 sorterMax; uint64 *sorterMer; uint32 *sorterCnt; }; class kmerGlobal { public: kmerGlobal() { inName = NULL; inFile = NULL; #if 0 inputBufferMax = 131072; inputBufferLen = 0; inputBufferPos = 0; inputBuffer = new char [inputBufferMax]; #endif inputBufferMax = 0; inputBufferLen = 0; inputBufferPos = 0; inputBuffer = NULL; outPrefix = NULL; outFile = NULL; fkPre = 0; fkMer = 0; rkPre = 0; rkMer = 0; kLen = 0; pilesFreeLock = 0; pilesFreeLen = 2048; pilesFreeMax = 2 << pilePreBits; pilesFree = new kmerPile * [pilesFreeMax]; memset(pilesFree, 0, sizeof(kmerPile *) * pilesFreeMax); piles = new kmerPile * [1 << pilePreBits]; sorters = new kmerSorter [1 << sortPreBits]; memset(piles, 0, sizeof(kmerPile *) * (1 << pilePreBits)); for (uint32 i=0; iinitialize(prefix); return(pp); }; void releasePile(kmerPile *pile) { if (pilesFreeLen >= pilesFreeMax) { //fprintf(stderr, "DELETE PILE!\n"); delete pile; } else { while (__sync_bool_compare_and_swap(&pilesFreeLock, 0, 1) == false) nanosleep(&naptime, 0L); assert(pilesFreeLock == 1); pilesFree[pilesFreeLen++] = pile; pilesFreeLock = 0; } }; void addToPile(uint64 pre, uint64 mer) { assert(piles[pre] != NULL); //if (piles[pre] == NULL) // piles[pre] = getFreePile(pre); if (piles[pre]->pileLen < pileMax) { piles[pre]->addMer(mer); return; } if (pilesToSortMax <= pilesToSortLen) { fprintf(stderr, "realloc\n"); exit(1); } pilesToSort[pilesToSortLen++] = piles[pre]; piles[pre] = getFreePile(pre); piles[pre]->addMer(mer); }; kmerPile *getFullPile(void) { if (pilesToSortLen == 0) return(NULL); //fprintf(stderr, "return pile "uint32FMT"\n", pilesToSort[pilesToSortLen-1]->pilePrefix); return(pilesToSort[--pilesToSortLen]); }; kmerPile *allDataLoaded(void) { for (uint32 pp=0; pp < (1 << pilePreBits); pp++) { if ((piles[pp] != NULL) && (piles[pp]->pileLen > 0)) { //fprintf(stderr, "Add pile "uint32FMT" to list.\n", pp); pilesToSort[pilesToSortLen++] = piles[pp]; } else { delete piles[pp]; } piles[pp] = NULL; } fprintf(stderr, "allDataLoaded()-- pilesToSortLen = "uint32FMT"\n", pilesToSortLen); return(getFullPile()); }; void addBases(uint32 bgn, uint32 len) { uint32 kp2 = kmerBits - pilePreBits - 2; uint32 pp2 = pilePreBits - 2; uint64 mpp = uint64MASK(pilePreBits); uint64 mkp = uint64MASK(kmerBits - pilePreBits); for (uint32 pos=0; pos 4) { kLen = 0; continue; } uint64 tm = 0; tm = fkMer >> kp2; tm &= 0x00000003; fkPre <<= 2; fkPre |= tm; fkMer <<= 2; fkMer |= bt; tm = rkMer & 0x00000003; rkPre >>= 2; rkPre |= tm << pp2; rkMer >>= 2; rkMer |= bt << kp2; kLen++; if (kLen < kmerSize) continue; kLen = kmerSize; fkPre &= mpp; fkMer &= mkp; rkPre &= mpp; rkMer &= mkp; addToPile(fkPre, fkMer); addToPile(rkPre, rkMer); } } bool addBaseToKmer(char base) { uint64 bt = letterToBits[base]; if (bt > 4) { kLen = 0; return(false); } uint64 tm = 0; tm = fkMer >> (kmerBits - pilePreBits - 2); tm &= 0x00000003; fkPre <<= 2; fkPre |= tm; fkMer <<= 2; fkMer |= bt; tm = rkMer & 0x00000003; rkPre >>= 2; rkPre |= tm << (pilePreBits - 2); rkMer >>= 2; rkMer |= bt << (kmerBits - pilePreBits - 2); kLen++; if (kLen < kmerSize) { return(false); } kLen = kmerSize; fkPre &= uint64MASK(pilePreBits); fkMer &= uint64MASK(kmerBits - pilePreBits); rkPre &= uint64MASK(pilePreBits); rkMer &= uint64MASK(kmerBits - pilePreBits); addToPile(fkPre, fkMer); addToPile(rkPre, rkMer); return(true); }; void write(void) { char outName[FILENAME_MAX]; sprintf(outName, "%s.fasta", outPrefix); errno = 0; FILE *F = fopen(outName, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for writing: %s\n", outName, strerror(errno)), exit(1); //merylStreamWriter *W = new merylStreamWriter(outPrefix, kmerSize, 0, sortPreBits, false); for (uint32 ss=0; ss < (1 << sortPreBits); ss++) sorters[ss].write(ss, F, NULL); fclose(F); //delete W; } char *inName; FILE *inFile; readBuffer *inBuffer; uint64 inputBufferMax; uint64 inputBufferLen; uint64 inputBufferPos; char *inputBuffer; char *outPrefix; FILE *outFile; uint64 fkPre; // Forward loaded kmer uint64 fkMer; uint64 rkPre; // Reverse loaded kmer uint64 rkMer; uint32 kLen; uint32 pilesFreeLock; uint32 pilesFreeLen; uint32 pilesFreeMax; kmerPile **pilesFree; kmerPile **piles; kmerSorter *sorters; struct timespec naptime; uint32 pilesToSortLen; uint32 pilesToSortMax; kmerPile **pilesToSort; }; uint64 bytesLoaded = 0; uint64 basesLoaded = 0; speedCounter bytes(" bytes %8.0f Mbytes (%8.5f Mbytes/sec\r", 1048576, 1048576, true); // Reads input, constructs kmers, adds kmers to piles of kmers. void* sifterThread(void *global) { kmerGlobal *glob = (kmerGlobal *)global; kmerPile *pile = glob->getFullPile(); if (pile) return(pile); //if ((glob->inFile == NULL) && (glob->inBuffer == NULL)) // return(NULL); anotherBase: //bytesLoaded++; //if ((bytesLoaded % (16 * 1048576)) == 0) // fprintf(stderr, "sifterThread()-- loaded "uint64FMT" MB.\n", bytesLoaded >> 20); #if 0 // Uses the readBuffer in char-by-char mode // char ch = glob->inBuffer->read(); bytes.tick(); if (glob->inBuffer->eof()) { delete glob->inBuffer; glob->inBuffer = NULL; return(glob->allDataLoaded()); } if (glob->addBaseToKmer(ch) == false) goto anotherBase; #endif #if 0 // Uses the readBuffer in block-copy mode // uint32 len = glob->inBuffer->read(glob->inputBuffer, glob->inputBufferMax); if (len == 0) { delete glob->inBuffer; glob->inBuffer = NULL; return(glob->allDataLoaded()); } glob->addBases(0, len); bytes.tick(len); #endif #if 1 // Uses a direct mmap'd file // uint64 len = glob->inputBufferLen - glob->inputBufferPos; if (len == 0) return(NULL); if (len > 16 * 1048576) len = 16 * 1048576; //fprintf(stderr, "Add "uint64FMT" bases.\n", len); glob->addBases(glob->inputBufferPos, len); bytes.tick(len); glob->inputBufferPos += len; #endif pile = glob->getFullPile(); if (pile == NULL) goto anotherBase; return(pile); } // Takes a pile of kmers, sorts it, and them merges into the appropriate kmerSorter objects. void sorterThread(void *global, void *thread, void *thing) { kmerGlobal *glob = (kmerGlobal *)global; kmerPile *pile = (kmerPile *)thing; struct timespec naptime; naptime.tv_sec = 0; naptime.tv_nsec = 166666666ULL; // 1/6 second naptime.tv_nsec = 250000ULL; if (pile->pileLen == 0) // Nothing to add. return; pile->sort(); uint32 pileBgn = 0; uint32 pileEnd = 1; uint32 pileMaskShift = sortPreBits - pilePreBits; uint32 pileDataShift = kmerBits - sortPreBits; uint64 pileToSortPreMask = uint64MASK(sortPreBits - pilePreBits) << (kmerBits - sortPreBits); uint64 pileToSortMask = uint64MASK(kmerBits - sortPreBits); uint32 sortPre = 0; uint64 pileToSort = 0; while (pileBgn < pile->pileLen) { sortPre = (pile->pilePrefix << pileMaskShift) | (pile->pileDat[pileBgn] >> pileDataShift); pileToSort = pile->pileDat[pileBgn] & pileToSortPreMask; //fprintf(stderr, "0x"uint64HEX"\n", pileToSortPreMask); //fprintf(stderr, "0x"uint64HEX"\n", pileToSortMask); while ((pileEnd < pile->pileLen) && ((pile->pileDat[pileEnd] & pileToSortPreMask) == pileToSort)) { //fprintf(stderr, "0x"uint64HEX" -> 0x"uint64HEX" "uint64FMT"\n", // pile->pileDat[pileEnd], // pile->pileDat[pileEnd] & pileToSortMask, // pile->pileDat[pileEnd] & pileToSortMask); pile->pileDat[pileEnd] &= pileToSortMask; pileEnd++; } while (__sync_bool_compare_and_swap(&glob->sorters[sortPre].sorterLocked, 0, 1) == false) nanosleep(&naptime, 0L); assert(glob->sorters[sortPre].sorterLocked == 1); glob->sorters[sortPre].merge(pile->pileDat + pileBgn, pileEnd - pileBgn); glob->sorters[sortPre].sorterLocked = 0; pileBgn = pileEnd; } } // Does nothing but delete the pile object. We don't output till the end. void nullThread(void *global, void *thing) { kmerGlobal *glob = (kmerGlobal *)global; kmerPile *pile = (kmerPile *)thing; glob->releasePile(pile); } int main(int argc, char **argv) { kmerGlobal *kg = new kmerGlobal; int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-i") == 0) kg->inName = argv[++arg]; else if (strcmp(argv[arg], "-o") == 0) kg->outPrefix = argv[++arg]; else err++; arg++; } if (kg->inName == NULL) err++; if (kg->outPrefix == NULL) err++; if (err) { fprintf(stderr, "usage: %s -i in.sequence -i prefix\n", argv[0]); exit(1); } kg->initialize(); sweatShop *ss = new sweatShop(sifterThread, sorterThread, nullThread); ss->setLoaderBatchSize(512); ss->setNumberOfWorkers(1); ss->setWriterQueueSize(16384); //for (uint32 i=0; isetThreadData(i, new searcherState(i)); ss->run(kg, true); delete ss; kg->write(); delete kg; exit(0); } kmer-code-2013-trunk/meryl/simple.C0000644000000000000000000000756512322046702015706 0ustar rootroot#include #include #include #include #include #include "bio++.H" #include "meryl.H" #include "libmeryl.H" #include "seqStream.H" #include "merStream.H" using namespace std; // A very simple mer counter. Allocates a gigantic 32-bit array, // populates the array with mers, sorts, writes output. int main(int argc, char **argv) { char *inName = 0L; char *otName = 0L; uint32 merSize = 22; uint32 merCompression = 1; bool doForward = true; bool doReverse = false; bool doCanonical = false; speedCounter *C = 0L; merStream *M = 0L; merylStreamWriter *W = 0L; uint64 numMers = 0; uint64 *theMers = 0L; uint64 theMersMax = 0; uint64 theMersLen = 0; int arg = 1; int err = 0; while (arg < argc) { if (strcmp(argv[arg], "-i") == 0) { inName = argv[++arg]; } else if (strcmp(argv[arg], "-o") == 0) { otName = argv[++arg]; } else if (strcmp(argv[arg], "-m") == 0) { merSize = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-f") == 0) { doForward = true; doReverse = false; doCanonical = false; } else if (strcmp(argv[arg], "-r") == 0) { doForward = false; doReverse = true; doCanonical = false; } else if (strcmp(argv[arg], "-C") == 0) { doForward = false; doReverse = false; doCanonical = true; } else if (strcmp(argv[arg], "-c") == 0) { merCompression = atoi(argv[++arg]); } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); err++; } arg++; } if (inName == 0L) { fprintf(stderr, "no input given with '-i'\n"); err++; } if (otName == 0L) { fprintf(stderr, "no output given with '-o'\n"); err++; } if (err) exit(1); { M = new merStream(new kMerBuilder(merSize, merCompression), new seqStream(inName), true, true); numMers = M->approximateNumberOfMers(); delete M; } fprintf(stderr, "Guessing "uint64FMT" mers in input '%s'\n", numMers, inName); fprintf(stderr, "Allocating "uint64FMT"MB for mer storage.\n", numMers * 8 >> 20); theMers = new uint64 [numMers]; theMersLen = 0; theMersMax = numMers; //////////////////////////////////////// C = new speedCounter(" Counting mers in buckets: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, 1); M = new merStream(new kMerBuilder(merSize, merCompression), new seqStream(inName), true, true); //M->setRange(args->mersPerBatch * segment, args->mersPerBatch * segment + args->mersPerBatch); while (M->nextMer()) { if (doForward) theMers[theMersLen++] = M->theFMer(); if (doReverse) theMers[theMersLen++] = M->theRMer(); if (doCanonical) theMers[theMersLen++] = (M->theFMer() <= M->theRMer()) ? M->theFMer() : M->theRMer(); C->tick(); } delete C; delete M; fprintf(stderr, "Found "uint64FMT" mers in input '%s'\n", theMersLen, inName); if (theMersLen > theMersMax) fprintf(stderr, "ERROR: too many mers in input!\n"), exit(1); //////////////////////////////////////// fprintf(stderr, "sorting\n"); sort(theMers, theMers + theMersLen); //////////////////////////////////////// C = new speedCounter(" Writing output: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, 1); W = new merylStreamWriter(otName, merSize, merCompression, 16, false); kMer mer(merSize); for (uint64 i=0; iaddMer(mer, 1, 0L); C->tick(); } delete C; delete W; //////////////////////////////////////// delete [] theMers; exit(0); } kmer-code-2013-trunk/meryl/kmer-mask.C0000644000000000000000000005104312572072362016302 0ustar rootroot#include #include #include "util++.H" #include "bio++.H" #include "libmeryl.H" #include "existDB.H" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" #include "sweatShop.H" class fastqRecord { public: fastqRecord(uint32 ml) { maxLength = ml; alloc = new char [maxLength * 8]; a2 = alloc + 0 * maxLength; af = alloc + 1 * maxLength; am = alloc + 2 * maxLength; a4 = alloc + 3 * maxLength; a1[0] = 0; a2[0] = 0; af[0] = 0; am[0] = 0; a3[0] = 0; a4[0] = 0; aLength = 0; aMasked = 0.0; aLabel = 0; b2 = alloc + 4 * maxLength; bf = alloc + 5 * maxLength; bm = alloc + 6 * maxLength; b4 = alloc + 7 * maxLength; b1[0] = 0; b2[0] = 0; bf[0] = 0; bm[0] = 0; b3[0] = 0; b4[0] = 0; bLength = 0; bMasked = 0.0; bLabel = 0; }; ~fastqRecord() { delete [] alloc; }; bool load(FILE *FASTQ1, FILE *FASTQ2) { bool tooShort = false; a2[maxLength - 2] = 0; b2[maxLength - 2] = 0; if (FASTQ1) { fgets(a1, 1024, FASTQ1); chomp(a1); fgets(a2, maxLength, FASTQ1); chomp(a2); fgets(a3, 1024, FASTQ1); chomp(a3); fgets(a4, maxLength, FASTQ1); chomp(a4); aLength = strlen(a2); aMasked = 0.0; aLabel = 0; if (a2[maxLength - 2] != 0) tooShort = true; } if (FASTQ2) { fgets(b1, 1024, FASTQ2); chomp(b1); fgets(b2, maxLength, FASTQ2); chomp(b2); fgets(b3, 1024, FASTQ2); chomp(b3); fgets(b4, maxLength, FASTQ2); chomp(b4); bLength = strlen(b2); bMasked = 0.0; bLabel = 0; if (b2[maxLength - 2] != 0) tooShort = true; } if (tooShort) { fprintf(stderr, "ERROR: -l too small for reads:\n"); fprintf(stderr, " a = '%s'\n", a1); fprintf(stderr, " b = '%s'\n", b1); exit(1); } return(!feof(FASTQ1)); }; void write(FILE *FASTQ1, FILE *FASTQ2) { if (FASTQ1) fprintf(FASTQ1, "%s fractionMasked=%.3f\n%s\n%s\n%s\n", a1, aMasked, am, a3, a4); if (FASTQ2) fprintf(FASTQ2, "%s fractionMasked=%.3f\n%s\n%s\n%s\n", b1, bMasked, bm, b3, b4); }; public: uint32 maxLength; char *alloc; char a1[1024]; char *a2; char *af; char *am; char a3[1024]; char *a4; uint32 aLength; double aMasked; uint32 aLabel; char b1[1024]; char *b2; char *bf; char *bm; char b3[1024]; char *b4; uint32 bLength; double bMasked; uint32 bLabel; }; class maskGlobal { public: maskGlobal() { merName = NULL; seq1Name = NULL; seq2Name = NULL; outPrefix = NULL; merSize = 0; maxLength = 512; existName = NULL; minSize = 0; extend = 0; cleaner = false; dirtier = false; discard = true; unlink = false; noMasking = false; cleanThreshold = 1. / 3.; matchThreshold = 2. / 3.; for (uint32 ii=0; ii<1001; ii++) scoreHistogram[ii] = 0; for (uint32 ii=0; ii<4; ii++) for (uint32 jj=0; jj<4; jj++) thresholdCounts[ii][jj] = 0.0; outputHistogram = NULL; exist = NULL; FASTQ1 = NULL; FASTQ1pipe = false; FASTQ2 = NULL; FASTQ2pipe = false; OUTPUT1[0] = NULL; OUTPUT1[1] = NULL; OUTPUT1[2] = NULL; OUTPUT2[0] = NULL; OUTPUT2[1] = NULL; OUTPUT2[2] = NULL; }; ~maskGlobal() { }; public: char *merName; char *seq1Name; char *seq2Name; char *outPrefix; uint32 merSize; uint32 maxLength; char *existName; uint32 minSize; uint32 extend; bool cleaner; bool dirtier; bool discard; bool unlink; bool noMasking; double cleanThreshold; double matchThreshold; uint32 scoreHistogram[1001]; uint64 thresholdCounts[4][4]; char *outputHistogram; existDB *exist; FILE *FASTQ1; bool FASTQ1pipe; FILE *FASTQ2; bool FASTQ2pipe; FILE *OUTPUT1[4]; FILE *OUTPUT2[4]; }; // Masks mers present in the database from the input sequences. Chains together // across small bits of missing mer. void printBits(char *S, uint32 Slen, char *found, char *display, const char *label) { for (uint32 i=0; iexists(MS.theFMer()) || exist->exists(MS.theRMer())) found[MS.thePositionInSequence()] = true; } // Searched for isolated 'true' bits, and removes them. Isolated means fewer than minSize true // bits are adjacent. // void removeIsolatedMers(char *S, uint32 Slen, char *found, uint32 minSize) { uint32 bgn = 0; uint32 end = 0; bool inRun = false; for (uint32 ii=0; ii 0)) { for (uint32 jj=ii; (jj 0) { found[ii] = true; isMasking--; } } } // Assumes the found[] array represents base-based masking. // Returns the fraction of the sequence that is masked. double maskSequence(char *S, uint32 Slen, char *found, bool noMasking, char *display) { uint32 masked = 0; // Flip the sense of the found[ii] test to mask kmers found in the database (true) or missing (false). for (uint32 ii=0; iimaxLength); if (s->load(g->FASTQ1, g->FASTQ2) == false) { delete s; s = NULL; } return(s); } void maskWorker(void *G, void *T, void *S) { maskGlobal *g = (maskGlobal *)G; //maskThread *t = (maskThread *)T; fastqRecord *s = (fastqRecord *)S; buildMask(s->a2, s->aLength, s->af, g->exist, g->merSize); buildMask(s->b2, s->bLength, s->bf, g->exist, g->merSize); //printBits(S, found, display , "INITIAL"); removeIsolatedMers(s->a2, s->aLength, s->af, g->minSize); removeIsolatedMers(s->b2, s->bLength, s->bf, g->minSize); //printBits(S, found, display, "ISOLATED REMOVAL"); convertToBases(s->a2, s->aLength, s->af, g->merSize, g->extend); convertToBases(s->b2, s->bLength, s->bf, g->merSize, g->extend); //printBits(S, found, display, "BASE COVERAGE"); s->aMasked = maskSequence(s->a2, s->aLength, s->af, g->noMasking, s->am); s->bMasked = maskSequence(s->b2, s->bLength, s->bf, g->noMasking, s->bm); s->aLabel = (s->aMasked < g->cleanThreshold) ? 0 : ((s->aMasked < g->matchThreshold) ? 1 : 2); s->bLabel = (s->bMasked < g->cleanThreshold) ? 0 : ((s->bMasked < g->matchThreshold) ? 1 : 2); // If both reads exist, adjust labels. if ((s->aLength > 0) && (s->bLength > 0)) { if ((s->aLabel != s->bLabel) && (g->dirtier)) { s->aLabel = MIN(s->aLabel, s->bLabel); s->bLabel = MIN(s->aLabel, s->bLabel); } if ((s->aLabel != s->bLabel) && (g->cleaner)) { s->aLabel = MAX(s->aLabel, s->bLabel); s->bLabel = MAX(s->aLabel, s->bLabel); } if ((s->aLabel != s->bLabel) && (g->discard)) { s->aLabel = 3; s->bLabel = 3; } } } void fastqWriter(void *G, void *S) { maskGlobal *g = (maskGlobal *)G; fastqRecord *s = (fastqRecord *)S; s->write(g->OUTPUT1[s->aLabel], g->OUTPUT2[s->bLabel]); g->thresholdCounts[s->aLabel][s->bLabel]++; g->scoreHistogram[(uint32)(1000 * s->aMasked)]++; g->scoreHistogram[(uint32)(1000 * s->bMasked)]++; delete s; } FILE * openInput(char *filename, bool &P) { char C[2 * FILENAME_MAX]; FILE *F = NULL; int32 L = (filename == NULL) ? 0 : strlen(filename); if (filename == NULL) return(NULL); if (fileExists(filename) == false) fprintf(stderr, "Failed to open Input file '%s' for reading: File not found.\n", filename), exit(1); if ((L > 3) && (strcmp(filename + L - 3, ".gz") == 0)) { sprintf(C, "gzip -dc %s", filename); F = popen(C, "r"); P = true; } else if ((L > 4) && (strcmp(filename + L - 4, ".bz2") == 0)) { sprintf(C, "bzip2 -dc %s", filename); F = popen(C, "r"); P = true; } else if ((L > 3) && (strcmp(filename + L - 3, ".xz") == 0)) { sprintf(C, "xz -dc %s", filename); F = popen(C, "r"); P = true; } else { errno = 0; F = fopen(filename, "r"); if (errno) fprintf(stderr, "Failed to open input file '%s' for reading: %s\n", filename, strerror(errno)), exit(1); P = false; } return(F); } void closeInput(FILE *F, char *filename, bool P) { if (F == NULL) return; if (P) pclose(F); else fclose(F); } FILE * openOutput(char *prefix, const char *extension) { char N[FILENAME_MAX]; FILE *F = NULL; errno = 0; sprintf(N, "%s.%s.fastq", prefix, extension); F = fopen(N, "w"); if (errno) fprintf(stderr, "ERROR: failed to open '%s': %s\n", N, strerror(errno)), exit(1); return(F); } void closeOutput(FILE *F, char *prefix, const char *extension) { if (F) fclose(F); } int main(int argc, char **argv) { maskGlobal *g = new maskGlobal(); uint32 numWorkers = 1; bool beVerbose = false; int32 arg=1; int32 err=0; while (arg < argc) { if (strcmp(argv[arg], "-mdb") == 0) { g->merName = argv[++arg]; } else if (strcmp(argv[arg], "-ms") == 0) { g->merSize = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-l") == 0) { g->maxLength = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-edb") == 0) { g->existName = argv[++arg]; } else if (strcmp(argv[arg], "-1") == 0) { g->seq1Name = argv[++arg]; } else if (strcmp(argv[arg], "-2") == 0) { g->seq2Name = argv[++arg]; } else if (strcmp(argv[arg], "-o") == 0) { g->outPrefix = argv[++arg]; } else if (strcmp(argv[arg], "-m") == 0) { g->minSize = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-e") == 0) { g->extend = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-t") == 0) { numWorkers = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-v") == 0) { beVerbose = true; } else if (strcmp(argv[arg], "-cleaner") == 0) { g->dirtier = false; g->cleaner = true; g->discard = false; g->unlink = false; } else if (strcmp(argv[arg], "-dirtier") == 0) { g->dirtier = true; g->cleaner = false; g->discard = false; g->unlink = false; } else if (strcmp(argv[arg], "-discard") == 0) { g->dirtier = false; g->cleaner = false; g->discard = true; g->unlink = false; } else if (strcmp(argv[arg], "-unlink") == 0) { g->dirtier = false; g->cleaner = false; g->discard = false; g->unlink = true; } else if (strcmp(argv[arg], "-nomasking") == 0) { g->noMasking = true; } else if (strncmp(argv[arg], "-clean", 3) == 0) { g->cleanThreshold = atof(argv[++arg]); } else if (strncmp(argv[arg], "-match", 3) == 0) { g->matchThreshold = atof(argv[++arg]); } else if (strcmp(argv[arg], "-h") == 0) { g->outputHistogram = argv[++arg]; } else { err++; } arg++; } if ((g->outPrefix == NULL) && (g->seq1Name != NULL)) err++; if ((g->merName == NULL) && (g->existName == NULL)) err++; if (err) { fprintf(stderr, "usage: %s -mdb mer-database -ms mer-size ...\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "INPUTS:\n"); fprintf(stderr, "\n"); fprintf(stderr, " -mdb mer-database load masking kmers from meryl 'mer-database'\n"); fprintf(stderr, " -ms mer-size the mer size used to generate the meryl database\n"); fprintf(stderr, " -edb exist-database save masking kmers to 'exist-database', and reuse on\n"); fprintf(stderr, " future runs (optional)\n"); fprintf(stderr, "\n"); fprintf(stderr, " -1 in.1.fastq input reads - fastq, fastq.gz, fastq.bz2 or fastq.xz\n"); fprintf(stderr, " -2 in.2.fastq - optional, for mated reads\n"); fprintf(stderr, "\n"); fprintf(stderr, " -l length maximum length of input read (%u)\n", g->maxLength); fprintf(stderr, " If too small, program will fail.\n"); fprintf(stderr, " If too large, program will use excessive memory.\n"); fprintf(stderr, "\n"); fprintf(stderr, "THRESHOLDS and OPTIONS:\n"); fprintf(stderr, "\n"); fprintf(stderr, " A read with fewer than 'c' bases masked is 'clean', while a read with more than 'd' bases\n"); fprintf(stderr, " masked is 'match'ed. Reads in between are 'murky'. See OUTPUTS.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -clean c mark reads with less than this fraction masked as 'clean' (%.4f)\n", g->cleanThreshold); fprintf(stderr, " -match d mark reads with more than this fraction masked as 'match' (%.4f)\n", g->matchThreshold); fprintf(stderr, "\n"); fprintf(stderr, " -m min-size ignore database hits below this many consecutive kmers (%d)\n", g->minSize); fprintf(stderr, " -e extend-size extend database hits across this many missing kmers (%d)\n", g->extend); fprintf(stderr, "\n"); fprintf(stderr, " For mate pairs, how to handle reads with different classifications:\n"); fprintf(stderr, "\n"); fprintf(stderr, " -cleaner use the cleaner classification of the two reads\n"); fprintf(stderr, " -dirtier use the dirtier classification of the two reads\n"); fprintf(stderr, " -discard discard pairs with conflicting classifications (DEFAULT)\n"); fprintf(stderr, " -unlink leave conflicting status alone, output reads to different files\n"); fprintf(stderr, " NOTE: mate pairing will be broken\n"); fprintf(stderr, "\n"); fprintf(stderr, " -nomasking do not trim masked sequence; output the original read\n"); fprintf(stderr, "\n"); fprintf(stderr, "OUTPUTS:\n"); fprintf(stderr, "\n"); fprintf(stderr, " -o prefix output reads:\n"); fprintf(stderr, " prefix.clean.[12].fastq - clean (unmasked) reads\n"); fprintf(stderr, " prefix.murky.[12].fastq - reads in between\n"); fprintf(stderr, " prefix.match.[12].fastq - matching (masked) reads\n"); fprintf(stderr, " prefix.mixed.[12].fastq - reads with conflicting status (for mated reads)\n"); fprintf(stderr, " -h histogram write a histogram of the amount of sequence RETAINED\n"); fprintf(stderr, "\n"); fprintf(stderr, "COMPUTE CONFIGURATION and LOGGING\n"); fprintf(stderr, "\n"); fprintf(stderr, " -t t use 't' compute threads\n"); fprintf(stderr, " -v show progress\n"); if ((g->outPrefix == NULL) && (g->seq1Name != NULL)) fprintf(stderr, "ERROR: an output prefix (-o) must be supplied.\n"); if ((g->merName == NULL) && (g->existName == NULL)) fprintf(stderr, "ERROR: either a mer-database (-mdb) or an exist-database (-edb) must be supplied.\n"); exit(1); } // Open inputs and outputs g->FASTQ1 = openInput(g->seq1Name, g->FASTQ1pipe); g->FASTQ2 = openInput(g->seq2Name, g->FASTQ2pipe); if (g->FASTQ1) { g->OUTPUT1[0] = openOutput(g->outPrefix, "clean.1"); // Label == 0 g->OUTPUT1[1] = openOutput(g->outPrefix, "murky.1"); // Label == 1 g->OUTPUT1[2] = openOutput(g->outPrefix, "match.1"); // Label == 2 g->OUTPUT1[3] = openOutput(g->outPrefix, "mixed.1"); // Label == 3 } if (g->FASTQ2) { g->OUTPUT2[0] = openOutput(g->outPrefix, "clean.2"); g->OUTPUT2[1] = openOutput(g->outPrefix, "murky.2"); g->OUTPUT2[2] = openOutput(g->outPrefix, "match.2"); g->OUTPUT2[3] = openOutput(g->outPrefix, "mixed.2"); } // Load data if ((g->existName != NULL) && (fileExists(g->existName))) { if (beVerbose) fprintf(stderr, "Load existDB existName='%s'.\n", g->existName); g->exist = new existDB(g->existName); } else { if (beVerbose) fprintf(stderr, "Build existDB from merName='%s'.\n", g->merName); g->exist = new existDB(g->merName, g->merSize, existDBnoFlags, 0, ~uint32ZERO); if (g->existName != NULL) { if (beVerbose) fprintf(stderr, "Save existDB into existName='%s'.\n", g->existName); g->exist->saveState(g->existName); } } // Process! if (g->FASTQ1) { sweatShop *ss = new sweatShop(fastqLoader, maskWorker, fastqWriter); ss->setNumberOfWorkers(numWorkers); ss->setWorkerBatchSize(1024); ss->setLoaderQueueSize(numWorkers * 81920); ss->setWriterQueueSize(numWorkers * 81920); ss->run(g, beVerbose); closeInput(g->FASTQ1, g->seq1Name, g->FASTQ1pipe); closeInput(g->FASTQ2, g->seq1Name, g->FASTQ2pipe); closeOutput(g->OUTPUT1[0], g->outPrefix, "clean.1"); closeOutput(g->OUTPUT1[1], g->outPrefix, "murky.1"); closeOutput(g->OUTPUT1[2], g->outPrefix, "match.1"); closeOutput(g->OUTPUT1[3], g->outPrefix, "mixed.1"); closeOutput(g->OUTPUT2[0], g->outPrefix, "clean.2"); closeOutput(g->OUTPUT2[1], g->outPrefix, "murky.2"); closeOutput(g->OUTPUT2[2], g->outPrefix, "match.2"); closeOutput(g->OUTPUT2[3], g->outPrefix, "mixed.2"); #define FW uint64FMTW(8) if (g->FASTQ2 == NULL) { fprintf(stderr, "aClean "FW"\n", g->thresholdCounts[0][0]); fprintf(stderr, "aMurky "FW"\n", g->thresholdCounts[1][0]); fprintf(stderr, "aMatch "FW"\n", g->thresholdCounts[2][0]); fprintf(stderr, "aMixed "FW"\n", g->thresholdCounts[3][0]); } else { fprintf(stderr, " bClean bMurky bMatch bMixed\n"); fprintf(stderr, "aClean "FW" "FW" "FW" "FW"\n", g->thresholdCounts[0][0], g->thresholdCounts[0][1], g->thresholdCounts[0][2], g->thresholdCounts[0][3]); fprintf(stderr, "aMurky "FW" "FW" "FW" "FW"\n", g->thresholdCounts[1][0], g->thresholdCounts[1][1], g->thresholdCounts[1][2], g->thresholdCounts[1][3]); fprintf(stderr, "aMatch "FW" "FW" "FW" "FW"\n", g->thresholdCounts[2][0], g->thresholdCounts[2][1], g->thresholdCounts[2][2], g->thresholdCounts[2][3]); fprintf(stderr, "aMixed "FW" "FW" "FW" "FW"\n", g->thresholdCounts[3][0], g->thresholdCounts[3][1], g->thresholdCounts[3][2], g->thresholdCounts[3][3]); } #undef FW if (g->outputHistogram != NULL) { FILE *H = fopen(g->outputHistogram, "w"); fprintf(H, "# fraction-masked number-of-sequences\n"); for (uint32 i=0; i<1001; i++) if (g->scoreHistogram[i] > 0) fprintf(H, "%.4f\t%u\n", i / 1000.0, g->scoreHistogram[i]); fclose(H); } } delete g->exist; delete g; exit(0); } kmer-code-2013-trunk/meryl/meryl.C0000644000000000000000000000247211237172061015537 0ustar rootroot#include #include #include #include #include "bio++.H" #include "meryl.H" int main(int argc, char **argv) { merylArgs *args = new merylArgs(argc, argv); switch (args->personality) { case 'P': estimate(args); break; case 'B': build(args); break; case 'd': dumpDistanceBetweenMers(args); break; case 't': dumpThreshold(args); break; case 'p': dumpPositions(args); break; case 'c': countUnique(args); break; case 'h': plotHistogram(args); break; case PERSONALITY_MIN: case PERSONALITY_MINEXIST: case PERSONALITY_MAX: case PERSONALITY_MAXEXIST: case PERSONALITY_ADD: case PERSONALITY_AND: case PERSONALITY_NAND: case PERSONALITY_OR: case PERSONALITY_XOR: multipleOperations(args); break; case PERSONALITY_SUB: case PERSONALITY_ABS: case PERSONALITY_DIVIDE: binaryOperations(args); break; case PERSONALITY_LEQ: case PERSONALITY_GEQ: case PERSONALITY_EQ: unaryOperations(args); break; default: args->usage(); fprintf(stderr, "%s: unknown personality. Specify -P, -B, -S or -M!\n", args->execName); exit(1); break; } delete args; return(0); } kmer-code-2013-trunk/meryl/estimate.C0000644000000000000000000001247412532056456016235 0ustar rootroot#include #include #include "bio++.H" #include "seqStream.H" #include "merStream.H" #include "libmeryl.H" #include "meryl.H" // Takes a memory limit in MB, returns the number of mers that we can fit in that memory size, // assuming optimalNumberOfBuckets() below uses the same algorithm. // // For each possible number of buckets, try all poissible pointer widths. First we compute the // number of mers that fit in a bucket pointer table of size 2^t storing N bits in the mer data // table, then we check that the number of mers in the mer data table agrees with the width of the // pointer table. // uint64 estimateNumMersInMemorySize(uint32 merSize, uint64 mem, bool positionsEnabled, bool beVerbose) { uint64 maxN = 0; uint64 bestT = 0; uint64 memLimt = mem * 8; // Memory limit, in bits. uint64 posPerMer = (positionsEnabled == false) ? 0 : 32; // Positions consume space, if enabled. uint64 tMax = (merSize > 25) ? 50 : 2 * merSize - 2; // Max width of bucket pointer table. // t - prefix stored in the bucket pointer table; number of entries in the table // N - width of a bucket pointer for (uint64 t=2; t < tMax; t++) { for (uint64 N=1; N<40; N++) { uint64 Nmin = uint64ONE << (N - 1); // Minimum number of mers we want to fit in the table uint64 Nmax = uint64ONE << (N); // Maximum number of mers that can fit in the table uint64 bucketsize = (uint64ONE << t) * N; // Size, in bits, of the pointer table uint64 n = (memLimt - bucketsize) / (2*merSize - t + posPerMer); // Number of mers we can fit into mer data table. if ((memLimt > bucketsize) && // pointer table small enough to fit in memory (n > 0) && // at least some space to store mers (n <= Nmax) && // enough space for the mers in the data table (Nmin <= n) && // ...but not more than enough space (maxN < n)) { // this value of t fits more mers that any other seen so far maxN = n; bestT = t; } } } if (beVerbose) fprintf(stdout, "Can fit "uint64FMT" mers into table with prefix of "uint64FMT" bits, using %.3fMB (%.3fMB for positions)\n", maxN, bestT, (((uint64ONE << bestT) * logBaseTwo64(maxN) + maxN * (2*merSize - bestT + posPerMer)) >> 3) / 1048576.0, ((maxN * posPerMer) >> 3) / 1048576.0); return(maxN); } uint64 estimateMemory(uint32 merSize, uint64 numMers, bool positionsEnabled) { uint64 posPerMer = (positionsEnabled == false) ? 0 : 32; uint64 tMax = (merSize > 25) ? 50 : 2 * merSize - 2; uint64 tMin = tMax; uint64 memMin = UINT64_MAX; for (uint64 t=2; t < tMax; t++) { uint64 N = logBaseTwo64(numMers); // Width of the bucket pointer table uint64 memUsed = ((uint64ONE << t) * logBaseTwo64(numMers) + numMers * (2 * merSize - t + posPerMer)) >> 3; if (memUsed < memMin) { tMin = t; memMin = memUsed; } //fprintf(stderr, "t=%2lu N=%2lu memUsed=%16lu -- tMin=%2lu memMin=%16lu\n", // t, N, memUsed, tMin, memMin); } return(memMin >> 20); } uint32 optimalNumberOfBuckets(uint32 merSize, uint64 numMers, bool positionsEnabled) { uint64 opth = ~uint64ZERO; uint64 opts = ~uint64ZERO; uint64 h = 0; uint64 s = 0; uint64 hwidth = logBaseTwo64(numMers); // Positions consume space too, but only if enabled. Probably // doesn't matter here. // uint64 posPerMer = 0; if (positionsEnabled) posPerMer = 32; // Find the table size (in bits, h) that minimizes memory usage // for the given merSize and numMers // // We have two tables: // the bucket pointers num buckets * pointer width == 2 << h * hwidth // the mer data: num mers * (mersize - hwidth) // uint64 hmax = 64 - logBaseTwo64(hwidth + numMers * (2 * merSize - h)); for (h=2; h<=hmax && h<2*merSize; h++) { s = (uint64ONE << h) * hwidth + numMers * (2 * merSize - h + posPerMer); //fprintf(stderr, "optimalNumberOfBuckets()-- h="uint64FMT" s="uint64FMT"\n", h, s); if (s < opts) { opth = h; opts = s; } } return((uint32)opth); } void estimate(merylArgs *args) { if (args->inputFile) { merStream M(new kMerBuilder(args->merSize, args->merComp), new seqStream(args->inputFile), true, true); speedCounter C(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); if (args->beVerbose) fprintf(stderr, "Counting mers in '%s'\n", args->inputFile); args->numMersEstimated = 0; while (M.nextMer()) { C.tick(); args->numMersEstimated++; } C.finish(); } uint32 opth = optimalNumberOfBuckets(args->merSize, args->numMersEstimated, args->positionsEnabled); uint64 memu = ((uint64ONE << opth) * logBaseTwo64(args->numMersEstimated+1) + args->numMersEstimated * (2 * args->merSize - opth)); fprintf(stderr, uint64FMT" "uint32FMT"-mers can be computed using "uint64FMT"MB memory.\n", args->numMersEstimated, args->merSize, memu >> 23); } kmer-code-2013-trunk/meryl/mapMers.C0000644000000000000000000001351712547027354016026 0ustar rootroot#include #include #include #include "bio++.H" #include "seqCache.H" #include "merStream.H" #include "libmeryl.H" #include "existDB.H" #define OP_NONE 0 #define OP_STATS 1 #define OP_REGIONS 2 #define OP_DETAILS 3 int main(int argc, char **argv) { uint32 merSize = 16; char *merylFile = 0L; char *fastaFile = 0L; bool beVerbose = false; uint32 loCount = 0; uint32 hiCount = ~uint32ZERO; uint32 operation = OP_NONE; // For OP_STATS uint32 Clen = 0; uint32 Cmax = 4 * 1024 * 1024; uint32 *C = new uint32 [Cmax]; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { merSize = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-mers") == 0) { merylFile = argv[++arg]; } else if (strcmp(argv[arg], "-seq") == 0) { fastaFile = argv[++arg]; } else if (strcmp(argv[arg], "-v") == 0) { beVerbose = true; } else if (strcmp(argv[arg], "-lo") == 0) { loCount = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-hi") == 0) { hiCount = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-stats") == 0) { operation = OP_STATS; } else if (strcmp(argv[arg], "-regions") == 0) { operation = OP_REGIONS; } else if (strcmp(argv[arg], "-details") == 0) { operation = OP_DETAILS; } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); } arg++; } if ((operation == OP_NONE) || (merylFile == 0L) || (fastaFile == 0L)) { fprintf(stderr, "usage: %s [-stats | -regions | -details] -m mersize -mers mers -seq fasta > output\n", argv[0]); exit(1); } #if 0 existDB *E = NULL; if (fileExists("junk.existDB")) { fprintf(stderr, "loading from junk.existDB\n"); E = new existDB("junk.existDB"); fprintf(stderr, "loaded\n"); } else { exit(1); E = new existDB(merylFile, merSize, existDBcounts, loCount, hiCount); E->saveState("junk.existDB"); } #endif existDB *E = new existDB(merylFile, merSize, existDBcounts, loCount, hiCount); seqCache *F = new seqCache(fastaFile); fprintf(stderr, "Begin.\n"); for (uint32 Sid=0; Sid < F->getNumberOfSequences(); Sid++) { seqInCore *S = F->getSequenceInCore(Sid); merStream *MS = new merStream(new kMerBuilder(merSize), new seqStream(S->sequence(), S->sequenceLength()), true, true); // with counts, report mean, mode, median, min, max for each frag. if (operation == OP_STATS) { Clen = 0; while (MS->nextMer()) C[Clen++] = E->count(MS->theFMer()) + E->count(MS->theRMer()); uint64 mean = uint64ZERO; uint64 min = ~uint64ZERO; uint64 max = uint64ZERO; uint64 hist[16] = { 0 }; // Histogram values are powers of two, e.g., <=1, <=2, <=4, <=8, <=16, <=32, <=64, <=128, <=256, <=512, <=1024, <=4096, <=8192, <=328768 for (uint32 i=0; i C[i]) min = C[i]; if (max < C[i]) max = C[i]; hist[ logBaseTwo64(C[i]) ]++; } if (Clen > 0) { mean /= Clen; } else { mean = uint64ZERO; min = uint64ZERO; max = uint64ZERO; } fprintf(stdout, "%s\t" uint64FMT"\t"uint64FMT"\t"uint64FMT"\t" uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t" uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\n", S->header(), mean, min, max, hist[ 0], hist[ 1], hist[ 2], hist[ 3], hist[ 4], hist[ 5], hist[ 6], hist[ 7], hist[ 8], hist[ 9], hist[10], hist[11], hist[12], hist[13], hist[14], hist[15]); } // without counts, reports regions with mer coverage. // Orientation tells us nothing, since the mers are probably canonical if (operation == OP_REGIONS) { uint64 beg = ~uint64ZERO; uint64 end = ~uint64ZERO; uint64 pos = ~uint64ZERO; uint64 numCovReg = 0; uint64 lenCovReg = 0; while (MS->nextMer()) { if (E->exists(MS->theFMer()) || E->exists(MS->theRMer())) { pos = MS->thePositionInSequence(); if (beg == ~uint64ZERO) beg = end = pos; if (pos <= end + merSize) { end = pos; } else { fprintf(stdout, "%s\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\n", S->header(), beg, end+merSize, end+merSize - beg); numCovReg++; lenCovReg += end+merSize - beg; beg = end = pos; } } else { fprintf(stdout, "%s\t"uint64FMT"\tuncovered\n", S->header(), MS->thePositionInSequence()); } } if (beg != ~uint64ZERO) fprintf(stdout, "%s\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\n", S->header(), beg, end+merSize, end+merSize - beg); fprintf(stderr, "numCovReg: "uint64FMT"\n", numCovReg); fprintf(stderr, "lenCovReg: "uint64FMT"\n", lenCovReg); } if (operation == OP_DETAILS) { char merString[256]; while (MS->nextMer()) { uint64 beg = MS->thePositionInSequence(); uint64 end = beg + merSize; uint64 fnt = E->count(MS->theFMer()); uint64 rnt = E->count(MS->theRMer()); fprintf(stdout, "%s\t%s\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\t"uint64FMT"\n", S->header(), MS->theFMer().merToString(merString), beg, end, fnt, rnt, fnt + rnt); } } delete MS; delete S; } delete F; delete E; } kmer-code-2013-trunk/meryl/compare-counts.C0000644000000000000000000001176612322046702017352 0ustar rootroot#include #include #include #include #include "libmeryl.H" #if 0 void heatMap() { speedCounter *C = new speedCounter(" Examining: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1ffffff, false); #define MAXA 150 #define MAXB 150 double heatraw[MAXA][MAXB]; double heatsca[MAXA][MAXB]; for (uint32 i=0; inextMer(); B->nextMer(); while ((A->validMer()) || (B->validMer())) { kMer &a = A->theFMer(); kMer &b = B->theFMer(); uint32 ac = A->theCount(); uint32 bc = B->theCount(); if (ac >= MAXA) ac = MAXA-1; if (bc >= MAXB) bc = MAXB-1; if (A->validMer() == false) { ac = 0; heatraw[ac][bc]++; B->nextMer(); continue; } if (B->validMer() == false) { bc = 0; heatraw[ac][bc]++; A->nextMer(); continue; } if (a == b) { heatraw[ac][bc]++; A->nextMer(); B->nextMer(); } else if (a < b) { heatraw[ac][0]++; A->nextMer(); } else { heatraw[0][bc]++; B->nextMer(); } C->tick(); } delete C; delete A; delete B; // Scale each row to be between 0 and 1 #if 0 for (uint32 j=0; jmerSize(); #define HMAX 64 * 1024 uint32 *Htrue = new uint32 [HMAX]; uint32 *Hnoise = new uint32 [HMAX]; for (uint32 i=0; inextMer(); S->nextMer(); while ((T->validMer()) || (S->validMer())) { kMer &t = T->theFMer(); kMer &s = S->theFMer(); uint32 tc = T->theCount(); uint32 sc = S->theCount(); if (tc >= HMAX) tc = HMAX-1; if (sc >= HMAX) sc = HMAX-1; // If we're out of truth kmers, the sample is noise. if (T->validMer() == false) { Hnoise[sc]++; S->nextMer(); continue; } // If we're out of sample kmers, do nothing but go to the next truth kmer. if (S->validMer() == false) { T->nextMer(); continue; } // If the kmers are equal, this is a true kmer if (t == s) { Htrue[sc]++; T->nextMer(); S->nextMer(); } // If the truth kmer is the lesser, get the next truth. else if (t < s) { T->nextMer(); } // Else the sample kmer is smaller, add it to the noise pile, and get the next. else { Hnoise[sc]++; S->nextMer(); } } delete T; delete S; char outputName[FILENAME_MAX]; sprintf(outputName, "%s.gp", outputPrefix); FILE *outputGP = fopen(outputName, "w"); sprintf(outputName, "%s.dat", outputPrefix); FILE *outputDAT = fopen(outputName, "w"); fprintf(outputGP, "set terminal png\n"); fprintf(outputGP, "set output \"%s.png\"\n", outputPrefix); fprintf(outputGP, "set title \"%s true/false %d-mers\"\n", plotTitle, kmerSize); fprintf(outputGP, "set xlabel \"k-mer count\"\n"); fprintf(outputGP, "set ylabel \"number of kmers\"\n"); fprintf(outputGP, "plot [0:100] [0:1000000] \"%s.dat\" using 1:2 with lines title \"true\", \"%s.dat\" using 1:3 with lines title \"false\"\n", outputPrefix, outputPrefix); fclose(outputGP); for (uint32 i=0; i #include #include #include #include "meryl.H" #include "libmeryl.H" void binaryOperations(merylArgs *args) { if (args->mergeFilesLen != 2) { fprintf(stderr, "ERROR - must have exactly two files!\n"); exit(1); } if (args->outputFile == 0L) { fprintf(stderr, "ERROR - no output file specified.\n"); exit(1); } if ((args->personality != PERSONALITY_SUB) && (args->personality != PERSONALITY_ABS) && (args->personality != PERSONALITY_DIVIDE)) { fprintf(stderr, "ERROR - only personalities sub and abs\n"); fprintf(stderr, "ERROR - are supported in binaryOperations().\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); } // Open the input files, read in the first mer // merylStreamReader *A = new merylStreamReader(args->mergeFiles[0]); merylStreamReader *B = new merylStreamReader(args->mergeFiles[1]); A->nextMer(); B->nextMer(); // Make sure that the mersizes agree, and pick a prefix size for // the output // if (A->merSize() != B->merSize()) { fprintf(stderr, "ERROR - mersizes are different!\n"); fprintf(stderr, "ERROR - mersize of '%s' is "uint32FMT"\n", args->mergeFiles[0], A->merSize()); fprintf(stderr, "ERROR - mersize of '%s' is "uint32FMT"\n", args->mergeFiles[1], B->merSize()); exit(1); } // Open the output file, using the larger of the two prefix sizes // merylStreamWriter *W = new merylStreamWriter(args->outputFile, A->merSize(), A->merCompression(), (A->prefixSize() > B->prefixSize()) ? A->prefixSize() : B->prefixSize(), A->hasPositions()); // SUB - report A - B // ABS - report the absolute difference between the two files // // These two operations are very similar (SUB was derived from ABS), so // any bug found in one is probably in the other. // kMer Amer; uint32 Acnt = uint32ZERO; kMer Bmer; uint32 Bcnt = uint32ZERO; switch (args->personality) { case PERSONALITY_SUB: while (A->validMer() || B->validMer()) { Amer = A->theFMer(); Acnt = A->theCount(); Bmer = B->theFMer(); Bcnt = B->theCount(); // If the A stream is all out of mers, set Amer to be the // same as Bmer, and set Acnt to zero. Similar for B. // if (!A->validMer()) { Amer = Bmer; Acnt = uint32ZERO; } if (!B->validMer()) { Bmer = Amer; Bcnt = uint32ZERO; } //fprintf(stderr, "sub A="uint64HEX" B="uint64HEX"\n", Amer, Bmer); if (Amer == Bmer) { W->addMer(Amer, (Acnt > Bcnt) ? Acnt - Bcnt : 0); A->nextMer(); B->nextMer(); } else if (Amer < Bmer) { W->addMer(Amer, Acnt); A->nextMer(); } else { B->nextMer(); } } break; case PERSONALITY_ABS: while (A->validMer() || B->validMer()) { Amer = A->theFMer(); Acnt = A->theCount(); Bmer = B->theFMer(); Bcnt = B->theCount(); // If the A stream is all out of mers, set Amer to be the // same as Bmer, and set Acnt to zero. Similar for B. // if (!A->validMer()) { Amer = Bmer; Acnt = uint32ZERO; } if (!B->validMer()) { Bmer = Amer; Bcnt = uint32ZERO; } if (Amer == Bmer) { W->addMer(Amer, (Acnt > Bcnt) ? Acnt - Bcnt : Bcnt - Acnt); A->nextMer(); B->nextMer(); } else if (Amer < Bmer) { W->addMer(Amer, Acnt); A->nextMer(); } else { W->addMer(Bmer, Bcnt); B->nextMer(); } } break; case PERSONALITY_DIVIDE: while (A->validMer() || B->validMer()) { Amer = A->theFMer(); Acnt = A->theCount(); Bmer = B->theFMer(); Bcnt = B->theCount(); // If the A stream is all out of mers, set Amer to be the // same as Bmer, and set Acnt to zero. Similar for B. // if (!A->validMer()) { Amer = Bmer; Acnt = uint32ZERO; } if (!B->validMer()) { Bmer = Amer; Bcnt = uint32ZERO; } if (Amer == Bmer) { if ((Acnt > 0) && (Bcnt > 0)) { double d = 1000.0 * (double)Acnt / (double)Bcnt; if (d > 4096.0 * 1024.0 * 1024.0) d = 4096.0 * 1024.0 * 1024.0; W->addMer(Amer, (uint32)floor(d)); } A->nextMer(); B->nextMer(); } else if (Amer < Bmer) { A->nextMer(); } else { B->nextMer(); } } break; } delete A; delete B; delete W; } kmer-code-2013-trunk/meryl/Make.include0000644000000000000000000000320412527037073016525 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBMERYL/ :=$(realpath $/../libmeryl/)/ LIBKMER/ :=$(realpath $/../libkmer/)/ merylsrc := $/args.C \ $/binaryOp.C \ $/build.C \ $/build-threads.C \ $/dump.C \ $/estimate.C \ $/merge.C \ $/unaryOp.C # Removed m.C from SRCS. and m from EXES -- what's it do?? # meryl.H is exported only for celera-assembler. $/.CXX_SRCS := ${merylsrc} $/meryl.C $/simple.C $/mapMers.C $/mapMers-depth.C $/kmer-mask.C $/.CXX_INCS := $/meryl.H $/.CXX_LIBS := $/libmerylguts.a $/.CXX_EXES := $/meryl $/simple $/mapMers $/mapMers-depth $/kmer-mask $/.CLEAN := $/*.o $/libmerylguts.a : ${merylsrc:.C=.o} $/meryl: $/meryl.o $/libmerylguts.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/simple: $/simple.o ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/mapMers: $/mapMers.o ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/mapMers-depth: $/mapMers-depth.o ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/kmer-mask: $/kmer-mask.o ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBMERYL/} -I${LIBKMER/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) $/%.d: ${LIBBIO/}alphabet.h kmer-code-2013-trunk/meryl/dump.C0000644000000000000000000000755112322046702015355 0ustar rootroot#include #include #include #include "meryl.H" #include "libmeryl.H" #include void dumpThreshold(merylArgs *args) { merylStreamReader *M = new merylStreamReader(args->inputFile); char str[1025]; while (M->nextMer()) { if (M->theCount() >= args->numMersEstimated) fprintf(stdout, ">"uint64FMT"\n%s\n", M->theCount(), M->theFMer().merToString(str)); } delete M; } void dumpPositions(merylArgs *args) { merylStreamReader *M = new merylStreamReader(args->inputFile); char str[1025]; if (M->hasPositions() == false) { fprintf(stderr, "File '%s' contains no position information.\n", args->inputFile); } else { while (M->nextMer()) { fprintf(stdout, ">"uint64FMT, M->theCount()); for (uint32 i=0; itheCount(); i++) fprintf(stdout, " "uint32FMT, M->getPosition(i)); fprintf(stdout, "\n%s\n", M->theFMer().merToString(str)); } } delete M; } void countUnique(merylArgs *args) { merylStreamReader *M = new merylStreamReader(args->inputFile); #warning make this a test #if 0 uint64 numDistinct = 0; uint64 numUnique = 0; uint64 numMers = 0; uint64 c = 0; while (M->nextMer()) { c = M->theCount(); numDistinct++; if (c == 1) numUnique++; numMers += c; } assert(numMers == M->numberOfTotalMers()); assert(numDistinct == M->numberOfDistinctMers()); assert(numUnique == M->numberOfUniqueMers()); fprintf(stderr, "OK\n"); #endif fprintf(stdout, "Found "uint64FMT" mers.\n", M->numberOfTotalMers()); fprintf(stdout, "Found "uint64FMT" distinct mers.\n", M->numberOfDistinctMers()); fprintf(stdout, "Found "uint64FMT" unique mers.\n", M->numberOfUniqueMers()); delete M; } void plotHistogram(merylArgs *args) { uint64 distinct = 0; uint64 total = 0; merylStreamReader *M = new merylStreamReader(args->inputFile); fprintf(stderr, "Found "uint64FMT" mers.\n", M->numberOfTotalMers()); fprintf(stderr, "Found "uint64FMT" distinct mers.\n", M->numberOfDistinctMers()); fprintf(stderr, "Found "uint64FMT" unique mers.\n", M->numberOfUniqueMers()); fprintf(stderr, "Largest mercount is "uint64FMT"; "uint64FMT" mers are too big for histogram.\n", M->histogramMaximumCount(), M->histogramHuge()); for (uint32 i=1; ihistogramLength(); i++) { uint64 hist = M->histogram(i); if (hist > 0) { distinct += hist; total += hist * i; fprintf(stdout, uint32FMT"\t"uint64FMT"\t%.4f\t%.4f\n", i, hist, distinct / (double)M->numberOfDistinctMers(), total / (double)M->numberOfTotalMers()); } } delete M; } void dumpDistanceBetweenMers(merylArgs *args) { merylStreamReader *M = new merylStreamReader(args->inputFile); // This is now tough because we don't know where the sequences end, // and our positions encode position in the chain. uint32 histMax = 64 * 1024 * 1024; uint64 *hist = new uint64 [histMax]; uint64 histHuge = 0; if (M->hasPositions() == false) { fprintf(stderr, "File '%s' contains no position information.\n", args->inputFile); } else { while (M->nextMer()) { std::sort(M->thePositions(), M->thePositions() + M->theCount()); for (uint32 i=1; itheCount(); i++) { uint32 d = M->getPosition(i) - M->getPosition(i-1); if (d < histMax) hist[d]++; else histHuge++; } } uint32 maxd = 0; for (uint32 d=0; d $asm.normalcontigs.fasta & fi if [ ! -e $asm.degeneratecontigs.fasta ] ; then bin/asmOutputContigsFasta -D < $dir/9-terminator/$asm.asm > $asm.degeneratecontigs.fasta & fi if [ ! -e $asm.allcontigs.fasta ] ; then bin/asmOutputContigsFasta -d < $dir/9-terminator/$asm.asm > $asm.allcontigs.fasta & fi # Count mers in contigs # if [ ! -e $asm-ms$ms-normal-contigs.mcidx ] ; then meryl -B -C -m $ms -s $asm.normalcontigs.fasta -o $asm-ms$ms-normal-contigs -threads 4 -segments 4 -v & fi if [ ! -e $asm-ms$ms-degenerate-contigs.mcidx ] ; then meryl -B -C -m $ms -s $asm.degeneratecontigs.fasta -o $asm-ms$ms-degenerate-contigs -threads 4 -segments 4 -v & fi if [ ! -e $asm-ms$ms-all-contigs.mcidx ] ; then meryl -B -C -m $ms -s $asm.allcontigs.fasta -o $asm-ms$ms-all-contigs -threads 4 -segments 4 -v & fi if [ ! -e $asm-ms$ms.asmMerQC ] ; then $asmMerQC -af $asm-ms$ms-all-frags \ -tf $asm-ms$ms-clr-frags \ -co $asm-ms$ms-normal-contigs \ -ac $asm-ms$ms-all-contigs \ -dc $asm-ms$ms-degenerate-contigs \ > $asm-ms$ms.asmMerQC & fi echo Finding badmers. if [ ! -e $asm-ms$ms-allfrags-normalcontigs.badmers.asmMerQC ] ; then $asmMerQC -af $asm-ms$ms-all-frags \ -co $asm-ms$ms-normal-contigs \ -dump $asm-ms$ms-allfrags-normalcontigs.badmers \ > $asm-ms$ms-allfrags-normalcontigs.badmers.asmMerQC & fi if [ ! -e $asm-ms$ms-allfrags-allcontigs.badmers.asmMerQC ] ; then $asmMerQC -af $asm-ms$ms-all-frags \ -ac $asm-ms$ms-all-contigs \ -dump $asm-ms$ms-allfrags-allcontigs.badmers \ > $asm-ms$ms-allfrags-allcontigs.badmers.asmMerQC & fi if [ ! -e $asm-ms$ms-allfrags-degeneratecontigs.badmers.asmMerQC ] ; then $asmMerQC -af $asm-ms$ms-all-frags \ -dc $asm-ms$ms-degenerate-contigs \ -dump $asm-ms$ms-allfrags-degeneratecontigs.badmers \ > $asm-ms$ms-allfrags-degeneratecontigs.badmers.asmMerQC & fi if [ ! -e $asm-ms$ms-clrfrags-normalcontigs.badmers.asmMerQC ] ; then $asmMerQC -tf $asm-ms$ms-clr-frags \ -co $asm-ms$ms-normal-contigs \ -dump $asm-ms$ms-clrfrags-normalcontigs.badmers \ > $asm-ms$ms-clrfrags-normalcontigs.badmers.asmMerQC & fi if [ ! -e $asm-ms$ms-clrfrags-allcontigs.badmers.asmMerQC ] ; then $asmMerQC -tf $asm-ms$ms-clr-frags \ -ac $asm-ms$ms-all-contigs \ -dump $asm-ms$ms-clrfrags-allcontigs.badmers \ > $asm-ms$ms-clrfrags-allcontigs.badmers.asmMerQC & fi if [ ! -e $asm-ms$ms-clrfrags-degeneratecontigs.badmers.asmMerQC ] ; then $asmMerQC -tf $asm-ms$ms-clr-frags \ -dc $asm-ms$ms-degenerate-contigs \ -dump $asm-ms$ms-clrfrags-degeneratecontigs.badmers \ > $asm-ms$ms-clrfrags-degeneratecontigs.badmers.asmMerQC & fi echo Mapping. if [ ! -e $asm-ms$ms-allfrags-normalcontigs.badmers.0.singlecontig.zerofrag.badmers ] ; then $mapMers -m 22 \ -mers $asm-ms$ms-allfrags-normalcontigs.badmers.0.singlecontig.zerofrag.fasta \ -seq $asm.normalcontigs.fasta \ > $asm-ms$ms-allfrags-normalcontigs.badmers.0.singlecontig.zerofrag.badmers & fi if [ ! -e $asm-ms$ms-allfrags-allcontigs.badmers.0.singlecontig.zerofrag.badmers ] ; then $mapMers -m 22 \ -mers $asm-ms$ms-allfrags-allcontigs.badmers.0.singlecontig.zerofrag.fasta \ -seq $asm.allcontigs.fasta \ > $asm-ms$ms-allfrags-allcontigs.badmers.0.singlecontig.zerofrag.badmers & fi if [ ! -e $asm-ms$ms-allfrags-degeneratecontigs.badmers.0.singlecontig.zerofrag.badmers ] ; then $mapMers -m 22 \ -mers $asm-ms$ms-allfrags-degeneratecontigs.badmers.0.singlecontig.zerofrag.fasta \ -seq $asm.degeneratecontigs.fasta \ > $asm-ms$ms-allfrags-degeneratecontigs.badmers.0.singlecontig.zerofrag.badmers & fi if [ ! -e $asm-ms$ms-clrfrags-normalcontigs.badmers.0.singlecontig.zerofrag.badmers ] ; then $mapMers -m 22 \ -mers $asm-ms$ms-clrfrags-normalcontigs.badmers.0.singlecontig.zerofrag.fasta \ -seq $asm.normalcontigs.fasta \ > $asm-ms$ms-clrfrags-normalcontigs.badmers.0.singlecontig.zerofrag.badmers & fi if [ ! -e $asm-ms$ms-clrfrags-allcontigs.badmers.0.singlecontig.zerofrag.badmers ] ; then $mapMers -m 22 \ -mers $asm-ms$ms-clrfrags-allcontigs.badmers.0.singlecontig.zerofrag.fasta \ -seq $asm.allcontigs.fasta \ > $asm-ms$ms-clrfrags-allcontigs.badmers.0.singlecontig.zerofrag.badmers & fi if [ ! -e $asm-ms$ms-clrfrags-degeneratecontigs.badmers.0.singlecontig.zerofrag.badmers ] ; then $mapMers -m 22 \ -mers $asm-ms$ms-clrfrags-degeneratecontigs.badmers.0.singlecontig.zerofrag.fasta \ -seq $asm.degeneratecontigs.fasta \ > $asm-ms$ms-clrfrags-degeneratecontigs.badmers.0.singlecontig.zerofrag.badmers & fi if [ ! -e $asm-ms$ms-allfrags-normalcontigs.badmers.5.all.badmers ] ; then cat $asm-ms$ms-allfrags-normalcontigs.badmers.[01].*.fasta > $asm-ms$ms-allfrags-normalcontigs.badmers.5.allzero.fasta $mapMers -m 22 \ -mers $asm-ms$ms-allfrags-normalcontigs.badmers.5.allzero.fasta \ -seq $asm.normalcontigs.fasta \ > $asm-ms$ms-allfrags-normalcontigs.badmers.5.allzero.badmers & fi date kmer-code-2013-trunk/meryl/args.C0000644000000000000000000005144512532056456015357 0ustar rootroot#include #include #include #include #include "bio++.H" #include "meryl.H" // Some string handling utilities. // bool writeString(const char *str, FILE *F) { errno = 0; uint32 len = 0; if (str) { len = (uint32)strlen(str) + 1; fwrite(&len, sizeof(uint32), 1, F); fwrite( str, sizeof(char), len, F); } else { fwrite(&len, sizeof(uint32), 1, F); } if (errno) { fprintf(stderr, "writeString()-- Failed to write string of length "uint32FMT": %s\n", len, strerror(errno)); fprintf(stderr, "writeString()-- First 80 bytes of string is:\n"); fprintf(stderr, "%80.80s\n", str); return(false); } return(true); } char* readString(FILE *F) { errno = 0; uint32 len = 0; fread(&len, sizeof(uint32), 1, F); if (errno) { fprintf(stderr, "readString()-- Failed to read string: %s\n", strerror(errno)); exit(1); } char *str = 0L; if (len > 0) { str = new char [len]; fread(str, sizeof(char), len, F); if (errno) { fprintf(stderr, "readString()-- Failed to read string: %s\n", strerror(errno)); exit(1); } } return(str); } char* duplString(char *str) { char *dupstr = 0L; if (str) { uint32 len = (uint32)strlen(str); dupstr = new char [len+1]; strcpy(dupstr, str); } return(dupstr); } void merylArgs::usage(void) { fprintf(stderr, "usage: %s [personality] [global options] [options]\n", execName); fprintf(stderr, "\n"); fprintf(stderr, "where personality is:\n"); fprintf(stderr, " -P -- compute parameters\n"); fprintf(stderr, " -B -- build table\n"); fprintf(stderr, " -S -- scan table\n"); fprintf(stderr, " -M -- \"math\" operations\n"); fprintf(stderr, " -D -- dump table\n"); fprintf(stderr, "\n"); fprintf(stderr, "-P: Given a sequence file (-s) or an upper limit on the\n"); fprintf(stderr, " number of mers in the file (-n), compute the table size\n"); fprintf(stderr, " (-t in build) to minimize the memory usage.\n"); fprintf(stderr, " -m # (size of a mer; required)\n"); fprintf(stderr, " -c # (homopolymer compression; optional)\n"); fprintf(stderr, " -p (enable positions)\n"); fprintf(stderr, " -s seq.fasta (seq.fasta is scanned to determine the number of mers)\n"); fprintf(stderr, " -n # (compute params assuming file with this many mers in it)\n"); fprintf(stderr, "\n"); fprintf(stderr, " Only one of -s, -n need to be specified. If both are given\n"); fprintf(stderr, " -s takes priority.\n"); fprintf(stderr, "\n"); fprintf(stderr, "\n"); fprintf(stderr, "-B: Given a sequence file (-s) and lots of parameters, compute\n"); fprintf(stderr, " the mer-count tables. By default, both strands are processed.\n"); fprintf(stderr, " -f (only build for the forward strand)\n"); fprintf(stderr, " -r (only build for the reverse strand)\n"); fprintf(stderr, " -C (use canonical mers, assumes both strands)\n"); fprintf(stderr, " -L # (DON'T save mers that occur less than # times)\n"); fprintf(stderr, " -U # (DON'T save mers that occur more than # times)\n"); fprintf(stderr, " -m # (size of a mer; required)\n"); fprintf(stderr, " -c # (homopolymer compression; optional)\n"); fprintf(stderr, " -p (enable positions)\n"); fprintf(stderr, " -s seq.fasta (sequence to build the table for)\n"); fprintf(stderr, " -o tblprefix (output table prefix)\n"); fprintf(stderr, " -v (entertain the user)\n"); fprintf(stderr, "\n"); fprintf(stderr, " By default, the computation is done as one large sequential process.\n"); fprintf(stderr, " Multi-threaded operation is possible, at additional memory expense, as\n"); fprintf(stderr, " is segmented operation, at additional I/O expense.\n"); fprintf(stderr, "\n"); fprintf(stderr, " Threaded operation: Split the counting in to n almost-equally sized\n"); fprintf(stderr, " pieces. This uses an extra h MB (from -P) per thread.\n"); fprintf(stderr, " -threads n (use n threads to build)\n"); fprintf(stderr, "\n"); fprintf(stderr, " Segmented, sequential operation: Split the counting into pieces that\n"); fprintf(stderr, " will fit into no more than m MB of memory, or into n equal sized pieces.\n"); fprintf(stderr, " Each piece is computed sequentially, and the results are merged at the end.\n"); fprintf(stderr, " Only one of -memory and -segments is needed.\n"); fprintf(stderr, " -memory mMB (use at most m MB of memory per segment)\n"); fprintf(stderr, " -segments n (use n segments)\n"); fprintf(stderr, "\n"); fprintf(stderr, " Segmented, batched operation: Same as sequential, except this allows\n"); fprintf(stderr, " each segment to be manually executed in parallel.\n"); fprintf(stderr, " Only one of -memory and -segments is needed.\n"); fprintf(stderr, " -memory mMB (use at most m MB of memory per segment)\n"); fprintf(stderr, " -segments n (use n segments)\n"); fprintf(stderr, " -configbatch (create the batches)\n"); fprintf(stderr, " -countbatch n (run batch number n)\n"); fprintf(stderr, " -mergebatch (merge the batches)\n"); fprintf(stderr, " Initialize the compute with -configbatch, which needs all the build options.\n"); fprintf(stderr, " Execute all -countbatch jobs, then -mergebatch to complete.\n"); fprintf(stderr, " meryl -configbatch -B [options] -o file\n"); fprintf(stderr, " meryl -countbatch 0 -o file\n"); fprintf(stderr, " meryl -countbatch 1 -o file\n"); fprintf(stderr, " ...\n"); fprintf(stderr, " meryl -countbatch N -o file\n"); fprintf(stderr, " meryl -mergebatch N -o file\n"); fprintf(stderr, " Batched mode can run on the grid.\n"); fprintf(stderr, " -sge jobname unique job name for this execution. Meryl will submit\n"); fprintf(stderr, " jobs with name mpjobname, ncjobname, nmjobname, for\n"); fprintf(stderr, " phases prepare, count and merge.\n"); fprintf(stderr, " -sgebuild \"options\" any additional options to sge, e.g.,\n"); fprintf(stderr, " -sgemerge \"options\" \"-p -153 -pe thread 2 -A merylaccount\"\n"); fprintf(stderr, " N.B. - -N will be ignored\n"); fprintf(stderr, " N.B. - be sure to quote the options\n"); fprintf(stderr, "\n"); fprintf(stderr, "-M: Given a list of tables, perform a math, logical or threshold operation.\n"); fprintf(stderr, " Unless specified, all operations take any number of databases.\n"); fprintf(stderr, "\n"); fprintf(stderr, " Math operations are:\n"); fprintf(stderr, " min count is the minimum count for all databases. If the mer\n"); fprintf(stderr, " does NOT exist in all databases, the mer has a zero count, and\n"); fprintf(stderr, " is NOT in the output.\n"); fprintf(stderr, " minexist count is the minimum count for all databases that contain the mer\n"); fprintf(stderr, " max count is the maximum count for all databases\n"); fprintf(stderr, " add count is sum of the counts for all databases\n"); fprintf(stderr, " sub count is the first minus the second (binary only)\n"); fprintf(stderr, " abs count is the absolute value of the first minus the second (binary only)\n"); fprintf(stderr, "\n"); fprintf(stderr, " Logical operations are:\n"); fprintf(stderr, " and outputs mer iff it exists in all databases\n"); fprintf(stderr, " nand outputs mer iff it exists in at least one, but not all, databases\n"); fprintf(stderr, " or outputs mer iff it exists in at least one database\n"); fprintf(stderr, " xor outputs mer iff it exists in an odd number of databases\n"); fprintf(stderr, "\n"); fprintf(stderr, " Threshold operations are:\n"); fprintf(stderr, " lessthan x outputs mer iff it has count < x\n"); fprintf(stderr, " lessthanorequal x outputs mer iff it has count <= x\n"); fprintf(stderr, " greaterthan x outputs mer iff it has count > x\n"); fprintf(stderr, " greaterthanorequal x outputs mer iff it has count >= x\n"); fprintf(stderr, " equal x outputs mer iff it has count == x\n"); fprintf(stderr, " Threshold operations work on exactly one database.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -s tblprefix (use tblprefix as a database)\n"); fprintf(stderr, " -o tblprefix (create this output)\n"); fprintf(stderr, " -v (entertain the user)\n"); fprintf(stderr, "\n"); fprintf(stderr, " NOTE: Multiple tables are specified with multiple -s switches; e.g.:\n"); fprintf(stderr, " %s -M add -s 1 -s 2 -s 3 -s 4 -o all\n", execName); fprintf(stderr, " NOTE: It is NOT possible to specify more than one operation:\n"); fprintf(stderr, " %s -M add -s 1 -s 2 -sub -s 3\n", execName); fprintf(stderr, " will NOT work.\n"); fprintf(stderr, "\n"); fprintf(stderr, "\n"); fprintf(stderr, "-D: Dump the table (not all of these work).\n"); fprintf(stderr, "\n"); fprintf(stderr, " -Dd Dump a histogram of the distance between the same mers.\n"); fprintf(stderr, " -Dt Dump mers >= a threshold. Use -n to specify the threshold.\n"); fprintf(stderr, " -Dc Count the number of mers, distinct mers and unique mers.\n"); fprintf(stderr, " -Dh Dump (to stdout) a histogram of mer counts.\n"); fprintf(stderr, " -s Read the count table from here (leave off the .mcdat or .mcidx).\n"); fprintf(stderr, "\n"); fprintf(stderr, "\n"); } void merylArgs::clear(void) { execName = 0L; options = 0L; beVerbose = false; doForward = true; doReverse = false; doCanonical = false; inputFile = 0L; outputFile = 0L; queryFile = 0L; merSize = 20; merComp = 0; positionsEnabled = false; numMersEstimated = 0; numMersActual = 0; numBasesActual = 0; mersPerBatch = 0; basesPerBatch = 0; numBuckets = 0; numBuckets_log2 = 0; merDataWidth = 0; merDataMask = uint64ZERO; bucketPointerWidth = 0; numThreads = 0; memoryLimit = 0; segmentLimit = 0; configBatch = false; countBatch = false; mergeBatch = false; batchNumber = 0; sgeJobName = 0L; sgeBuildOpt = 0L; sgeMergeOpt = 0L; isOnGrid = false; lowCount = 0; highCount = ~lowCount; desiredCount = 0; outputCount = 0; outputAll = 0; outputPosition = 0; mergeFilesMax = 0; mergeFilesLen = 0; mergeFiles = 0L; personality = 0; } merylArgs::merylArgs(int argc, char **argv) { clear(); execName = duplString(argv[0]); if (argc == 1) { usage(); exit(1); } // Count how many '-s' switches there are, then allocate space // for them in mergeFiles. We also sum the length of all options, // so we can copy them into an 'options' string used when we // resubmit to the grid. // uint32 optionsLen = 0; for (int arg=1; arg < argc; arg++) { optionsLen += strlen(argv[arg]) + 1; if (strcmp(argv[arg], "-s") == 0) mergeFilesMax++; } mergeFiles = new char * [mergeFilesMax]; options = new char [2 * optionsLen + 1]; options[0] = 0; bool fail = false; char *optptr = options; for (int arg=1; arg < argc; arg++) { if (arg > 1) *optptr++ = ' '; // Arg! If the arg has spaces or other stuff that the shell // needs escaped we need to escape them again. So, we copy byte // by byte and insert escapes at the right points. for (char *op=argv[arg]; *op; op++, optptr++) { if (isspace(*op) || !isalnum(*op)) if ((*op != '-') && (*op != '_') && (*op != '.') && (*op != '/')) *optptr++ = '\\'; *optptr = *op; } //strcat(options, argv[arg]); } // Parse the options // for (int arg=1; arg < argc; arg++) { if (strncmp(argv[arg], "-V", 2) == 0) { fprintf(stdout, "meryl the Mighty Mer Counter version (no version)\n"); exit(0); } else if (strcmp(argv[arg], "-m") == 0) { arg++; merSize = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "-c") == 0) { arg++; merComp = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "-p") == 0) { positionsEnabled = true; } else if (strcmp(argv[arg], "-s") == 0) { arg++; delete [] inputFile; inputFile = duplString(argv[arg]); mergeFiles[mergeFilesLen++] = duplString(argv[arg]); } else if (strcmp(argv[arg], "-n") == 0) { arg++; numMersEstimated = strtouint64(argv[arg], 0L); } else if (strcmp(argv[arg], "-f") == 0) { doForward = true; doReverse = false; doCanonical = false; } else if (strcmp(argv[arg], "-r") == 0) { doForward = false; doReverse = true; doCanonical = false; } else if (strcmp(argv[arg], "-C") == 0) { doForward = false; doReverse = false; doCanonical = true; } else if (strcmp(argv[arg], "-L") == 0) { arg++; lowCount = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "-U") == 0) { arg++; highCount = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "-o") == 0) { arg++; delete [] outputFile; outputFile = duplString(argv[arg]); } else if (strcmp(argv[arg], "-v") == 0) { beVerbose = true; } else if (strcmp(argv[arg], "-P") == 0) { personality = 'P'; } else if (strcmp(argv[arg], "-B") == 0) { personality = 'B'; } else if (strcmp(argv[arg], "-S") == 0) { personality = 'S'; } else if (strcmp(argv[arg], "-M") == 0) { arg++; if (strcmp(argv[arg], "merge") == 0) { personality = PERSONALITY_MERGE; } else if (strcmp(argv[arg], "min") == 0) { personality = PERSONALITY_MIN; } else if (strcmp(argv[arg], "minexist") == 0) { personality = PERSONALITY_MINEXIST; } else if (strcmp(argv[arg], "max") == 0) { personality = PERSONALITY_MAX; } else if (strcmp(argv[arg], "maxexist") == 0) { personality = PERSONALITY_MAXEXIST; } else if (strcmp(argv[arg], "add") == 0) { personality = PERSONALITY_ADD; } else if (strcmp(argv[arg], "sub") == 0) { personality = PERSONALITY_SUB; } else if (strcmp(argv[arg], "abs") == 0) { personality = PERSONALITY_ABS; } else if (strcmp(argv[arg], "divide") == 0) { personality = PERSONALITY_DIVIDE; } else if (strcmp(argv[arg], "and") == 0) { personality = PERSONALITY_AND; } else if (strcmp(argv[arg], "nand") == 0) { personality = PERSONALITY_NAND; } else if (strcmp(argv[arg], "or") == 0) { personality = PERSONALITY_OR; } else if (strcmp(argv[arg], "xor") == 0) { personality = PERSONALITY_XOR; } else if (strcmp(argv[arg], "lessthan") == 0) { personality = PERSONALITY_LEQ; arg++; desiredCount = strtouint32(argv[arg], 0L) - 1; } else if (strcmp(argv[arg], "lessthanorequal") == 0) { personality = PERSONALITY_LEQ; arg++; desiredCount = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "greaterthan") == 0) { personality = PERSONALITY_GEQ; arg++; desiredCount = strtouint32(argv[arg], 0L) + 1; } else if (strcmp(argv[arg], "greaterthanorequal") == 0) { personality = PERSONALITY_GEQ; arg++; desiredCount = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "equal") == 0) { personality = PERSONALITY_EQ; arg++; desiredCount = strtouint32(argv[arg], 0L); } else { fprintf(stderr, "ERROR: unknown math personality %s\n", argv[arg]); exit(1); } } else if (strcmp(argv[arg], "-Dd") == 0) { personality = 'd'; } else if (strcmp(argv[arg], "-Dt") == 0) { personality = 't'; } else if (strcmp(argv[arg], "-Dp") == 0) { personality = 'p'; } else if (strcmp(argv[arg], "-Dc") == 0) { personality = 'c'; } else if (strcmp(argv[arg], "-Dh") == 0) { personality = 'h'; } else if (strcmp(argv[arg], "-memory") == 0) { arg++; memoryLimit = strtouint64(argv[arg], 0L) * 1024 * 1024; } else if (strcmp(argv[arg], "-segments") == 0) { arg++; segmentLimit = strtouint64(argv[arg], 0L); } else if (strcmp(argv[arg], "-threads") == 0) { arg++; numThreads = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "-configbatch") == 0) { personality = 'B'; configBatch = true; countBatch = false; mergeBatch = false; batchNumber = uint32ZERO; } else if (strcmp(argv[arg], "-countbatch") == 0) { arg++; personality = 'B'; configBatch = false; countBatch = true; mergeBatch = false; batchNumber = strtouint32(argv[arg], 0L); } else if (strcmp(argv[arg], "-mergebatch") == 0) { personality = 'B'; configBatch = false; countBatch = false; mergeBatch = true; batchNumber = uint32ZERO; } else if (strcmp(argv[arg], "-sge") == 0) { sgeJobName = argv[++arg]; } else if (strcmp(argv[arg], "-sgebuild") == 0) { sgeBuildOpt = argv[++arg]; } else if (strcmp(argv[arg], "-sgemerge") == 0) { sgeMergeOpt = argv[++arg]; } else if (strcmp(argv[arg], "-forcebuild") == 0) { isOnGrid = true; } else { fprintf(stderr, "Unknown option '%s'.\n", argv[arg]); fail = true; } } // Using threads is only useful if we are not a batch. // if ((numThreads > 0) && (configBatch || countBatch || mergeBatch)) { if (configBatch) fprintf(stderr, "WARNING: -threads has no effect with -configbatch, disabled.\n"); if (countBatch) fprintf(stderr, "WARNING: -threads has no effect with -countbatch, disabled.\n"); if (mergeBatch) fprintf(stderr, "WARNING: -threads has no effect with -mergebatch, disabled.\n"); numThreads = 0; } // SGE is not useful unless we are in batch mode. // if (sgeJobName && !configBatch && !countBatch && !mergeBatch) { fprintf(stderr, "ERROR: -sge not useful unless in batch mode (replace -B with -configbatch)\n"); exit(1); } if (fail) exit(1); } merylArgs::merylArgs(const char *prefix) { clear(); char *filename = new char [strlen(prefix) + 17]; sprintf(filename, "%s.merylArgs", prefix); errno = 0; FILE *F = fopen(filename, "rb"); if (errno) { fprintf(stderr, "merylArgs::readConfig()-- Failed to open '%s': %s\n", filename, strerror(errno)); exit(1); } char magic[17] = {0}; fread(magic, sizeof(char), 16, F); if (strncmp(magic, "merylBatcherv02", 16) != 0) { fprintf(stderr, "merylArgs::readConfig()-- '%s' doesn't appear to be a merylArgs file.\n", filename); exit(1); } // Load the config, then reset the pointers. fread(this, sizeof(merylArgs), 1, F); execName = readString(F); options = 0L; inputFile = readString(F); outputFile = readString(F); queryFile = 0L; sgeJobName = readString(F); sgeBuildOpt = readString(F); sgeMergeOpt = readString(F); mergeFiles = new char* [mergeFilesLen]; for (uint32 i=0; i #include #include #include // This reads the assembly frgctg, varctg and merQC badmers, computes // the number and location of bad-mer, bad-var regions, and their // depth, in contig space. // // File paths are hardcoded. // This code ONLY works on 64-bit hardware, but it's easy to fix. using namespace std; #include // // Begin code from Bri's intervalList.H, intervalList.C, splitToWords.H // typedef unsigned long uint64; typedef unsigned int uint32; #define uint64FMT "%lu" #define uint32FMT "%u" #define uint32FMTW(X) "%" #X "u" #define strtouint32(N,O) (uint32)strtoul(N, O, 10) #define strtouint64(N,O) (uint64)strtoul(N, O, 10) class splitToWords { public: splitToWords() { _argWords = 0; _maxWords = 0; _arg = 0L; _maxChars = 0; _cmd = 0L; }; splitToWords(char *cmd) { _argWords = 0; _maxWords = 0; _arg = 0L; _maxChars = 0; _cmd = 0L; split(cmd); }; ~splitToWords() { delete [] _cmd; delete [] _arg; }; void split(char *cmd) { // Step Zero: // // Count the length of the string, in words and in characters. // For simplicity, we overcount words, by just counting white-space. // // Then, allocate space for a temporary copy of the string, and a // set of pointers into the temporary copy (much like argv). // uint32 cmdChars = 1; // 1 == Space for terminating 0 uint32 cmdWords = 2; // 2 == Space for first word and terminating 0L for (char *tmp=cmd; *tmp; tmp++) { cmdWords += *tmp == ' '; cmdWords += *tmp == '\t'; cmdChars++; } if (cmdChars > _maxChars) { delete [] _cmd; _cmd = new char [cmdChars]; _maxChars = cmdChars; } if (cmdWords > _maxWords) { delete [] _arg; _arg = new char * [cmdWords]; _maxWords = cmdWords; } _argWords = 0; // Step One: // // Determine where the words are in the command string, copying the // string to _cmd and storing words in _arg. // bool isFirst = true; char *cmdI = cmd; char *cmdO = _cmd; while (*cmdI) { // If we are at a non-space character, we are in a word. If // this is the first character in the word, save the word in // the args list. // // Otherwise we are at a space and thus not in a word. Make // all spaces be string terminators, and declare that we are // at the start of a word. // if ((*cmdI != ' ') && (*cmdI != '\t')) { *cmdO = *cmdI; if (isFirst) { _arg[_argWords++] = cmdO; isFirst = false; } } else { *cmdO = 0; isFirst = true; } cmdI++; cmdO++; } // Finish off the list by terminating the last arg, and // terminating the list of args. // *cmdO = 0; _arg[_argWords] = 0L; }; uint32 numWords(void) { return(_argWords); }; char *getWord(uint32 i) { return(_arg[i]); }; char *operator[](uint32 i) { return(_arg[i]); }; private: uint32 _argWords; uint32 _maxWords; char **_arg; uint32 _maxChars; char *_cmd; }; typedef uint64 intervalNumber; struct _intervalPair { intervalNumber lo; intervalNumber hi; }; struct _intervalDepth { intervalNumber lo; intervalNumber hi; uint32 de; }; class intervalList { public: intervalList(); ~intervalList(); intervalList &operator=(intervalList &src); // Clear a list void clear(void) { _isSorted = true; _isMerged = true; _listLen = 0; } // Insert a new interval into the list void add(intervalNumber position, intervalNumber length); // Sort the set of intervals by the lo value void sort(void); // Merge overlapping or adjacent intervals together. void merge(void); void invert(intervalNumber lo, intervalNumber hi); // Returns the number of intervals uint32 numberOfIntervals(void) { return(_listLen); }; // Returns the sum of the length of all intervals intervalNumber sumOfLengths(void) { intervalNumber len = 0; uint32 i = numberOfIntervals(); if (i > 0) while (i--) len += _list[i].hi - _list[i].lo; return(len); }; // Populates an array with the intervals that are within the // supplied interval. Return // uint32 overlapping(intervalNumber lo, intervalNumber hi, uint32 *&intervals, uint32 &intervalsLen, uint32 &intervalsMax); // Populates this intervalList with the intersection of A and B. // This intervalList is not cleared prior to adding new intervals. // // Both A and B call merge(). // void intersect(intervalList &A, intervalList &B); // Populates this intervalList with regions in A that are completely // contained in a region in B. // // Both A and B call merge(). // void contained(intervalList &A, intervalList &B); intervalNumber lo(uint32 i) { return(_list[i].lo); }; intervalNumber hi(uint32 i) { return(_list[i].hi); }; private: bool _isSorted; bool _isMerged; uint32 _listLen; uint32 _listMax; _intervalPair *_list; }; // Takes as input an intervalList, computes the number of intervals // covering every position in there, stores this as a new set of // intervals, annotated with the depth. // // This is a static object, initialized once by the intervalList. // class intervalDepth { public: intervalDepth(intervalList &IL); ~intervalDepth(); // Returns the number of intervals uint32 numberOfIntervals(void) { return(_listLen); }; intervalNumber lo(uint32 i) { return(_list[i].lo); }; intervalNumber hi(uint32 i) { return(_list[i].hi); }; uint32 de(uint32 i) { return(_list[i].de); }; private: uint32 _listLen; uint32 _listMax; _intervalDepth *_list; }; intervalList::intervalList() { _isSorted = true; _isMerged = true; _listLen = 0; _listMax = 16; _list = new _intervalPair [_listMax]; } intervalList::~intervalList() { delete [] _list; } intervalList & intervalList::operator=(intervalList &src) { _isSorted = src._isSorted; _isMerged = src._isMerged; _listLen = src._listLen; if (_listMax < src._listMax) { delete [] _list; _listMax = src._listMax; _list = new _intervalPair [_listMax]; } memcpy(_list, src._list, _listLen * sizeof(_intervalPair)); return(*this); } void intervalList::add(intervalNumber position, intervalNumber length) { if (_listLen >= _listMax) { _listMax *= 2; _intervalPair *l = new _intervalPair [_listMax]; memcpy(l, _list, sizeof(_intervalPair) * _listLen); delete [] _list; _list = l; } _list[_listLen].lo = position; _list[_listLen].hi = position + length; #if 0 // Aborted attempt to add a data field here. Got stuck // deciding how to handle merges lightweight _list[_listLen].data = 0L; if (data != ~uint64ZERO) { _list[_listLen].dataLen = 1; _list[_listLen].dataMax = 4; _list[_listLen].data = new uint64 [_list[_listLen].dataMax]; _list[_listLen].data[0] = data; } #endif if ((_listLen > 0) && (_list[_listLen-1].lo > _list[_listLen].lo)) { _isSorted = false; _isMerged = false; } _listLen++; } static int intervalList_sort_helper(const void *a, const void *b) { _intervalPair *A = (_intervalPair *)a; _intervalPair *B = (_intervalPair *)b; if (A->lo < B->lo) return(-1); if (A->lo > B->lo) return(1); if (A->hi < B->hi) return(-1); if (A->hi > B->hi) return(1); return(0); } void intervalList::sort(void) { if (_isSorted) return; if (_listLen > 1) qsort(_list, _listLen, sizeof(_intervalPair), intervalList_sort_helper); _isSorted = true; } void intervalList::merge(void) { uint32 thisInterval = 0; uint32 nextInterval = 1; if (_listLen < 2) return; sort(); while (nextInterval < _listLen) { if ((_list[thisInterval].lo == 0) && (_list[thisInterval].hi == 0)) { // Our interval is empty. Copy in the interval we are // examining and move to the next. // XXX This is probably useless, thisInterval should always be // valid. _list[thisInterval].lo = _list[nextInterval].lo; _list[thisInterval].hi = _list[nextInterval].hi; _list[nextInterval].lo = 0; _list[nextInterval].hi = 0; nextInterval++; } else { // This interval is valid. See if it overlaps with the next // interval. if (_list[thisInterval].hi >= _list[nextInterval].lo) { // Got an intersection. // Merge nextInterval into thisInterval -- the hi range // is extended if the nextInterval range is larger. // if (_list[thisInterval].hi < _list[nextInterval].hi) _list[thisInterval].hi = _list[nextInterval].hi; // Clear the just merged nextInterval and move to the next one. // _list[nextInterval].lo = 0; _list[nextInterval].hi = 0; nextInterval++; } else { // No intersection. Move along. Nothing to see here. // If there is a gap between the target and the examine (we // must have merged sometime in the past), copy examine to // the next target. thisInterval++; if (thisInterval != nextInterval) { _list[thisInterval].lo = _list[nextInterval].lo; _list[thisInterval].hi = _list[nextInterval].hi; } nextInterval++; } } } if (thisInterval+1 < _listLen) _listLen = thisInterval + 1; _isMerged = true; } void intervalList::invert(intervalNumber lo, intervalNumber hi) { if (!_isSorted || !_isMerged) { fprintf(stderr, "intervalList::invert()-- ERROR! List is not sorted or not merged!\n"); exit(1); } // Create a new list to store the inversion // uint32 invLen = 0; uint32 invMax = _listLen + 2; _intervalPair *inv = new _intervalPair [invMax]; // Add the first // if (lo < _list[0].lo) { inv[invLen].lo = lo; inv[invLen].hi = _list[0].lo; invLen++; } // Add the pieces for (uint32 i=1; i<_listLen; i++) { if (_list[i-1].hi < _list[i].lo) { inv[invLen].lo = _list[i-1].hi; inv[invLen].hi = _list[i].lo; invLen++; } } // Add the last if (_list[_listLen-1].hi < hi) { inv[invLen].lo = _list[_listLen-1].hi; inv[invLen].hi = hi; invLen++; } // Nuke the old list, swap in the new one delete [] _list; _list = inv; _listLen = invLen; _listMax = invMax; } uint32 intervalList::overlapping(intervalNumber rangelo, intervalNumber rangehi, uint32 *&intervals, uint32 &intervalsLen, uint32 &intervalsMax) { // XXX: Naive implementation that is easy to verify (and that works // on an unsorted list). if (intervals == 0L) { intervalsMax = 256; intervals = new uint32 [intervalsMax]; } intervalsLen = 0; for (uint32 i=0; i<_listLen; i++) { if ((rangelo <= _list[i].hi) && (rangehi >= _list[i].lo)) { if (intervalsLen >= intervalsMax) { intervalsMax *= 2; uint32 *X = new uint32 [intervalsMax]; memcpy(X, intervals, sizeof(uint32) * intervalsLen); delete [] intervals; intervals = X; } intervals[intervalsLen++] = i; } } return(intervalsLen); } void intervalList::intersect(intervalList &A, intervalList &B) { A.merge(); B.merge(); uint32 ai = 0; uint32 bi = 0; while ((ai < A.numberOfIntervals()) && (bi < B.numberOfIntervals())) { uint32 al = A.lo(ai); uint32 ah = A.hi(ai); uint32 bl = B.lo(bi); uint32 bh = B.hi(bi); uint32 nl = 0; uint32 nh = 0; // If they intersect, make a new region // if ((al <= bl) && (bl < ah)) { nl = bl; nh = (ah < bh) ? ah : bh; } if ((bl <= al) && (al < bh)) { nl = al; nh = (ah < bh) ? ah : bh; } if (nl < nh) add(nl, nh - nl); // Advance the list with the earlier region. // if (ah < bh) { // A ends before B ai++; } else if (ah > bh) { // B ends before A bi++; } else { // Exactly the same ending! ai++; bi++; } } } void intervalList::contained(intervalList &A, intervalList &B) { A.merge(); B.merge(); uint32 ai = 0; uint32 bi = 0; while ((ai < A.numberOfIntervals()) && (bi < B.numberOfIntervals())) { uint32 al = A.lo(ai); uint32 ah = A.hi(ai); uint32 bl = B.lo(bi); uint32 bh = B.hi(bi); // If A is contained in B, make a new region. // if ((bl <= al) && (ah <= bh)) add(bl, bh - bl); #if 0 if ((al <= bl) && (bh <= ah)) add(al, ah - al); #endif // Advance the list with the earlier region. // if (ah < bh) { // A ends before B ai++; } else if (ah > bh) { // B ends before A bi++; } else { // Exactly the same ending! ai++; bi++; } } } static int intervalDepth_sort_helper(const void *a, const void *b) { _intervalDepth *A = (_intervalDepth *)a; _intervalDepth *B = (_intervalDepth *)b; if (A->lo < B->lo) return(-1); if (A->lo > B->lo) return(1); return(0); } intervalDepth::intervalDepth(intervalList &IL) { uint32 idlen = IL.numberOfIntervals() * 2; _intervalDepth *id = new _intervalDepth [idlen]; for (uint32 i=0; i &lowCoverage) { char line[1024] = {0}; map ILs; fprintf(stderr, "Reading depth from '%s'\n", depthname); errno = 0; FILE *F = fopen(depthname, "r"); if (errno) fprintf(stderr, "failed to open '%s': %s\n", depthname, strerror(errno)), exit(1); uint32 i=0; fgets(line, 1024, F); while (!feof(F)) { splitToWords W(line); uint64 uid = strtouint64(W[1], 0L); uint32 beg = strtouint32(W[2], 0L); uint32 end = strtouint32(W[3], 0L); if (beg > end) fprintf(stderr, "ERROR: l="uint32FMT" h="uint32FMT"\n", beg, end); if (ILs[uid] == 0L) ILs[uid] = new intervalList(); ILs[uid]->add(beg, end - beg); i++; fgets(line, 1024, F); } fclose(F); fprintf(stderr, " "uint32FMT" lines.\n", i); map::iterator it = ILs.begin(); map::iterator ed = ILs.end(); while (it != ed) { lowCoverage[it->first] = new intervalDepth(*it->second); delete it->second; it->second = 0L; it++; } } void readVariation(char *depthname, map &variation) { char line[1024 * 1024] = {0}; fprintf(stderr, "Reading variation from '%s'\n", depthname); errno = 0; FILE *F = fopen(depthname, "r"); if (errno) fprintf(stderr, "failed to open '%s': %s\n", depthname, strerror(errno)), exit(1); uint32 i=0; fgets(line, 1024 * 1024, F); while (!feof(F)) { splitToWords W(line); uint64 uid = strtouint64(W[1], 0L); uint32 beg = strtouint32(W[2], 0L); uint32 end = strtouint32(W[3], 0L); if (variation[uid] == 0L) variation[uid] = new intervalList(); variation[uid]->add(beg, end - beg); i++; fgets(line, 1024 * 1024, F); } fclose(F); fprintf(stderr, " "uint32FMT" lines.\n", i); } void readBadMers(char *depthname, map &badMers) { char line[1024] = {0}; fprintf(stderr, "Reading badMers from '%s'\n", depthname); errno = 0; FILE *F = fopen(depthname, "r"); if (errno) fprintf(stderr, "failed to open '%s': %s\n", depthname, strerror(errno)), exit(1); uint32 i=0; fgets(line, 1024, F); while (!feof(F)) { splitToWords W(line); // Change every non-digit to a space in the first word. for (uint32 z=strlen(W[0])-1; z--; ) if (!isdigit(W[0][z])) W[0][z] = ' '; uint64 uid = strtouint64(W[0], 0L); uint32 beg = strtouint32(W[3], 0L); uint32 end = strtouint32(W[4], 0L); if (badMers[uid] == 0L) badMers[uid] = new intervalList(); badMers[uid]->add(beg, end - beg); i++; fgets(line, 1024, F); } fclose(F); fprintf(stderr, " "uint32FMT" lines.\n", i); } int main(int argc, char **argv) { map badMers; map variation; map lowCoverage; bool showDepthIntersect = false; bool showVariantIntersect = false; bool showVarDepthIntersect = false; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-D") == 0) { } else if (strcmp(argv[arg], "-pd") == 0) { showDepthIntersect = true; } else if (strcmp(argv[arg], "-pv") == 0) { showVariantIntersect = true; } else if (strcmp(argv[arg], "-pvd") == 0) { showVarDepthIntersect = true; } else { fprintf(stderr, "usage: %s [-D debugfile] [-pd] [-pv] [-pvd]\n", argv[0]); fprintf(stderr, " -pd print bad mers regions isect depth\n"); fprintf(stderr, " -pv print bad mers regions isect variants\n"); fprintf(stderr, " -pvd print bad mers regions isect both variants and depth\n"); exit(1); } arg++; } #if 1 // HuRef6, in the assembly directory. // readDepth ("/project/huref6/assembly/h6/9-terminator/h6.posmap.frgctg", lowCoverage); readVariation("/project/huref6/assembly/h6/9-terminator/h6.posmap.varctg", variation); readBadMers ("/project/huref6/assembly/h6-mer-validation/h6-ms22-allfrags-normalcontigs.badmers.0.singlecontig.zerofrag.badmers", badMers); #endif #if 0 // HuRef6, ws=25, in the assembly directory. // readDepth ("/project/huref6/assembly/h6/9-terminator-ws25/h6.posmap.frgctg", lowCoverage); readVariation("/project/huref6/assembly/h6/9-terminator-ws25/h6.posmap.varctg", variation); readBadMers ("/project/huref6/assembly/h6-mer-validation/h6-version4-ws25/h6-ms22-allfrags-normalcontigs.badmers.0.singlecontig.zerofrag.badmers", badMers); #endif #if 0 // Our scratch huref // readDepth ("/project/huref6/redo_consensus-gennady/mer-validation/h6tmp.posmap.frgctg", lowCoverage); readVariation("/project/huref6/redo_consensus-gennady/mer-validation/h6tmp.posmap.varctg", variation); readBadMers ("/project/huref6/redo_consensus-gennady/mer-validation/h6tmp-ms22-allfrags-allcontigs.badmers.0.singlecontig.zerofrag.badmers", badMers); #endif uint32 badBegDepth[1024] = {0}; uint32 badEndDepth[1024] = {0}; uint32 badDepth[32][32]; for (uint32 i=0; i<32; i++) for (uint32 j=0; j<32; j++) badDepth[i][j] = 0; map::iterator it = badMers.begin(); map::iterator ed = badMers.end(); while (it != ed) { uint64 uid = it->first; intervalList *Iv = variation[uid]; intervalList *Ib = badMers[uid]; intervalList *Ii = 0L; intervalDepth *Id = lowCoverage[uid]; if (Iv) Iv->merge(); if (Ib) Ib->merge(); if (Iv && Ib) { Ii = new intervalList(); Ii->intersect(*Iv, *Ib); } if (Ii) { uint32 ii = 0; uint32 id = 0; while ((ii < Ii->numberOfIntervals()) && (id < Id->numberOfIntervals())) { // We want to count the number of times a badmer region // begins/ends in some depth. //fprintf(stderr, "testing beg "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT"\n", // Ii->lo(ii), Ii->hi(ii), Id->lo(id), Id->hi(id)); uint32 beg = 0; uint32 end = 0; // Low points are not allowed to be equal to high points, skip to the next while ((id < Id->numberOfIntervals()) && (Id->hi(id) <= Ii->lo(ii))) { id++; //fprintf(stderr, "testing beg (m) "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT"\n", // Ii->lo(ii), Ii->hi(ii), Id->lo(id), Id->hi(id)); } if (id < Id->numberOfIntervals()) { uint32 lo = Id->lo(id); uint32 hi = Id->hi(id); // Low points are not allowed to be equal to high points. if ((lo <= Ii->lo(ii)) && (Ii->lo(ii) < hi)) { beg = Id->de(id); } else { fprintf(stderr, "failed to find begin "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT" "uint32FMT"\n", Ii->lo(ii), Ii->hi(ii), Id->lo(id), Id->hi(id), Id->de(id)); if (id > 0) fprintf(stderr, " "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT" "uint32FMT"\n", Ii->lo(ii), Ii->hi(ii), Id->lo(id-1), Id->hi(id-1), Id->de(id-1)); //exit(1); } } //fprintf(stderr, "testing end "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT"\n", // Ii->lo(ii), Ii->hi(ii), Id->lo(id), Id->hi(id)); // High points can be equal. while ((id < Id->numberOfIntervals()) && (Id->hi(id) < Ii->hi(ii))) { id++; //fprintf(stderr, "testing end (m) "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT"\n", // Ii->lo(ii), Ii->hi(ii), Id->lo(id), Id->hi(id)); } if (id < Id->numberOfIntervals()) { uint32 lo = Id->lo(id); uint32 hi = Id->hi(id); // High points aren't allowed to be equal to lo, but can be equal to hi. if ((lo < Ii->hi(ii)) && (Ii->hi(ii) <= hi)) { end = Id->de(id); } else { fprintf(stderr, "failed to find end "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT" "uint32FMT"\n", Ii->lo(ii), Ii->hi(ii), Id->lo(id), Id->hi(id), Id->de(id)); if (id > 0) fprintf(stderr, " "uint32FMT" "uint32FMT" -- "uint32FMT" "uint32FMT" "uint32FMT"\n", Ii->lo(ii), Ii->hi(ii), Id->lo(id-1), Id->hi(id-1), Id->de(id-1)); //exit(1); } } badBegDepth[beg]++; badEndDepth[end]++; fprintf(stdout, uint64FMT"\t"uint32FMT"\t"uint32FMT"\tdepth="uint32FMT","uint32FMT"\n", uid, Ii->lo(ii), Ii->hi(ii), beg, end); if ((beg < 32) && (end < 32)) badDepth[beg][end]++; ii++; } } it++; } uint32 bb = 0; uint32 be = 0; for (uint32 x=0; x<32; x++) { fprintf(stdout, uint32FMT"\t"uint32FMT"\t"uint32FMT"\n", x, badBegDepth[x], badEndDepth[x]); bb += badBegDepth[x]; be += badEndDepth[x]; } fprintf(stdout, "total\t"uint32FMT"\t"uint32FMT"\n", bb, be); for (uint32 i=0; i<30; i++) { for (uint32 j=0; j<30; j++) fprintf(stdout, uint32FMTW(5), badDepth[i][j]); fprintf(stdout, "\n"); } return(0); } kmer-code-2013-trunk/meryl/merge.qsort.C0000644000000000000000000003107712322046702016656 0ustar rootroot#include #include #include #include "meryl.H" #include "libmeryl.H" using namespace std; #include struct mMer { kMer _mer; uint32 _cnt; uint32 _off; }; static int mMerGreaterThan(void const *a, void const *b) { mMer const *A = (mMer const *)a; mMer const *B = (mMer const *)b; return(B->_mer.qsort_less(A->_mer)); } class mMerList { public: mMerList(uint32 maxSize) { _posLen = 0; _posMax = 2 * maxSize; _pos = new uint32 [_posMax]; _mmmLen = 0; _mmmMax = maxSize; _mmm = new mMer [_mmmMax]; }; ~mMerList() { delete [] _pos; delete [] _mmm; }; uint32 length(void) { return(_mmmLen); }; // Until we sort, first() is the last thing loaded. // After we sort, first() is the lowest mer in the set. kMer &first(void) { return(_mmm[_mmmLen-1]._mer); }; //kMer &last(void) { return(_mmm[0]._mer); }; //kMer &get(uint32 i) { return(_mmm[i]._mer); }; // Return the first (sorted order) thing in the list -- it's the last on the list. kMer *pop(uint32 &cnt, uint32* &pos) { if (_mmmLen == 0) return(0L); _mmmLen--; assert(_sorted); cnt = _mmm[_mmmLen]._cnt; pos = 0L; if (_mmm[_mmmLen]._off != ~uint32ZERO) pos = _pos + _mmm[_mmmLen]._off; return(&_mmm[_mmmLen]._mer); } // rebuild the position list, squeezes out empty items void rebuild(void) { if (_posLen > 0) { uint32 *np = new uint32 [_posMax]; _posLen = 0; for (uint32 i=0; i<_mmmLen; i++) { mMer *m = _mmm + i; if (m->_off != ~uint32ZERO) { _mmm[_mmmLen]._off = _posLen; for (uint32 p=0; p_cnt; p++, _posLen++) np[_posLen] = _pos[p]; } } delete [] _pos; _pos = np; } }; // Read more mers from the file void read(merylStreamReader *R, uint32 num) { uint32 xxx = 0; if (_mmmLen + num >= _mmmMax) { fprintf(stderr, "Reallocate _mmm\n"); _mmmMax = _mmmMax + 2 * num; mMer *tmp = new mMer [_mmmMax]; memcpy(tmp, _mmm, sizeof(mMer) * _mmmLen); delete [] _mmm; _mmm = tmp; } _sorted = false; R->nextMer(); for (xxx=0; (xxx < num) && (R->validMer()); xxx++) { if (_mmmMax <= _mmmLen) { fprintf(stderr, "Reallocate _mmm\n"); _mmmMax *= 2; mMer *tmp = new mMer [_mmmMax]; memcpy(tmp, _mmm, sizeof(mMer) * _mmmLen); delete [] _mmm; _mmm = tmp; } _mmm[_mmmLen]._mer = R->theFMer(); _mmm[_mmmLen]._cnt = R->theCount(); _mmm[_mmmLen]._off = ~uint32ZERO; uint32 *pos = R->thePositions(); if (pos) { _mmm[_mmmLen]._off = _posLen; if (_posMax <= _posLen + _mmm[_mmmLen]._cnt) { fprintf(stderr, "Reallocate _pos\n"); _posMax *= 2; uint32 *tmp = new uint32 [_posMax]; memcpy(tmp, _pos, sizeof(uint32) * _posLen); delete [] _pos; _pos = tmp; } for (uint32 i=0; i<_mmm[_mmmLen]._cnt; i++, _posLen++) _pos[_posLen] = pos[i]; } _mmmLen++; R->nextMer(); } //fprintf(stderr, "read()-- now up to "uint32FMT" mers ("uint32FMT" pos); loaded "uint32FMT" out of "uint32FMT" requested.\n", _mmmLen, _posLen, xxx, num); }; // Sort our list of mers void sort(void) { if (_sorted == false) { //fprintf(stderr, "SORT BEG\n"); qsort_mt(_mmm, _mmmLen, sizeof(mMer), mMerGreaterThan, 8, 32 * 1024); _sorted = true; //fprintf(stderr, "SORT END\n"); } }; private: bool _sorted; uint32 _posLen; uint32 _posMax; uint32 *_pos; uint32 _mmmLen; uint32 _mmmMax; mMer *_mmm; }; void multipleOperations(merylArgs *args) { char debugstring[256]; char debugstring2[256]; if (args->mergeFilesLen < 2) { fprintf(stderr, "ERROR - must have at least two databases (you gave "uint32FMT")!\n", args->mergeFilesLen); exit(1); } if (args->outputFile == 0L) { fprintf(stderr, "ERROR - no output file specified.\n"); exit(1); } if ((args->personality != PERSONALITY_MERGE) && (args->personality != PERSONALITY_MIN) && (args->personality != PERSONALITY_MINEXIST) && (args->personality != PERSONALITY_MAX) && (args->personality != PERSONALITY_ADD) && (args->personality != PERSONALITY_AND) && (args->personality != PERSONALITY_NAND) && (args->personality != PERSONALITY_OR) && (args->personality != PERSONALITY_XOR)) { fprintf(stderr, "ERROR - only personalities min, minexist, max, add, and, nand, or, xor\n"); fprintf(stderr, "ERROR - are supported in multipleOperations().\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); } merylStreamReader **R = new merylStreamReader* [args->mergeFilesLen]; merylStreamWriter *W = 0L; uint32 maxSize = 512 * 1024; mMerList *M = new mMerList(maxSize + maxSize / 4); // Open the input files and load some mers - we need to do this // just so we can check the mersizes/compression next. // for (uint32 i=0; imergeFilesLen; i++) { R[i] = new merylStreamReader(args->mergeFiles[i]); M->read(R[i], 1 + i); } // Verify that the mersizes are all the same // bool fail = false; uint32 merSize = R[0]->merSize(); uint32 merComp = R[0]->merCompression(); for (uint32 i=0; imergeFilesLen; i++) { fail |= (merSize != R[i]->merSize()); fail |= (merComp != R[i]->merCompression()); } if (fail) fprintf(stderr, "ERROR: mer sizes (or compression level) differ.\n"), exit(1); // Open the output file, using the largest prefix size found in the // input/mask files. // uint32 prefixSize = 0; for (uint32 i=0; imergeFilesLen; i++) if (prefixSize < R[i]->prefixSize()) prefixSize = R[i]->prefixSize(); W = new merylStreamWriter(args->outputFile, merSize, merComp, prefixSize); kMer lastLoaded; lastLoaded.setMerSize(merSize); lastLoaded.smallest(); // Load mers from all files, remember the largest mer we load. // for (uint32 i=0; imergeFilesLen; i++) { M->read(R[i], maxSize / args->mergeFilesLen); if (lastLoaded < M->first()) lastLoaded = M->first(); } // Make sure all files have at least that largest mer loaded. // for (uint32 i=0; imergeFilesLen; i++) while (R[i]->validMer() && (R[i]->theFMer() <= lastLoaded)) M->read(R[i], 2 * 1024); fprintf(stderr, "Initial load: length="uint32FMT" lastLoaded=%s\n", M->length(), lastLoaded.merToString(debugstring)); M->sort(); bool allLoaded = false; bool moreStuff = true; kMer currentMer; // The current mer we're operating on uint32 currentCount = uint32ZERO; // The count (operation dependent) of this mer uint32 currentTimes = uint32ZERO; // Number of files it's in uint32 currentPositionsMax = 0; uint32 *currentPositions = 0L; kMer *thisMer; // The mer we just read uint32 thisCount = uint32ZERO; // The count of the mer we just read uint32 *thisPositions = 0L; speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); currentMer.setMerSize(merSize); while (moreStuff) { // Load more stuff if needed. M is sorted, so first() is the // smallest mer in the set - we're good up to and including // lastLoaded. // if ((allLoaded == false) && ((M->length() == 0) || (lastLoaded < M->first()))) { #if 0 if (M->length() > 0) fprintf(stderr, "LOADMORE length="uint32FMT" lastLoaded=%s first=%s\n", M->length(), lastLoaded.merToString(debugstring2), M->first().merToString(debugstring)); else fprintf(stderr, "LOADMORE length="uint32FMT" lastLoaded=%s first=EMPTY\n", M->length(), lastLoaded.merToString(debugstring2)); #endif // We need to copy all the mers currently loaded into fresh // storage, so we can deallocate the position storage. Yucky. // M->rebuild(); allLoaded = true; // Load more stuff to give us a large collection of mers // uint32 additionalLoading = 8192; if (maxSize / args->mergeFilesLen > M->length()) additionalLoading = maxSize / args->mergeFilesLen - M->length(); //fprintf(stderr, "LOADMORE adding "uint32FMT" from each file\n", additionalLoading); lastLoaded.setMerSize(merSize); lastLoaded.smallest(); for (uint32 i=0; imergeFilesLen; i++) { if (R[i]->validMer()) { M->read(R[i], additionalLoading); if (lastLoaded < M->first()) lastLoaded = M->first(); allLoaded = false; } } // Make sure all files have at least that largest mer loaded. // for (uint32 i=0; imergeFilesLen; i++) while (R[i]->validMer() && (R[i]->theFMer() <= lastLoaded)) M->read(R[i], 2 * 1024); M->sort(); } // All done? Exit. if (M->length() == 0) moreStuff = false; thisMer = M->pop(thisCount, thisPositions); // If we've hit a different mer, write out the last one if ((M->length() == 0) || (*thisMer != currentMer)) { switch (args->personality) { case PERSONALITY_MIN: if (currentTimes == args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_MERGE: case PERSONALITY_MINEXIST: case PERSONALITY_MAX: case PERSONALITY_ADD: W->addMer(currentMer, currentCount, currentPositions); break; case PERSONALITY_AND: if (currentTimes == args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_NAND: if (currentTimes != args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_OR: W->addMer(currentMer, currentCount); break; case PERSONALITY_XOR: if ((currentTimes % 2) == 1) W->addMer(currentMer, currentCount); break; default: fprintf(stderr, "ERROR - invalid personality in multipleOperations::write\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); break; } currentMer = *thisMer; currentCount = uint32ZERO; currentTimes = uint32ZERO; C->tick(); } if (moreStuff == false) break; // Perform the operation switch (args->personality) { case PERSONALITY_MERGE: if (thisPositions) { if (currentPositionsMax == 0) { currentPositionsMax = 1048576; currentPositions = new uint32 [currentPositionsMax]; } if (currentPositionsMax < currentCount + thisCount) { while (currentPositionsMax < currentCount + thisCount) currentPositionsMax *= 2; uint32 *t = new uint32 [currentPositionsMax]; memcpy(t, currentPositions, sizeof(uint32) * currentCount); delete [] currentPositions; currentPositions = t; } if (thisCount < 16) { for (uint32 i=0; i thisCount) currentCount = thisCount; } break; case PERSONALITY_MAX: if (currentCount < thisCount) currentCount = thisCount; break; case PERSONALITY_ADD: currentCount += thisCount; break; case PERSONALITY_AND: case PERSONALITY_NAND: case PERSONALITY_OR: case PERSONALITY_XOR: currentCount = 1; break; default: fprintf(stderr, "ERROR - invalid personality in multipleOperations::operate\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); break; } currentTimes++; } for (uint32 i=0; imergeFilesLen; i++) delete R[i]; delete R; delete W; delete M; delete C; } kmer-code-2013-trunk/meryl/unaryOp.C0000644000000000000000000000343111023244666016044 0ustar rootroot#include #include #include #include "meryl.H" #include "libmeryl.H" void unaryOperations(merylArgs *args) { if (args->mergeFilesLen != 1) { fprintf(stderr, "ERROR - must have exactly one file!\n"); exit(1); } if (args->outputFile == 0L) { fprintf(stderr, "ERROR - no output file specified.\n"); exit(1); } if ((args->personality != PERSONALITY_LEQ) && (args->personality != PERSONALITY_GEQ) && (args->personality != PERSONALITY_EQ)) { fprintf(stderr, "ERROR - only personalities lessthan, lessthanorequal,\n"); fprintf(stderr, "ERROR - greaterthan, greaterthanorequal, and equal\n"); fprintf(stderr, "ERROR - are supported in unaryOperations().\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); } // Open the input and output files -- we don't know the number // unique, distinct, and total until after the operation, so we // leave them zero. // merylStreamReader *R = new merylStreamReader(args->mergeFiles[0]); merylStreamWriter *W = new merylStreamWriter(args->outputFile, R->merSize(), R->merCompression(), R->prefixSize(), R->hasPositions()); switch (args->personality) { case PERSONALITY_LEQ: while (R->nextMer()) if (R->theCount() <= args->desiredCount) W->addMer(R->theFMer(), R->theCount(), R->thePositions()); break; case PERSONALITY_GEQ: while (R->nextMer()) if (R->theCount() >= args->desiredCount) W->addMer(R->theFMer(), R->theCount(), R->thePositions()); break; case PERSONALITY_EQ: while (R->nextMer()) if (R->theCount() == args->desiredCount) W->addMer(R->theFMer(), R->theCount(), R->thePositions()); break; } delete R; delete W; } kmer-code-2013-trunk/meryl/build-threads.C0000644000000000000000000000367712322046702017144 0ustar rootroot#include #include #include #include #include #include "bio++.H" #include "meryl.H" #include "libmeryl.H" void runSegment(merylArgs *args, uint64 segment); pthread_mutex_t segmentMutex; uint64 segmentNext; uint64 segmentMax; uint32 *segmentDone; void* buildThread(void *U) { uint64 segment = uint32ZERO; merylArgs *args = (merylArgs *)U; while (segment < segmentMax) { pthread_mutex_lock(&segmentMutex); segment = segmentNext++; pthread_mutex_unlock(&segmentMutex); if (segment < segmentMax) { runSegment(args, segment); segmentDone[segment]++; } } if (args->beVerbose) fprintf(stderr, "Thread exits.\n"); return(0L); } void runThreaded(merylArgs *args) { // Clear stuff // segmentNext = uint64ZERO; segmentMax = args->segmentLimit; segmentDone = new uint32 [segmentMax]; for (uint64 s=0; snumThreads; i++) pthread_create(&threadID, &threadAttr, buildThread, (void *)args); // Wait for the threads to complete // struct timespec shortNap; shortNap.tv_sec = 1; shortNap.tv_nsec = 0; uint64 s=0; while (s < segmentMax) { if (segmentDone[s] == 0) nanosleep(&shortNap, 0L); else s++; } if (args->beVerbose) fprintf(stderr, "Threads all done, cleaning up.\n"); // Cleanup // pthread_attr_destroy(&threadAttr); pthread_mutex_destroy(&segmentMutex); delete [] segmentDone; } kmer-code-2013-trunk/meryl/merge.C0000644000000000000000000001674112322046702015510 0ustar rootroot#include #include #include #include "meryl.H" #include "libmeryl.H" void multipleOperations(merylArgs *args) { if (args->mergeFilesLen < 2) { fprintf(stderr, "ERROR - must have at least two databases (you gave "uint32FMT")!\n", args->mergeFilesLen); exit(1); } if (args->outputFile == 0L) { fprintf(stderr, "ERROR - no output file specified.\n"); exit(1); } if ((args->personality != PERSONALITY_MERGE) && (args->personality != PERSONALITY_MIN) && (args->personality != PERSONALITY_MINEXIST) && (args->personality != PERSONALITY_MAX) && (args->personality != PERSONALITY_MAXEXIST) && (args->personality != PERSONALITY_ADD) && (args->personality != PERSONALITY_AND) && (args->personality != PERSONALITY_NAND) && (args->personality != PERSONALITY_OR) && (args->personality != PERSONALITY_XOR)) { fprintf(stderr, "ERROR - only personalities min, minexist, max, maxexist, add, and, nand, or, xor\n"); fprintf(stderr, "ERROR - are supported in multipleOperations(). (%d)\n", args->personality); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); } merylStreamReader **R = new merylStreamReader* [args->mergeFilesLen]; merylStreamWriter *W = 0L; // Open the input files, read in the first mer // for (uint32 i=0; imergeFilesLen; i++) { R[i] = new merylStreamReader(args->mergeFiles[i]); R[i]->nextMer(); } // Verify that the mersizes are all the same // bool fail = false; uint32 merSize = R[0]->merSize(); uint32 merComp = R[0]->merCompression(); for (uint32 i=0; imergeFilesLen; i++) { fail |= (merSize != R[i]->merSize()); fail |= (merComp != R[i]->merCompression()); } if (fail) fprintf(stderr, "ERROR: mer sizes (or compression level) differ.\n"), exit(1); // Open the output file, using the largest prefix size found in the // input/mask files. // uint32 prefixSize = 0; for (uint32 i=0; imergeFilesLen; i++) if (prefixSize < R[i]->prefixSize()) prefixSize = R[i]->prefixSize(); W = new merylStreamWriter(args->outputFile, merSize, merComp, prefixSize, args->positionsEnabled); // We will find the smallest mer in any file, and count the number of times // it is present in the input files. bool moreInput = true; kMer currentMer; // The current mer we're operating on uint32 currentCount = uint32ZERO; // The count (operation dependent) of this mer uint32 currentTimes = uint32ZERO; // Number of files it's in uint32 currentPositionsMax = 0; uint32 *currentPositions = 0L; kMer thisMer; // The mer we just read uint32 thisFile = ~uint32ZERO; // The file we read it from uint32 thisCount = uint32ZERO; // The count of the mer we just read speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose); currentMer.setMerSize(merSize); thisMer.setMerSize(merSize); while (moreInput) { // Find the smallest mer present in any input file. // moreInput = false; thisMer.clear(); thisFile = ~uint32ZERO; thisCount = uint32ZERO; // Load thisMer with the first valid mer for (uint32 i=0; imergeFilesLen && !moreInput; i++) if (R[i]->validMer()) { moreInput = true; thisCount = R[i]->theCount(); thisFile = i; thisMer = R[i]->theFMer(); } // Now find the smallest one if (moreInput) { for (uint32 i=thisFile+1; imergeFilesLen; i++) if ((R[i]->validMer()) && (R[i]->theFMer()) < thisMer) { moreInput = true; thisCount = R[i]->theCount(); thisFile = i; thisMer = R[i]->theFMer(); } } // If we've hit a different mer, write out the last one if ((moreInput == false) || (thisMer != currentMer)) { switch (args->personality) { case PERSONALITY_MIN: case PERSONALITY_MAX: if (currentTimes == args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_MERGE: case PERSONALITY_MINEXIST: case PERSONALITY_MAXEXIST: case PERSONALITY_ADD: W->addMer(currentMer, currentCount, currentPositions); break; case PERSONALITY_AND: if (currentTimes == args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_NAND: if (currentTimes != args->mergeFilesLen) W->addMer(currentMer, currentCount); break; case PERSONALITY_OR: W->addMer(currentMer, currentCount); break; case PERSONALITY_XOR: if ((currentTimes % 2) == 1) W->addMer(currentMer, currentCount); break; default: fprintf(stderr, "ERROR - invalid personality in multipleOperations::write\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); break; } currentMer = thisMer; currentCount = uint32ZERO; currentTimes = uint32ZERO; C->tick(); } // All done? Exit. if (moreInput == false) continue; // Perform the operation switch (args->personality) { case PERSONALITY_MERGE: if (R[thisFile]->thePositions()) { if (currentPositionsMax == 0) { currentPositionsMax = 1048576; currentPositions = new uint32 [currentPositionsMax]; } if (currentPositionsMax < currentCount + thisCount) { while (currentPositionsMax < currentCount + thisCount) currentPositionsMax *= 2; uint32 *t = new uint32 [currentPositionsMax]; memcpy(t, currentPositions, sizeof(uint32) * currentCount); delete [] currentPositions; currentPositions = t; } if (thisCount < 16) { uint32 *p = R[thisFile]->thePositions(); for (uint32 i=0; ithePositions(), sizeof(uint32) * thisCount); } } // Otherwise, we're the same as ADD. currentCount += thisCount; break; case PERSONALITY_MIN: case PERSONALITY_MINEXIST: if (currentTimes == 0) { currentCount = thisCount; } else { if (currentCount > thisCount) currentCount = thisCount; } break; case PERSONALITY_MAX: case PERSONALITY_MAXEXIST: if (currentCount < thisCount) currentCount = thisCount; break; case PERSONALITY_ADD: currentCount += thisCount; break; case PERSONALITY_AND: case PERSONALITY_NAND: case PERSONALITY_OR: case PERSONALITY_XOR: currentCount = 1; break; default: fprintf(stderr, "ERROR - invalid personality in multipleOperations::operate\n"); fprintf(stderr, "ERROR - this is a coding error, not a user error.\n"); exit(1); break; } currentTimes++; // Move the file we just read from to the next mer R[thisFile]->nextMer(); } for (uint32 i=0; imergeFilesLen; i++) delete R[i]; delete R; delete W; delete C; } kmer-code-2013-trunk/README.leaff0000644000000000000000000002047611515726327015124 0ustar rootrootLEAFF, leaff - sequence library utilities and applications Described in the publication: B. Walenz, L. Florea (2010) Sim4db and leaff: Utilities for fast batch spliced alignment and sequence indexing, submitted. Copyright (C) 2002, and GNU GPL, PE Corporation (NY) through the Celera Genomics Group Copyright (C) 2003-2004, and GNU GPL, Applied Biosystems Copyright (C) 2004-2010, and GNU GPL, Brian Walenz Includes portions copyright from: kmer - Copyright (C) 2004-2010, and GNU GPL, by Brian Walenz ======================================================================= Content: I. What is leaff? II. Guide to using leaff III. Examples IV. Terms of use V. Support I. What is leaff? LEAFF (Let's Extract Anything From Fasta) is a utility program for working with multi-fasta files. In addition to providing random access to the base level, it includes several analysis functions. II. Guide to using leaff leaff [-f fasta-file] [options] SOURCE FILES -f file: use sequence in 'file' (-F is also allowed for historical reasons) -A file: read actions from 'file' SOURCE FILE EXAMINATION -d: print the number of sequences in the fasta -i name: print an index, labelling the source 'name' OUTPUT OPTIONS -6 <#>: insert a newline every 60 letters (if the next arg is a number, newlines are inserted every n letters, e.g., -6 80. Disable line breaks with -6 0, or just don't use -6!) -e beg end: Print only the bases from position 'beg' to position 'end' (space based, relative to the FORWARD sequence!) If beg == end, then the entire sequence is printed. It is an error to specify beg > end, or beg > len, or end > len. -ends n Print n bases from each end of the sequence. One input sequence generates two output sequences, with '_5' or '_3' appended to the ID. If 2n >= length of the sequence, the sequence itself is printed, no ends are extracted (they overlap). -C: complement the sequences -H: DON'T print the defline -h: Use the next word as the defline ("-H -H" will reset to the original defline -R: reverse the sequences -u: uppercase all bases SEQUENCE SELECTION -G n s l: print n randomly generated sequences, 0 < s <= length <= l -L s l: print all sequences such that s <= length < l -N l h: print all sequences such that l <= % N composition < h (NOTE 0.0 <= l < h < 100.0) (NOTE that you cannot print sequences with 100% N This is a useful bug). -q file: print sequences from the seqid list in 'file' -r num: print 'num' randomly picked sequences -s seqid: print the single sequence 'seqid' -S f l: print all the sequences from ID 'f' to 'l' (inclusive) -W: print all sequences (do the whole file) LONGER HELP -help analysis -help examples ANALYSIS FUNCTIONS --findduplicates a.fasta Reports sequences that are present more than once. Output is a list of pairs of deflines, separated by a newline. --mapduplicates a.fasta b.fasta Builds a map of IIDs from a.fasta and b.fasta that have identical sequences. Format is "IIDa <-> IIDb" --md5 a.fasta: Don't print the sequence, but print the md5 checksum (of the entire sequence) followed by the entire defline. --partition prefix [ n[gmk]bp | n ] a.fasta --partitionmap [ n[gmk]bp | n ] a.fasta Partition the sequences into roughly equal size pieces of size nbp, nkbp, nmbp or ngbp; or into n roughly equal sized parititions. Sequences larger that the partition size are in a partition by themself. --partitionmap writes a description of the partition to stdout; --partiton creates a fasta file 'prefix-###.fasta' for each partition. Example: -F some.fasta --partition parts 130mbp -F some.fasta --partition parts 16 --segment prefix n a.fasta Splits the sequences into n files, prefix-###.fasta. Sequences are not reordered; the first n sequences are in the first file, the next n in the second file, etc. --gccontent a.fasta Reports the GC content over a sliding window of 3, 5, 11, 51, 101, 201, 501, 1001, 2001 bp. --testindex a.fasta Test the index of 'file'. If index is up-to-date, leaff exits successfully, else, leaff exits with code 1. If an index file is supplied, that one is tested, otherwise, the default index file name is used. --dumpblocks a.fasta Generates a list of the blocks of N and non-N. Output format is 'base seq# beg end len'. 'N 84 483 485 2' means that a block of 2 N's starts at space-based position 483 in sequence ordinal 84. A '.' is the end of sequence marker. --errors L N C P a.fasta For every sequence in the input file, generate new sequences including simulated sequencing errors. L -- length of the new sequence. If zero, the length of the original sequence will be used. N -- number of subsequences to generate. If L=0, all subsequences will be the same, and you should use C instead. C -- number of copies to generate. Each of the N subsequences will have C copies, each with different errors. P -- probability of an error. HINT: to simulate ESTs from genes, use L=500, N=10, C=10 -- make C=10 sequencer runs of N=10 EST sequences of length 500bp each. to simulate mRNA from genes, use L=0, N=10, C=10 to simulate reads from genomes, use L=800, N=10, C=1 -- of course, N= should be increased to give the appropriate depth of coverage --stats a.fasta Reports size statistics; number, N50, sum, largest. --seqstore out.seqStore Converts the input file (-f) to a seqStore file (for instance, for use with the Celera assembler or sim4db). NOTES: 1. Please note that options are ORDER DEPENDENT. Sequences are printed whenever a SEQUENCE SELECTION option occurs on the command line. OUTPUT OPTIONS are not reset when a sequence is printed. 2. SEQUENCES are numbered starting at ZERO, not one! III. Examples 1. Print the first 10 bases of the fourth sequence in file 'genes': leaff -f genes -e 0 10 -s 3 2. Print the first 10 bases of the fourth and fifth sequences: leaff -f genes -e 0 10 -s 3 -s 4 3. Print the fourth and fifth sequences reverse complemented, and the sixth sequence forward. The second set of -R -C toggle off reverse-complement: leaff -f genes -R -C -s 3 -s 4 -R -C -s 5 4. Convert file 'genes' to a seqStore 'genes.seqStore'. leaff -f genes --seqstore genes.seqStore IV. Terms of use This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received (LICENSE.txt) a copy of the GNU General Public License along with this program; if not, you can obtain one from http://www.gnu.org/licenses/gpl.txt or by writing to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA V. Support Brian Walenz (brianwalenz@users.sourceforge.net). Please check the parent project's Sourceforge page at http://kmer.sourceforge.net for details and updates. Last updated: Jan 19, 2011 kmer-code-2013-trunk/README.atac0000644000000000000000000000615612524130027014741 0ustar rootrootatac - assembly-to-assembly comparasion, comparative mapping between two genome assemblies (same species), or between two different genomes (cross species). Described in the publication: S. Istrail, et al. "Whole-genome shotgun assembly and comparison of human genome assemblies" PNAS, Feb 2004; 101: 1916-1921 Copyright (C) 2002, and GNU GPL, PE Corporation (NY) through the Celera Genomics Group Copyright (C) 2003-2004, and GNU GPL, Applied Biosystems Copyright (C) 2004-2015, and GNU GPL, Brian Walenz ======================================================================= Content: I. What is atac? II. Command line usage III. Input/Output IV. Affiliated tools V. Terms of use VI. Support I. What is atac? atac computes a one-to-one pairwise alignment of large DNA sequences. It first finds the unique k-mers in each sequence, chains them to larger blocks, and fills in spaces between blocks. It was written primarily to transfer annotations between different assemblies of the human genome. The output is a set of ungapped 'matches', and a set of gapped 'runs' formed from the matches. Each match or run associates one sequence with the other sequence. The association is 'unique', in that there is no other (sizeable) associations for either sequence. Thus, large repeats and duplications are not present in the output - they appear as unmapped regions. Though the output is always pairwise, atac can cache intermediate results to speed a comparisons of multiple sequences. II. Command line usage A simple invocation: atac.pl \ -dir ecoli-k-vs-o \ -meryldir atac-sequences \ -id1 K -seq2 /data/references/ecolik12.fasta \ -id2 O -seq1 /data/references/ecolio157.fasta Run with no options for a list of parameters. See http://kmer.sourceforge.net/wiki/index.php/Getting_Started_with_ATAC for more. III. Input/Output Input is two multi-FASTA files. The files must be uncompressed. Output is in two self-documenting text files, reported at the end of the run: Finished! Output is: matches and runs -- /work/ecoli-k-vs-o/KvsO.atac clumps -- /work/ecoli-k-vs-o/KvsO.*clump*.atac IV. Affiliated tools N/A V. Terms of use This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received (LICENSE.txt) a copy of the GNU General Public License along with this program; if not, you can obtain one from http://www.gnu.org/licenses/gpl.txt or by writing to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA VI. Support Brian Walenz (brianwalenz@users.sourceforge.net) Please check the parent project's Sourceforge page at http://kmer.sourceforge.net for details and updates. Last updated: May 11, 2015 kmer-code-2013-trunk/Make.include0000644000000000000000000000154311542345214015374 0ustar rootroot# -*- makefile -*- LIBBIO/ :=$(realpath $/libbio/)/ LIBSEQ/ :=$(realpath $/libseq/)/ LIBKMER/ :=$(realpath $/libkmer/)/ LIBMERYL/ :=$(realpath $/libmeryl/)/ LIBSIM4/ :=$(realpath $/libsim4/)/ LIBUTL/ :=$(realpath $/libutl/)/ $(eval $(call Include,$/ESTmapper/)) $(eval $(call Include,$/atac-driver/)) $(eval $(call Include,$/seatac/)) $(eval $(call Include,$/leaff/)) $(eval $(call Include,$/meryl/)) $(eval $(call Include,$/seagen/)) $(eval $(call Include,$/sim4dbutils/)) $(eval $(call Include,$/sim4db/)) $(eval $(call Include,$/snapper/)) $(eval $(call Include,$/tapper/)) $(eval $(call Include,${LIBSIM4/})) $(eval $(call Include,${LIBKMER/})) $(eval $(call Include,${LIBMERYL/})) $(eval $(call Include,${LIBBIO/})) $(eval $(call Include,${LIBSEQ/})) $(eval $(call Include,${LIBUTL/})) $/.REAL-CLEAN := $/Make.compilers kmer-code-2013-trunk/configure.sh0000755000000000000000000003201012572072316015467 0ustar rootroot#!/bin/sh # Set up the build system -- need some symlinks to the build # directory. # if [ ! -e Makefile ] ; then if [ -e ../build/Makefile ] ; then ln -s ../build/Make.rules . ln -s ../build/Makefile . elif [ -e build/Makefile ] ; then ln -s build/Make.rules . ln -s build/Makefile . else echo "ERROR: Couldn't find the Makefile!" exit 1 fi fi # If no target, try to figure out one based on uname. This defaults to # the optimized target below. If it works well, we can always use this # mechanism, and extend with "debug" or "profile" (e.g., "./configure.sh debug") # target=$1 if [ "x$target" = "xdebug" ] ; then opts="-debug"; target="" fi if [ "x$target" = "xprofile" ] ; then opts="-profile"; target="" fi if [ "x$target" = "x" ] ; then case `uname` in Darwin) target="Darwin-i386$opts" if [ "`uname -m`" = "Power Macintosh" ] ; then target="Darwin-ppc$opts" fi if [ `uname -m` = "x86_64" ] ; then target="Darwin-amd64$opts" fi ;; FreeBSD) target="FreeBSD-i386$opts" if [ `uname -m` = "amd64" ] ; then target="FreeBSD-amd64$opts" fi ;; AIX) target="AIX$opts" ;; OSF1) target="OSF1$opts" ;; Linux) target="Linux-i686$opts" if [ `uname -m` = "x86_64" ] ; then target="Linux-amd64$opts" fi if [ `uname -m` = "ia64" ] ; then target="Linux-ia64$opts" fi ;; SunOS) target="solaris$opts" ;; *) echo "ERROR: Unknown uname of `uname` -- try manual configuration." exit 1 ;; esac fi # # Look for the python headers. We don't need the libraries. This is # used by atac-driver/chainer only. # PYTHON=${PYTHON:-`which python`} if [ ! -x $PYTHON ] ; then echo "WARNING: Python program not found at '$PYTHON'. Try setting environment variable PYTHON to the location of the python interpreter." WITHOUT_ATAC="atac-driver/ seatac/" else echo "Python executable found in '$PYTHON'" CFLAGS_PYTHON=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_inc()"` if [ -z "$CFLAGS_PYTHON" -o ! -d "$CFLAGS_PYTHON" ] ; then echo "WARNING: Python development environment not found." WITHOUT_ATAC="atac-driver/ seatac/" else echo "Python libraries found in '$CFLAGS_PYTHON'" fi fi if [ ! -z "$WITHOUT_ATAC" ] ; then echo "WARNING: Will not build ATAC." fi # # Decide on compilers to use. Unfortunately, all the options are tuned for gcc/g++. # In particular, -m64 and -W* and -f* aren't liked by Intel compilers. # if [ x$CC = x ] ; then CC="gcc" fi if [ x$CXX = x ] ; then CXX="g++" fi # # Emit architecture specific configurations. # case $target in Darwin-i386|Darwin-amd64) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # OS-X, optimized # CC := $CC SHLIB_FLAGS := -dynamiclib -undefined dynamic_lookup CFLAGS_COMPILE := -Ofast -fPIC -m64 -fmessage-length=0 -D_REENTRANT -D_THREAD_SAFE -Wall -Wno-char-subscripts CLDFLAGS := -m64 CLIBS := CXX := $CXX CXXFLAGS_COMPILE := -Ofast -fPIC -m64 -fmessage-length=0 -D_REENTRANT -D_THREAD_SAFE -Wall -Wno-char-subscripts CXXLDFLAGS := -m64 CXXLIBS := LDFLAGS_PYTHON := -bundle -framework CoreFoundation -framework Python -dynamic ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; Darwin-i386-debug|Darwin-amd64-debug) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # OS-X, debug # CC := $CC SHLIB_FLAGS := -dynamiclib -undefined dynamic_lookup CFLAGS_COMPILE := -g3 -m64 -fmessage-length=0 -D_REENTRANT -D_THREAD_SAFE -Wall -Wno-char-subscripts CLDFLAGS := -m64 CLIBS := CXX := $CXX CXXFLAGS_COMPILE := -g3 -m64 -fmessage-length=0 -D_REENTRANT -D_THREAD_SAFE -Wall -Wno-char-subscripts CXXLDFLAGS := -m64 CXXLIBS := LDFLAGS_PYTHON := -bundle -framework CoreFoundation -framework Python -dynamic ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; FreeBSD-amd64) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # FreeBSD, optimized CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -O3 -fPIC -pthread -D_REENTRANT -Wall -Wno-char-subscripts -mtune=native -march=native -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -lthr CXX := $CXX CXXFLAGS_COMPILE := -O3 -fPIC -pthread -D_REENTRANT -Wall -Wno-char-subscripts -mtune=native -march=native -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -lthr ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; FreeBSD-amd64-debug) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # FreeBSD, debug, warnings CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -g -pthread -D_REENTRANT -fPIC -Wall -Wno-char-subscripts -Wshadow -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wnested-externs CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -lthr CXX := $CXX CXXFLAGS_COMPILE := -g -pthread -D_REENTRANT -fPIC -Wall -Wno-char-subscripts -Wshadow -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -lthr ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; FreeBSD-amd64-profile) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # FreeBSD, debug, warnings CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -pg -O3 -pthread -D_REENTRANT -fPIC -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions CLDFLAGS := -pg -L/usr/local/lib CLIBS := -pthread -lthr CXX := $CXX CXXFLAGS_COMPILE := -pg -O3 -pthread -D_REENTRANT -fPIC -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions CXXLDFLAGS := -pg -L/usr/local/lib CXXLIBS := -pthread -lthr ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; Linux-i686) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # Linux, optimized CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -ldl CXX := $CXX CXXFLAGS_COMPILE := -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -ldl ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; Linux-amd64) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # Linux64, optimized CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -m64 -fPIC -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -ldl CXX := $CXX CXXFLAGS_COMPILE := -m64 -fPIC -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -ldl ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; Linux-amd64-debug) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # Linux64, optimized CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -m64 -fPIC -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_REENTRANT -g -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -ldl CXX := $CXX CXXFLAGS_COMPILE := -m64 -fPIC -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_REENTRANT -g -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -ldl ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; Linux-amd64-profile) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # Linux64, optimized CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -pg -m64 -fPIC -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -ldl CXX := $CXX CXXFLAGS_COMPILE := -pg -m64 -fPIC -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -ldl ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; Linux-ia64) rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # Linux64, optimized CC := $CC SHLIB_FLAGS := -shared CFLAGS_COMPILE := -m64 -fPIC -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CLDFLAGS := -L/usr/local/lib CLIBS := -pthread -ldl CXX := $CXX CXXFLAGS_COMPILE := -m64 -fPIC -D_REENTRANT -O3 -D_THREAD_SAFE -pthread -fmessage-length=0 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CXXLDFLAGS := -L/usr/local/lib CXXLIBS := -pthread -ldl ARFLAGS := ruvs INSTALL/ := $target/ EOF ;; # SUNLF needs to be set to allow for large file support on Solaris. It # should be whatever the following getconf's say. # # getconf LFS_CFLAGS = "-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" # getconf LFS_LDFLAGS = "" # getconf LFS_LFS_LIBS = "" # solaris) echo "Solaris is UNTESTED!" rm -f Make.compilers cat < Make.compilers # -*- makefile -*- # Solaris, gcc optimized # CC := $CC -m64 SHLIB_FLAGS := -G #untested CFLAGS_COMPILE := -D_REENTRANT -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -O3 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CLDFLAGS := CLIBS := -lpthread -lrt CXX := $CXX -m64 CXXFLAGS_COMPILE := -D_REENTRANT -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -O3 -Wall -Wno-char-subscripts -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer CXXLDFLAGS := CXXLIBS := -lpthread -lrt ARFLAGS := ruv INSTALL/ := $target/ EOF ;; *) echo "usage: $0 " echo " osx OS-X, optimized" echo " osx-debug OS-X, debug" echo "" echo " freebsd FreeBSD, optimized" echo " freebsd-profile FreeBSD, optimized, profiled" echo " freebsd-debug FreeBSD, debug and warnings" echo "" echo " aix AIX, optimized" echo " aix-profile AIX, optimized, profiled (NOT TESTED)" echo " aix-debug AIX, debug" echo "" echo " tru64, compaq Tru64, optimized" echo " tru64-debug, compaq-debug Tru64, debug, warnings, trapuv" echo "" echo " linux Linux, i686, optimized" echo " linux64 Linux, Opteron, optimized" echo "" echo " solaris Solaris, gcc, optimized (STALE)" exit ;; esac cat <> Make.compilers PERL := /usr/bin/env perl .EXE := .SO := .so .A := .a .O := .o CLD := \${CC} CXXLD := \${CXX} CCDEP := \${CC} -MM -MG CXXDEP := \${CXX} -MM -MG CLIBS += -lm CXXLIBS += -lm PYTHON := $PYTHON PYTHON_H := $CFLAGS_PYTHON/Python.h CFLAGS_PYTHON := -I$CFLAGS_PYTHON WITHOUT := $WITHOUT_ATAC EOF echo "Configured." #cat Make.compilers kmer-code-2013-trunk/libbio/0000755000000000000000000000000012641613356014415 5ustar rootrootkmer-code-2013-trunk/libbio/alphabet-acgtspace.c0000644000000000000000000001064011043436604020264 0ustar rootroot#include #include #include "alphabet.h" void initCompressionTablesForACGTSpace(void) { int i, j; for (i=0; i<256; i++) { whitespaceSymbol[i] = isspace(i) ? 1 : 0; toLower[i] = tolower(i); toUpper[i] = toupper(i); letterToBits[i] = (unsigned char)0xff; bitsToLetter[i] = (unsigned char)'?'; bitsToColor[i] = (unsigned char)'?'; complementSymbol[i] = (unsigned char)'?'; } for (i=0; i<128; i++) for (j=0; j<128; j++) IUPACidentity[i][j] = 0; letterToBits['a'] = letterToBits['A'] = (unsigned char)0x00; letterToBits['c'] = letterToBits['C'] = (unsigned char)0x01; letterToBits['g'] = letterToBits['G'] = (unsigned char)0x02; letterToBits['t'] = letterToBits['T'] = (unsigned char)0x03; letterToBits['0'] = (unsigned char)0x00; letterToBits['1'] = (unsigned char)0x01; letterToBits['2'] = (unsigned char)0x02; letterToBits['3'] = (unsigned char)0x03; bitsToLetter[0x00] = 'A'; bitsToLetter[0x01] = 'C'; bitsToLetter[0x02] = 'G'; bitsToLetter[0x03] = 'T'; bitsToColor[0x00] = '0'; bitsToColor[0x01] = '1'; bitsToColor[0x02] = '2'; bitsToColor[0x03] = '3'; complementSymbol['a'] = 't'; // a complementSymbol['t'] = 'a'; // t complementSymbol['u'] = 'a'; // u, Really, only for RNA complementSymbol['g'] = 'c'; // g complementSymbol['c'] = 'g'; // c complementSymbol['y'] = 'r'; // c t complementSymbol['r'] = 'y'; // a g complementSymbol['s'] = 'w'; // g c complementSymbol['w'] = 's'; // a t complementSymbol['k'] = 'm'; // t/u g complementSymbol['m'] = 'k'; // a c complementSymbol['b'] = 'v'; // c g t complementSymbol['d'] = 'h'; // a g t complementSymbol['h'] = 'd'; // a c t complementSymbol['v'] = 'b'; // a c g complementSymbol['n'] = 'n'; // a c g t complementSymbol['A'] = 'T'; // a complementSymbol['T'] = 'A'; // t complementSymbol['U'] = 'A'; // u, Really, only for RNA complementSymbol['G'] = 'C'; // g complementSymbol['C'] = 'G'; // c complementSymbol['Y'] = 'R'; // c t complementSymbol['R'] = 'Y'; // a g complementSymbol['S'] = 'W'; // g c complementSymbol['W'] = 'S'; // a t complementSymbol['K'] = 'M'; // t/u g complementSymbol['M'] = 'K'; // a c complementSymbol['B'] = 'V'; // c g t complementSymbol['D'] = 'H'; // a g t complementSymbol['H'] = 'D'; // a c t complementSymbol['V'] = 'B'; // a c g complementSymbol['N'] = 'N'; // a c g t complementSymbol['0'] = '0'; // ColorSpace is self-complementing complementSymbol['1'] = '1'; complementSymbol['2'] = '2'; complementSymbol['3'] = '3'; IUPACidentity['A']['A'] = 1; IUPACidentity['C']['C'] = 1; IUPACidentity['G']['G'] = 1; IUPACidentity['T']['T'] = 1; IUPACidentity['M']['A'] = 1; IUPACidentity['M']['C'] = 1; IUPACidentity['R']['A'] = 1; IUPACidentity['R']['G'] = 1; IUPACidentity['W']['A'] = 1; IUPACidentity['W']['T'] = 1; IUPACidentity['S']['C'] = 1; IUPACidentity['S']['G'] = 1; IUPACidentity['Y']['C'] = 1; IUPACidentity['Y']['T'] = 1; IUPACidentity['K']['G'] = 1; IUPACidentity['K']['T'] = 1; IUPACidentity['V']['A'] = 1; IUPACidentity['V']['C'] = 1; IUPACidentity['V']['G'] = 1; IUPACidentity['H']['A'] = 1; IUPACidentity['H']['C'] = 1; IUPACidentity['H']['T'] = 1; IUPACidentity['D']['A'] = 1; IUPACidentity['D']['G'] = 1; IUPACidentity['D']['T'] = 1; IUPACidentity['B']['C'] = 1; IUPACidentity['B']['G'] = 1; IUPACidentity['B']['T'] = 1; IUPACidentity['N']['A'] = 1; IUPACidentity['N']['C'] = 1; IUPACidentity['N']['G'] = 1; IUPACidentity['N']['T'] = 1; IUPACidentity['M']['M'] = 1; IUPACidentity['R']['R'] = 1; IUPACidentity['W']['W'] = 1; IUPACidentity['S']['S'] = 1; IUPACidentity['Y']['Y'] = 1; IUPACidentity['K']['K'] = 1; IUPACidentity['V']['V'] = 1; IUPACidentity['H']['W'] = 1; IUPACidentity['D']['D'] = 1; IUPACidentity['B']['B'] = 1; IUPACidentity['N']['N'] = 1; // Order isn't important // for (i='A'; i<'Z'; i++) for (j='A'; j<'Z'; j++) { if (IUPACidentity[j][i]) IUPACidentity[i][j] = 1; } // Case isn't important // for (i='A'; i<'Z'; i++) for (j='A'; j<'Z'; j++) { if (IUPACidentity[j][i]) { IUPACidentity[tolower(i)][tolower(j)] = 1; IUPACidentity[tolower(i)][j ] = 1; IUPACidentity[i ][tolower(j)] = 1; } } } kmer-code-2013-trunk/libbio/reversecomplement.c0000644000000000000000000000135412322046702020312 0ustar rootroot#include "bio.h" #include // Inplace reverse-complement an ACGT sequence. A pointer the the // string is returned. // char * reverseComplementSequence(char *seq, uint32 seqlen) { char *s = seq; char *e = seq + seqlen - 1; char t; uint32 c = seqlen / 2; while (c--) { t = complementSymbol[*s]; *(s++) = complementSymbol[*e]; *(e--) = t; } if (s == e) *s = complementSymbol[*s]; return(seq); } // Inplace reverse a string. A pointer the the string is returned. // char * reverseString(char *seq, uint32 seqlen) { char *s = seq; char *e = seq + seqlen - 1; char t; uint32 c = seqlen / 2; while (c--) { t = *s; *(s++) = *e; *(e--) = t; } return(seq); } kmer-code-2013-trunk/libbio/merCovering.H0000644000000000000000000001622212322046702017000 0ustar rootroot#ifndef MER_COVERING_H #define MER_COVERING_H // This is an interval list, where the intervals are built using // fixed size pieces. // // It's designed to accept pieces in roughly sorted order. // // Intervals are stored c-style. // #include #include class merCovering { private: class interval { public: uint32 _lo; uint32 _hi; interval *_next; interval(uint32 lo, uint32 hi, interval *n) { _lo = lo; _hi = hi; _next = n; } }; interval *_intervals; uint32 _width; uint32 _pieces; #ifdef TEST_MERCOVERING uint32 _test[TEST_SIZE]; #endif public: merCovering(uint32 w) { _intervals = 0L; _width = w; _pieces = 0; #ifdef TEST_MERCOVERING for (uint32 i=0; i_next; delete i; i = _intervals; } _intervals = 0L; _pieces = 0; }; uint32 sumOfLengths(void) { uint32 s=0; for (interval *i=_intervals; i; i = i->_next) s += i->_hi - i->_lo; return(s); }; uint32 numberOfPieces(void) { return(_pieces); }; void addMer(uint32 lo) { _pieces++; uint32 hi = lo + _width; interval *c; #ifdef TEST_MERCOVERING for (uint32 i=lo; i_lo)) { _intervals = new interval(lo, hi, _intervals); return; } c = _intervals; while (c) { // Case: New interval is completely contained in the current interval. // if ((c->_lo <= lo) && (hi <= c->_hi)) return; // Case: New interval overlaps the low end of the current interval, // or is completely contained in an existing interval. // if ((lo <= c->_lo) && (hi <= c->_hi)) { c->_lo = lo; return; } if (c->_next) { // Case: New interval overlaps the high end of the current interval... // if (lo <= c->_hi) { if (hi < c->_next->_lo) { // but does not intersect the next interval. // c->_hi = hi; return; } else { // and does intersect the next interval. // interval *p = c->_next; c->_hi = c->_next->_hi; c->_next = c->_next->_next; delete p; return; } } else { // Case: New interval is between two existing intervals // // (lo > c->_hi) is given // if (hi < c->_next->_lo) { c->_next = new interval(lo, hi, c->_next); return; } } } else { // Case: New interval overlaps the high end of the current interval // if (lo <= c->_hi) { c->_hi = hi; return; } else { // Otherwise, we just fell off the end of all intervals. // Add one at the end. // c->_next = new interval(lo, hi,0L); return; } } c = c->_next; } #ifdef TEST_MERCOVERING fprintf(stderr, "ERROR IN addInterval!\n"); #endif }; #ifdef TEST_MERCOVERING void test(void) { for (uint32 i=0; i_next) { for (uint32 i=z->_lo; i_hi; i++) { if (_test[i] == 0) { fprintf(stderr, "INTERVAL CONTAINS SOMETHING NOT IN ARRAY! (%d)\n", i); exit(1); } if (_test[i] == 1) { fprintf(stderr, "INTERVAL HIT SOMETHING TWICE! (%d)\n", i); exit(1); } _test[i] = 1; } } for (uint32 i=0; i_intervals; N = 0L; L = 0L; while (A || B) { uint32 lo = 0; uint32 hi = 0; // if either list is zero, we can just zip down the other list // and add things. // if (!B) { while (A) { L->_next = new interval(A->_lo, A->_hi, 0L); L = L->_next; A = A->_next; } } if (!A) { while (B) { L->_next = new interval(B->_lo, B->_hi, 0L); L = L->_next; B = B->_next; } } if (A && B) { if (A->_lo == B->_lo) { // A and B start at the same position // lo = A->_lo; hi = A->_hi; if (hi < B->_hi) hi = B->_hi; A = A->_next; B = B->_next; } else { // A and B start at different positions. Pick the first one. // if (A->_lo < B->_lo) { lo = A->_lo; hi = A->_hi; A = A->_next; } else { lo = B->_lo; hi = B->_hi; B = B->_next; } } // We have an initial interval. Add more stuff, while there // are overlaps. bool modified = true; while ((A || B) && (modified)) { modified = false; if ((A) && (hi >= A->_lo)) { if (hi < A->_hi) hi = A->_hi; A = A->_next; modified = true; } if ((B) && (hi >= B->_lo)) { if (hi < B->_hi) hi = B->_hi; B = B->_next; modified = true; } } // OK, got the new interval. Save it. // if (N) { L->_next = new interval(lo, hi, 0L); L = L->_next; } else { N = L = new interval(lo, hi, 0L); } } } // Save the number of mers in both intervals // uint32 p = _pieces + I->_pieces; clear(); _intervals = N; _pieces = p; } #ifdef TEST_MERCOVERING void dump(void) { for (interval *i=_intervals; i; i = i->_next) fprintf(stderr, "%5d-%5d ", i->_lo, i->_hi); fprintf(stderr, "\n"); }; void compare(merCovering *B) { interval *i = _intervals; interval *j = B->_intervals; if (_pieces != B->_pieces) { fprintf(stderr, "Pieces differ (this=%d that=%d).\n", _pieces, B->_pieces); exit(1); } while (i && j) { if ((i->_lo != j->_lo) || (i->_hi != j->_hi)) { fprintf(stderr, "ERROR!\n"); exit(1); } i = i->_next; j = j->_next; } if (i) { fprintf(stderr, "ERROR (i still exists)!\n"); exit(1); } if (j) { fprintf(stderr, "ERROR (i still exists)!\n"); exit(1); } }; #endif }; #endif // MERCOVERING_H kmer-code-2013-trunk/libbio/test/0000755000000000000000000000000012641613356015374 5ustar rootrootkmer-code-2013-trunk/libbio/test/Makefile0000644000000000000000000000672210676620645017050 0ustar rootroot #PROG = test-merstream-from-seqstore PROG = dump-merstreamfile \ test-seqStream \ test-chainedSequence \ test-fasta-accessor \ test-merstream \ test-merstreamfile \ test-setbits \ halign-test DEAD = test-merstream-speed \ test-bigmer-msf \ INCLUDE = -I.. -I../../libutil LIBS = -L.. -L../../libutil -lbio -lutil -lm OBJS = include ../../Make.compilers all: $(PROG) @echo Tests passed! dump-merstreamfile: dump-merstreamfile.C $(CXX) $(CXXFLAGS_COMPILE) -c -o dump-merstreamfile.o dump-merstreamfile.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o dump-merstreamfile dump-merstreamfile.o $(LIBS) test-merstream-from-seqstore: test-merstream-from-seqstore.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-merstream-from-seqstore.o test-merstream-from-seqstore.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-merstream-from-seqstore test-merstream-from-seqstore.o $(LIBS) test-seqStream: test-seqStream.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-seqStream.o test-seqStream.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-seqStream test-seqStream.o $(LIBS) ../../leaff/leaff -G 3 30 40 > junk2.fasta ./test-seqStream junk2.fasta rm -f junk* test-chainedSequence: test-chainedSequence.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-chainedSequence.o test-chainedSequence.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-chainedSequence test-chainedSequence.o $(LIBS) ./test-chainedSequence ../../leaff/leaff -G 1000 1000 3000 > junk2.fasta ./test-chainedSequence junk2.fasta rm -f junk* test-fasta-accessor: test-fasta-accessor.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-fasta-accessor.o test-fasta-accessor.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-fasta-accessor test-fasta-accessor.o $(LIBS) ./test-fasta-accessor test-merstream: test-merstream.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-merstream.o test-merstream.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-merstream test-merstream.o $(LIBS) ../../leaff/leaff -G 1 10000 30000 > junk.fasta ./test-merstream junk.fasta rm -f junk* ../../leaff/leaff -G 1000 10000 30000 > junk.fasta ./test-merstream junk.fasta rm -f junk* test-merstreamfile: test-merstreamfile.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-merstreamfile.o test-merstreamfile.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-merstreamfile test-merstreamfile.o $(LIBS) #../../leaff/leaff -G 2 50 50 > junk.fasta ../../leaff/leaff -G 100000 10 600 > junk.fasta #../../leaff/leaff -G 10000 10 10000 > junk.fasta #../../leaff/leaff -G 30000 10000 10000 > junk.fasta ./test-merstreamfile junk.fasta #rm -f junk.fasta junk.fastaidx test-merstream-speed: test-merstream-speed.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-merstream-speed.o test-merstream-speed.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-merstream-speed test-merstream-speed.o $(LIBS) ../../leaff/leaff -G 10000 1000 10000 > junk.fasta cat junk.fasta > /dev/null ./test-merstream-speed junk.fasta rm -f junk* test-setbits: test-setbits.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-setbits.o test-setbits.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-setbits test-setbits.o $(LIBS) ./test-setbits test-bigmer-msf: test-bigmer-msf.C $(CXX) $(CXXFLAGS_COMPILE) -c -o test-bigmer-msf.o test-bigmer-msf.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-bigmer-msf test-bigmer-msf.o $(LIBS) ./test-bigmer-msf halign-test: halign-test.C $(CXX) $(CXXFLAGS_COMPILE) -c -o halign-test.o halign-test.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o halign-test halign-test.o $(LIBS) clean: rm -f $(PROG) *.o *junk* kmer-code-2013-trunk/libbio/test/test-setbits.C0000644000000000000000000000111712322046702020121 0ustar rootroot#include "bio++.H" //g++ -o test-setbits test-setbits.C -I../libutil -I. -L../libutil -L. -lbio -lutil int main(int argc, char **argv) { kMer x(96); char str[256]; if (KMER_WORDS < 3) { fprintf(stderr, "I need at least KMER_WORDS == 3; test not run.\n"); exit(0); } for (uint32 i=0; i<168; i++) { x.clear(); x.setBits(i, 24, 0x535); fprintf(stderr, uint32FMTW(3)" -- %s -- "uint64HEX"\n", i, x.merToString(str), x.getBits(i, 16)); if (x.getBits(i, 16) != 0x535) { fprintf(stderr, "decode error.\n"); exit(1); } } exit(0); } kmer-code-2013-trunk/libbio/test/halign-test.C0000644000000000000000000003624510415625305017723 0ustar rootroot#include "bio++.H" int main(int argc, char **argv) { const char *s1 = "gattcatggctgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctaaaattaaccgtcagaatatttatgcctgattcatggctgaaattgtgtttgaccagctatgtgtgtctcttaatccactcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcacgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctgaaattaaccatcagaatatttatgcctgattcatggctgaaatttcaggatgaaagctatgaaatctctatttgtgtttgtgtatctattaatgtatgttatgtatatgtgatattttcttaactccagagagcattgcaaaattcatttatgaaaacctctaaaagtgctctattctaacttggcttggaaaaaaataagcatttataaataaatattcaccaaactcctagaaatataggaactgatcaaatgtttcttaagttaacatgatttggataaaacttagttaaataagattaatatagtatttttggtgtaataaaacaactatatcttcaaaattatcattattgaatataaaacaagcataaattcctattctgcttgagttctagtcaaataagctaatattatacttactagaaacgtaaaatcttaaagcttatagatttgattctaattaagttgtcattcttatgaaaaacattattttttttatgctgaaaagatacacatatatttagagttagccagctggactcagtttaggtgatcccaattttgttacaacatcgaaagcatcataatcaggagcaagtcgaacatatgccttctctttatcaggacaaatcagggtggtgaccttggccacatcactgtcatagagcttcttcacagcctgtctgatctggtgcttgttggctttaacatccacagtgaacacaagcgtgttgttttcttctatcttcttcacggccgactcagtggtcagcggaaacttgatgatagcatagtggccaagcttgtttctcctgggggtgctcttccgaggatatctgggctgcctccggagtcgcagtgtcttgggccgcctgaaggtgagtgacatgcggatcttcttttttgcgtgtggctgcggacacctttcaacactgccttcttggcctttaaggccttcgctttggcttcggctttaggaggagcaggagcttccttcgctttcggtgccgtcttgtgaaaagcgaaaaacattatttcaaaaataatttgtttacagtaaatctgcctaagaatagtttccaaagtacttttggtaatttttaaccttaaagttaagctaagtaaaagatttgcattaaatatctagaccatttataaataagatacaatactaaaacattaattactgaacataaataattcaagtttatatacttttggcttcctgtttttacagagagactaaagatattttggcccgttaataaacatgtttttttctgccacactgaggaattgtattatgagaaaacacatccctctagatgttgggagatggtatattcatacattttctaacctactatagaatgctaatatatgacagtttataaccgtctacttcctagttttctctggaaaataaaagattactaagtattaaaattataatcaatatatgtaaataaaactactagaaataatagaataactagaaacaactctatgcaaagcatgcaagaaaagtagggcatgtttcgcaagtaaagtaggttgcattttttataaggaaaaccatacagaagatacaaataaaaagagatacctaaccttccctgtgttatatttgtatgggtaaaatgttatgttttcagaaattatataaaattcctggaagtttgtcaatgtcctccttatccatgctatgtgccactatagagtaatgagtcataattccaattattactttaaatgttgtgccaggcacagtggctcatgcctataatcccagcactttaggaggctgaggcgggtggatcacaaggtcaggagatccagaccatcctggttaactcggtgaatctccatctctattaaaaatataaaaaattagccgggcgtgatggaaggcacctgtagtcccagctactcgggaggctgaggcaggagaatggcgtgaacccaggagacagagcttgcagtgagccgagatcgcactgctgcactccagcctgggcgacagagcaagactctgtctctaaataaataaataaataaatgttgtctgccacagaaaaaatcgaatat"; const char *s2 = "gattcatggctgaaatcatgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcgtgtttgaccagctatgtgtgtctcttactccactcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcatgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctgaaattaaccatcagaatatttatgcctgattcatggctgaaatttcaggatgacagctatgaaatctctatttgtgtttgtatatctattaatgtatgttatgtatatgtgatattttcttaactccagagagcattgcaaaattcatttatgaaatcctctaaaagtgctctattctaacttggcttggaaaaaaataagcatttataaataaatattcaccaaactcctagaaatataggaactgatcaaatgtttcttaagttaacatgatttggataaaacttagttaaataagattaatatagtatttttggtgtaataaaacaactatatcttcaaaattatcattattgaatataaaacaagcataaattcctattctgcttgagttctagtcaaataagctaatattatacttactagaaacgtaaaatcttaaagcttatagatttgattctaattaagttgtcattcttatgaaaaacattatttttttatgctgaaaagatacacatatatttagagttagccagctggactcagtttaggtgatcccaattttgttacaacatcgaaagcatcataatcaggagcaagtcgaacatatgccttgttctctttatcaggacaaatcagggtggtgaccttggccacatcactgtcatagagcttcttcacagcctgtctgatctggtgcttgttggctttaacatccacagtgaacacaagcgtgttgttttcttctatcttcttcacggccgactcagtggtcagcggaaacttgatgatagcatagtggccaagcttgtttctcctgggggtgctcttccgaggatatctgggctgcctccggagtcgcagtgtcttgggccgcctgaaggtgagtgacatgcggatcttcttttttgcgtgtggctgcggacacctttcaacactgccttcttggcctttaaagccttcgctttggcttcggctttaggaggagcaggagcttccttcgctttcggtgccatcttgtgaaaagcgaaaaacattatttcaaaaataatttgtttacagtaaatctgcctatgaatagtttccaaagtacttttggtaatttttaaccttaaagttaagctaagtaaaagatttgcattaaatatctagaccatttataaataagatacaatactaaaacattaattactgaacataaataattcaagtttatatacttttggctcctatttttacagagagactaaagatattttggcccgttaataaacatgtttttttctgccacactgaggaattgtattatgaggaaacacatccctctagatgttgggagatggtatattcatacattttctaacctactatagaatgctaatatatgacagtttataactgtctacttcctagttttctctggaaaataaaagattactaagtattaaaattataatcaatatatgtaaataaaactactagaaataatagaataactagaaacaactctatgcaaagcatgcaagaaaagtagggcatgtttcgcaagtaaagtaggttgcattttttataaggaaaaccatacagaagatacaaataaaaagagatacctaaccttccctgtgttatatttgtatgggtaaaatgttatgttttcagaaattatataaaattcctggaagtttgtcaatgtcctccttatccatgctatgtgccactatagagtaatgagtcataattccaattattactttaaatgttgtgccaggcacagtggctcatgcctataatcccagcactttaggaggctgaggcgggtggatcacaaggtcaggagatccagaccatcctggctaacccggtgaatctccatctctattaaaaatataaaaaattagccgggcgtgatggcaggcacctgtagtcccagctactcgggaggctgaggcaggagaatggcgtgaacccaggagacagagcttgcagtgagccgagatcgcaccgctgcactccagcctgggcgacagagcaagactctgtctctaaataaataaataaataaatgttgtctgccacagaaaaaatcgaatat"; s1 = "gATTCATGGCTgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctaaaattaaccgtcaGAATATTTATGCCTGATTCATGGCTgaaattgtgtttgaccagctatgtgtgtctcttaatccactcaagtagatgtctaaaattaaccatcaGAATATTTATGCCTGATTCATGGCTgaaatcacgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcaGAATATTTATGCCTGATTCATGGCTgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctgaaattaaccatcaGAAtatttatgcctgattcatggctgaaatttcaggatgaaagctatgaaatctctatttgtgtttgtgtatctattaatgtatgttatgtatatgtgatattttcttaactccagagagcattgcaaaattcatttatgaaaacctctaaaagtgctctattctaacttggcttggaaaaaaataagcatttataaataaatattcaccaaactcctagaaatataggaactgatcaaatgtttcttaagttaacatgatttggataaaacttagttaaataagattaatatagtatttttggtgtaataaaacaactatatcttcaaaattatcattattgaatataaaacaagcataaattcctattctgcttgagttctagtcaaataagctaatattatacttactagaaacgtaaaatcttaaagcttatagatttgattctaaTTAAGTTGTCATTCTTATGAAAAACATTATTTTTTTTATGCTGAAAAGATACACATATATTTAGAGTTAGCCAGCTGGACTCAGTTTAGGTGATCCCAATTTTGTTACAACATCGAAAGCATCATAATCAGGAGCAAGTCGAACATATGCCTTCTCTTTATCAGGACAAATCAGGGTGGTGACCTTGGCCACATCACTGTCATAGAGCTTCTTCACAGCCTGTCTGATCTGGTGCTTGTTGGCTTTAACATCCACAGTGAACACAAGCGTGTTGTTTTCTTCTATCTTCTTCACGGCCGACTCAGTGGTCAGCGGAAACTTGATGATAGCATAGTGGCCAAGCTTGTTTCTCCTGGGGGTGCTCTTCCGAGGATATCTGGGCTGCCTCCGGAGTCGCAGTGTCTTGGGCCGCCTGAAGGTGAGTGACATGCGGATCTTCTTTTTTGCGTGTGGCTGCGGACACCTTTCAACACTGCCTTCTTGGCCTTTAAGGCCTTCGCTTTGGCTTCGGCTTTAGGAGGAGCAGGAGCTTCCTTCGCTTTCGGTGCCGTCTTGTGAAAAGCGAAAAACATTATTTCAAAAATAATTTGTTTACAGTAAATCTgcctaagaatagtttccaaagtacttttggtaatttttaaccttaaagttaagctaagtaaaagatttgcattaaatatctagaccatttataaataagatacaatactaaaacattaattactgaacataaataattcaagtttatatacttttggcttcctgtttttacagagagactaaagatattttggcccgttaataaacatgtttttttctgccacactgaggaattgtattatgagaaaacacatccctctagatgttgggagatggtatattcatacattttctaacctactatagaatgctaatatatgacagtttataaccgtctacttcctagttttctctggaaaataaaagattactaagtattaaaattataatcaatatatgtaaataaaactactagaaataatagaataactagaaacaactctatgcaaagcatgcaagaaaagtagggcatgtttcgcaagtaaagtaggttgcattttttataaggaaaaCCATACAGAAGATAcaaataaaaagagatacctaaccttccctgtgttatatttgtatgggtaaaatgttatgttttcagaaattatataaaattcctggaagtttgtcaatgtcctccttatccatgctatgtgccactatagagtaatgagtcataattccaattattactttaaatgttgtgccaggcacagtggctcatgcctataatcccagcactttaggaggctgaggcgggtggatcacaaggtcaggagatccagaccatcctggttaactcggtgaatctccatctctattaaaaatataaaaaattagccgggcgtgatggaaggcacctgtagtcccagctactcgggaggctgaggcaggagaatggcgtgaacccaggagacagagcttgcagtgagccgagatcgcactgctgcactccagcctgggcgacagagcaagactctgtctctaaataaataaataaataaatgttgtctgccacagaaaaaatcgaatAT"; s2 = "gATTCATGGCTGAAATCATGTTTGACCAGCTATGTGTGTCTCTTAATCCAGTCAAGTAGATGTCTAAAATTAACCATCAGAATATTTATGCCTGATTCATGGCTGAAATCGTGTTTGACCAGCTATGTGTGTCTCTTACTCCACTCAAGTAGATGTCTAAAATTAACCATCAGAATATTTATGCCTGATTCATGGCTGAAATCATGTTTGACCAGCTATGTGTGTCTCTTAATCCAGTCAAGTAGATGTCTAAAATTAACCATCAGAATATTTATGCCTGATTCATGGCTGAAATCGTGTTTGACCAGCTATGTGTGTCTCTCAATCCGATCAAGTAGATGTCTGAAATTAACCATCAGAATATTTATGCCTGATTCATGGCTGAAATTTCAGGATGACAGCTATGAAATCTCTATTTGTGTTTGTATATCTATTAATGTATGTTATGTATATGTGATATTTTCTTAACTCCAGAGAGCATTGCAAAATTCATTTATGAAATCCTCTAAAAGTGCTCTATTCTAACTTGGCTTGGAAAAAAATAAGCATTTATAAATAAATATTCACCAAACTCCTAGAAATATAGGAACTGATCAAATGTTTCTTAAGTTAACATGATTTGGATAAAACTTAGTTAAATAAGATTAATATAGTATTTTTGGTGTAATAAAACAACTATATCTTCAAAATTATCATTATTGAATATAAAACAAGCATAAATTCCTATTCTGCTTGAGTTCTAGTCAAATAAGCTAATATTATACTTACTAGAAACGTAAAATCTTAAAGCTTATAGATTTGATTCTAATTAAGTTGTCATTCTTATGAAAAACATTATTTTTTTATGCTGAAAAGATACACATATATTTAGAGTTAGCCAGCTGGACTCAGTTTAGGTGATCCCAATTTTGTTACAACATCGAAAGCATCATAATCAGGAGCAAGTCGAACATATGCCTTGTTCTCTTTATCAGGACAAATCAGGGTGGTGACCTTGGCCACATCACTGTCATAGAGCTTCTTCACAGCCTGTCTGATCTGGTGCTTGTTGGCTTTAACATCCACAGTGAACACAAGCGTGTTGTTTTCTTCTATCTTCTTCACGGCCGACTCAGTGGTCAGCGGAAACTTGATGATAGCATAGTGGCCAAGCTTGTTTCTCCTGGGGGTGCTCTTCCGAGGATATCTGGGCTGCCTCCGGAGTCGCAGTGTCTTGGGCCGCCTGAAGGTGAGTGACATGCGGATCTTCTTTTTTGCGTGTGGCTGCGGACACCTTTCAACACTGCCTTCTTGGCCTTTAAAGCCTTCGCTTTGGCTTCGGCTTTAGGAGGAGCAGGAGCTTCCTTCGCTTTCGGTGCCATCTTGTGAAAAGCGAAAAACATTATTTCAAAAATAATTTGTTTACAGTAAATCTGCCTATGAATAGTTTCCAAAGTACTTTTGGTAATTTTTAACCTTAAAGTTAAGCTAAGTAAAAGATTTGCATTAAATATCTAGACCATTTATAAATAAGATACAATACTAAAACATTAATTACTGAACATAAATAATTCAAGTTTATATACTTTTGGCTCCTATTTTTACAGAGAGACTAAAGATATTTTGGCCCGTTAATAAACATGTTTTTTTCTGCCACACTGAGGAATTGTATTATGAGGAAACACATCCCTCTAGATGTTGGGAGATGGTATATTCATACATTTTCTAACCTACTATAGAATGCTAATATATGACAGTTTATAACTGTCTACTTCCTAGTTTTCTCTGGAAAATAAAAGATTACTAAGTATTAAAATTATAATCAATATATGTAAATAAAACTACTAGAAATAATAGAATAACTAGAAACAACTCTATGCAAAGCATGCAAGAAAAGTAGGGCATGTTTCGCAAGTAAAGTAGGTTGCATTTTTTATAAGGAAAACCATACAGAAGATACAAATAAAAAGAGATACCTAACCTTCCCTGTGTTATATTTGTATGGGTAAAATGTTATGTTTTCAGAAATTATATAAAATTCCTGGAAGTTTGTCAATGTCCTCCTTATCCATGCTATGTGCCACTATAGAGTAATGAGTCATAATTCCAATTATTACTTTAAATGTTGTGCCAGGCACAGTGGCTCATGCCTATAATCCCAGCACTTTAGGAGGCTGAGGCGGGTGGATCACAAGGTCAGGAGATCCAGACCATCCTGGCTAACCCGGTGAATCTCCATCTCTATTAAAAATATAAAAAATTAGCCGGGCGTGATGGCAGGCACCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGACAGAGCTTGCAGTGAGCCGAGATCGCACCGCTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCTAAATAAATAAATAAATAAATGTTGTCTGCCACAGAAAAAATCGAATAT"; s1 = "gattcatggctgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctaaaattaaccgtcagaatatttatgcctgattcatggctgaaattgtgtttgaccagctatgtgtgtctcttaatccactcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcacgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctgaaattaaccatcagaatatttatgcctgattcatggctgaaatttcaggatgaaagctatgaaatctctatttgtgtttgtgtatctattaatgtatgttatgtatatgtgatattttcttaactccagagagcattgcaaaattcatttatgaaaacctctaaaagtgctctattctaacttggcttggaaaaaaataagcatttataaataaatattcaccaaactcctagaaatataggaactgatcaaatgtttcttaagttaacatgatttggataaaacttagttaaataagattaatatagtatttttggtgtaataaaacaactatatcttcaaaattatcattattgaatataaaacaagcataaattcctattctgcttgagttctagtcaaataagctaatattatacttactagaaacgtaaaatcttaaagcttatagatttgattctaattaagttgtcattcttatgaaaaacattattttttttatgctgaaaagatacacatatatttagagttagccagctggactcagtttaggtgatcccaattttgttacaacatcgaaagcatcataatcaggagcaagtcgaacatatgccttctctttatcaggacaaatcagggtggtgaccttggccacatcactgtcatagagcttcttcacagcctgtctgatctggtgcttgttggctttaacatccacagtgaacacaagcgtgttgttttcttctatcttcttcacggccgactcagtggtcagcggaaacttgatgatagcatagtggccaagcttgtttctcctgggggtgctcttccgaggatatctgggctgcctccggagtcgcagtgtcttgggccgcctgaaggtgagtgacatgcggatcttcttttttgcgtgtggctgcggacacctttcaacactgccttcttggcctttaaggccttcgctttggcttcggctttaggaggagcaggagcttccttcgctttcggtgccgtcttgtgaaaagcgaaaaacattatttcaaaaataatttgtttacagtaaatctgcctaagaatagtttccaaagtacttttggtaatttttaaccttaaagttaagctaagtaaaagatttgcattaaatatctagaccatttataaataagatacaatactaaaacattaattactgaacataaataattcaagtttatatacttttggcttcctgtttttacagagagactaaagatattttggcccgttaataaacatgtttttttctgccacactgaggaattgtattatgagaaaacacatccctctagatgttgggagatggtatattcatacattttctaacctactatagaatgctaatatatgacagtttataaccgtctacttcctagttttctctggaaaataaaagattactaagtattaaaattataatcaatatatgtaaataaaactactagaaataatagaataactagaaacaactctatgcaaagcatgcaagaaaagtagggcatgtttcgcaagtaaagtaggttgcattttttataaggaaaaccatacagaagatacaaataaaaagagatacctaaccttccctgtgttatatttgtatgggtaaaatgttatgttttcagaaattatataaaattcctggaagtttgtcaatgtcctccttatccatgctatgtgccactatagagtaatgagtcataattccaattattactttaaatgttgtgccaggcacagtggctcatgcctataatcccagcactttaggaggctgaggcgggtggatcacaaggtcaggagatccagaccatcctggttaactcggtgaatctccatctctattaaaaatataaaaaattagccgggcgtgatggaaggcacctgtagtcccagctactcgggaggctgaggcaggagaatggcgtgaacccaggagacagagcttgcagtgagccgagatcgcactgctgcactccagcctgggcgacagagcaagactctgtctctaaataaataaataaataaatgttgtctgccacagaaaaaatcgaatat"; s2 = "gattcatggctgaaatcatgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcgtgtttgaccagctatgtgtgtctcttactccactcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcatgtttgaccagctatgtgtgtctcttaatccagtcaagtagatgtctaaaattaaccatcagaatatttatgcctgattcatggctgaaatcgtgtttgaccagctatgtgtgtctctcaatccgatcaagtagatgtctgaaattaaccatcagaatatttatgcctgattcatggctgaaatttcaggatgacagctatgaaatctctatttgtgtttgtatatctattaatgtatgttatgtatatgtgatattttcttaactccagagagcattgcaaaattcatttatgaaatcctctaaaagtgctctattctaacttggcttggaaaaaaataagcatttataaataaatattcaccaaactcctagaaatataggaactgatcaaatgtttcttaagttaacatgatttggataaaacttagttaaataagattaatatagtatttttggtgtaataaaacaactatatcttcaaaattatcattattgaatataaaacaagcataaattcctattctgcttgagttctagtcaaataagctaatattatacttactagaaacgtaaaatcttaaagcttatagatttgattctaattaagttgtcattcttatgaaaaacattatttttttatgctgaaaagatacacatatatttagagttagccagctggactcagtttaggtgatcccaattttgttacaacatcgaaagcatcataatcaggagcaagtcgaacatatgccttgttctctttatcaggacaaatcagggtggtgaccttggccacatcactgtcatagagcttcttcacagcctgtctgatctggtgcttgttggctttaacatccacagtgaacacaagcgtgttgttttcttctatcttcttcacggccgactcagtggtcagcggaaacttgatgatagcatagtggccaagcttgtttctcctgggggtgctcttccgaggatatctgggctgcctccggagtcgcagtgtcttgggccgcctgaaggtgagtgacatgcggatcttcttttttgcgtgtggctgcggacacctttcaacactgccttcttggcctttaaagccttcgctttggcttcggctttaggaggagcaggagcttccttcgctttcggtgccatcttgtgaaaagcgaaaaacattatttcaaaaataatttgtttacagtaaatctgcctatgaatagtttccaaagtacttttggtaatttttaaccttaaagttaagctaagtaaaagatttgcattaaatatctagaccatttataaataagatacaatactaaaacattaattactgaacataaataattcaagtttatatacttttggctcctatttttacagagagactaaagatattttggcccgttaataaacatgtttttttctgccacactgaggaattgtattatgaggaaacacatccctctagatgttgggagatggtatattcatacattttctaacctactatagaatgctaatatatgacagtttataactgtctacttcctagttttctctggaaaataaaagattactaagtattaaaattataatcaatatatgtaaataaaactactagaaataatagaataactagaaacaactctatgcaaagcatgcaagaaaagtagggcatgtttcgcaagtaaagtaggttgcattttttataaggaaaaccatacagaagatacaaataaaaagagatacctaaccttccctgtgttatatttgtatgggtaaaatgttatgttttcagaaattatataaaattcctggaagtttgtcaatgtcctccttatccatgctatgtgccactatagagtaatgagtcataattccaattattactttaaatgttgtgccaggcacagtggctcatgcctataatcccagcactttaggaggctgaggcgggtggatcacaaggtcaggagatccagaccatcctggctaacccggtgaatctccatctctattaaaaatataaaaaattagccgggcgtgatggcaggcacctgtagtcccagctactcgggaggctgaggcaggagaatggcgtgaacccaggagacagagcttgcagtgagccgagatcgcaccgctgcactccagcctgggcgacagagcaagactctgtctctaaataaataaataaataaatgttgtctgccacagaaaaaatcgaatat"; char *a1 = new char [10240]; char *a2 = new char [10240]; halign(s1, s2, strlen(s1), strlen(s2), a1, a2); int match = 0; int mismatch = 0; int gap = 0; for (int i=0; a1[i]; i++) { if ((a1[i] == '-') || (a2[i] == '-')) { gap++; a1[i] = toupper(a1[i]); a2[i] = toupper(a2[i]); } else if (a1[i] != a2[i]) { mismatch++; a1[i] = toupper(a1[i]); a2[i] = toupper(a2[i]); } else { match++; a1[i] = tolower(a1[i]); a2[i] = tolower(a2[i]); } } fprintf(stdout, "a1 = %s\n", a1); fprintf(stdout, "a2 = %s\n", a2); fprintf(stdout, "mismatch=%d gap=%d match=%d\n", mismatch, gap, match); } kmer-code-2013-trunk/libbio/test/test-bigmer-msf.C0000644000000000000000000001366712322046702020511 0ustar rootroot#include #include #include #include #include "bio++.H" // Build a merStreamFile using small mers, read it back using bigger mers. // // construct a fasta sequence: // small sequence // big sequence, multiple of mersize // small sequence // big sequence // etc // small sequence // // Then we reconstruct the sequences using mers. All three merstream // sources are tested (the character string source is not tested). // The merStreamFile is tested both forwards (nextMer(), via the // merstream interface) and backwards (setIterationStart()). #define BUILD_SIZE 88 #define TEST_SIZE 403 #define MERS_PER_SEQ 37 #define TEST_ITERATIONS 300 #define MSF_FILENAME "junk.bigmer" #define FASTA_FILENAME "junk.bigmer.fasta" // construct a multi-fasta sequence, alternating short and long // sequences. Short sequences are less than TEST_SIZE long (most, // not all, longer than BUILD_SIZE). Long sequences are exactly // TEST_SIZE * MERS_PER_SEQ long -- this lets us dump the mers to // reconstruct the sequence. // void buildFastA(void) { mt_s *mtctx = mtInit(time(0L)); char *seq = new char [TEST_SIZE * MERS_PER_SEQ + 1]; char dna[4] = { 'A', 'C', 'G', 'T' }; FILE *F = fopen(FASTA_FILENAME, "w"); for (uint32 i=0; i"uint32FMT"short\n", i); len = mtRandom32(mtctx) % (TEST_SIZE-1) + 1; for (uint32 s=0; s"uint32FMT"long\n", i); len = TEST_SIZE * MERS_PER_SEQ; for (uint32 s=0; sgetSequenceInCore(); seqInCore *lseq = fasta->getSequenceInCore(); char mseq[TEST_SIZE * MERS_PER_SEQ + 1]; // Construct a reader, and load the first mer. merStream *MS = 0L; merStreamFileReader *RD = 0L; chainedSequence *CS = 0L; switch (style) { case 0: fprintf(stderr, "test1(0)-- Testing merStreamFileReader -> merStream\n"); RD = new merStreamFileReader(MSF_FILENAME, TEST_SIZE); MS = new merStream(RD); break; case 1: fprintf(stderr, "test1(2)-- Testing chainedSequence -> merStream\n"); CS = new chainedSequence(); CS->setSource(FASTA_FILENAME); CS->finish(); MS = new merStream(TEST_SIZE, CS); break; case 2: fprintf(stderr, "test1(3)-- Testing merStreamFileReader (backwards)\n"); RD = new merStreamFileReader(MSF_FILENAME, TEST_SIZE); break; default: break; } for (uint32 s=0; fasta->eof() == false; s++) { for (uint32 i=0; inextMer(); for (uint32 i=0; itheFMer().merToString(mseq + i * TEST_SIZE); if (i != MERS_PER_SEQ-1) MS->nextMer(TEST_SIZE - 1); } break; case 2: // Same thing, but read the mers backwards -- we could read // the sequences backwards, too, but that doesn't gain us // anything (we still seek to every location). // for (uint32 i=MERS_PER_SEQ; i--; ) { char copy[TEST_SIZE + 1]; RD->setIterationStart(s * (MERS_PER_SEQ * TEST_SIZE - TEST_SIZE + 1) + i * (TEST_SIZE)); RD->nextMer(); RD->theFMer().merToString(copy); strncpy(mseq + i * TEST_SIZE, copy, TEST_SIZE); // Aww, what the hell! Test reverse complement stuff too! // kMer f = RD->theFMer(); kMer r = RD->theRMer(); f.reverseComplement(); if (f != r) { char str[1025]; fprintf(stderr, "Reverse Complement mismatch:\n"); fprintf(stderr, " reversed fwd = '%s'\n", f.merToString(str)); fprintf(stderr, " rev = '%s'\n", r.merToString(str)); exit(1); } f = RD->theFMer(); r = RD->theRMer(); r.reverseComplement(); if (f != r) { char str[1025]; fprintf(stderr, "Reverse Complement mismatch:\n"); fprintf(stderr, " fwd = '%s'\n", f.merToString(str)); fprintf(stderr, " reversed rev = '%s'\n", r.merToString(str)); exit(1); } } mseq[MERS_PER_SEQ * TEST_SIZE] = 0; break; default: break; } // Compare our mer-constructed sequence to the long sequence in // the file // if (strcmp(mseq, lseq->sequence()) != 0) { fprintf(stderr, "FAIL: seq="uint32FMT"\nmseq=%s\nlseq=%s\n", s, mseq, lseq->sequence()); exit(1); } delete sseq; delete lseq; sseq = fasta->getSequenceInCore(); lseq = fasta->getSequenceInCore(); } delete sseq; delete lseq; delete CS; delete RD; delete MS; fprintf(stderr, " OK!\n"); } int main(int argc, char **argv) { // Minimum KMER_WORDS is 13 -- mersizes up to 416 bases if (KMER_WORDS < 13) { fprintf(stderr, "I need at least KMER_WORDS == 13; test not run.\n"); exit(0); } buildFastA(); merStreamFileBuilder *B = new merStreamFileBuilder(BUILD_SIZE, FASTA_FILENAME, MSF_FILENAME); B->build(true); delete B; test1(0); test1(1); test1(2); unlink(FASTA_FILENAME); unlink(FASTA_FILENAME "idx"); unlink(MSF_FILENAME ".merStream"); exit(0); } kmer-code-2013-trunk/libbio/halign.c0000644000000000000000000002604110711332076016017 0ustar rootroot#include #include #include #include #include "bio.h" // Liliana Florea's halign (a sim4-derivitive). #define DEL 0 #define INS 1 #define SUB 2 #ifdef min #undef min #endif #define min(x,y) ((x)<=(y) ? (x):(y)) #ifdef max #undef max #endif #define max(x,y) ((x)>=(y) ? (x):(y)) typedef struct edit_script { int op_type; /* SUB, INS or DEL */ int num; /* Number of operations */ struct edit_script *next; } edit_script; typedef struct edit_script_list { int offset1, offset2; int len1, len2; int score; int first; edit_script *script; } edit_script_list; static int snake(const char *seq1, const char *seq2, int k, int x, int endx, int endy) { int y; if (x<0) return x; y = x+k; while ((x < endx) && (y < endy) && (toUpper[seq1[x]] == toUpper[seq2[y]])) { ++x; ++y; } return x; } static int rsnake(const char *seq1, const char *seq2, int k, int x, int startx, int starty, int M, int N) { int y; if (x>M) return x; if ((startx<0) || (starty<0)) fprintf(stderr, "halign::rsnake()-- TROUBLE!!! startx: %5d, starty: %5d\n",startx, starty); if ((x>M) || (x+k>N)) fprintf(stderr, "halign::rsnake()-- TROUBLE!!! x: %5d, y: %5d\n",x,x+k); y = x+k; while ((x>startx) && (y>starty) && (toUpper[seq1[x-1]] == toUpper[seq2[y-1]])) { --x; --y; } return x; } static int align_get_dist(const char *seq1, const char *seq2, int i1, int j1, int i2, int j2, int limit, void *ph) { int *last_d, *temp_d; int goal_diag, ll, uu; int c, k, row; int start; int lower, upper; /* Compute the boundary diagonals */ start = j1 - i1; lower = max(j1-i2, start-limit); upper = min(j2-i1, start+limit); goal_diag = j2-i2; if (goal_diag > upper || goal_diag < lower) { fprintf(stderr, "The two sequences are not really similar.\n"); fprintf(stderr, "Please try an exact aligning method.\n"); exit(1); } /* Allocate space for forward vectors */ last_d = (int *)palloc2((upper-lower+1)*sizeof(int), ph) - lower; temp_d = (int *)palloc2((upper-lower+1)*sizeof(int), ph) - lower; /* Initialization */ for (k=lower; k<=upper; ++k) last_d[k] = INT_MIN; last_d[start] = snake(seq1, seq2, start, i1, i2, j2); if (last_d[goal_diag] >= i2) return 0; for (c=1; c<=limit; ++c) { ll = max(lower,start-c); uu = min(upper, start+c); for (k=ll; k<=uu; ++k) { if (k == ll) row = last_d[k+1]+1; /* DELETE */ else if (k == uu) row = last_d[k-1]; /* INSERT */ else if ((last_d[k]>=last_d[k+1]) && (last_d[k]+1>=last_d[k-1])) row = last_d[k]+1; /*SUBSTITUTE */ else if ((last_d[k+1]+1>=last_d[k-1]) && (last_d[k+1]>=last_d[k])) row = last_d[k+1]+1; /* DELETE */ else row = last_d[k-1]; /* INSERT */ temp_d[k] = snake(seq1,seq2,k,row,i2,j2); } for (k=ll; k<=uu; ++k) last_d[k] = temp_d[k]; if (last_d[goal_diag] >= i2) return c; } /* Ran out of distance limit */ return -1; } static void align_path(const char *seq1, const char *seq2, int i1, int j1, int i2, int j2, int dist, edit_script **head, edit_script **tail, int M, int N, void *ph) { int *last_d, *temp_d; /* forward vectors */ int *rlast_d, *rtemp_d; /* backward vectors */ edit_script *head1, *tail1, *head2, *tail2; int midc, rmidc; int start; int lower, upper; int rstart, rlower, rupper; int c, k, row; int mi, mj, tmp, ll, uu; char flag; *head = *tail = NULL; /* Boundary cases */ if (i1 == i2) { if (j1 == j2) *head = NULL; else { head1 = (edit_script *)palloc2(sizeof(edit_script), ph); head1->op_type = INS; head1->num = j2-j1; head1->next = NULL; *head = *tail = head1; } return; } if (j1 == j2) { head1 = (edit_script *)palloc2(sizeof(edit_script), ph); head1->op_type = DEL; head1->num = i2-i1; head1->next = NULL; *head = *tail = head1; return; } if (dist <= 1) { start = j1-i1; if (j2-i2 == j1-i1) { head1 = (edit_script *)palloc2(sizeof(edit_script), ph); head1->op_type = SUB; head1->num = i2-i1; head1->next = NULL; *head = *tail = head1; } else if (j2-j1 == i2-i1+1) { tmp = snake(seq1,seq2,start,i1,i2,j2); if (tmp>i1) { head1 = (edit_script *)palloc2(sizeof(edit_script), ph); head1->op_type = SUB; head1->num = tmp-i1; *head = head1; } head2 = (edit_script *)palloc2(sizeof(edit_script), ph); head2->op_type = INS; head2->num = 1; if (*head) head1->next = head2; else *head = head2; *tail = head2; head2->next = NULL; if (i2-tmp) { head1 = head2; *tail = head2 = (edit_script *)palloc2(sizeof(edit_script), ph); head2->op_type = SUB; head2->num = i2-tmp; head2->next = NULL; head1->next = head2; } } else if (j2-j1+1 == i2-i1) { tmp = snake(seq1,seq2,start,i1,i2,j2); if (tmp>i1) { head1 = (edit_script *)palloc2(sizeof(edit_script), ph); head1->op_type = SUB; head1->num = tmp-i1; *head = head1; } head2 = (edit_script *)palloc2(sizeof(edit_script), ph); head2->op_type = DEL; head2->num = 1; if (*head) head1->next = head2; else *head = head2; *tail = head2; head2->next = NULL; if (i2>tmp+1) { head1 = head2; *tail = head2 = (edit_script *)palloc2(sizeof(edit_script), ph); head2->op_type = SUB; head2->num = i2-tmp-1; head2->next = NULL; head1->next = head2; } } else { fprintf(stderr, "halign::align_path()-- warning: something wrong when aligning."); } return; } /* Divide the problem at the middle cost */ midc = dist/2; rmidc = dist - midc; /* Compute the boundary diagonals */ start = j1 - i1; lower = max(j1-i2, start-midc); upper = min(j2-i1, start+midc); rstart = j2-i2; rlower = max(j1-i2, rstart-rmidc); rupper = min(j2-i1, rstart+rmidc); /* Allocate space for forward vectors */ last_d = (int *)palloc2((upper-lower+1)*sizeof(int), ph) - lower; temp_d = (int *)palloc2((upper-lower+1)*sizeof(int), ph) - lower; for (k=lower; k<=upper; k++) last_d[k] = -1; last_d[start] = snake(seq1,seq2,start,i1,i2,j2); /* Forward computation */ for (c=1; c<=midc; ++c) { ll = max(lower,start-c); uu = min(upper,start+c); for (k=ll; k<=uu; ++k) { if (k == ll) { /* DELETE : down from (k+1,c-1) */ row = last_d[k+1]+1; } else if (k == uu) { /* INSERT : right from (k-1,c-1) */ row = last_d[k-1]; } else if ((last_d[k]>=last_d[k+1]) && (last_d[k]+1>=last_d[k-1])) { /* SUBSTITUTE */ row = last_d[k]+1; } else if ((last_d[k+1]+1>=last_d[k-1]) && (last_d[k+1]>=last_d[k])) { /* DELETE */ row = last_d[k+1]+1; } else { /* INSERT */ row = last_d[k-1]; } temp_d[k] = snake(seq1,seq2,k,row,i2,j2); } for (k=ll; k<=uu; ++k) last_d[k] = temp_d[k]; } /* Allocate space for backward vectors */ rlast_d = (int *)palloc2((rupper-rlower+1)*sizeof(int), ph) - rlower; rtemp_d = (int *)palloc2((rupper-rlower+1)*sizeof(int), ph) - rlower; for (k=rlower; k<=rupper; k++) rlast_d[k] = i2+1; rlast_d[rstart] = rsnake(seq1,seq2,rstart,i2,i1,j1,M,N); /* Backward computation */ for (c=1; c<=rmidc; ++c) { ll = max(rlower,rstart-c); uu = min(rupper,rstart+c); for (k=ll; k<=uu; ++k) { if (k == ll) { /* INSERT : left from (k+1,c-1) */ row = rlast_d[k+1]; } else if (k == uu) { /* DELETE : up from (k-1,c-1) */ row = rlast_d[k-1]-1; } else if ((rlast_d[k]-1<=rlast_d[k+1]) && (rlast_d[k]-1<=rlast_d[k-1]-1)) { /* SUBSTITUTE */ row = rlast_d[k]-1; } else if ((rlast_d[k-1]-1<=rlast_d[k+1]) && (rlast_d[k-1]-1<=rlast_d[k]-1)) { /* DELETE */ row = rlast_d[k-1]-1; } else { /* INSERT */ row = rlast_d[k+1]; } rtemp_d[k] = rsnake(seq1,seq2,k,row,i1,j1,M,N); } for (k=ll; k<=uu; ++k) rlast_d[k] = rtemp_d[k]; } /* Find (mi, mj) such that the distance from (i1, j1) to (mi, mj) is midc and the distance from (mi, mj) to (i2, j2) is rmidc. */ flag = 0; mi = i1; mj = j1; ll = max(lower,rlower); uu = min(upper,rupper); for (k=ll; k<=uu; ++k) { if (last_d[k]>=rlast_d[k]) { if (last_d[k]-i1>=i2-rlast_d[k]) { mi = last_d[k]; mj = k+mi; } else { mi = rlast_d[k]; mj = k+mi; } flag = 1; break; } } if (flag) { /* Find a path from (i1,j1) to (mi,mj) */ align_path(seq1,seq2,i1,j1,mi,mj,midc,&head1,&tail1,M,N,ph); /* Find a path from (mi,mj) to (i2,j2) */ align_path(seq1,seq2,mi,mj,i2,j2,rmidc,&head2,&tail2,M,N,ph); /* Join these two paths together */ if (head1) tail1->next = head2; else head1 = head2; } else { fprintf(stderr, "halign::align_path()-- warning: something wrong when dividing\n"); head1 = NULL; } *head = head1; if (head2) *tail = tail2; else *tail = tail1; } void halign(const char *seq1, const char *seq2, const int len1, const int len2, char *alnline1, char *alnline2) { edit_script *head, *tail, *tp; int i; void *ph; ph = pallochandle(0); align_path(seq1, seq2, 0, 0, len1, len2, align_get_dist(seq1, seq2, 0, 0, len1, len2, len1+len2, ph), &head, &tail, len1, len2, ph); /* generate the alignment(s) */ *alnline1 = 0; *alnline2 = 0; for (tp=head; tp; tp=tp->next) { switch (tp->op_type) { case SUB: for (i=0; inum; i++) { if (toUpper[*seq1] == toUpper[*seq2]) { *alnline1 = toLower[*seq1]; *alnline2 = toLower[*seq2]; } else { *alnline1 = toUpper[*seq1]; *alnline2 = toUpper[*seq2]; } seq1++; seq2++; alnline1++; alnline2++; } break; case INS: for (i=0; inum; i++) { *alnline1 = '-'; *alnline2 = toUpper[*seq2]; seq2++; alnline1++; alnline2++; } break; case DEL: for (i=0; inum; i++) { *alnline2 = '-'; *alnline1 = toUpper[*seq1]; seq1++; alnline1++; alnline2++; } break; default: fprintf(stderr, "halign::halign()-- unrecognized op_type in script. %d\n", tp->op_type); exit(0); } } *alnline1 = 0; *alnline2 = 0; pfree2(ph); pfreehandle(ph); } kmer-code-2013-trunk/libbio/kmer.H0000644000000000000000000001272412322046702015461 0ustar rootroot// Copyright (c) 2005 J. Craig Venter Institute // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // A 'simple' kMer datastructure. #ifndef BIO_KMER_H #define BIO_KMER_H // The maximum size of a mer. You get 32 bases per word, so // KMER_WORDS=4 will get you up to a 128-mer. // #define KMER_WORDS 1 #include "util.h" #include "util++.H" #include "bio.h" #include "bio++.H" #include "kmeriface.H" #if KMER_WORDS == 1 #include "kmertiny.H" typedef kMerTiny kMer; #else #include "kmerhuge.H" typedef kMerHuge kMer; #endif #undef DEBUGADDBASE #undef DEBUGCOMP #undef DEBUGSPACE class kMerBuilder { public: kMerBuilder(uint32 ms=0, uint32 cm=0, char *tm=0L); ~kMerBuilder(); // Clear all mer data, reset state to as just after construction. void clear(bool clearMer=true); // Returns true if we need another base to finish the mer. This // only occurs for compressed mers, if we are in a homopolymer run. // private: bool addBaseContiguous(uint64 cf, uint64 cr); bool addBaseCompressed(uint64 cf, uint64 cr); bool addBaseSpaced(uint64 cf, uint64 cr); bool addBaseCompressedSpaced(uint64 cf, uint64 cr); public: bool addBase(char ch) { uint64 cf = letterToBits[ch]; uint64 cr = letterToBits[complementSymbol[ch]]; #ifdef DEBUGADDBASE fprintf(stderr, "addBase() %c\n", ch); #endif if (_style == 0) return(addBaseContiguous(cf, cr)); if (_style == 1) return(addBaseCompressed(cf, cr)); if (_style == 2) return(addBaseSpaced(cf, cr)); if (_style == 3) return(addBaseCompressedSpaced(cf, cr)); fprintf(stderr, "kMerBuilder::addBase()-- Invalid mer type %d.\n", _style); exit(1); return(false); } void mask(void) { _fMer->mask(true); _rMer->mask(false); }; kMer const &theFMer(void) { return(*_fMer); }; kMer const &theRMer(void) { return(*_rMer); }; kMer const &theCMer(void) { return((theFMer() < theRMer()) ? theFMer() : theRMer()); }; uint32 merSize(void) { return(_merSize); }; uint32 templateSpan(void) { return(_templateSpan); }; uint32 baseSpan(uint32 b) { return(_compressionLength[(_compressionIndex + 1 + b) % _merSize]);; }; private: // Style of builder we are uint32 _style; // Amount of the mer that has valid sequence. Sigh. I really needed a signed value here -- // where negative values mean that we first have to get to the end of the template that was // invalid, then we need to build a new mer. // // And, yes, just simply making it signed leads to all sortes of compiler warnings about // comparing signed and unsigned. And I've been here before, and those warnings just propate // endlessly. Just go away, Mr. Smartypants. // // Details: when building spaced seeds, if we hit an N in the middle of the template, we need to // invalidate the mer, but not start building a new mer until we exhaust the current template. // The example is template=1101. Suppose we hit an N at the second 1. We set the merSizeValid // to 0, and proceed. When we push on the base for the last 1 in the template, we'd increment // the merSizeValid. The first two 1's in the template would now create a mer big enough to be // valid, and we'd return it -- but now the template we're using is 0111. // // _merSizeValid is offset by _merSize (e.g., the true valid size is _merSizeValid - _merSize). // _merSizeValidIs is the size _merSizeValid needs to be in order for it to be valid. // Similarily, _merSizeValidZero is the value of zero (currently this is equal to _merSize). // uint32 _merSize; // Desired number of bases in the mer uint32 *_merSizeValid; // Actual number of bases in the mer uint32 _merSizeValidZero; // Definition of 'zero' bases in the mer uint32 _merSizeValidIs; // Definition of 'full' bases in the mer // An array of mers, we allocate all mers in one block kMer *_merStorage; // Pointer to the currently active mer kMer *_fMer; kMer *_rMer; // For compression uint32 _compression; uint32 _compressionIndex; // index into cL[] that is the last base in the mer uint32 _compressionFirstIndex; // index into cL[] that is the first base in a run uint32 *_compressionLength; // one per base uint32 _compressionCurrentLength; // For templates uint32 _templateSpan; // # of 0's and 1's in the template uint32 _templateLength; // length of the pattern in the template char *_template; // character string template uint32 _templatePos; // position we are building in the template uint32 _templateMer; // the mer we should output next uint32 _templateFirst; // if true, we're still building the initial mer }; #endif // BIO_KMER_H kmer-code-2013-trunk/libbio/kmertiny.H0000644000000000000000000000651212322046702016363 0ustar rootroot class kMerTiny { public: kMerTiny(uint32 ms=uint32ZERO) { setMerSize(ms); clear(); }; ~kMerTiny() { }; void setMerSize(uint32 ms); uint32 getMerSize(void) const { return(_merSize); }; void setMerSpan(uint32 ms) { _merSpan = ms; }; uint32 getMerSpan(void) const { return(_merSpan); }; kMerTiny &reverseComplement(void) { _md = reverseComplementMer(_merSize, _md); return(*this); }; void clear(void) { _md = uint64ZERO; }; void smallest(void) { clear(); }; void largest(void) { clear(); reverseComplement(); }; private: void operator>>=(uint32 x) { _md >>= x; }; void operator<<=(uint32 x) { _md <<= x; }; public: void operator+=(uint64 x) { *this <<= 2; assert((x & 0xfc) == 0); _md |= x & uint64NUMBER(0x3); }; void operator-=(uint64 x) { *this >>= 2; assert((x & 0xfc) == 0); _md |= (x & uint64NUMBER(0x3)) << _lastShift; }; public: void mask(bool) { _md &= _mask; }; public: bool operator!=(kMerTiny const &r) const { return(_md != r._md); }; bool operator==(kMerTiny const &r) const { return(_md == r._md); }; bool operator< (kMerTiny const &r) const { return(_md < r._md); }; bool operator> (kMerTiny const &r) const { return(_md > r._md); }; bool operator<=(kMerTiny const &r) const { return(_md <= r._md); }; bool operator>=(kMerTiny const &r) const { return(_md >= r._md); }; int qsort_less(kMerTiny const &r) const { if (_md < r._md) return(-1); if (_md > r._md) return( 1); return(0); }; public: operator uint64 () const {return(_md);}; public: void writeToBitPackedFile(bitPackedFile *BPF, uint32 numBits=0) const { BPF->putBits(_md, _merSize << 1); }; void readFromBitPackedFile(bitPackedFile *BPF, uint32 numBits=0) { _md = BPF->getBits(_merSize << 1); }; public: void setBits(uint32 pos, uint32 numbits, uint64 val) { _md &= ~(uint64MASK(numbits) << pos); _md |= val << pos; }; uint64 getBits(uint32 pos, uint32 numbits) const { return((_md >> pos) & uint64MASK(numbits)); }; public: uint64 startOfMer(uint32 bits) const { return(getBits((_merSize << 1) - bits, bits)); }; uint64 endOfMer(uint32 bits) const { return(_md & uint64MASK(bits)); }; public: uint64 getWord(uint32 wrd) const { return(_md); }; void setWord(uint32 wrd, uint64 val) { _md = val; }; public: char *merToString(char *instr) const; private: uint64 _md; // The _merSize is always the number of letters in the mer -- if we // are a spaced seed, it is the weight. // uint32 _merSize; uint32 _merSpan; // The mask is used to make sure the mer has only _merSize bases // set -- we can get more than that if we shift to the left. The // uint64 _mask; // For operator-=() (add a base to the left end) we need to know // what the last word is, and how far to shift the bits. // uint32 _lastShift; }; inline void kMerTiny::setMerSize(uint32 ms) { _merSize = ms; _merSpan = ms; _lastShift = (2 * ms - 2) % 64; _mask = uint64MASK(_merSize << 1); } inline char * kMerTiny::merToString(char *str) const { for (uint32 i=0; i<_merSize; i++) str[_merSize-i-1] = bitsToLetter[(_md >> (2*i)) & 0x03]; str[_merSize] = 0; return(str); } kmer-code-2013-trunk/libbio/alphabet.c0000644000000000000000000025603011043436604016341 0ustar rootroot// // Automagically generated -- DO NOT EDIT! // See ../kmer/libbio/alphabet-generate.c for details. // unsigned char whitespaceSymbol[256] = { 0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; unsigned char toLower[256] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; unsigned char toUpper[256] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; unsigned char letterToBits[256] = { 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,1,2,3,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,1,255,255,255,2,255,255,255,255,255,255,255,255,255,255,255,255,3,255,255,255,255,255,255,255,255,255,255,255,255,0,255,1,255,255,255,2,255,255,255,255,255,255,255,255,255,255,255,255,3,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 }; unsigned char bitsToLetter[256] = { 65,67,71,84,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63 }; unsigned char bitsToColor[256] = { 48,49,50,51,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63 }; unsigned char complementSymbol[256] = { 63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,48,49,50,51,63,63,63,63,63,63,63,63,63,63,63,63,63,84,86,71,72,63,63,67,68,63,63,77,63,75,78,63,63,63,89,87,65,65,66,83,63,82,63,63,63,63,63,63,63,116,118,103,104,63,63,99,100,63,63,109,63,107,110,63,63,63,121,119,97,97,98,115,63,114,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63 }; unsigned char IUPACidentity[128][128] = { {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, }; unsigned char baseToColor[128][128] = { {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,49,50,51,52,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,49,48,51,50,52,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,50,51,48,49,52,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,51,50,49,48,52,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,97,99,103,116,110,46,46,46,46,46,46,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,99,97,116,103,110,46,46,46,46,46,46,46,46,46,46,46,46,49,52,48,52,52,52,51,52,52,52,52,52,52,52,52,52,52,52,52,50,52,52,52,52,52,52,46,46,46,46,46,46,49,52,48,52,52,52,51,52,52,52,52,52,52,52,52,52,52,52,52,50,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,103,116,97,99,110,46,46,46,46,46,46,46,46,46,46,46,46,50,52,51,52,52,52,48,52,52,52,52,52,52,52,52,52,52,52,52,49,52,52,52,52,52,52,46,46,46,46,46,46,50,52,51,52,52,52,48,52,52,52,52,52,52,52,52,52,52,52,52,49,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,97,99,103,116,110,46,46,46,46,46,46,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,116,103,99,97,110,46,46,46,46,46,46,46,46,46,46,46,46,51,52,50,52,52,52,49,52,52,52,52,52,52,52,52,52,52,52,52,48,52,52,52,52,52,52,46,46,46,46,46,46,51,52,50,52,52,52,49,52,52,52,52,52,52,52,52,52,52,52,52,48,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,97,99,103,116,110,46,46,46,46,46,46,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,99,97,116,103,110,46,46,46,46,46,46,46,46,46,46,46,46,49,52,48,52,52,52,51,52,52,52,52,52,52,52,52,52,52,52,52,50,52,52,52,52,52,52,46,46,46,46,46,46,49,52,48,52,52,52,51,52,52,52,52,52,52,52,52,52,52,52,52,50,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,103,116,97,99,110,46,46,46,46,46,46,46,46,46,46,46,46,50,52,51,52,52,52,48,52,52,52,52,52,52,52,52,52,52,52,52,49,52,52,52,52,52,52,46,46,46,46,46,46,50,52,51,52,52,52,48,52,52,52,52,52,52,52,52,52,52,52,52,49,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,97,99,103,116,110,46,46,46,46,46,46,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46,46,48,52,49,52,52,52,50,52,52,52,52,52,52,52,52,52,52,52,52,51,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,116,103,99,97,110,46,46,46,46,46,46,46,46,46,46,46,46,51,52,50,52,52,52,49,52,52,52,52,52,52,52,52,52,52,52,52,48,52,52,52,52,52,52,46,46,46,46,46,46,51,52,50,52,52,52,49,52,52,52,52,52,52,52,52,52,52,52,52,48,52,52,52,52,52,52,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46,46,48,46,49,46,46,46,50,46,46,46,46,46,46,52,46,46,46,46,46,51,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, {46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46}, }; kmer-code-2013-trunk/libbio/mers.h0000644000000000000000000000256312322046702015531 0ustar rootroot#ifndef BIO_MERS_H #define BIO_MERS_H inline uint64 reverseComplementMer(uint32 ms, uint64 fmer) { // The interested reader shall consult bri-bits.h // Reverse the mer // uint64 rmer = fmer; rmer = ((rmer >> 2) & 0x3333333333333333llu) | ((rmer << 2) & 0xccccccccccccccccllu); rmer = ((rmer >> 4) & 0x0f0f0f0f0f0f0f0fllu) | ((rmer << 4) & 0xf0f0f0f0f0f0f0f0llu); rmer = ((rmer >> 8) & 0x00ff00ff00ff00ffllu) | ((rmer << 8) & 0xff00ff00ff00ff00llu); rmer = ((rmer >> 16) & 0x0000ffff0000ffffllu) | ((rmer << 16) & 0xffff0000ffff0000llu); rmer = ((rmer >> 32) & 0x00000000ffffffffllu) | ((rmer << 32) & 0xffffffff00000000llu); // Complement the bases // rmer ^= 0xffffffffffffffffllu; // Shift and mask out the bases not in the mer // rmer >>= 64 - ms * 2; rmer &= uint64MASK(ms * 2); return(rmer); } // Used for in seagen/encodedQuery.C (diagnostics) and // libbio/kmerhuge.H (in its merToString method). inline char * uint64ToMerString(uint32 ms, uint64 mer, char *str) { for (uint32 i=0; i> (2*i)) & 0x03]; str[ms] = 0; return(str); } #if 0 #error this is not used anywhere inline uint64 stringToMer(uint32 ms, char *str) { uint64 mer = 0L; for (uint32 i=0; i _mersMax) { _ptrsLen++; if (_ptrsLen >= _ptrsMax) { _ptrsMax *= 2; coord **p = new coord * [_ptrsMax]; memcpy(p, _ptrs, sizeof(coord*) * _ptrsLen); delete [] _ptrs; _ptrs = p; } _ptrs[_ptrsLen] = new coord [_mersMax+1]; _mersLen = 0; } _ptrs[_ptrsLen][_mersLen]._qPos = x; _ptrs[_ptrsLen][_mersLen]._gPos = y; _mersLen++; }; bool getMer(uint32 i, uint32 &x, uint32 &y) { uint32 p = i >> _mersWid; uint32 a = i & _mersMax; if ((p > _ptrsLen) || ((p == _ptrsLen) && (a >= _mersLen))) return(false); x = _ptrs[(i >> _mersWid)][i & _mersMax]._qPos; y = _ptrs[(i >> _mersWid)][i & _mersMax]._gPos; return(true); }; void clear(void) { // Don't delete the first guy! We write into it blindly! for (uint32 x=1; x<_ptrsLen; x++) delete [] _ptrs[x]; _ptrsLen = 0; _mersLen = 0; }; void merge(merList *ML) { uint32 i, x, y; for (i=0; ML->getMer(i, x, y); i++) addMer(x, y); }; private: struct coord { uint32 _qPos; uint32 _gPos; }; // The number of mer blocks we have space for, and the current mer // block. // uint32 _ptrsMax; uint32 _ptrsLen; coord **_ptrs; // The number of mers available in each block, and the current mer // we are at in the current block (for adding new mers). // uint32 _mersWid; uint32 _mersMax; uint32 _mersLen; }; #endif // MER_LIST_H kmer-code-2013-trunk/libbio/bio.h0000644000000000000000000000137612322046702015335 0ustar rootroot#ifndef BIO_H #define BIO_H #include "util.h" #ifdef __cplusplus extern "C" { #endif //////////////////////////////////////// // // alphabet // #include "alphabet.h" //////////////////////////////////////// // // reversecomplement.c // char *reverseComplementSequence(char *seq, uint32 seqlen); char *reverseString(char *seq, uint32 seqlen); // halign // // N.B. align() (aka halign) was switched over to palloc() -- this // fixed any memory leaks, and gives a 30%-ish speed increase. This // is thread safe (unless someone breaks palloc2()). // void halign(const char *string1, const char *string2, const int len1, const int len2, char *alnline1, char *alnline2); #ifdef __cplusplus } #endif #endif // BIO_H kmer-code-2013-trunk/libbio/Make.include0000644000000000000000000000321311544637641016642 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ src := $/alphabet.c \ $/alphabet.h \ $/alphabet-acgtspace.c \ $/alphabet-colorspace.c \ $/bio++.H \ $/bio.h \ $/halign.c \ $/kmer.C \ $/kmer.H \ $/kmerhuge.H \ $/kmeriface.H \ $/kmertiny.H \ $/merCovering.H \ $/merList.H \ $/mers.h \ $/reversecomplement.c old := $/fasta-accessor.H \ $/fasta-c.C \ $/fasta-c.h \ $/fasta-cache.C \ $/fasta-cache.H \ $/fasta-simple.c \ $/fasta-simple.h \ $/fasta.C \ $/fasta.H \ $/merstream.C \ $/merstream.H \ $/seq.C \ $/seq.H \ $/seqFactory.H \ $/seqFile.H \ $/seqInCore.H \ $/seqOnDisk.H \ $/seqStore.H \ $/seqStream.H \ $/sff.H \ $/sff.C $/.C_SRCS :=$(filter %.c,${src}) $/.C_INCS :=$(filter %.h,${src}) $/.CXX_SRCS :=$(filter %.C,${src}) $/.CXX_INCS :=$(filter %.H,${src}) $/.CXX_LIBS :=$/libbio.a $/.CLEAN := $/*.o $(eval $/%.d $/%.o: CFLAGS += -I${LIBUTL/}) $(eval $/%.d $/%.o: CXXFLAGS += -I${LIBUTL/}) $/reversecomplement.c.d: $/alphabet.h $/merstream.C.d: $/alphabet.h $/libbio.a: ${$/.C_SRCS:.c=.o} ${$/.CXX_SRCS:.C=.o} $/alphabet.o $/alphabet.c: $/alphabet.h $/alphabet.h: $/alphabet-generate.c $/alphabet-acgtspace.c $/alphabet-colorspace.c $(CC) $(CFLAGS) $(CFLAGS_COMPILE) -o `dirname $@`/a.out $< cd `dirname $@` ; ./a.out && rm -f ./a.out kmer-code-2013-trunk/libbio/kmerhuge.H0000644000000000000000000002534312322046702016333 0ustar rootroot #define MERWORD(N) _md[N] class kMerHuge { public: kMerHuge(uint32 ms=uint32ZERO) { setMerSize(ms); clear(); }; ~kMerHuge() { }; void setMerSize(uint32 ms); uint32 getMerSize(void) const { return(_merSize); }; void setMerSpan(uint32 ms) { _merSpan = ms; }; uint32 getMerSpan(void) const { return(_merSpan); }; kMerHuge &reverseComplement(void) { for (uint32 i=0, j=KMER_WORDS-1; i> 2) & 0x3333333333333333llu) | ((MERWORD(i) << 2) & 0xccccccccccccccccllu); MERWORD(i) = ((MERWORD(i) >> 4) & 0x0f0f0f0f0f0f0f0fllu) | ((MERWORD(i) << 4) & 0xf0f0f0f0f0f0f0f0llu); MERWORD(i) = ((MERWORD(i) >> 8) & 0x00ff00ff00ff00ffllu) | ((MERWORD(i) << 8) & 0xff00ff00ff00ff00llu); MERWORD(i) = ((MERWORD(i) >> 16) & 0x0000ffff0000ffffllu) | ((MERWORD(i) << 16) & 0xffff0000ffff0000llu); MERWORD(i) = ((MERWORD(i) >> 32) & 0x00000000ffffffffllu) | ((MERWORD(i) << 32) & 0xffffffff00000000llu); MERWORD(i) ^= 0xffffffffffffffffllu; } *this >>= KMER_WORDS * 64 - 2 * _merSize; return(*this); }; void clear(void) { for (uint32 i=0; i>=(uint32 x) { // thisWord, the word we shift bits into // thatWord, the word we shift bits out of // shift, the number of bits we shift // uint32 thisWord = 0; uint32 thatWord = x >> 6; uint32 shift = x & uint32MASK(6); // Do an initial word-size shift, to reduce the shift amount to // be less than wordsize. Fill any shifted-out words with zero. // if (thatWord) { while (thatWord < KMER_WORDS) MERWORD(thisWord++) = MERWORD(thatWord++); while (thisWord < KMER_WORDS) MERWORD(thisWord++) = 0; } // Do bit-size shift, of adjacent words // thisWord = 0; thatWord = 1; MERWORD(thisWord) >>= shift; while (thatWord < KMER_WORDS) { MERWORD(thisWord++) |= MERWORD(thatWord) << (64 - shift); MERWORD(thatWord++) >>= shift; } }; void operator<<=(uint32 x) { uint32 thisWord = KMER_WORDS; uint32 thatWord = KMER_WORDS - (x >> 6); uint32 shift = x & uint32MASK(6); if (thatWord != KMER_WORDS) { while (thatWord > 0) MERWORD(--thisWord) = MERWORD(--thatWord); while (thisWord > 0) MERWORD(--thisWord) = 0; } thisWord = KMER_WORDS; thatWord = KMER_WORDS - 1; MERWORD(thisWord-1) <<= shift; while (thatWord > 0) { --thisWord; --thatWord; MERWORD(thisWord) |= MERWORD(thatWord) >> (64 - shift); MERWORD(thatWord) <<= shift; } }; public: void operator+=(uint64 x) { *this <<= 2; assert((x & 0xfc) == 0); MERWORD(0) |= x & uint64NUMBER(0x3); }; void operator-=(uint64 x) { *this >>= 2; assert((x & 0xfc) == 0); MERWORD(_lastWord) |= (x & uint64NUMBER(0x3)) << _lastShift; }; void mask(bool full) { MERWORD(_maskWord) &= _mask; if (full) for (uint32 x=_maskWord+1; x r.MERWORD(i)) return(false); } return(false); }; bool operator>(kMerHuge const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (MERWORD(i) > r.MERWORD(i)) return(true); if (MERWORD(i) < r.MERWORD(i)) return(false); } return(false); }; bool operator<=(kMerHuge const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (MERWORD(i) < r.MERWORD(i)) return(true); if (MERWORD(i) > r.MERWORD(i)) return(false); } return(true); }; bool operator>=(kMerHuge const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (MERWORD(i) > r.MERWORD(i)) return(true); if (MERWORD(i) < r.MERWORD(i)) return(false); } return(true); }; int qsort_less(kMerHuge const &r) const { for (uint32 i=KMER_WORDS; i--; ) { if (MERWORD(i) < r.MERWORD(i)) return(-1); if (MERWORD(i) > r.MERWORD(i)) return(1); } return(0); }; public: operator uint64 () const {return(MERWORD(0));}; public: // these should work generically for both big and small void writeToBitPackedFile(bitPackedFile *BPF, uint32 numBits=0) const { if (numBits == 0) numBits = _merSize << 1; uint32 lastWord = numBits >> 6; if ((numBits & uint32MASK(6)) == 0) lastWord++; if (numBits & uint32MASK(6)) BPF->putBits(MERWORD(lastWord), numBits & uint32MASK(6)); while (lastWord > 0) { lastWord--; BPF->putBits(MERWORD(lastWord), 64); } }; void readFromBitPackedFile(bitPackedFile *BPF, uint32 numBits=0) { if (numBits == 0) numBits = _merSize << 1; uint32 lastWord = numBits >> 6; if ((numBits & uint32MASK(6)) == 0) lastWord++; if (numBits & uint32MASK(6)) MERWORD(lastWord) = BPF->getBits(numBits & uint32MASK(6)); while (lastWord > 0) { lastWord--; MERWORD(lastWord) = BPF->getBits(64); } }; public: // these should work generically for both big and small void setBits(uint32 pos, uint32 numbits, uint64 val) { uint32 wrd = pos >> 6; uint32 bit = pos & 0x3f; val &= uint64MASK(numbits); if (wrd >= KMER_WORDS) { fprintf(stderr, "kMer::setBits()-- ERROR: tried to set pos="uint32FMT" numbits="uint32FMT" larger than KMER_WORDS=%d\n", pos, numbits, KMER_WORDS), exit(1); } // If we have enough space in the word for the bits, replace // those bits in the word. Otherwise we need to split the value // into two pieces, and add to the end of the first word and the // start of the second. if (64 - bit >= numbits) { MERWORD(wrd) &= ~(uint64MASK(numbits) << bit); MERWORD(wrd) |= val << bit; } else { if (wrd+1 >= KMER_WORDS) { fprintf(stderr, "kMer::setBits()-- ERROR: tried to set pos="uint32FMT" numbits="uint32FMT" larger than KMER_WORDS=%d\n", pos, numbits, KMER_WORDS), exit(1); } uint32 b1 = 64 - bit; // bits in the first word uint32 b2 = numbits - b1; // bits in the second word MERWORD(wrd) &= ~(uint64MASK(b1) << bit); MERWORD(wrd) |= (val & uint64MASK(b1)) << bit; MERWORD(wrd+1) &= ~(uint64MASK(b2)); MERWORD(wrd+1) |= (val >> b1) & uint64MASK(b2); } }; uint64 getBits(uint32 pos, uint32 numbits) const { uint64 val = uint64ZERO; uint32 wrd = pos >> 6; uint32 bit = pos & 0x3f; if (wrd >= KMER_WORDS) { fprintf(stderr, "kMer::getBits()-- ERROR: tried to get pos="uint32FMT" numbits="uint32FMT" larger than KMER_WORDS=%d\n", pos, numbits, KMER_WORDS), exit(1); } if (64 - bit >= numbits) { val = MERWORD(wrd) >> bit; } else { if (wrd+1 >= KMER_WORDS) { fprintf(stderr, "kMer::getBits()-- ERROR: tried to get pos="uint32FMT" numbits="uint32FMT" larger than KMER_WORDS=%d\n", pos, numbits, KMER_WORDS), exit(1); } uint32 b1 = 64 - bit; // bits in the first word uint32 b2 = numbits - b1; // bits in the second word val = MERWORD(wrd) >> (64-b1); val |= (MERWORD(wrd+1) & uint64MASK(b2)) << b1; } val &= uint64MASK(numbits); return(val); }; public: // these should work generically for both big and small uint64 startOfMer(uint32 bits) const { return(getBits((_merSize << 1) - bits, bits)); }; uint64 endOfMer(uint32 bits) const { return(MERWORD(0) & uint64MASK(bits)); }; public: // these should work generically for both big and small uint64 getWord(uint32 wrd) const { return(MERWORD(wrd)); }; void setWord(uint32 wrd, uint64 val) { MERWORD(wrd) = val; }; public: char *merToString(char *instr) const; private: uint64 _md[KMER_WORDS]; // The _merSize is always the number of letters in the mer -- if we // are a spaced seed, it is the weight. // uint32 _merSize; uint32 _merSpan; // The mask is used to make sure the mer has only _merSize bases // set -- we can get more than that if we shift to the left. The // _maskWord is the word that we want to mask: // uint64 _mask; uint32 _maskWord; // For operator-=() (add a base to the left end) we need to know // what the last word is, and how far to shift the bits. // // _lastWord -- the last word that contains bases // _lastShift -- the amount we need to shift left to put bits 0 and 1 // into the last base uint32 _lastWord; uint32 _lastShift; }; inline void kMerHuge::setMerSize(uint32 ms) { _merSize = ms; _merSpan = ms; _lastWord = (2 * ms - 2) / 64; _lastShift = (2 * ms - 2) % 64; _mask = uint64ZERO; _maskWord = _merSize / 32; // Filled whole words with the mer, the mask is special-cased // to clear the whole next word, unless there is no whole next // word, then it does nothing on the last word. // // Otherwise, we can construct the mask as usual. // if ((_merSize % 32) == 0) { if (_maskWord >= KMER_WORDS) { _maskWord = KMER_WORDS - 1; _mask = ~uint64ZERO; } else { _maskWord = _merSize / 32; _mask = uint64ZERO; } } else { _mask = uint64MASK((_merSize % 32) << 1); } if (_maskWord >= KMER_WORDS) { fprintf(stderr, "kMer::setMerSize()-- ERROR! Desired merSize of "uint32FMT" larger than\n", _merSize); fprintf(stderr, " available storage space (KMER_WORDS=%d, max merSize %d).\n", KMER_WORDS, KMER_WORDS*32); exit(1); } } inline char * kMerHuge::merToString(char *instr) const { uint32 lastWord = _merSize >> 5; char *str = instr; if ((_merSize & uint32MASK(6)) == 0) lastWord++; if (_merSize & uint32MASK(5)) { uint64ToMerString(_merSize & uint32MASK(5), MERWORD(lastWord), str); str += _merSize & uint32MASK(5); } while (lastWord > 0) { lastWord--; uint64ToMerString(32, MERWORD(lastWord), str); str += 32; } return(instr); }; kmer-code-2013-trunk/libbio/alphabet-generate.c0000644000000000000000000000756611043436604020141 0ustar rootroot#include #include // Instead of forcing client applications to explicitly call // initCompressionTables(), static tables are now generated. unsigned char whitespaceSymbol[256]; unsigned char toLower[256]; unsigned char toUpper[256]; unsigned char letterToBits[256]; unsigned char bitsToLetter[256]; unsigned char bitsToColor[256]; unsigned char complementSymbol[256]; unsigned char validCompressedSymbol[256]; unsigned char IUPACidentity[128][128]; unsigned char baseToColor[128][128]; void initCompressionTablesForACGTSpace(void); void initCompressionTablesForColorSpace(void); #include "alphabet-acgtspace.c" #include "alphabet-colorspace.c" int main(int argc, char **argv) { int i, j; FILE *C = fopen("alphabet.c", "w"); FILE *H = fopen("alphabet.h", "w"); initCompressionTablesForACGTSpace(); initCompressionTablesForColorSpace(); fprintf(H, "//\n"); fprintf(H, "// Automagically generated -- DO NOT EDIT!\n"); fprintf(H, "// See libbri/alphabet-generate.c for details.\n"); fprintf(H, "//\n"); fprintf(H, "\n"); fprintf(H, "#ifdef __cplusplus\n"); fprintf(H, "extern \"C\" {\n"); fprintf(H, "#endif\n"); fprintf(H, "\n"); fprintf(C, "//\n"); fprintf(C, "// Automagically generated -- DO NOT EDIT!\n"); fprintf(C, "// See %s for details.\n", __FILE__); fprintf(C, "//\n"); fprintf(H, "extern unsigned char whitespaceSymbol[256];\n"); fprintf(C, "unsigned char whitespaceSymbol[256] = { %d", whitespaceSymbol[0]); for (i=1; i<256; i++) fprintf(C, ",%d", whitespaceSymbol[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char toLower[256];\n"); fprintf(C, "unsigned char toLower[256] = { %d", toLower[0]); for (i=1; i<256; i++) fprintf(C, ",%d", toLower[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char toUpper[256];\n"); fprintf(C, "unsigned char toUpper[256] = { %d", toUpper[0]); for (i=1; i<256; i++) fprintf(C, ",%d", toUpper[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char letterToBits[256];\n"); fprintf(C, "unsigned char letterToBits[256] = { %d", letterToBits[0]); for (i=1; i<256; i++) fprintf(C, ",%d", letterToBits[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char bitsToLetter[256];\n"); fprintf(C, "unsigned char bitsToLetter[256] = { %d", bitsToLetter[0]); for (i=1; i<256; i++) fprintf(C, ",%d", bitsToLetter[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char bitsToColor[256];\n"); fprintf(C, "unsigned char bitsToColor[256] = { %d", bitsToColor[0]); for (i=1; i<256; i++) fprintf(C, ",%d", bitsToColor[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char complementSymbol[256];\n"); fprintf(C, "unsigned char complementSymbol[256] = { %d", complementSymbol[0]); for (i=1; i<256; i++) fprintf(C, ",%d", complementSymbol[i]); fprintf(C, " };\n"); fprintf(H, "extern unsigned char IUPACidentity[128][128];\n"); fprintf(C, "unsigned char IUPACidentity[128][128] = {\n"); for (i=0; i<128; i++) { fprintf(C, " {"); if (IUPACidentity[i][0]) fprintf(C, "1"); else fprintf(C, "0"); for (j=1;j<128; j++) { if (IUPACidentity[i][j]) fprintf(C, ",1"); else fprintf(C, ",0"); } fprintf(C, "},\n"); } fprintf(C, "};\n"); fprintf(H, "extern unsigned char baseToColor[128][128];\n"); fprintf(C, "unsigned char baseToColor[128][128] = {\n"); for (i=0; i<128; i++) { fprintf(C, " {%d", baseToColor[i][0]); for (j=1;j<128; j++) fprintf(C, ",%d", baseToColor[i][j]); fprintf(C, "},\n"); } fprintf(C, "};\n"); fprintf(H, "\n"); fprintf(H, "void initCompressionTablesForACGTSpace(void);\n"); fprintf(H, "void initCompressionTablesForColorSpace(void);\n"); fprintf(H, "\n"); fprintf(H, "#ifdef __cplusplus\n"); fprintf(H, "}\n"); fprintf(H, "#endif\n"); return(0); } kmer-code-2013-trunk/libbio/alphabet.h0000644000000000000000000000120711043436604016340 0ustar rootroot// // Automagically generated -- DO NOT EDIT! // See libbri/alphabet-generate.c for details. // #ifdef __cplusplus extern "C" { #endif extern unsigned char whitespaceSymbol[256]; extern unsigned char toLower[256]; extern unsigned char toUpper[256]; extern unsigned char letterToBits[256]; extern unsigned char bitsToLetter[256]; extern unsigned char bitsToColor[256]; extern unsigned char complementSymbol[256]; extern unsigned char IUPACidentity[128][128]; extern unsigned char baseToColor[128][128]; void initCompressionTablesForACGTSpace(void); void initCompressionTablesForColorSpace(void); #ifdef __cplusplus } #endif kmer-code-2013-trunk/libbio/alphabet-colorspace.c0000644000000000000000000000736311043436604020474 0ustar rootroot#include #include #include "alphabet.h" void initCompressionTablesForColorSpace(void) { int i, j; for (i=0; i<128; i++) for (j=0; j<128; j++) baseToColor[i][j] = '.'; // Invalid // Supports transforming a base sequence to a color sequence. // Not sure how valid this is; treat every letter like it's a gap. // We then override ACGT to be the correct encoding. for (i='a'; i<='z'; i++) { baseToColor['a'][i] = '4'; baseToColor['c'][i] = '4'; baseToColor['g'][i] = '4'; baseToColor['t'][i] = '4'; baseToColor['n'][i] = '4'; } for (i='a'; i<='z'; i++) { baseToColor[i]['a'] = '0'; baseToColor[i]['c'] = '1'; baseToColor[i]['g'] = '2'; baseToColor[i]['t'] = '3'; baseToColor[i]['n'] = '4'; } baseToColor['a']['a'] = '0'; baseToColor['a']['c'] = '1'; baseToColor['a']['g'] = '2'; baseToColor['a']['t'] = '3'; baseToColor['a']['n'] = '4'; baseToColor['c']['a'] = '1'; baseToColor['c']['c'] = '0'; baseToColor['c']['g'] = '3'; baseToColor['c']['t'] = '2'; baseToColor['c']['n'] = '4'; baseToColor['g']['a'] = '2'; baseToColor['g']['c'] = '3'; baseToColor['g']['g'] = '0'; baseToColor['g']['t'] = '1'; baseToColor['g']['n'] = '4'; baseToColor['t']['a'] = '3'; baseToColor['t']['c'] = '2'; baseToColor['t']['g'] = '1'; baseToColor['t']['t'] = '0'; baseToColor['t']['n'] = '4'; for (i='a'; i<='z'; i++) for (j='a'; j<='z'; j++) { baseToColor[toupper(i)][toupper(j)] = baseToColor[i][j]; baseToColor[tolower(i)][toupper(j)] = baseToColor[i][j]; baseToColor[toupper(i)][tolower(j)] = baseToColor[i][j]; baseToColor[tolower(i)][tolower(j)] = baseToColor[i][j]; } // Supports composing colors baseToColor['0']['0'] = '0'; baseToColor['0']['1'] = '1'; baseToColor['0']['2'] = '2'; baseToColor['0']['3'] = '3'; baseToColor['0']['4'] = '4'; baseToColor['1']['0'] = '1'; baseToColor['1']['1'] = '0'; baseToColor['1']['2'] = '3'; baseToColor['1']['3'] = '2'; baseToColor['1']['4'] = '4'; baseToColor['2']['0'] = '2'; baseToColor['2']['1'] = '3'; baseToColor['2']['2'] = '0'; baseToColor['2']['3'] = '1'; baseToColor['2']['4'] = '4'; baseToColor['3']['0'] = '3'; baseToColor['3']['1'] = '2'; baseToColor['3']['2'] = '1'; baseToColor['3']['3'] = '0'; baseToColor['3']['4'] = '4'; // Supports transforming color sequence to base sequence. baseToColor['a']['0'] = baseToColor['A']['0'] = 'a'; baseToColor['a']['1'] = baseToColor['A']['1'] = 'c'; baseToColor['a']['2'] = baseToColor['A']['2'] = 'g'; baseToColor['a']['3'] = baseToColor['A']['3'] = 't'; baseToColor['a']['4'] = baseToColor['A']['4'] = 'n'; baseToColor['c']['0'] = baseToColor['C']['0'] = 'c'; baseToColor['c']['1'] = baseToColor['C']['1'] = 'a'; baseToColor['c']['2'] = baseToColor['C']['2'] = 't'; baseToColor['c']['3'] = baseToColor['C']['3'] = 'g'; baseToColor['c']['4'] = baseToColor['C']['4'] = 'n'; baseToColor['g']['0'] = baseToColor['G']['0'] = 'g'; baseToColor['g']['1'] = baseToColor['G']['1'] = 't'; baseToColor['g']['2'] = baseToColor['G']['2'] = 'a'; baseToColor['g']['3'] = baseToColor['G']['3'] = 'c'; baseToColor['g']['4'] = baseToColor['G']['4'] = 'n'; baseToColor['t']['0'] = baseToColor['T']['0'] = 't'; baseToColor['t']['1'] = baseToColor['T']['1'] = 'g'; baseToColor['t']['2'] = baseToColor['T']['2'] = 'c'; baseToColor['t']['3'] = baseToColor['T']['3'] = 'a'; baseToColor['t']['4'] = baseToColor['T']['4'] = 'n'; baseToColor['n']['0'] = baseToColor['N']['0'] = 'a'; baseToColor['n']['1'] = baseToColor['N']['1'] = 'c'; baseToColor['n']['2'] = baseToColor['N']['2'] = 'g'; baseToColor['n']['3'] = baseToColor['N']['3'] = 't'; baseToColor['n']['4'] = baseToColor['N']['4'] = 'n'; } kmer-code-2013-trunk/libbio/kmeriface.H0000644000000000000000000000523312322046702016446 0ustar rootroot #if 0 // Documentation, really. // Incomplete too. class kMerInterface { kMerInterface() {}; virtual ~kMerInterface() {}; // Reverse all the words, reverse and complement the bases in // each word, then shift right to align the edge. // virtual kMerInterface &reverseComplement(void) = 0; virtual void clear(void); // Construct a mer by shifting bases onto the end: // += shifts onto the right end // -= shifts onto the left end // virtual void operator+=(uint64 x) = 0; virtual void operator-=(uint64 x) = 0; // used by merStream at least // virtual void mask(bool) = 0; // Return the mer, as a 64-bit integer. If the mer is more than // 32-bases long, then the left-most (the earliest, the start, etc) // bases are used. // virtual operator uint64 () const = 0; // These are written/read in 5'endian, which isn't the most natural // implementation. It's done this way to keep the sequence in // order (e.g., the merStreamFile). Don't change the order. // // On the otherhand, the implementation (of write anyway) is // basically the same as merToString(). // // Takes an optional number of BITS to write, pulled from the // END of the mer. // virtual void writeToBitPackedFile(bitPackedFile *BPF, uint32 numBits=0) const = 0; virtual void readFromBitPackedFile(bitPackedFile *BPF, uint32 numBits=0) = 0; // Returns a sub-mer from either the start (left end) or the end // (right end) of the mer. The sub-mer must be at most 64 bits // long. Yes, BITS. // // The start is difficult, because it can span multiple words. The // end is always in the first word. // virtual uint64 startOfMer(uint32 bits) const = 0; virtual uint64 endOfMer(uint32 bits) const = 0; // Set 'numbits' bits from (the end of) 'val' at bit position 'pos' // in the mer. This is wildly low-level, but merylStreamReader // needs it. // // The position is measured from the right end. // (0, 8, X) would copy the bits 7 to 0 of X to bits 7 to 0 of the mer. // // Argh! Can't use set/getDecodedValue because that is doing things in the wrong order. // // Meryl // virtual uint64 getWord(uint32 wrd) const = 0; // { return(MERWORD(wrd)); }; virtual void setWord(uint32 wrd, uint64 val) = 0; // { MERWORD(wrd) = val; }; // Show the mer as ascii // // Doesn't print the last full word, if it's on the word boundary // // We build the string right to left, print any partial word first, // then print whole words until we run out of words to print. // virtual char *merToString(char *instr) const = 0; }; #endif kmer-code-2013-trunk/libbio/kmer.C0000644000000000000000000003325012322046702015451 0ustar rootroot#include "kmer.H" kMerBuilder::kMerBuilder(uint32 ms, uint32 cm, char *tm) { _style = 0; _merSize = 0; _merSizeValid = 0L; _merSizeValidIs = 0; _merSizeValidZero = 0; _merStorage = 0L; _fMer = 0L; _rMer = 0L; _compression = 0; _compressionIndex = 0; _compressionFirstIndex = 0; _compressionLength = 0L; _compressionCurrentLength = 0; _templateSpan = 0; _templateLength = 0; _template = 0L; _templatePos = 0; _templateMer = 0; _templateFirst = 0; if (ms) { _style = 0; _merSize = ms; _merSizeValidIs = _merSize + _merSize; _merSizeValidZero = _merSize; } if (cm) { _style = 1; _merSize = ms; _merSizeValidIs = _merSize + _merSize; _merSizeValidZero = _merSize; _compression = cm; _compressionIndex = 0; _compressionFirstIndex = 0; _compressionLength = 0L; _compressionCurrentLength = 0; assert(_compression < _merSize); } if (tm) { _style = 2; _merSize = 0; _templateSpan = strlen(tm); _templateLength = 0; _template = new char [_templateSpan + 1]; _templatePos = 0; _templateMer = 0; _templateFirst = 1; // Templates cannot begin or end in zero -- they MUST begin/end // with a letter. We silently fix these problems. Unless there // are no 1's in the string, then we bail. uint32 i=0, t=0; while ((i < _templateSpan) && (tm[i] == '0')) i++; if (i == _templateSpan) { fprintf(stderr, "invalid kMerBuilder template '%s' -- its empty!\n", tm); exit(1); } while (i < _templateSpan) { _template[t] = 0; if (tm[i] == '1') { _template[t] = 1; _merSize++; } i++; t++; } while (_template[--t] == 0) ; _templateSpan = t + 1; #ifdef DEBUGSPACE for (i=0; i<_templateSpan; i++) fprintf(stderr, "%d", _template[i]); fprintf(stderr, " -- %d\n", _templateSpan); #endif // Look for patterns in the template, set templateLength to be the // size of the pattern. _templateLength = _templateSpan; // Finally, we can set what valid and zero mersizes are. _merSizeValidIs = _templateLength + _merSize; _merSizeValidZero = _templateLength; } if (cm && tm) { _style = 3; assert(0); } if (_merSize > KMER_WORDS * 32) fprintf(stderr, "kMer size too large; increase KMER_WORDS in libbio/kmer.H\n"), exit(1); _compressionLength = new uint32 [_merSize]; for (uint32 z=0; z<_merSize; z++) _compressionLength[z] = (cm) ? 0 : 1; if (tm) { _merStorage = new kMer [_templateLength * 2]; _merSizeValid = new uint32 [_templateLength]; for (uint32 i=0; i<2*_templateLength; i++) { _merStorage[i].setMerSize(_merSize); _merStorage[i].setMerSpan(_templateSpan); } // VERY IMPORTANT! Offset the valid length to adjust for the // template that every mer except the first is starting in the // middle of. // for (uint32 i=0; i<_templateLength; i++) _merSizeValid[i] = _merSize - i; } else { _merStorage = new kMer [2]; _merSizeValid = new uint32 [1]; _merStorage[0].setMerSize(_merSize); _merStorage[1].setMerSize(_merSize); _merSizeValid[0] = _merSizeValidZero; if (cm) { _merStorage[0].setMerSpan(0); _merStorage[1].setMerSpan(0); } } _fMer = _merStorage + 0; _rMer = _merStorage + 1; } kMerBuilder::~kMerBuilder() { delete [] _merSizeValid; delete [] _merStorage; delete [] _compressionLength; delete [] _template; } void kMerBuilder::clear(bool clearMer) { // Contiguous mers _merSizeValid[0] = _merSizeValidZero; // Compressed mers if (_compression) { _compressionIndex = 0; _compressionFirstIndex = 0; _compressionCurrentLength = 0; for (uint32 z=0; z<_merSize; z++) _compressionLength[z] = 0; _merStorage[0].setMerSpan(0); _merStorage[1].setMerSpan(0); } // Spaced mers if (_template) { for (uint32 i=0; i<2*_templateLength; i++) _merStorage[i].clear(); for (uint32 i=0; i<_templateLength; i++) _merSizeValid[i] = _merSize - i; _templatePos = 0; _templateMer = 0; _templateFirst = 1; } if (clearMer) { _fMer->clear(); _rMer->clear(); } } // // The addBase methods add a single base (cf - forward, cr - complemented) to // the mer. The return true if another base is needed to finish the mer, and // false if the mer is complete. // bool kMerBuilder::addBaseContiguous(uint64 cf, uint64 cr) { // Not a valid base, reset the mer to empty, and request more bases // (this is a slightly optimized version of clear()). if (cf & (unsigned char)0xfc) { clear(false); //_merSizeValid[0] = _merSizeValidZero; return(true); } // Add the base to both mers. *_fMer += cf; *_rMer -= cr; // If there aren't enough bases, request another one. if (_merSizeValid[0] + 1 < _merSizeValidIs) { _merSizeValid[0]++; return(true); } return(false); // Good! Don't need another letter. } bool kMerBuilder::addBaseCompressed(uint64 cf, uint64 cr) { // Not a valid base, reset the mer to empty, and request more bases. // if (cf & (unsigned char)0xfc) { clear(); return(true); } uint64 lb = theFMer().endOfMer(2); // Last base in the mer uint32 ms = theFMer().getMerSpan(); // Span BEFORE adding the mer if (_merSizeValid[0] <= _merSizeValidZero) lb = 9; // No valid last base (should probably be ~uint64ZERO, but that screws up diagnostic output) #ifdef DEBUGCOMP fprintf(stderr, "kMerBuilder::addBaseCompressed()-- lb="uint64FMT" cf="uint64FMT" ms="uint32FMT" ccl="uint32FMT" lvl="uint32FMT"\n", lb, cf, ms, _compressionCurrentLength, _compression); #endif // Always add one to the current length. When we started, it // was 0. This represents the length AFTER adding the base. // _compressionCurrentLength++; // If the lastbase is the same as the one we want to add (and // there IS a last base), and we've seen too many of these, // remember we've seen another letter in the run, and don't add // it. Request another letter. // if ((lb == cf) && // last is the same as this (_compressionCurrentLength > _compression)) { // run is already too big _compressionLength[_compressionIndex]++; _fMer->setMerSpan(ms + 1); _rMer->setMerSpan(ms + 1); #ifdef DEBUGCOMP fprintf(stderr, "kMerBuilder::addBaseCompressed()-- COMPRESSED currentIdx=%u first=%u", _compressionIndex, _compressionFirstIndex); for (uint32 x=0, y=_compressionFirstIndex; x<_merSize; x++) { fprintf(stderr, " %u(%d)", _compressionLength[y], y); y = (y + 1) % _merSize; } fprintf(stderr, "\n"); #endif return(true); } // Else, it's a new run (a different letter) or our run isn't // big enough to compress and we need to add the duplicate // letter. *_fMer += cf; *_rMer -= cr; // If this is a new letter, propagate the current length to the first letter in this run. That // way, when that letter is popped off the mer, we automagically update our span to include only // as many letters as are here. // // 01234567890 // // E.g. For sequence TATTTTTTAGT (that's 6 T's) with a mersize of 3 and compression 2, we'd have // mers with position: // // TATTTTTTAGT // #1 TAT position 0 (with lengths 1, 1, 1) uncompressed mer TAT // #2 ATT position 1 (with lengths 1, 1, 1) ATT // #3 TTA position 6 (with lengths 5, 1, 1) TTTTTTA // #4 TAG position 7 TAG // #5 AGT position 8 AGT // // In #2, because the length so far (1) is not >= the compression (2) we add a new base and // return. // // In #3, the current length is >= the compression, so we keep stuffing on T's and incrementing // the last length, stopping when we get the A. We now propagate the current length to the first // letter in the run. Special case, if the first letter in the run is the first letter in the // mer, we need to immediately update the span. #ifdef DEBUGCOMP fprintf(stderr, "kMerBuilder::addBaseCompressed()-- ADDNEWBASE currentIdx=%u first=%u", _compressionIndex, _compressionFirstIndex); for (uint32 x=0, y=_compressionFirstIndex; x<_merSize; x++) { fprintf(stderr, " %u(%d)", _compressionLength[y], y); y = (y + 1) % _merSize; } fprintf(stderr, "\n"); #endif // If we added a new letter, transfer the run-length count to the first letter in the previous // run. In the above example, when we built the run, the lengths are (1, 1, 5). That is, all // compression occurred on the last letter. When we shift off that first letter, we want to // remove as much of the run as possible. if (lb != cf) { if (_compressionFirstIndex != _compressionIndex) { _compressionLength[_compressionFirstIndex] += _compressionLength[_compressionIndex] - 1; _compressionLength[_compressionIndex] = 1; } _compressionFirstIndex = (_compressionIndex + 1) % _merSize; _compressionCurrentLength = 1; } _compressionIndex = (_compressionIndex + 1) % _merSize; ms -= _compressionLength[_compressionIndex]; // subtract the count for the letter we just shifted out #ifdef DEBUGCOMP fprintf(stderr, "kMerBuilder::addBaseCompressed()-- ADDNEWBASE shifted out at idx="uint32FMT" with "uint32FMT" positions; final span "uint32FMT"\n", _compressionIndex, _compressionLength[_compressionIndex], ms + 1); #endif _compressionLength[_compressionIndex] = 1; // one letter at this position _fMer->setMerSpan(ms + 1); _rMer->setMerSpan(ms + 1); // If there aren't enough bases, request another one. if (_merSizeValid[0] + 1 < _merSizeValidIs) { _merSizeValid[0]++; return(true); } return(false); // Good! Don't need another letter. } bool kMerBuilder::addBaseSpaced(uint64 cf, uint64 cr) { #ifdef DEBUGSPACE fprintf(stderr, "add %c templatePos=%u templateMer=%u\n", ch, _templatePos, _templateMer); #endif // We always advance the templatePos, unfortunately, we need to // use the current value throughout this function. If there // was a single return point, we could advance immediately // before returning. // uint32 tp = _templatePos; _templatePos = (_templatePos + 1) % _templateLength; // If we get an invalid letter, set all mers that would have // had a letter added to be broken. // if (cf & (unsigned char)0xfc) { for (uint32 m=0; m<_templateLength; m++) { uint32 tppos = (tp + _templateLength - m) % _templateLength; if (_template[tppos] == 1) { // Reset to 'zero', but make it skip over any remaining // positions in the current template. // _merSizeValid[m] = _merSizeValidZero + tppos - _templateLength + 1; #ifdef DEBUGSPACE fprintf(stderr, "-- invalid letter, reset mer %u to valid %u (mersizevalidzero=%u ttpos=%u templatelength=%u)\n", m, _merSizeValid[m], _merSizeValidZero, tppos, _templateLength); #endif } } if (_templateFirst == 0) _templateMer = (_templateMer + 1) % _templateLength; return(true); } // We have a valid letter, and add it to all the mers that the // template allows. // for (uint32 m=0; m<_templateLength; m++) { uint32 tppos = (tp + _templateLength - m) % _templateLength; if (_template[tppos] == 1) { _merStorage[2*m+0] += cf; _merStorage[2*m+1] -= cr; if (_merSizeValid[m] < _merSizeValidIs) _merSizeValid[m]++; #ifdef DEBUGSPACE fprintf(stderr, "push %c onto %d (at template %u) length = %u %s\n", ch, m, (tp + _templateLength - m) % _templateLength, _merSizeValid[m], (_merSizeValid[m] >= _merSizeValidIs) ? "complete" : ""); #endif } else if (_merSizeValid[m] <= _merSizeValidZero) { // The template doesn't want us to add a letter to the mer, // but we're adjusting for an aborted template, and we're // counting template positions (not just non-zero template // positions) when adjusting. // _merSizeValid[m]++; } } // If the current mer isn't long enough, we move to the next mer, // and request another letter. // if (_merSizeValid[_templateMer] < _merSizeValidIs) { if (_templateFirst == 0) _templateMer = (_templateMer + 1) % _templateLength; #ifdef DEBUGSPACE fprintf(stderr, "-- too short -- need more templateMer=%u templateFirst=%u\n", _templateMer, _templateFirst); #endif return(true); } // On startup, _templateMer is always 0 (the first mer) until // it is long enough to be a valid mer. Then, we clear // _templateFirst so that we can start advancing through mers. // Update the f and r pointers to the correct mers, advance our // template to the next, and terminate. // _fMer = _merStorage + 2 * _templateMer + 0; _rMer = _merStorage + 2 * _templateMer + 1; #ifdef DEBUGSPACE fprintf(stderr, "-- valid! (templateMer = %u)\n", _templateMer); #endif _templateFirst = 0; _templateMer = (_templateMer + 1) % _templateLength; return(false); // Good! Don't need another letter. } bool kMerBuilder::addBaseCompressedSpaced(uint64 cf, uint64 cr) { fprintf(stderr, "kMerBuilder::addBaseCompressedSpace()-- Compressed and spaced mers not supported.\n"); exit(1); } kmer-code-2013-trunk/libbio/bio++.H0000644000000000000000000000057511061606335015426 0ustar rootroot#ifndef BIO_PLUS_PLUS_H #define BIO_PLUS_PLUS_H #include #include #include #include #include #include #include #include #include #include "bio.h" #include "util++.H" #include "mers.h" #include "kmer.H" #include "merCovering.H" #include "merList.H" #endif // BIO_PLUS_PLUS_H kmer-code-2013-trunk/developer-doc/0000755000000000000000000000000012641613360015700 5ustar rootrootkmer-code-2013-trunk/seatac/0000755000000000000000000000000012641613356014415 5ustar rootrootkmer-code-2013-trunk/seatac/thr-loader.C0000644000000000000000000000456412322046702016562 0ustar rootroot#include #include #include #include "seatac.H" // Define this to print a message whenever a sequence is loaded. // Useful for testing the loader with large sequences (scaffolds, // chromosomes). // //#define VERBOSE_LOADER #ifdef TRUE64BIT char const *loadDesc = "WARNING: Loader ran dry. Increasing limit to %u sequences, decreasing sleep to %f.\n"; #else char const *loadDesc = "WARNING: Loader ran dry. Increasing limit to %lu sequences, decreasing sleep to %f.\n"; #endif void* loaderThread(void *) { uint32 waterLevel = 0; seqInCore *B = 0L; bool slept = false; while (inputHead < numberOfQueries) { // We fill the input as fast as we can, up to the high water // mark, then we take a little snooze to let the workers catch up. // pthread_mutex_lock(&inputTailMutex); waterLevel = inputHead - inputTail; pthread_mutex_unlock(&inputTailMutex); // Warn if we're too small. // if ((slept) && (waterLevel <= 1)) { uint32 i = (uint32) (0.1 * config._loaderHighWaterMark); if (i == 0) i = 1; config._loaderHighWaterMark += i; config.setTime(&config._loaderSleep, 0.9 * ((double)config._loaderSleep.tv_sec + (double)config._loaderSleep.tv_nsec * 1e-9)); if (config._loaderWarnings) fprintf(stderr, loadDesc, config._loaderHighWaterMark, ((double)config._loaderSleep.tv_sec + (double)config._loaderSleep.tv_nsec * 1e-9)); } // Sleep, if we need to, otherwise, get the next sequence and // push it onto the input list at inputHead. This alloc is // deleted by the output thread. // if (waterLevel >= config._loaderHighWaterMark) { slept = true; nanosleep(&config._loaderSleep, 0L); } else { slept = false; #ifdef VERBOSE_LOADER fprintf(stderr, "Loading sequence %u (tail = %u)\n", inputHead, inputTail); #endif try { B = qsFASTA->getSequenceInCore(); } catch (std::bad_alloc) { fprintf(stderr, "loaderThread()-- Failed to load next query sequence\ncaught bad_alloc in %s at line %d\n", __FILE__, __LINE__); exit(1); } pthread_mutex_lock(&inputTailMutex); input[inputHead] = B; inputHead++; pthread_mutex_unlock(&inputTailMutex); } } return(0L); } kmer-code-2013-trunk/seatac/filter-nop.C0000644000000000000000000000755212322046702016600 0ustar rootroot#include #include #include // A very simple seatac filter. It reports the single longest match for each pair. // // Also shows how to use a C++ object as a filter. C is pretty much the same thing. #include "bio.h" #include "util++.H" extern "C" { void *construct(char *options); void destruct(void *handle); void addHit(void *handle, char orientation, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled); void filter(void *handle); uint64 output(void *handle, FILE *file, uint64 matchid); void *constructStats(char *options); void destructStats(void *handle); void addStats(void *handle, void *filterhandle); void showStats(void *handle, FILE *file); } class filterLongest { public: filterLongest(char *n1, char *n2) { fprintf(stderr, "Creating a filterLongest\n"); strncpy(name1, n1, 31); strncpy(name2, n2, 31); }; ~filterLongest() { fprintf(stderr, "Destroyed a filterLongest\n"); }; void addHit(char orientation, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled) { if (maxfilled < filled) { fprintf(stderr, "filterNOP-- addHit\n"); maxfilled = filled; #if 0 sprintf(outstring, "-%c -e "uint32FMT" "uint32FMT" "uint32FMT" -D "uint32FMT" "uint32FMT" "uint32FMT" -F "uint32FMT"\n", orientation, id1, pos1, len1, id2, pos2, len2, filled); #endif sprintf(outstring, "M x . . %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %s "uint32FMT"\n", name1, id1, pos1, len1, name2, id2, pos2, len2, (orientation == 'f') ? "1" : "-1", filled); } }; void filter(void) { fprintf(stderr, "filterNOP-- filter\n"); }; uint64 output(FILE *file, uint64 matchid) { fprintf(stderr, "filterNOP-- output (ignoring matchid)\n"); fprintf(file, "%s", outstring); return(matchid); }; private: char outstring[512]; char name1[32], name2[32]; uint32 maxfilled; }; class statLongest { public: statLongest() { num = 0; } ~statLongest() { } void add(filterLongest *F) { num++; } void show(FILE *file) { fprintf(file, "/statObjNum=%d\n", num); } private: int num; }; void* construct(char *opts) { char *seq1 = "UNK"; char *seq2 = "UNK"; // Parse the options to find the parameters // splitToWords W(opts); uint32 arg = 0; while (arg < W.numWords()) { if (strcmp(W.getWord(arg), "-1") == 0) { seq1 = W.getWord(++arg); } else if (strcmp(W.getWord(arg), "-2") == 0) { seq2 = W.getWord(++arg); } arg++; } return(new filterLongest(seq1, seq2)); } void destruct(void *handle) { delete (filterLongest *)handle; } void addHit(void *handle, char orientation, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled) { ((filterLongest *)handle)->addHit(orientation, id1, pos1, len1, id2, pos2, len2, filled); } void filter(void *handle) { ((filterLongest *)handle)->filter(); } uint64 output(void *handle, FILE *file, uint64 matchid) { return(((filterLongest *)handle)->output(file, matchid)); } void* constructStats(char *options) { return(new statLongest); } void destructStats(void *handle) { delete (statLongest *)handle; } void addStats(void *handle, void *filterhandle) { ((statLongest *)handle)->add((filterLongest *)filterhandle); } void showStats(void *handle, FILE *file) { ((statLongest *)handle)->show(file); } kmer-code-2013-trunk/seatac/configuration.C0000644000000000000000000002361412322046702017365 0ustar rootroot#include "seatac.H" #include #include #include #include #include "sharedObj.H" configuration::configuration(void) { _beVerbose = false; _merSize = 20; _merSkip = 0; _numSearchThreads = 4; _doReverse = true; _doForward = true; _maxDiagonal = 25; _maxGap = 0; _qsOverlap = 15; _dsOverlap = 15; _minLength = 20; _dbFileName = 0L; _qsFileName = 0L; _maskFileName = 0L; _onlyFileName = 0L; _outputFileName = 0L; _statsFileName = 0L; _tableFileName = 0L; _tableBuildOnly = false; _filtername = 0L; _filteropts = 0L; _filterObj = 0L; _startTime = 0.0; _initTime = 0.0; _buildTime = 0.0; _searchTime = 0.0; _totalTime = 0.0; _loaderHighWaterMark = 2; _loaderSleep.tv_sec = 1; _loaderSleep.tv_nsec = 0; _loaderWarnings = false; _searchSleep.tv_sec = 0; _searchSleep.tv_nsec = 10000000; _writerHighWaterMark = 256; _writerSleep.tv_sec = 1; _writerSleep.tv_nsec = 0; _writerWarnings = false; } configuration::~configuration() { } static char const *usageString = "usage: %s [options]\n" "\n" "Algorithm Options:\n" " -mersize k Use k-mers\n" " -merskip j Skip j mers between each mer inserted into table\n" " -forward Search only the normal query sequences\n" " -reverse Search only the reverse-complemented query sequences\n" " -maxdiagonal d\n" " -maxgap g\n" " -qoverlap q\n" " -doverlap d\n" " -minelength l\n" "\n" "Process Options\n" " -numthreads n Use n search threads\n" " -loaderhighwatermark h Size of the loader queue\n" " -loadersleep t Time the loader will sleep when its output queue is full\n" " -loaderwarnings Enable warning messages for the loader\n" " -searchsleep t Time the searcher will sleep when it has no input\n" " -writerhighwatermark h Size of the output queue\n" " -writersleep t Time the writer will sleep when it has nothing to write\n" " -writerwarnings Enable warning messages for the writer\n" "\n" " -usetables datfile If 'datfile' exists AND is a complete and valid file,\n" " load the tables from the file and do the compute.\n" " Otherwise, fail.\n" "\n" " -buildtables datfile If 'datfile' doesn't exist, build the tables, write\n" " them to 'datfile' and exit. Otherwise, quit.\n" "\n" "Filtering Options\n" " -filtername x.so Use the shared object x.so as a filter method.\n" " -filteropts opts The string 'opts' is passed to the filter on creation.\n" "\n" "Input Options:\n" " -mask f Ignore all mers listed in file f\n" " -only f Use only the mers listed in file f\n" " -stream s.fasta Query sequences (the stream)\n" " -table t.fasta Database sequences (the table)\n" " -use #,#,#,# using only those sequences specified\n" " -use file using only those sequences listed in the file\n" "\n" "Output Options\n" " -verbose Entertain the user\n" " -output f Write output to file f\n" " -stats f Write resource statistics to f\n"; void configuration::usage(char *name) { fprintf(stderr, usageString, name); } void configuration::read(int argc, char **argv) { int fail = 0; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-mersize") == 0) { arg++; _merSize = atoi(argv[arg]); } else if (strcmp(argv[arg], "-merskip") == 0) { arg++; _merSkip = atoi(argv[arg]); } else if (strcmp(argv[arg], "-numthreads") == 0) { arg++; _numSearchThreads = atoi(argv[arg]); } else if (strcmp(argv[arg], "-mask") == 0) { arg++; _maskFileName = argv[arg]; } else if (strcmp(argv[arg], "-only") == 0) { arg++; _onlyFileName = argv[arg]; } else if (strcmp(argv[arg], "-usetables") == 0) { arg++; _tableFileName = argv[arg]; _tableBuildOnly = false; } else if (strcmp(argv[arg], "-buildtables") == 0) { arg++; _tableFileName = argv[arg]; _tableBuildOnly = true; } else if (strcmp(argv[arg], "-stream") == 0) { arg++; _qsFileName = argv[arg]; } else if (strcmp(argv[arg], "-table") == 0) { arg++; _dbFileName = argv[arg]; } else if (strcmp(argv[arg], "-use") == 0) { arg++; fprintf(stderr, "%s: -use not supported anymore.\n", argv[0]); exit(1); } else if (strcmp(argv[arg], "-forward") == 0) { _doForward = true; _doReverse = false; } else if (strcmp(argv[arg], "-reverse") == 0) { _doReverse = true; _doForward = false; } else if (strcmp(argv[arg], "-verbose") == 0) { _beVerbose = true; } else if (strcmp(argv[arg], "-output") == 0) { arg++; _outputFileName = argv[arg]; } else if (strcmp(argv[arg], "-stats") == 0) { arg++; _statsFileName = argv[arg]; } else if (strcmp(argv[arg], "-maxdiagonal") == 0) { arg++; _maxDiagonal = atoi(argv[arg]); } else if (strcmp(argv[arg], "-maxgap") == 0) { arg++; _maxGap = atoi(argv[arg]); } else if (strcmp(argv[arg], "-qoverlap") == 0) { arg++; _qsOverlap = atoi(argv[arg]); } else if (strcmp(argv[arg], "-doverlap") == 0) { arg++; _dsOverlap = atoi(argv[arg]); } else if (strcmp(argv[arg], "-minlength") == 0) { arg++; _minLength = atoi(argv[arg]); } else if (strncmp(argv[arg], "-loaderhighwatermark", 8) == 0) { _loaderHighWaterMark = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-loadersleep", 8) == 0) { setTime(&_loaderSleep, atof(argv[++arg])); } else if (strncmp(argv[arg], "-loaderwarnings", 8) == 0) { _loaderWarnings = true; } else if (strncmp(argv[arg], "-searchsleep", 8) == 0) { setTime(&_searchSleep, atof(argv[++arg])); } else if (strncmp(argv[arg], "-writerhighwatermark", 8) == 0) { _writerHighWaterMark = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-writersleep", 8) == 0) { setTime(&_writerSleep, atof(argv[++arg])); } else if (strncmp(argv[arg], "-writerwarnings", 8) == 0) { _writerWarnings = true; } else if (strcmp(argv[arg], "-filtername") == 0) { arg++; _filtername = argv[arg]; _filterObj = new sharedObj(argv[arg]); } else if (strcmp(argv[arg], "-filteropts") == 0) { arg++; _filteropts = argv[arg]; } else { fprintf(stderr, "ERROR: Unknown option '%s'\n", argv[arg]); fail++; } arg++; } if (fail) exit(1); // // Make sure some constraints are met // if (_numSearchThreads > MAX_THREADS) { fprintf(stderr, "ERROR: Threads are limited to %d.\n", MAX_THREADS); exit(1); } if (_maskFileName && _onlyFileName) { fprintf(stderr, "ERROR: At most one of -mask and -only may be used.\n"); exit(1); } // // Check that the mers are at least adjacent // if (_merSkip >= _merSize) { fprintf(stderr, "ERROR: Mers are not adjacent; make sure merskip <= mersize.\n"); exit(1); } // // Test that we can build filter and stat objects // if (_filtername) { filterObj *testf = new filterObj(_filterObj, _filteropts); delete testf; statObj *tests = new statObj(_filterObj, _filteropts); delete tests; } } void configuration::writeATACheader(FILE *out) { fprintf(out, "! format atac 1.0\n"); fprintf(out, "/seatacBeVerbose=%s\n", _beVerbose ? "enabled" : "disabled"); fprintf(out, "/seatacNumSearchThreads="uint32FMT"\n", _numSearchThreads); fprintf(out, "/seatacLoaderHighWaterMark="uint32FMT"\n", _loaderHighWaterMark); fprintf(out, "/seatacLoaderSleep=%f\n", (double)_loaderSleep.tv_sec + (double)_loaderSleep.tv_nsec * 1e-9); fprintf(out, "/seatacLoaderWarnings=%s\n", _loaderWarnings ? "true" : "false"); fprintf(out, "/seatacSearchSleep=%f\n", (double)_searchSleep.tv_sec + (double)_searchSleep.tv_nsec * 1e-9); fprintf(out, "/seatacWriterHighWaterMark="uint32FMT"\n", _writerHighWaterMark); fprintf(out, "/seatacWriterSleep=%f\n", (double)_writerSleep.tv_sec + (double)_writerSleep.tv_nsec * 1e-9); fprintf(out, "/seatacWriterWarnings=%s\n", _writerWarnings ? "true" : "false"); fprintf(out, "/seatacMaxDiagonal="uint32FMT"\n", _maxDiagonal); fprintf(out, "/seatacMaxGap="uint32FMT"\n", _maxGap); fprintf(out, "/seatacQsOverlap="uint32FMT"\n", _qsOverlap); fprintf(out, "/seatacDsOverlap="uint32FMT"\n", _dsOverlap); fprintf(out, "/seatacMinLength="uint32FMT"\n", _minLength + _merSize); fprintf(out, "/seatacMerSize="uint32FMT"\n", _merSize); fprintf(out, "/seatacMerSkip="uint32FMT"\n", _merSkip); fprintf(out, "/seatacDoReverse=%s\n", (_doReverse) ? "true" : "false"); fprintf(out, "/seatacDoForward=%s\n", (_doForward) ? "true" : "false"); fprintf(out, "/seatacFilterName=%s\n", (_filtername) ? _filtername : "None Specified."); fprintf(out, "/seatacFilterOpts=%s\n", (_filteropts) ? _filteropts : "None Specified."); fprintf(out, "/seatacDbFile=%s\n", (_dbFileName) ? _dbFileName : "None Specified."); fprintf(out, "/seatacQsFile=%s\n", (_qsFileName) ? _qsFileName : "None Specified."); fprintf(out, "/seatacMaskFile=%s\n", (_maskFileName) ? _maskFileName : "None Specified."); fprintf(out, "/seatacOnlyFile=%s\n", (_onlyFileName) ? _onlyFileName : "None Specified."); fprintf(out, "/seatacOutputFile=%s\n", (_outputFileName) ? _outputFileName : "None Specified."); fprintf(out, "/seatacStatsFile=%s\n", (_statsFileName) ? _statsFileName : "None Specified."); fprintf(out, "/seatacTableFile=%s\n", (_tableFileName) ? _tableFileName : "None Specified."); } kmer-code-2013-trunk/seatac/hitMatrix.C0000644000000000000000000002262512322046702016470 0ustar rootroot#include "seatac.H" hitMatrix::hitMatrix(uint32 qsLen, uint32 qsIdx) { _qsLen = qsLen; _qsIdx = qsIdx; // Because this is doing scaffolds or chromosomes against more than // 1/4 a genome, we expect a LOT of hits. Start off with a good // amount of memory. // // At 8 bytes per diagonalLine, 128M of these is 1GB. Which works // great for aligning mamalian chromosomes and stinks for microbes. // _hitsLen = 0; _hitsMax = 32 * 1024 * 1024; _hits = new diagonalLine [_hitsMax]; } hitMatrix::~hitMatrix() { delete [] _hits; } // Utility for sorting the diagonal lines in the hitMatrix // // The two comparison functions return true if the first line // is less than the second line. #ifdef WITHOUT_DIAGONALID inline int compareLines(diagonalLine *A, diagonalLine *B, uint32 qsLen) { uint32 a = qsLen - A->_qsPos - 1 + A->_dsPos; uint32 b = qsLen - B->_qsPos - 1 + B->_dsPos; return(((a < b)) || ((a == b) && (A->_qsPos < B->_qsPos))); } inline int compareLines(uint32 l, uint32 q, diagonalLine *B, uint32 qsLen) { uint32 b = qsLen - B->_qsPos - 1 + B->_dsPos; return(((l < b)) || ((l == b) && (q < B->_qsPos))); } inline void adjustHeap(diagonalLine *L, int32 p, int32 n, uint32 qsLen) { uint32 q = L[p]._qsPos; uint32 d = L[p]._dsPos; uint32 l = qsLen - q - 1 + d; int32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && compareLines(L+c, L+c+1, qsLen)) c++; // Does the node in question fit here? // if (compareLines(l, q, L+c, qsLen) == false) break; // Else, swap the parent and the child // L[p]._qsPos = L[c]._qsPos; L[p]._dsPos = L[c]._dsPos; // Move down the tree // p = c; c = (p << 1) + 1; } L[p]._qsPos = q; L[p]._dsPos = d; } #else // WITH_DIAGONALID inline int compareLines(diagonalLine *A, diagonalLine *B) { return(((A->_diagonalID < B->_diagonalID)) || ((A->_diagonalID == B->_diagonalID) && (A->_qsPos < B->_qsPos))); } inline int compareLines(uint32 l, uint32 q, diagonalLine *B) { return(((l < B->_diagonalID)) || ((l == B->_diagonalID) && (q < B->_qsPos))); } inline void adjustHeap(diagonalLine *L, int32 p, int32 n) { uint32 q = L[p]._qsPos; uint32 d = L[p]._dsPos; uint32 l = L[p]._diagonalID; int32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && compareLines(L+c, L+c+1)) c++; // Does the node in question fit here? // if (compareLines(l, q, L+c) == false) break; // Else, swap the parent and the child // L[p]._qsPos = L[c]._qsPos; L[p]._dsPos = L[c]._dsPos; L[p]._diagonalID = L[c]._diagonalID; // Move down the tree // p = c; c = (p << 1) + 1; } L[p]._qsPos = q; L[p]._dsPos = d; L[p]._diagonalID = l; } #endif void hitMatrix::processMatrix(char direction, filterObj *FO) { if (_hitsLen == 0) return; // First, sort by the dsPos. This is done so that we can find all the hits for // a specific scaffold. // sort_dsPos(); merCovering IL(config._merSize); uint32 ILlength = 0; // Now, while there are hits left.... // uint32 firstHit = 0; uint32 lastHit = 0; uint32 currentSeq = 0; while (firstHit < _hitsLen) { // Move the currentSeq until the firstHit is below it. // while ((currentSeq < config._genome->numberOfSequences()) && (config._genome->startOf(currentSeq) <= _hits[firstHit]._dsPos)) currentSeq++; // // currentSeq is now the sequence AFTER the one that we want hits in. // // Find the first hit that is in currentSeq. If this is the last sequence, // then, of course, all remaining hits are in it. // if (currentSeq < config._genome->numberOfSequences()) { lastHit = firstHit + 1; while ((lastHit < _hitsLen) && (_hits[lastHit]._dsPos < config._genome->startOf(currentSeq))) lastHit++; } else { lastHit = _hitsLen; } // Drop back one sequence; this is the sequence the hits are in. // currentSeq--; // Adjust the hits to be relative to the start of this sequence // for (uint32 i=firstHit; istartOf(currentSeq); // Sort them, if needed. // if (lastHit - firstHit > 1) { // We cheat; heapsort isn't too friendly to sorting the middle of // an array, so we make a new array in the middle! // diagonalLine *hitsToSort = _hits + firstHit; // Build the heap. I initially thought this could be done at the // same time as the scan for the last hit, but it can't (easily) // for (int32 i=(lastHit - firstHit)/2 - 1; i>=0; i--) #ifdef WITHOUT_DIAGONALID adjustHeap(hitsToSort, i, lastHit - firstHit, _qsLen); #else adjustHeap(hitsToSort, i, lastHit - firstHit); #endif // Sort the hits be diagonal. This is the second part of // heap sort -- Interchange the new maximum with the element // at the end of the tree // for (uint32 i=lastHit - firstHit - 1; i>0; i--) { uint32 q = hitsToSort[i]._qsPos; uint32 d = hitsToSort[i]._dsPos; #ifndef WITHOUT_DIAGONALID uint32 l = hitsToSort[i]._diagonalID; #endif hitsToSort[i]._qsPos = hitsToSort[0]._qsPos; hitsToSort[i]._dsPos = hitsToSort[0]._dsPos; #ifndef WITHOUT_DIAGONALID hitsToSort[i]._diagonalID = hitsToSort[0]._diagonalID; #endif hitsToSort[0]._qsPos = q; hitsToSort[0]._dsPos = d; #ifndef WITHOUT_DIAGONALID hitsToSort[0]._diagonalID = l; #endif #ifdef WITHOUT_DIAGONALID adjustHeap(hitsToSort, 0, i, _qsLen); #else adjustHeap(hitsToSort, 0, i); #endif } } // Filter them // #ifdef WITHOUT_DIAGONALID uint32 lastDiagonal = _qsLen - _hits[firstHit]._qsPos - 1 + _hits[firstHit]._dsPos; #else uint32 lastDiagonal = _hits[firstHit]._diagonalID; #endif uint32 qsLow = _hits[firstHit]._qsPos; uint32 qsHigh = _hits[firstHit]._qsPos; uint32 dsLow = _hits[firstHit]._dsPos; uint32 dsHigh = _hits[firstHit]._dsPos; IL.clear(); for (uint32 i=firstHit; i _hits[i]._qsPos) qsLow = _hits[i]._qsPos; if (qsHigh < _hits[i]._qsPos) qsHigh = _hits[i]._qsPos; if (dsLow > _hits[i]._dsPos) dsLow = _hits[i]._dsPos; if (dsHigh < _hits[i]._dsPos) dsHigh = _hits[i]._dsPos; IL.addMer(_hits[i]._qsPos); } else { // // Save the match. cut-n-paste with below. // ILlength = IL.sumOfLengths(); IL.clear(); if (ILlength >= config._minLength) { if (direction == 'r') { FO->addHit(direction, config._genome->IIDOf(currentSeq), dsLow, dsHigh - dsLow + config._merSize, _qsIdx, _qsLen - qsHigh - config._merSize, qsHigh - qsLow + config._merSize, ILlength); } else { FO->addHit(direction, config._genome->IIDOf(currentSeq), dsLow, dsHigh - dsLow + config._merSize, _qsIdx, qsLow, qsHigh - qsLow + config._merSize, ILlength); } } #ifdef WITHOUT_DIAGONALID lastDiagonal = _qsLen - _hits[i]._qsPos - 1 + _hits[i]._dsPos; #else lastDiagonal = _hits[i]._diagonalID; #endif qsLow = _hits[i]._qsPos; qsHigh = _hits[i]._qsPos; dsLow = _hits[i]._dsPos; dsHigh = _hits[i]._dsPos; IL.addMer(_hits[i]._qsPos); } } // Save the final cluster? (cut-n-paste from above) // ILlength = IL.sumOfLengths(); IL.clear(); if (ILlength >= config._minLength) { if (direction == 'r') { FO->addHit(direction, config._genome->IIDOf(currentSeq), dsLow, dsHigh - dsLow + config._merSize, _qsIdx, _qsLen - qsHigh - config._merSize, qsHigh - qsLow + config._merSize, ILlength); } else { FO->addHit(direction, config._genome->IIDOf(currentSeq), dsLow, dsHigh - dsLow + config._merSize, _qsIdx, qsLow, qsHigh - qsLow + config._merSize, ILlength); } } // All done with these hits. Move to the next set. // firstHit = lastHit; } } kmer-code-2013-trunk/seatac/filterObj.H0000644000000000000000000001330712322046702016441 0ustar rootroot#ifndef FILTEROBJ_H #define FILTEROBJ_H #include #include #include "sharedObj.H" #include "util++.H" // // Object that will filter and output hits. If no sharedObj is // supplied, the default behavior is output all hits. // // The default filter here inserts lots of '#'s into the output string, // then replaces those with the real match id on output. An alternative // (and probably better idea) is to build a vector of structs of things // to output. class filterObj { public: filterObj(sharedObj *so, char *soOpts); ~filterObj(); void addHit(char direction, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled); void filter(void); uint64 output(FILE *, uint64); private: char *soOpts; sharedObj *so; void *handle; void * (*soconstruct)(char *); void (*sodestruct)(void *); void (*soaddHit)(void *, char, uint32, uint32, uint32, uint32, uint32, uint32, uint32); void (*sofilter)(void *); uint64 (*sooutput)(void *, FILE *, uint64); uint32 theOutputPos; uint32 theOutputMax; char *theOutput; char name1[32]; char name2[32]; friend class statObj; }; inline filterObj::filterObj(sharedObj *so_, char *op_) { soOpts = 0L; so = so_; handle = 0L; soconstruct = 0L; sodestruct = 0L; soaddHit = 0L; sofilter = 0L; sooutput = 0L; theOutputPos = 0; theOutputMax = 0; theOutput = 0L; strcpy(name1, "UNK"); strcpy(name2, "UNK"); if (op_) { soOpts = new char [strlen(op_) + 1]; strcpy(soOpts, op_); splitToWords W(soOpts); for (uint32 arg = 0; arg < W.numWords(); arg++) { if (strcmp(W.getWord(arg), "-1") == 0) { strncpy(name1, W.getWord(++arg), 31); } else if (strcmp(W.getWord(arg), "-2") == 0) { strncpy(name2, W.getWord(++arg), 31); } } } if (so) { soconstruct = (void* (*)(char *))so->get("construct"); sodestruct = (void (*)(void*))so->get("destruct"); soaddHit = (void (*)(void *, char, uint32, uint32, uint32, uint32, uint32, uint32, uint32))so->get("addHit"); sofilter = (void (*)(void*))so->get("filter"); sooutput = (uint64 (*)(void*,FILE*,uint64))so->get("output"); if (!soconstruct) fprintf(stderr, "construct not found!\n"); if (!sodestruct) fprintf(stderr, "destruct not found!\n"); if (!soaddHit) fprintf(stderr, "addHit not found!\n"); if (!sofilter) fprintf(stderr, "filter not found!\n"); if (!sooutput) fprintf(stderr, "output not found!\n"); handle = (*soconstruct)(soOpts); } if (!so) { theOutputPos = 0; theOutputMax = 1048576; theOutput = new char [theOutputMax]; theOutput[0] = 0; } } inline filterObj::~filterObj() { if (sodestruct) (*sodestruct)(handle); delete [] soOpts; delete [] theOutput; } inline void filterObj::addHit(char orientation, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled) { if (soaddHit) { (*soaddHit)(handle, orientation, id1, pos1, len1, id2, pos2, len2, filled); } else { if (theOutputPos + 128 >= theOutputMax) { theOutputMax <<= 1; char *o = 0L; try { o = new char [theOutputMax]; } catch (std::bad_alloc) { fprintf(stderr, "hitMatrix::filter()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "hitMatrix::filter()-- tried to extend output string from "uint32FMT" to "uint32FMT" bytes.\n", theOutputPos, theOutputMax); exit(1); } memcpy(o, theOutput, theOutputPos); delete [] theOutput; theOutput = o; } sprintf(theOutput + theOutputPos, "M x ############ . %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %s "uint32FMT"\n", name1, id1, pos1, len1, name2, id2, pos2, len2, (orientation == 'f') ? "1" : "-1", filled); while (theOutput[theOutputPos]) theOutputPos++; } } inline void filterObj::filter(void) { if (sofilter) { (*sofilter)(handle); } } inline uint64 filterObj::output(FILE *F, uint64 matchid) { if (sooutput) { matchid = (*sooutput)(handle, F, matchid); } else { char matchIDstring[32] = {0}; // Insert the match id's for all these matches. We have to // do this here (not during searches) because we're threaded. // char *pos = theOutput; while (*pos) { // Construct a string holding the text version of the match id. // matchid++; sprintf(matchIDstring, uint64FMT, matchid); // At the start of an output record. Skip the row type and // sub type, 'M x ', which should put us at the start of // the match id. // pos += 4; // Copy the number into the space, removing any extra # // marks, warning if we run out of space. // char *matchIDiterator = matchIDstring; while ((*pos == '#') && (*matchIDiterator != 0)) *pos++ = *matchIDiterator++; while (*pos == '#') *pos++ = ' '; if (*matchIDiterator != 0) fprintf(stderr, "WARNING: there isn't enough space in the match to insert the match id "uint64FMT" '%s'!\n", matchid, matchIDstring); // Skip to the next record // while (*pos++ != '\n') ; } fwrite(theOutput, sizeof(char), theOutputPos, F); } return(matchid); } #endif // FILTEROBJ_H kmer-code-2013-trunk/seatac/hitMatrix.H0000644000000000000000000000560112322046702016470 0ustar rootroot#ifndef HITMATRIX_H #define HITMATRIX_H #include #include #include #include "bio++.H" #include "positionDB.H" #include "filterObj.H" // Define this to cut the space required for storing hits by 1/3 -- // from 12 byyes to 8 bytes -- at a slight computational expense -- // negligible on real hardware, I hope. // // The original definition of diagonalID was // qsLen - qsPos - 1 + dsPos // but qsLen is fixed for everyone, so we could reduce it to // dsPos - qsPos // but that's not unsigned. // // Results: on a human mapping, using chromosomes as the stream and // the whole human as the table (so we need to actually store a large // number of hits), we see a savings of 2GB and a small drop in // runtime. Process size went from 20.7GB to 18.7GB, CPU time from // 20578 to 20193 seconds (833MHz EV6.8AL (21264B)). // #define WITHOUT_DIAGONALID struct diagonalLine { uint32 _qsPos; uint32 _dsPos; #ifndef WITHOUT_DIAGONALID uint32 _diagonalID; #endif }; class hitMatrix { public: hitMatrix(uint32 qsLen, uint32 qsIdx); ~hitMatrix(); void addHits(uint32 qi, uint64 *ps, uint64 cn); void sort_diagonal(void); void sort_dsPos(void); void processMatrix(char direction, filterObj *FO); private: uint32 _qsLen; // Seq Len of Q uint32 _qsIdx; // Index of Q in the FastA // Instead of building the lines during add(), we store // the information used to build lines, and then build them // in chain(). This was done to reduce simultaneous memory // usage, as the lineArrayMap and etc take up considerable space. // uint32 _hitsLen; uint32 _hitsMax; diagonalLine *_hits; }; inline void hitMatrix::addHits(uint32 qi, uint64 *ps, uint64 cn) { if ((_hitsLen + cn) >= _hitsMax) { _hitsMax = _hitsMax + _hitsMax + (uint32)cn; diagonalLine *h; try { h = new diagonalLine [_hitsMax]; } catch (std::bad_alloc) { fprintf(stderr, "hitMatrix::addHits()-- caught std::bad_alloc in %s at line %d.\n", __FILE__, __LINE__); fprintf(stderr, "hitMatrix::addHits()-- have "uint32FMT" hits, tried to add "uint64FMT" more\n", _hitsLen, cn); exit(1); } for (uint32 z=_hitsLen; z--; ) { h[z]._qsPos = _hits[z]._qsPos; h[z]._dsPos = _hits[z]._dsPos; #ifndef WITHOUT_DIAGONALID h[z]._diagonalID = _hits[z]._diagonalID; #endif } delete [] _hits; _hits = h; } for (uint64 i=0; i #include #include "sharedObj.H" class statObj { public: statObj(sharedObj *so, char *soOpts); ~statObj(); void add(filterObj *FO) { if (soadd) (*soadd)(handle, FO->handle); }; void show(FILE *file) { if (soshow) (*soshow)(handle, file); }; private: char *soOpts; sharedObj *so; void *handle; void * (*soconstruct)(char *); void (*sodestruct)(void *); void (*soadd)(void *, void *); void (*soshow)(void *, FILE *); }; inline statObj::statObj(sharedObj *so_, char *op_) { soOpts = 0L; so = so_; handle = 0L; soconstruct = 0L; soadd = 0L; soshow = 0L; sodestruct = 0L; if (op_) { soOpts = new char [strlen(op_) + 1]; strcpy(soOpts, op_); } if (so) { soconstruct = (void* (*)(char*))so->get("constructStats"); sodestruct = (void (*)(void*))so->get("destructStats"); soadd = (void (*)(void*, void*))so->get("addStats"); soshow = (void (*)(void*, FILE*))so->get("showStats"); handle = (*soconstruct)(soOpts); } } inline statObj::~statObj() { if (sodestruct) (*sodestruct)(handle); delete [] soOpts; } #endif // STATOBJ_H kmer-code-2013-trunk/seatac/seatac.H0000644000000000000000000000653312322046702015764 0ustar rootroot#include #include #include #include #include #include #include #include #include #include #include #include // At one time, this was needed for pthread.h or semaphore.h typedef unsigned short ushort; #include #include #include "bio++.H" #include "seqCache.H" #include "existDB.H" #include "positionDB.H" #include "hitMatrix.H" #include "filterObj.H" #include "statObj.H" class encodedQuery { private: char const *_seq; uint32 _seqLen; uint32 _merSize; bool _rc; uint32 _seqPos; int32 _timeUntilValid; uint64 _substring; uint64 _mermask; public: encodedQuery(char const *seq, uint32 seqLen, uint32 k, bool rc); ~encodedQuery(); bool getMer(uint64 &mer, uint32 &pos); }; // // A singleton for working with the command line parameters. // #define MAX_THREADS 64 class configuration { public: bool _beVerbose; uint32 _merSize; uint32 _merSkip; uint32 _numSearchThreads; bool _doReverse; bool _doForward; uint32 _maxDiagonal; uint32 _maxGap; uint32 _qsOverlap; uint32 _dsOverlap; uint32 _minLength; char *_dbFileName; char *_qsFileName; char *_maskFileName; char *_onlyFileName; char *_outputFileName; char *_statsFileName; char *_tableFileName; bool _tableBuildOnly; seqStream *_genome; // Filter parameters // char *_filtername; char *_filteropts; sharedObj *_filterObj; // Wall clock times // double _startTime; double _initTime; double _buildTime; double _searchTime; double _totalTime; // Loader parameters // uint32 _loaderHighWaterMark; struct timespec _loaderSleep; bool _loaderWarnings; // Search parameters // struct timespec _searchSleep; // Output parameters // uint32 _writerHighWaterMark; struct timespec _writerSleep; bool _writerWarnings; configuration(); ~configuration(); void usage(char *name); void read(int argc, char **argv); void writeATACheader(FILE *out); void setTime(struct timespec *ts, double t) { ts->tv_sec = (time_t)floor(t); ts->tv_nsec = (long)((t - ts->tv_sec) * 1e9); }; }; // Shared data // extern configuration config; extern seqCache *qsFASTA; // Used exclusively by thr-loader.C extern positionDB *positions; extern volatile uint32 numberOfQueries; extern filterObj **output; extern pthread_mutex_t inputTailMutex; extern seqInCore **input; extern volatile uint32 inputHead; extern volatile uint32 inputTail; extern volatile uint32 outputPos; extern char *threadStats[MAX_THREADS]; void *deadlockDetector(void *U); void *deadlockChecker(void *U); void *loaderThread(void *U); void *searchThread(void *U); kmer-code-2013-trunk/seatac/filter-heavychains.C0000644000000000000000000002137312322046702020303 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Copyright (c) 2005 The J. Craig Venter Institute // Author: Clark Mobarry // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include "util++.H" #include "heavychains.H" extern "C" { void *construct(char *options); void destruct(void *handle); void addHit(void *handle, char orientation, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled); void filter(void *handle); uint64 output(void *handle, FILE *file, uint64 matchid); void *constructStats(char *options); void destructStats(void *handle); void addStats(void *handle, void *sp); void showStats(void *handle, FILE *file); } // HeavyChains is implemented in the StrandPair class. It takes all // hits for a single pair of sequences and....does something. Seatac // gives the filterObj interface (aka, the interface in this file) // all hits for a single sequence to the whole genome (or part of). // So, the StrandPairManager acts as the, uhhh, manager for a bunch // of StrandPairs, ensuring that each StrandPair is in fact a pair. // // It is interface compatible with a StrandPair. // class StrandPairManager { private: int beVerbose; char assemblyId1[32]; char assemblyId2[32]; int maxJump; // Default maximum intra-run jump allowed in a good run. double minScore; // Default minimum of bp filled in a good run. bool isForward; StrandPair *P; StrandPair *Proot; public: StrandPairManager(bool verbose, char *assemblyid1, char *assemblyid2, int maxjump, double minscore) { beVerbose = verbose; strncpy(assemblyId1, assemblyid1, 31); strncpy(assemblyId2, assemblyid2, 31); maxJump = maxjump; minScore = minscore; isForward = true; Proot = 0L; P = 0L; }; ~StrandPairManager(void) { P = Proot; while (Proot) { Proot = Proot->next(); delete P; P = Proot; } }; void addHit(char direction, uint32 id1, uint32 xlo, uint32 xln, uint32 id2, uint32 ylo, uint32 yln, uint32 filled) { // We're given hits for exactly one id2 and all id1, forward hits // followed by reverse hits. Which means that id1 makes two // passes through, both passes are increasing (enforced by the // seqStream used in seatac). // // A linked list of strand pairs is kept (the links are built // into StrandPair for convenience), each strand pair knows it's // pair of ids. // // No root? Make one and add the hit. // if (Proot == 0L) { P = Proot = new StrandPair(beVerbose, assemblyId1, assemblyId2, maxJump, minScore); P->addHit(direction, id1, xlo, xln, id2, ylo, yln, filled); return; } // Reset to the start if we just switched from forward to // reverse. This is also the only time that the sequence id can // decrease, and we might have to make a new root. // if (isForward && (direction == 'r')) { isForward = false; if (id1 < Proot->sequenceIID1()) { StrandPair *N = new StrandPair(beVerbose, assemblyId1, assemblyId2, maxJump, minScore); N->addHit(direction, id1, xlo, xln, id2, ylo, yln, filled); N->addNext(Proot); P = Proot = N; return; } P = Proot; } // Verify that id1 didn't decrease. // if (id1 < P->sequenceIID1()) { fprintf(stderr, "Why did the sequence id just decrease? This should not have happened.\n"); fprintf(stderr, "Crash. %s at line %d\n", __FILE__, __LINE__ - 2); exit(1); } // Move to the node just before, or exactly at, the one we want // to add to. Remember, id1 never decreases. // while ((P->next()) && (P->next()->sequenceIID1() <= id1)) P = P->next(); // If we're not at the correct node, insert one after the // current, and make it the correct one. // if (P->sequenceIID1() != id1) { StrandPair *NP = new StrandPair(beVerbose, assemblyId1, assemblyId2, maxJump, minScore); NP->addNext(P->next()); P->addNext(NP); P = NP; // Hooray! } // And now we can just add the hit. // P->addHit(direction, id1, xlo, xln, id2, ylo, yln, filled); }; void process(void) { for (StrandPair *SP=Proot; SP; SP=SP->next()) SP->process(); }; uint64 print(FILE *outF, uint64 matchid) { for (StrandPair *SP=Proot; SP; SP=SP->next()) matchid = SP->print(outF, matchid); return(matchid); }; void addStats(TheStats *ST) { for (StrandPair *SP=Proot; SP; SP=SP->next()) ST->add(SP); }; }; void* construct(char *options) { int beVerbose = 0; char assemblyIdD[4] = { 'U', 'N', 'K', 0 }; char *assemblyId1 = assemblyIdD; char *assemblyId2 = assemblyIdD; double minScore = 100.0; // Default minimum of bp filled in a good run. int maxJump = 100000; // Default maximum intra-run jump allowed in a good run. // Parse the options to find the parameters // splitToWords W(options); uint32 arg = 0; while (arg < W.numWords()) { if (strcmp(W.getWord(arg), "-v") == 0) { beVerbose++; } else if (strcmp(W.getWord(arg), "-s") == 0) { minScore = atof(W.getWord(++arg)); } else if (strcmp(W.getWord(arg), "-j") == 0) { maxJump = atoi(W.getWord(++arg)); } else if (strcmp(W.getWord(arg), "-1") == 0) { assemblyId1 = W.getWord(++arg); } else if (strcmp(W.getWord(arg), "-2") == 0) { assemblyId2 = W.getWord(++arg); } arg++; } return((void *)(new StrandPairManager(beVerbose, assemblyId1, assemblyId2, maxJump, minScore))); } void destruct(void *handle) { delete (StrandPairManager *)handle; } void addHit(void *handle, char orientation, uint32 id1, uint32 pos1, uint32 len1, uint32 id2, uint32 pos2, uint32 len2, uint32 filled) { ((StrandPairManager *)handle)->addHit(orientation, id1, pos1, len1, id2, pos2, len2, filled); } void filter(void *handle) { ((StrandPairManager *)handle)->process(); } uint64 output(void *handle, FILE *file, uint64 matchid) { return(((StrandPairManager *)handle)->print(file, matchid)); } void* constructStats(char *options) { int beVerbose = 0; char assemblyIdD[4] = { 'U', 'N', 'K', 0 }; char *assemblyId1 = assemblyIdD; char *assemblyId2 = assemblyIdD; double minScore = 100.0; // Default minimum of bp filled in a good run. int maxJump = 100000; // Default maximum intra-run jump allowed in a good run. // Parse the options to find the parameters // splitToWords W(options); uint32 arg = 0; while (arg < W.numWords()) { if (strcmp(W.getWord(arg), "-v") == 0) { beVerbose++; } else if (strcmp(W.getWord(arg), "-s") == 0) { minScore = atof(W.getWord(++arg)); } else if (strcmp(W.getWord(arg), "-j") == 0) { maxJump = atoi(W.getWord(++arg)); } else if (strcmp(W.getWord(arg), "-1") == 0) { assemblyId1 = W.getWord(++arg); } else if (strcmp(W.getWord(arg), "-2") == 0) { assemblyId2 = W.getWord(++arg); } arg++; } return((void *)(new TheStats(beVerbose, assemblyId1, assemblyId2, maxJump, minScore))); } void destructStats(void *handle) { delete (TheStats *)handle; } void addStats(void *handle, void *sp) { // We aren't getting a single StrandPair anymore, we're getting a StrandPairManager now. // //((TheStats *)handle)->add((StrandPair *)sp); // ((StrandPairManager *)sp)->addStats((TheStats *)handle); } void showStats(void *handle, FILE *file) { ((TheStats *)handle)->show(file); } kmer-code-2013-trunk/seatac/posix.H0000644000000000000000000000000011463747051015657 0ustar rootrootkmer-code-2013-trunk/seatac/seatac.C0000644000000000000000000001604512322046702015756 0ustar rootroot#include #include #include #include "seatac.H" // Shared data // configuration config; seqCache *qsFASTA = 0L; positionDB *positions = 0L; volatile uint32 numberOfQueries = 0; filterObj **output = 0L; pthread_mutex_t inputTailMutex; seqInCore **input = 0L; volatile uint32 inputHead = 0; volatile uint32 inputTail = 0; volatile uint32 outputPos = 0; char *threadStats[MAX_THREADS] = { 0L }; #ifdef _AIX static void aix_new_handler() { fprintf(stderr, "aix_new_handler()-- Memory allocation failed.\n"); throw std::bad_alloc(); } #endif int main(int argc, char **argv) { #ifdef _AIX // By default, AIX Visual Age C++ new() returns 0L; this turns on // exceptions. // std::set_new_handler(aix_new_handler); #endif // Read the configuration from the command line // if (argc < 2) { config.usage(argv[0]); exit(1); } config.read(argc, argv); config._startTime = getTime(); // Open and init the query sequence // qsFASTA = new seqCache(config._qsFileName); numberOfQueries = qsFASTA->getNumberOfSequences(); output = new filterObj * [numberOfQueries]; input = new seqInCore * [numberOfQueries]; inputHead = 0; inputTail = 0; for (uint32 i=numberOfQueries; i--; ) { output[i] = 0L; input[i] = 0L; } config._initTime = getTime(); config._genome = new seqStream(config._dbFileName); // Create the chunk, returning a positionDB. Threads will use both // chain and postions to build hitMatrices. // if ((config._tableFileName) && (fileExists(config._tableFileName))) { if (config._tableBuildOnly) { fprintf(stderr, "All done. Table '%s' already build.\n", config._tableFileName); exit(0); } else { fprintf(stderr, "Loading positionDB state from '%s'\n", config._tableFileName); positions = new positionDB(config._tableFileName, config._merSize, config._merSkip, 0); } } else { existDB *maskDB = 0L; if (config._maskFileName) { if (config._beVerbose) fprintf(stderr, "Building maskDB from '%s'\n", config._maskFileName); maskDB = new existDB(config._maskFileName, config._merSize, existDBcanonical | existDBcompressHash | existDBcompressBuckets, 0, ~uint32ZERO); } existDB *onlyDB = 0L; if (config._onlyFileName) { if (config._beVerbose) fprintf(stderr, "Building onlyDB from '%s'\n", config._onlyFileName); onlyDB = new existDB(config._onlyFileName, config._merSize, existDBcanonical | existDBcompressHash | existDBcompressBuckets, 0, ~uint32ZERO); } merStream *MS = new merStream(new kMerBuilder(config._merSize), config._genome, true, false); positions = new positionDB(MS, config._merSize, config._merSkip, maskDB, onlyDB, 0L, 0, 0, 0, 0, config._beVerbose); delete MS; delete maskDB; delete onlyDB; if (config._tableFileName) { if (config._beVerbose) fprintf(stderr, "Dumping positions table to '%s'\n", config._tableFileName); positions->saveState(config._tableFileName); if (config._tableBuildOnly) exit(0); } } config._buildTime = getTime(); // // Initialize threads // pthread_attr_t threadAttr; pthread_t threadID; pthread_mutex_init(&inputTailMutex, NULL); pthread_attr_init(&threadAttr); pthread_attr_setscope(&threadAttr, PTHREAD_SCOPE_SYSTEM); pthread_attr_setdetachstate(&threadAttr, PTHREAD_CREATE_DETACHED); pthread_attr_setschedpolicy(&threadAttr, SCHED_OTHER); // Start the deadlock detection threads // #ifdef __alpha fprintf(stderr, "Deadlock detection enabled!\n"); pthread_create(&threadID, &threadAttr, deadlockDetector, 0L); pthread_create(&threadID, &threadAttr, deadlockChecker, 0L); #endif // Start the loader thread // pthread_create(&threadID, &threadAttr, loaderThread, 0L); // Start the search threads // for (uint32 i=0; ioutput(resultFILE, matchID); if (errno) { fprintf(stderr, "Couldn't write to the output file '%s'.\n%d: %s\n", config._outputFileName, errno, strerror(errno)); exit(1); } // Add this set of results to the statistics collector // stats->add(output[outputPos]); //stats->show(stderr); delete input[outputPos]; delete output[outputPos]; input[outputPos] = 0L; output[outputPos] = 0L; outputPos++; } else { nanosleep(&config._writerSleep, 0L); } } if (config._beVerbose) { fprintf(stderr, "\n"uint32FMTW(7)" sequences (%5.1f%%; %8.3f/sec) %5.2f seconds.\n", numberOfQueries, 100.0 * outputPos / numberOfQueries, outputPos / (getTime() - zeroTime), getTime() - zeroTime); } // Print statistics // stats->show(resultFILE); delete stats; errno = 0; fclose(resultFILE); if (errno) fprintf(stderr, "Couldn't close to the output file '%s'.\n%s\n", config._outputFileName, strerror(errno)); config._searchTime = getTime(); // Clean up // delete positions; pthread_attr_destroy(&threadAttr); pthread_mutex_destroy(&inputTailMutex); delete [] input; delete [] output; return(0); } kmer-code-2013-trunk/seatac/hitMatrix-sort.C0000644000000000000000000000326612322046702017455 0ustar rootroot#include "hitMatrix.H" // Sort by dsPos inline void adjustHeap_dsPos(diagonalLine *L, uint32 p, uint32 n) { uint32 q = L[p]._qsPos; uint32 d = L[p]._dsPos; #ifndef WITHOUT_DIAGONALID uint32 l = L[p]._diagonalID; #endif uint32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && (L[c]._dsPos < L[c+1]._dsPos)) c++; // Does the node in question fit here? // if (d >= L[c]._dsPos) break; // Else, swap the parent and the child // L[p]._qsPos = L[c]._qsPos; L[p]._dsPos = L[c]._dsPos; #ifndef WITHOUT_DIAGONALID L[p]._diagonalID = L[c]._diagonalID; #endif // Move down the tree // p = c; c = (p << 1) + 1; } L[p]._qsPos = q; L[p]._dsPos = d; #ifndef WITHOUT_DIAGONALID L[p]._diagonalID = l; #endif } void hitMatrix::sort_dsPos(void) { if (_hitsLen > 1) { // Create the heap of lines. // for (uint32 i=_hitsLen/2; i--; ) adjustHeap_dsPos(_hits, i, _hitsLen); // Interchange the new maximum with the element at the end of the tree // for (uint32 i=_hitsLen-1; i>0; i--) { uint32 q = _hits[i]._qsPos; uint32 d = _hits[i]._dsPos; #ifndef WITHOUT_DIAGONALID uint32 l = _hits[i]._diagonalID; #endif _hits[i]._qsPos = _hits[0]._qsPos; _hits[i]._dsPos = _hits[0]._dsPos; #ifndef WITHOUT_DIAGONALID _hits[i]._diagonalID = _hits[0]._diagonalID; #endif _hits[0]._qsPos = q; _hits[0]._dsPos = d; #ifndef WITHOUT_DIAGONALID _hits[0]._diagonalID = l; #endif adjustHeap_dsPos(_hits, 0, i); } } } kmer-code-2013-trunk/seatac/heavychains.H0000644000000000000000000002663512322046702017033 0ustar rootroot#ifndef STRANDPAIR_H #define STRANDPAIR_H #include #include #include #include using namespace std; #include "util.h" // // The StrandPair does the heavy chains filtering, while the TheStats // collects statistics on all StrandPairs. TheStats is also // responsible for reporting the options used by StrandPair. // struct Match { int xlo; int ylo; int xhi; int yhi; double selfS; // The intrinsic score of the Match. double S; // The computed score of the Match? // We need two times the number of dimensions of scores. That is // one score starting from each corner of the bounding box of the // space. // double neS; double swS; double nwS; double seS; int filled; // Is this the same as selfS ? char ori; }; class StrandPair { public: StrandPair(bool verbose, char *assemblyid1, char *assemblyid2, int maxjump, double minscore) { beVerbose = verbose; strncpy(assemblyId1, assemblyid1, 31); strncpy(assemblyId2, assemblyid2, 31); maxJump = maxjump; minScore = minscore; Plen = 0; Pmax = 1024; P = new Match [Pmax]; clear(); }; ~StrandPair(void) { if (beVerbose > 1) fprintf(stderr, "StrandPair::StrandPair()-- delete %s vs %s with %d hits\n", assemblyId1, assemblyId2, Plen); delete [] P; }; void addHit(char direction, uint32 id1, uint32 xlo, uint32 xln, uint32 id2, uint32 ylo, uint32 yln, uint32 filled); void process(void); uint64 print(FILE *outF, uint64 matchid); void clear(void) { iid1 = ~uint32ZERO; iid2 = ~uint32ZERO; _next = 0L; sumlen1 = 0.0; sumlen2 = 0.0; maxlen1 = 0.0; maxlen2 = 0.0; maxScoreFwd = 0.0; // Maximum forward chain score for the strand pair. maxScoreRev = 0.0; // Maximum reverse chain score for the strand pair. Plen = 0; }; // The StrandPairManager (in filter-strandpair.H) is lazy and makes // StrandPairs keep track of the next one. // StrandPair *_next; StrandPair *next(void) { return(_next); }; void addNext(StrandPair *n) { _next = n; }; uint32 sequenceIID1(void) { return(iid1); }; //uint32 sequenceIID2(void) { return(iid2); }; double getsumlen1(void) const { return(sumlen1); }; double getsumlen2(void) const { return(sumlen2); }; double getmaxlen1(void) const { return(maxlen1); }; double getmaxlen2(void) const { return(maxlen2); }; double getmaxScoreFwd(void) const { return(maxScoreFwd); }; double getmaxScoreRev(void) const { return(maxScoreRev); }; private: // This used to use a vector, but DPTree wants a pointer to the // array of matches // int Plen; int Pmax; Match *P; void Padd(Match *m) { if (Plen >= Pmax) { Pmax *= 2; Match *n = new Match [Pmax]; memcpy(n, P, sizeof(Match) * Plen); delete [] P; P = n; } memcpy(P+Plen, m, sizeof(Match)); Plen++; }; private: uint32 iid1; uint32 iid2; uint32 beVerbose; char assemblyId1[32]; char assemblyId2[32]; int maxJump; // Default maximum intra-run jump allowed in a good run. double minScore; // Default minimum of bp filled in a good run. // The following are only known after StrandPair::print(). double sumlen1; double sumlen2; double maxlen1; double maxlen2; double maxScoreFwd; // Maximum forward chain score for the strand pair. double maxScoreRev; // Maximum reverse chain score for the strand pair. }; class TheStats { public: TheStats(bool verbose, char *assemblyid1, char *assemblyid2, int maxjump, double minscore) { beVerbose = verbose; strncpy(assemblyId1, assemblyid1, 31); // Note the cap 'i' strncpy(assemblyId2, assemblyid2, 31); maxJump = maxjump; minScore = minscore; sumlen1 = 0.0; sumlen2 = 0.0; sumMaxLen1 = 0.0; sumMaxLen2 = 0.0; sumMaxScoreFwd = 0.0; sumMaxScoreRev = 0.0; }; void add(StrandPair *sp) { sumlen1 += sp->getsumlen1(); sumlen2 += sp->getsumlen2(); sumMaxLen1 += sp->getmaxlen1(); sumMaxLen2 += sp->getmaxlen2(); sumMaxScoreFwd += sp->getmaxScoreFwd(); sumMaxScoreRev += sp->getmaxScoreRev(); }; void show(FILE *outfile) { fprintf(outfile, "/assemblyId1=%s\n", assemblyId1); fprintf(outfile, "/assemblyId2=%s\n", assemblyId2); fprintf(outfile, "/heavyMaxJump=%d\n", maxJump); fprintf(outfile, "/heavyMinFill=%f\n", minScore); fprintf(outfile, "/heavySumLen1=%f\n", sumlen1); fprintf(outfile, "/heavySumLen2=%f\n", sumlen2); fprintf(outfile, "/heavySumMaxLen1=%f\n", sumMaxLen1); fprintf(outfile, "/heavySumMaxLen2=%f\n", sumMaxLen2); fprintf(outfile, "/heavySumMaxScoreFwd=%f\n", sumMaxScoreFwd); fprintf(outfile, "/heavySumMaxScoreRev=%f\n", sumMaxScoreRev); }; private: // Parameters to the filter int beVerbose; char assemblyId1[32]; char assemblyId2[32]; int maxJump; double minScore; double sumlen1; double sumlen2; double sumMaxLen1; double sumMaxLen2; double sumMaxScoreFwd; double sumMaxScoreRev; }; struct Interval { int lo; int hi; double S; Interval() {}; // This is an explicit redefinition of the default constructor. }; int x_compar(const void *x,const void *y); int y_compar(const void *x,const void *y); class DPTree { Interval *node; Match *match; int node_size; int match_size; // The number of matches stored in the tree. // DP parameters int MaxJump; struct kd_node { bool Xy; int start,stop; // The indices to define a segment of the vector. int intv; // some index kd_node() {}; // This is an explicit redefinition of the default constructor. inline int nmatches() const {return stop-start;} inline int midpoint() const {return (start+stop+1)/2;} // This is the midpoint of the match list, not the midpoint of the region. inline bool lesser_is_node() const {return nmatches() > 2;} inline bool greater_is_node() const {return nmatches() > 3;} inline bool X() const {return Xy;} inline bool Y() const {return !Xy;} kd_node lesser_node() const { kd_node ret(*this); // This is an explicit invocation of the copy constructor filled with "this". // Equivalent to: kd_node ret = (*this); ret.intv = ret.intv*2+1; ret.stop = midpoint(); ret.Xy = !ret.Xy; return ret; } kd_node greater_node() const { kd_node ret(*this); // This is an explicit invocation of the copy constructor filled with "this". // Equivalent to: kd_node ret = (*this); ret.intv = ret.intv*2+2; ret.start = midpoint(); ret.Xy = !ret.Xy; return ret; } inline kd_node child_node(int x) const { if (x < midpoint()) return lesser_node(); else return greater_node(); } // root is always real, and everyone else must have at least 2 points inline bool is_real() const {return intv==0 || nmatches() > 1;} }; kd_node root() const { kd_node ret; ret.start = 0; ret.stop = match_size; ret.intv = 0; ret.Xy = true; return ret; // Return the structure by value. } public: ~DPTree() { delete[] node; } DPTree(unsigned siz, Match *p): node(NULL),match(p), node_size(0),match_size(siz) { node_size = 2; for(unsigned sz = match_size; sz>1 ; sz/=2, node_size *= 2); node = new Interval[node_size]; } inline void setParams(int mj) { MaxJump = mj; } double treeScore() { init(); if (match_size > 0) privScore(root(),root()); return node[root().intv].S; } private: inline double pairScore(const Match &pl,const Match &ph) const { const int dx = ph.xlo - pl.xlo; const int dy = ph.ylo - pl.ylo; // causality difference const int ix = ph.xlo - pl.xhi; const int iy = ph.ylo - pl.yhi; const int smaller_jump = (ix < iy)?ix:iy; // will be < 0 if they intersect const int larger_jump = (ix < iy)?iy:ix; // must be < MaxJump for an interaction int intersection = smaller_jump * (smaller_jump < 0); return (dx >= 0 && dy >= 0 && larger_jump < MaxJump) * (pl.S + intersection ); } inline bool pruneScore(kd_node f, const Match &p) const { int d,jd; if (f.X()) { jd = p.xlo - node[f.intv].hi; d = p.xlo - node[f.intv].lo; } else { jd = p.ylo - node[f.intv].hi; d = p.ylo - node[f.intv].lo; } // returns true if we really need to check this score return (d >= 0 && jd < MaxJump) && (node[f.intv].S >= p.S); } double privScore(kd_node flo,kd_node fhi) { // no longer double recursive -- just iterate through fhi for(int x = fhi.start; x < fhi.stop; ++x) { match[x].S = 0; matchScore(flo,match[x]); match[x].S += match[x].selfS; for(kd_node tmp(fhi); tmp.is_real() ; tmp = tmp.child_node(x)) { if (node[tmp.intv].S < match[x].S) node[tmp.intv].S = match[x].S; } } return node[fhi.intv].S; } double matchScore(kd_node flo,Match &p) { double score = 0; if ( (flo.X() && node[flo.intv].lo <= p.xlo || flo.Y() && node[flo.intv].lo <= p.ylo) && pruneScore(flo,p) ) { if (flo.greater_is_node()) score = matchScore(flo.greater_node(),p); else score = pairScore(match[flo.stop-1],p); if (p.S < score) p.S = score; if (flo.lesser_is_node()) score = matchScore(flo.lesser_node(),p); else score = pairScore(match[flo.start],p); if (p.S < score) p.S = score; } return p.S; } void init() { if (match_size > 0){ sort_nodes(root());} int minx=0,miny=0,maxx=0,maxy=0; // initial values will be overwritten get_bbox(root(),minx,miny,maxx,maxy); for (int i=0; i < node_size; ++i) node[i].S = -1; for (int i=0; i < match_size; ++i) match[i].S = -1; } void sort_nodes(kd_node fs) { if (fs.intv >= node_size) { fprintf(stderr,"overflow %d %d\n",fs.intv,node_size); } qsort( match+fs.start, fs.nmatches(), sizeof(Match), (fs.X()?x_compar:y_compar) ); if (fs.greater_is_node()) sort_nodes(fs.greater_node()); if (fs.lesser_is_node()) sort_nodes(fs.lesser_node()); } void get_bbox(kd_node fs,int &minx,int &miny,int &maxx,int &maxy) { int lminx,lminy,lmaxx,lmaxy; int gminx,gminy,gmaxx,gmaxy; if (fs.lesser_is_node()) { get_bbox(fs.lesser_node(),lminx,lminy,lmaxx,lmaxy); } else { lminx = match[fs.start].xlo; lmaxx = match[fs.start].xhi; lminy = match[fs.start].ylo; lmaxy = match[fs.start].yhi; } if (fs.greater_is_node()) { get_bbox(fs.greater_node(),gminx,gminy,gmaxx,gmaxy); } else { gminx = match[fs.stop-1].xlo; gmaxx = match[fs.stop-1].xhi; gminy = match[fs.stop-1].ylo; gmaxy = match[fs.stop-1].yhi; } miny = (lminy < gminy)?lminy:gminy; minx = (lminx < gminx)?lminx:gminx; maxy = (lmaxy > gmaxy)?lmaxy:gmaxy; maxx = (lmaxx > gmaxx)?lmaxx:gmaxx; if (fs.X()) { node[fs.intv].lo = minx; node[fs.intv].hi = maxx; } else { node[fs.intv].lo = miny; node[fs.intv].hi = maxy; } } }; #endif // STRANDPAIR_H kmer-code-2013-trunk/seatac/thr-deadlock.C0000644000000000000000000000262712322046702017060 0ustar rootroot#include "seatac.H" #ifdef __alpha // Define this to kill the process with a vengance instead of // gracefully exiting. exit() tries to free memory, and is thus gets // caught in the deadlock -- but is useful for debugging. // #define KILL_INSTEAD_OF_EXIT #ifdef KILL_INSTEAD_OF_EXIT #include #endif uint32 deadlockTested = 0; uint32 deadlockPassed = 0; void* deadlockDetector(void *) { fprintf(stderr, "Hello! I'm a deadlockDetector!\n"); detectAgain: // Wait for the deadlock checker to reset things // while ((deadlockTested == 1) || (deadlockPassed == 1)) sleep(4); deadlockTested = 1; char *x = new char [16]; delete [] x; deadlockPassed = 1; goto detectAgain; return(0L); // Ignore the warning! } void* deadlockChecker(void *) { fprintf(stderr, "Hello! I'm a deadlockChecker!\n"); checkAgain: // Wait for the tester to test // while (deadlockTested == 0) sleep(5); // Give it another ten seconds to return // sleep(5); if (deadlockPassed == 0) { fprintf(stderr, "\n\n\nESTmapper/search-- Deadlock detected! Aborting the process!\n\n"); fflush(stderr); #ifdef KILL_INSTEAD_OF_EXIT kill(getpid(), SIGKILL); #endif exit(1); } //fprintf(stderr, "Deadlock OK\n"); // Reset the testing/checking flags // deadlockPassed = 0; deadlockTested = 0; goto checkAgain; return(0L); // Ignore the warning! } #endif kmer-code-2013-trunk/seatac/Make.include0000644000000000000000000000243411512763666016650 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBMERYL/ :=$(realpath $/../libmeryl/)/ LIBKMER/ :=$(realpath $/../libkmer/)/ src := $/seatac.C \ $/configuration.C \ $/encodedQuery.C \ $/hitMatrix.C \ $/thr-search.C \ $/thr-loader.C \ $/thr-deadlock.C \ $/hitMatrix-sort.C \ $/hitMatrix.H \ $/posix.H \ $/seatac.H \ $/filterObj.H \ $/statObj.H $/.CXX_SRCS := $(filter %.C,${src}) $/.CXX_EXES := $/seatac $/heavychains $/.CXX_SHLIBS := $/filter-nop.so $/filter-heavychains.so $/filter-nop.o: $/filterObj.H $/statObj.H $/filter-nop.C $/filter-heavychains.o: $/filterObj.H $/statObj.H $/filter-heavychains.C $/heavychains.C $/heavychains.H $/filter-nop.so: $/filter-nop.o $/filter-heavychains.so: $/filter-heavychains.o $/heavychains.o $/.CLEAN :=$/*.o $/%.d $/%.o: CXXFLAGS+=-I${LIBKMER/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/} $/seatac: ${$/.CXX_SRCS:.C=.o} \ ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/heavychains: $/heavychains-driver.o $/heavychains.o kmer-code-2013-trunk/seatac/encodedQuery.C0000644000000000000000000000321012322046702017133 0ustar rootroot#include #include #include "seatac.H" #include "bio++.H" encodedQuery::encodedQuery(char const *seq, uint32 seqLen, uint32 k, bool rc) { _seq = seq; _seqLen = seqLen; _merSize = k; _rc = rc; _seqPos = 0; _substring = uint64ZERO; _mermask = uint64MASK(2 * _merSize); _timeUntilValid = _merSize; } bool encodedQuery::getMer(uint64 &mer, uint32 &pos) { bool found = false; mer = uint64ZERO; pos = uint32ZERO; if (_rc) { while (!found && (_seqPos < _seqLen)) { _substring <<= 2; _substring &= _mermask; if (letterToBits[_seq[_seqLen - 1 - _seqPos]] != 0xff) { _substring |= letterToBits[ complementSymbol[ _seq[_seqLen - 1 - _seqPos] ]]; _timeUntilValid--; } else { _timeUntilValid = _merSize; } _seqPos++; if (_seqPos >= _merSize) { mer = _substring; pos = _seqPos - _merSize; found = _timeUntilValid <= 0; } } } else { while (!found && (_seqPos < _seqLen)) { _substring <<= 2; _substring &= _mermask; if (letterToBits[_seq[_seqPos]] != 0xff) { _substring |= letterToBits[_seq[_seqPos]]; _timeUntilValid--; } else { _timeUntilValid = _merSize; } _seqPos++; if (_seqPos >= _merSize) { mer = _substring; pos = _seqPos - _merSize; found = _timeUntilValid <= 0; } } } return(found); } encodedQuery::~encodedQuery() { } kmer-code-2013-trunk/seatac/heavychains.C0000644000000000000000000001272512322046702017021 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Copyright (c) 2005 The J. Craig Venter Institute // Author: Clark Mobarry // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include "heavychains.H" // The following would need to parameterized for a general kD tree. // Could we use one function with a static variable to remember the // sorting direction? // int x_compar(const void *x,const void *y) { const Match &p1=*((const Match*)x); const Match &p2=*((const Match*)y); if (p1.xhi < p2.xhi) return -1; if (p1.xhi > p2.xhi) return 1; return 0; } int y_compar(const void *x,const void *y) { const Match &p1=*((const Match*)x); const Match &p2=*((const Match*)y); if (p1.yhi < p2.yhi) return -1; if (p1.yhi > p2.yhi) return 1; return 0; } void StrandPair::addHit(char direction, uint32 id1, uint32 xlo, uint32 xln, uint32 id2, uint32 ylo, uint32 yln, uint32 filled) { Match tmp; tmp.xlo = xlo; tmp.ylo = ylo; tmp.xhi = xlo + xln; tmp.yhi = ylo + yln; // Use the match lengths to initialize the self scores. tmp.selfS = xln; if (yln < xln) tmp.selfS = yln; tmp.S = 0.0; tmp.neS = 0; tmp.nwS = 0; tmp.seS = 0; tmp.swS = 0; tmp.filled = filled; tmp.ori = direction; iid1 = id1; iid2 = id2; if (beVerbose > 1) fprintf(stderr, "heavychains: add %8d %8d %8d -- %8d %8d %8d\n", id1, tmp.xlo, tmp.xhi, id2, tmp.ylo, tmp.yhi); Padd(&tmp); } // new strand pair: begin processing data for the strand pair // void StrandPair::process(void) { int swapi; if (Plen > 0) { if (beVerbose > 0) fprintf(stderr,"HeavyChains: filtering strands "uint32FMT" "uint32FMT" "uint32FMT"\n", iid1, iid2, Plen); DPTree *dp = NULL; dp = new DPTree(Plen, P); dp->setParams(maxJump); for(int quadrant=0; quadrant < 4; ++quadrant) { if (beVerbose > 1) fprintf(stderr,"HeavyChains: arranging process quadrant %d\n", quadrant); if ((quadrant == 0) || (quadrant == 2)) { for (int i=0; i 1) fprintf(stderr,"HeavyChains: scoring quadrant\n"); dp->treeScore(); if (beVerbose>1) fprintf(stderr,"HeavyChains: recording scores\n"); switch(quadrant) { case 0: for (int i=0; i < Plen; ++i) P[i].nwS = P[i].S; break; case 1: for (int i=0; i < Plen; ++i) P[i].swS = P[i].S; break; case 2: for (int i=0; i < Plen; ++i) P[i].seS = P[i].S; break; case 3: for (int i=0; i < Plen; ++i) P[i].neS = P[i].S; break; } if (beVerbose > 1) fprintf(stderr,"HeavyChains: done quadrant\n"); } // All output information is now in the match records of P. delete dp; } } uint64 StrandPair::print(FILE *outF, uint64 matchid) { for (int i=0; i= minScore) || (dec >= minScore)) { int len1 = (P[i].xhi-P[i].xlo); int len2 = (P[i].yhi-P[i].ylo); matchid++; if (beVerbose > 1) fprintf(stderr, "heavychains: out "uint32FMTW(8)" %8d %8d -- "uint32FMTW(8)" %8d %8d\n", iid1, P[i].xlo, P[i].xhi, iid2, P[i].ylo, P[i].yhi); errno = 0; fprintf(outF, "M x H"uint64FMT" . %s:"uint32FMT" %d %d %d %s:"uint32FMT" %d %d %d > /hf=%.1f /hr=%.1f\n", matchid, assemblyId1, iid1, P[i].xlo, len1, 1, assemblyId2, iid2, P[i].ylo, len2, (P[i].ori == 'f'? 1 : -1), inc, dec); if (errno) fprintf(stderr, "StrandPair::print()-- write failed: %s\n", strerror(errno)); sumlen1 += len1; sumlen2 += len2; maxlen1 = (maxlen1 > len1) ? maxlen1 : len1; maxlen2 = (maxlen2 > len2) ? maxlen2 : len2; maxScoreFwd = (maxScoreFwd > inc) ? maxScoreFwd : inc; maxScoreRev = (maxScoreRev > dec) ? maxScoreRev : dec; } if (beVerbose > 0) fprintf(stderr, "HeavyChains: finished strands "uint32FMTW(8)" "uint32FMTW(8)" maxlen1=%f maxlen2=%f maxScoreFwd=%f maxScoreRef=%f\n", iid1, iid2, maxlen1, maxlen2, maxScoreFwd, maxScoreRev); } return(matchid); } kmer-code-2013-trunk/seatac/thr-search.C0000644000000000000000000001063112322046702016551 0ustar rootroot#include "seatac.H" char const *srchGbye = "[%ld] computed: "uint64FMTW(8)" blocked: "uint64FMTW(4)"/"uint64FMTW(4)" encodeTime: %7.2f searchTime: %7.2f processTime: %7.2f\n"; class searcherState { public: uint64 posnMax; uint64 posnLen; uint64 *posn; double encodeTime; double maskTime; double searchTime; double processTime; searcherState() { posnMax = 16384; posnLen = 0; posn = new uint64 [ posnMax ]; encodeTime = 0.0; maskTime = 0.0; searchTime = 0.0; processTime = 0.0; }; ~searcherState() { delete [] posn; }; }; void doSearch(searcherState *state, seqInCore *seq, uint32 idx, bool rc, filterObj *FO) { encodedQuery *query = 0L; hitMatrix *matrix = 0L; double startTime = 0.0; uint64 mer = uint64ZERO; uint32 pos = uint32ZERO; uint64 count = 0; // Build and mask the query // startTime = getTime(); query = new encodedQuery(seq->sequence(), seq->sequenceLength(), config._merSize, rc); state->encodeTime += getTime() - startTime; // Get the hits // startTime = getTime(); matrix = new hitMatrix(seq->sequenceLength(), idx); while (query->getMer(mer, pos) == true) if (positions->getExact(mer, state->posn, state->posnMax, state->posnLen, count)) matrix->addHits(pos, state->posn, state->posnLen); state->searchTime += getTime() - startTime; // Begin processing // startTime = getTime(); matrix->processMatrix(rc ? 'r' : 'f', FO); state->processTime += getTime() - startTime; delete matrix; delete query; } void* searchThread(void *U) { uint32 idx = 0; seqInCore *seq = 0L; uint32 blockedI = 0; uint32 blockedO = 0; uint32 computed = 0; searcherState *state = new searcherState; // Allocate and fill out the thread stats -- this ensures that we // always have stats (even if they're bogus). // threadStats[(long)U] = new char [1025]; sprintf(threadStats[(long)U], srchGbye, (long)U, (uint32)0, (uint32)0, (uint32)0, 0.0, 0.0, 0.0); while (inputTail < numberOfQueries) { // Grab the next sequence. // pthread_mutex_lock(&inputTailMutex); idx = inputTail; if (idx < numberOfQueries) { seq = input[idx]; input[idx] = 0L; if (seq) inputTail++; } pthread_mutex_unlock(&inputTailMutex); // Still need to check that the index is valid. Another thread // could (and does) steal execution between the while and the // mutex lock. // if (idx < numberOfQueries) { // If there is no sequence, oh boy, we are in bad shape. Sleep a // little bit to let the loader catch up, then try again. // if (seq == 0L) { //if (config._loaderWarnings) // fprintf(stderr, "%lu Blocked by input.\n", (uint64)U); blockedI++; nanosleep(&config._searchSleep, 0L); } else { // If our idx is too far away from the output thread, sleep // a little bit. We keep the idx and seq that we have obtained, // though. // while (idx > (outputPos + config._writerHighWaterMark)) { if (config._writerWarnings) fprintf(stderr, uint64FMT" Blocked by output (idx = "uint32FMT", outputPos = "uint32FMT").\n", (long)U, idx, outputPos); blockedO++; nanosleep(&config._searchSleep, 0L); } // Construct a filter object // filterObj *FO = new filterObj(config._filterObj, config._filteropts); // Do searches. // if (config._doForward) doSearch(state, seq, idx, false, FO); if (config._doReverse) doSearch(state, seq, idx, true, FO); // Do filtering. // FO->filter(); // Signal that we are done. // output[idx] = FO; computed++; delete seq; } // end of seq != 0L } // end of idx < numberOfQueries } // end of inputTail < numberOfQueries // OK, now fill out the read thread stats // sprintf(threadStats[(long)U], srchGbye, (long)U, computed, blockedI, blockedO, state->encodeTime, state->searchTime, state->processTime); delete state; return(0L); } kmer-code-2013-trunk/seatac/heavychains-driver.C0000644000000000000000000001245510214720163020310 0ustar rootroot// This file is part of A2Amapper. // Copyright (c) 2004 Applera Corporation // Copyright (c) 2005 The J. Craig Venter Institute // Author: Clark Mobarry // Author: Brian Walenz // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received (LICENSE.txt) a copy of the GNU General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include "heavychains.H" #define BUFFERSIZE 1024 int main(int argc, char *argv[]) { int beVerbose = 0; char *assemblyId1 = 0L; char *assemblyId2 = 0L; double minScore = 100.0; // Default minimum of bp filled in a good run. int maxJump = 100000; // Default maximum intra-run jump allowed in a good run. char *inFileName = 0L; char *outFileName = 0L; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-v") == 0) { beVerbose++; } else if (strcmp(argv[arg], "-1") == 0) { assemblyId1 = argv[++arg]; } else if (strcmp(argv[arg], "-2") == 0) { assemblyId2 = argv[++arg]; } else if (strcmp(argv[arg], "-s") == 0) { minScore = atof(argv[++arg]); } else if (strcmp(argv[arg], "-j") == 0) { maxJump = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-i") == 0) { inFileName = argv[++arg]; } else if (strcmp(argv[arg], "-o") == 0) { outFileName = argv[++arg]; } else { fprintf(stderr,"%s : unknown flag '-%s'\n", argv[0], *argv); } } FILE *inpF = fopen(inFileName, "r"); FILE *outF = fopen(outFileName, "w"); fprintf(outF,"! format atac 1.0\n"); int old_stra1 = -1; int old_stra2 = -1; // True strand ordinals are non-negative. char linebuffer[BUFFERSIZE] = {0}; long matchid = 0; StrandPair *sp = new StrandPair(beVerbose, assemblyId1, assemblyId2, maxJump, minScore); TheStats *ts = new TheStats(beVerbose, assemblyId1, assemblyId2, maxJump, minScore); bool endOfInput = false; while (!endOfInput) { endOfInput = true; int new_stra1 = -1; int new_stra2 = -1; int xln = 0; int yln = 0; int tmp_xlo = 0; int tmp_ylo = 0; int tmp_filled = 0; // This is never changed! char tmp_ori = 0; if (fgets(linebuffer, BUFFERSIZE, inpF)) { endOfInput = false; if(linebuffer[0] == 'M') { char classCode; char subtype; char selfId[100]; char parentId[100]; char new_ass1[100]; char new_ass2[100]; int xfl; int yfl; if (12 != sscanf(linebuffer, "%c %c %s %s %s %d %d %d %s %d %d %d\n", &classCode, &subtype, selfId, parentId, new_ass1, &tmp_xlo, &xln, &xfl, new_ass2, &tmp_ylo, &yln, &yfl)) { fprintf(stderr, "WARNING: short read on '%s'\n", linebuffer); } #if 0 printf("classCode=%c\n", classCode); printf("subtype =%c\n", subtype); printf("selfId =%s\n", selfId); printf("parentId =%s\n", parentId); printf("new_ass1 =%s\n", new_ass1); printf("xfl =%d\n", xfl); printf("new_ass2 =%s\n", new_ass2); printf("yfl =%d\n", yfl); #endif if ((xfl != 1 && xfl != -1) || (yfl != 1 && yfl != -1)) { fprintf(stderr, "ERROR: orientation wrong.\n%s\n", linebuffer); exit(1); } tmp_ori = (xfl == yfl ? 'f' : 'r'); // Parse the IID out of the ID // for (char *p = new_ass1; *p; p++) if (*p == ':') new_stra1 = atoi(p+1); for (char *p = new_ass2; *p; p++) if (*p == ':') new_stra2 = atoi(p+1); } else if ((linebuffer[0] == '#') || (linebuffer[0] == '!') || (linebuffer[0] == '/')) { fprintf(stderr,"%s",linebuffer); } else { fprintf(stderr, "UNRECOGNIZED: %s", linebuffer); } } if ((new_stra1 != old_stra1) || (new_stra2 != old_stra2) || endOfInput) { sp->process(); matchid = sp->print(outF, matchid); ts->add(sp); delete sp; sp = new StrandPair(beVerbose, assemblyId1, assemblyId2, maxJump, minScore); } // Add the hit to the sp if we just read a point // if (linebuffer[0] == 'M') { sp->addHit(tmp_ori, new_stra1, tmp_xlo, xln, new_stra2, tmp_ylo, yln, tmp_filled); old_stra1 = new_stra1; old_stra2 = new_stra2; } } ts->add(sp); ts->show(outF); delete sp; delete ts; fclose(inpF); fclose(outF); } kmer-code-2013-trunk/seatac/summarizeAtacStats.pl0000644000000000000000000000572507754262443020615 0ustar rootroot#!/usr/local/bin/perl # generates summary of the meryl min phase printf("\n%-30s %5d %5d %8d\n", "meryl min", "user", "sys", "maxRSS"); open(F, "ls min*stats |"); while () { chomp $_; my $file = $_; my $ut = 0; my $st = 0; my $mr = 0; my $bt = 0; my $tm = 0; my $dm = 0; my $um = 0; my $b = 0; my $p = 0; open(G, "< $file"); while () { if (m/userTime:\s+(\d+)/) { $ut = $1; } if (m/systemTime:\s+(\d+)/) { $st = $1; } if (m/maxrss:\s+(\d+)/) { $mr = $1; } } close(G); printf("%-30s %5d %5d %8d\n", $file, $ut, $st, $mr); } close(F); # generates summary of the build phase printf("\n%-30s %5s %5s %5s %8s %10s %10s %10s %8s %8s\n", "seatac build", "user", "sys", "wall", "maxRSS", "totMer", "distinctMer", "uniqueMer", "bktSize", "posnSize"); open(F, "ls *build*out |"); while () { chomp $_; my $file = $_; my $ut = 0; my $st = 0; my $mr = 0; my $bt = 0; my $tm = 0; my $dm = 0; my $um = 0; my $b = 0; my $p = 0; open(G, "< $file"); while () { if (m/userTime:\s+(\d+)/) { $ut = $1; } if (m/systemTime:\s+(\d+)/) { $st = $1; } if (m/maxrss:\s+(\d+)/) { $mr = $1; } if (m/build:\s+(\d+)/) { $bt = $1; } if (m/Found\s+(\d+)\s+total/) { $tm = $1; } if (m/Found\s+(\d+)\s+distinct/) { $dm = $1; } if (m/Found\s+(\d+)\s+unique/) { $um = $1; } if (m/Allocated\s+(\d+)\s*KB\s+for\s+buckets/) { $b = $1; } if (m/Allocated\s+(\d+)\s*KB\s+for\s+positions/) { $p = $1; } } close(G); printf("%-30s %5d %5d %5d %8d %10d %10d %10d %8dKB %8dKB\n", $file, $ut, $st, $bt, $mr, $tm, $dm, $um, $b, $p); } close(F); # generates summary of the search phase printf("\n%-50s %5s %5s %5s %5s %5s %9s %9s %10s\n", "seatac search", "user", "sys", "build", "srch", "total", "usr/srch", "usr/totl", "maxRSS"); open(F, "ls *segment*stats |"); while () { chomp $_; my $file = $_; my $ut = 0; my $st = 0; my $mr = 0; my $btt = 0; my $stt = 0; my $ttt = 0; open(G, "< $file"); while () { if (m/userTime:\s+(\d+)/) { $ut = $1; } if (m/systemTime:\s+(\d+)/) { $st = $1; } if (m/maxrss:\s+(\d+)/) { $mr = $1; } if (m/build:\s+(\d+)/) { $btt = $1; } if (m/search:\s+(\d+)/) { $stt = $1; } if (m/total:\s+(\d+)/) { $ttt = $1; } } close(G); printf("%-50s %5d %5d %5d %5d %5d %9.6f %9.6f %10d\n", $file, $ut, $st, $btt, $stt, $ttt, $ut / $stt, $ut / $ttt, $mr); } close(F); kmer-code-2013-trunk/seatac/sharedObj.H0000644000000000000000000000266510214720163016425 0ustar rootroot#ifndef SHAREDOBJ_H #define SHAREDOBJ_H #include #include #include #include #include // A wrapper around a shared object. // // Responsible for opening, accessing and closing a shared object. // class sharedObj { public: sharedObj(char *p) { path = new char [strlen(p) + 1]; strcpy(path, p); handle = dlopen(path, RTLD_NOW); if (handle == 0L) { fprintf(stderr, "ERROR: Failed to open shared object '%s'\n%s\n", path, dlerror()); exit(1); } }; ~sharedObj() { if (dlclose(handle)) { fprintf(stderr, "WARNING: Failed to close shared object '%s'\n%s\n", path, dlerror()); } delete [] path; }; bool exists(char *name) { void *ptr = 0L; errno = 0; ptr = dlsym(handle, name); if (errno) { fprintf(stderr, "ERROR: Failed to find symbol '%s' in shared object '%s'\n%s\n", name, path, dlerror()); exit(1); } return(ptr != 0L); } void *get(char const *name) { void *ptr = 0L; errno = 0; ptr = dlsym(handle, name); if (errno) { fprintf(stderr, "ERROR: Failed to find symbol '%s' in shared object '%s'\n%s\n", name, path, dlerror()); exit(1); } if (ptr == 0L) { fprintf(stderr, "ERROR: Symbol '%s' not present in shared object '%s'\n", name, path); exit(1); } return(ptr); }; private: char *path; void *handle; }; #endif // SHAREDOBJ_H kmer-code-2013-trunk/ESTmapper GSAC.pdf0000644000000000000000000050701212431447644016215 0ustar rootroot%PDF-1.3 % 2 0 obj << /Length 1 0 R /Filter /FlateDecode >> stream xڼ[#Ǒ&"_ƌ:,";z )jvfEjVsd,X° ((Z7"ef'&!;H:"~ϕV2jZm~s36!_]q w'`߼Zg)go{}/.ܙJ*\=1_a<ğw:glx>y>Hװ?8!ç}dV1M- =K{8Q "l-+~GP)wG^WK?;?ꋯնyg>T<ǓU~zWn:n>WwWϛCfw|8_謹eûJroo؇/"`Eo"dI񋜕obo7ſ:^u$yoxY~S5Ϧi8ܯd ӆN>v9~NE `* >q"\1J࿮$jaT D"7G mN>R(#Bw O BXqfE}NQ7r@_ޟʐd,;=ZX8별aդ6cw~QkfȾ67?s:fҞƿFF ] l -H[j6(ɇxWT Wi)̆ D+qe 9Qd2.N}Ε(e ^ JXDRH MHs[H r D5TP.2P,R#F)1[wTb=a%QǶ7~)ڳ'WDNj"vWEGv+aw$}C,cxbUldVLUC:z$R1ƶ\s##L#2lBN}Zcwc c)q*%8}lyX[&*7owYI+RBYU Q#Ў[p' yfHb'!oc<.jČnȖ^g!? F{X〙.KcMcH;XtFԸeߜK/MfQHv }S9.4cc*Z-^4a6p"> iVs*(8?➰eZf}-tN&TvCg H$vTDFP$;Bm OYr%*.@% VbsL=2+B/e^[f2y*mʛTPLs))oKDZT1IϽ0FSXa S}_M +5q7ى=NahLBwdIh6JT%!|jIBV9xjȬ`&՘C#ENNI!AFPh@R$0&.@~Zj=#Hu{%q~"kCI{#t|4~Negf!j~f6Ҭ AC2QJ~STJp-}aVjjvn8V#ZpVV s.&MOC>Z Ha@Z~'/ESN 47^(dz5ZV:}Y휃KJ/lCiE$Hn@S^#ԊbIL@Qf( ohaߍev&Q%)h8«}O }eY80x :?XFVxjZ {VKO$@5!_\1@Rc$\8V=` see? -%YaWdBWdHMY7<9NTHc8RAι2b(/tDw?n?>\,QL\L]PϤBuPP:zBT*r@;Deljv&hVDUk5JT]UT'¨#&258r pF) B_L)дVQzbVQ7)C7E ?%dco۳HϹZ!FFt%LƠkV/Ke;C擝]W ~5K+@Jx G~X7Zhpx}AmnuPY 1DRֆR,R()ƲTXx=cf+4~ GJ0;RA/S!Hilr")xŧ/s'.u R'嬜m#fP~w#O,æ2: ePFst $۱\0%R,S( /3:^fbE^#!#;MC!F>?D@4F>u 2SgTT>Rdeu0%e--BSWv\.Scۂ+z)E<=+ȓa}<;R<As\a$dE*w2@CɬX܋f 4FP؆77Uv\W4:V@?n 4S xuXU!,eO=g=xclo^MBH{ZQ@Ҽևm0DҤ.Lh)9C?~ou=%0Z-0O"hn4rd55ɱAJ ~٭8*wS{/}!tL,rXFegc9XU$o6írQ644 WԀK$?:g+y;\n`gôpݵۃ+6BY2hCUǷTߥZ}`Lpirv.*eqJH>'w ~@.xk4.fXg䖍5~Myİ Σ_m{tDU 5a1ruFV'$ yO+gUn1oՐHvs]H]\fÖ z>OULN\b:5ޡ-JZZ/%:-P l3R:"Ua?Ai ZYx(i,l 摪X5є*/N`mWZŻcNYš1BĦ]$G2#i2_4o;I,F|MiT@Tl^w ł32P<[ N[R(N]ibڤڐf!ys% :xMhDq#狝Ѓ}";F@'R-\?X;RXMx&Dif( qHt6uf{Mue#<3R򍼑4Hit"yLq-8 Qt{, =P4èIS&-eCP=nZ0sfݑ^*925$(ӂʚ1NNTbTVRr3"-(eƘtv}3zfT$nGEŊ[gZ<#HPlMCx!hCAUOJ{wV!aP'k8^iO3YӺR2:Y[qv^yee MY( {-V{ {5Tkynz ID$ꦄ#BH6)(MH Oe|.,N"L@YH4;?J?PF95l^$/3K\LeeVRV"IhZ 9kJZ"N%ב_%ס8B@ebBUU!(Ix :0hPZaGy|1J r|CiE"T񥜦=fo/Ha'?Qw8ȹ)`Y7%}7jZ~n%Pϼ@ҙ8#3vN0&g>M >[B斲0լPz1/is\a== \#Lh;F+c/ҥWі@ڇ6hPo'me"hR} /eL!\YwPS !oZqauo~HJ G(FBo; H.7Gݑ>BKpȮI &M.х?& ۸EQ7w'EYrѵl\  U8|gV % Tp;~pxA< Pvr\8%#L"\ʞ L$aᖕ k2&t1vNgfEECz6Z3۰y6Ol?ڦFOQ]\։UpBV{@&]G+N[q-܏v2J4[{=h&61^!ț_6] V2V=p#}c\ (jab·0IV K* oԁi⣥U(f0JfG5Zyf=]#yz{2X '$UO)¯)̸${reiKfAm vٯ4~ U时Z$wSCoW_oӈ͑\1:Xf1:̎ڄO=NZ{`TDY-)M@#\-ҞËnA{/;[{L=ݨ{6]BEQ|VqBYB!-eC̿rEZ?#OFK(s" e¼PHl>9a`bhska;xnA!amr*txÊ Lx-}~Y4SxH"o"Qۨ:XUq!3#CNT/z~ rmgFF [ l8WǝlZ+AHک5 XsI +hb^UY{1QN#>mXPD@Q>ZH?^(b>?74L[Q g@ [=[ꠝ eŶ@Qi,-VHiOPKn(p--$C>}.z&Ѯ\觲$VQ"?l@vjT67\s $\#Ax烙6wwXH\(+{h)sf@JR Ok'V L 8}@0xڐIRA1@MNpʀM2ȥО@IZ?UXԂ7?+_j΋4\~CQJW`FX|d⇐ն_B,n$U^T_I @as^Zw[UZ,N(5iU˟{x mtxj~\UEA4%y_1~TBFmpo}4ջ@em2>jH@[6֫~є[wR7 IUDuT ate` b~c}I.fS 0NqIʬyTc#bIߨq]و -2b{:.%eiY_y5S!ƹăo6t<`?a%6JY56\>,V e-fc{!×}-ӊi"BzKEl*:#%B_u@JxS"@4:5˨=+)y]<-FlɉW;>PpԜ^*XYn}HZ*HU<n)NL eX0no; XK㡟J˪=X_a<. k0W|ޅ`sܗNeEchH`=!*RJUV(k5;];K=̾x !4]~E]˩fXVA]Z7;C`.YO+4d /RaDg2lF@Y-KH@jɿ޴?acLj-" |\ ]r\UM҂MULgX9 w S5i7227Ko%YʱVc؂\9vjl${Lbo\Ʌjv t}jT):\ߣ`a=9c@i>Rk/b73Bj\Eٍ CEQD (d?qHk,E &AfX9xYnR7 x%S0wHM6C^#WþY P^2_h?2YFZﱩS^۲Sff][Wp/I3*-1*Ί=AZ0+lj^ZQД c5´qKU۾QPkKbiT5Vtmy#,,/#$O\nk5lҪa T5  v:2>meXu~~u+vS>. ;.ٺNJ;bGdtWRDZ(?TǾOK'jaR\05wz{\ ~ "*& -e5#^8U'lHi0uH~޴i&$ ~|>ԥB&ԋ<7 !NvƼJi/iJS>^‚p.:b#@wnIYB%= D9p )+k.nU 2+vs/K5ѭ35y>MBָG.!ա(T#)¢Hu41}Ws0ZYi=ĉ.`G "_) YN0Džoq { ή|lq-QB˞ϤsťT_::*%dV-_(SvX /#Japm:#̅juco<#}zHrj(BdvaA퇴+v-:. YQU[ȨEY/6eןpUߺ뜐SޭI CFЃPUb|l Fr#[ :^zL55R`V qˊm5 JM9kiUM e 3#U{m꒺q$ eJV˔ UGH˞ ]Ez Q`#R^2\1#f;Æ,caC&optY0eވ^=Q,!¬IꞪ,k[{?*O~V=PƸPJ@$ (i<(] ;jz`^W5)l6yl5P7zNbfwZl6NsgZY\l#lzOjL8!+ʌwebh!БHǮj"  ij__33[|2IEO aĎ>'֯x~C:i)yְV1'spˋ&T1e&kdtʰZ}w ITw~ڑKG?穂aIJ aBEYޫpX4{ }YK_Lfc69r5XQX%M'C]&W× j0r F wfi ˨}5:.55:[쎓|2N+_b};˅gq}-#ZbHu~03ԜJS&M MUFan'w==)Y!lfdՄjsvD"gom؀.\D^w)uCVθ1Cڳ`/;%V&Or9%w}WR;TA5{>!QӝSI#7K2nKa8vf^)"ؐj̶l$5HΚ tPdh{gg e93ye! SH[Ax~6֗e" oꕜ螒DCX;m-"~E{ʶXڰ' "v;)/;meup/)}>5:+cǯZʶ6i%yk+`I2ũ}vߢi=" E|]o*tBq[&JcQ ?~o+Dſ߈ fъlyiöf_y_?jw|Zmۏm߽Ʉ>絷O2?0~O_}ǧu/7?OxIq{K!D#/57È,3S(-#C/ B}`w.ƙAQ:>oU =z^~VHЏ**]W_۟׼:B;v;H.7XѦċ46$zV=~C>j"_D0`ql Z=eO(XB;UXWFCߞ,e)AJ)yDP,=Gq폄MaQ:jcIdvP|dׅAd _'Z&0 40N,aYpctcF;ԏ0BOb"{L?{ WrYL'UM2TfGD}j} |e?lDWBeF\`|zH1TlReuEs60lT|]K|%׎!wgWF*8fhWR(BbRQQ0M$oQ_Tdn]`]Sf2lAb+#qVFQʬC2M KM0_nq0$0`Vb˪"@V|Oh|J_%jY=g+{ &gybZVfe bw gJ~ l[=vn c8l3%i#~zد鉦T`y,F_J6[ ?PQi~FUVX!;R_PU;q3@,:@&$|,BTQ{Nn{pcy NBnRyinXW6aDکg*xI-;z96:BFeY17TD<It^K+DyW屚߽ +ծABIP4 ) KU:0#Rm9C#0Z%%C3ſz WJaR/Hn k*>j"MXKY^ /Ϥ/ kAEl؉tj"(ud:ǝ,\f$zI>uC$m "q'F)6[rM} zȦ$a*JSxL1bl40T:n24r3.hG >e:Dqt}}PfYJ R jx + xuȧ =MH zZYS*D5(!gx`htz Sc R+ ]nv'4;:m FWHʌX;E*H{+1JmN7"_JCuua^<@b0vlaX?ՇQzeb^d{ ŴV \or Yf欅WSgyzfKrvbg/C(}^;E5゗ Fn9jMuiW+ C&,X^Epf<>U7 #ɒn.T;`ʊ,K AF`Ī4u8XΩ†oz"At72o!/3C6w]-B8( YK{us?i}b}߈lih$llf(4sJR{楍;qUKإγ$B~9:DnoUPM" lP5qymVZj,6vsp] ]R<,=I˥.륾Ԃc5N>F;W4iYw.h: p@i_QrD0nxRz٣ +ޔN ssFI6v,:! Y<ʞqݑժ4@|LMDk`毥-c>;-'[խ7@]vU'C0jѢ S(Ʃ+ c 黡eiבSk,R[q;Jŕ^[ľQwCE!HgMJ44M9%_4b!c9iS7&7э]mv\r~'”( v2sa‰Fa8?Q9WHIŎiP3JZ/mIJKne漑J y'.$e74JO \͘q4!(*/Oi>mL,ewƸUYoP I7+;6{a|?7ڒ[5%T ҾYѥe/(!Xh-fЈYi+iw2b5}ndT3f|UUV,&CB2UXM^` \ TtU0*،6, #,hSI ?ꑐ0"5->/eme%>HRI zM, 0H^ICkj"-l NV ;S[GgvN>MUr6ƛ):X!MY' ;Yw!IPbiR$d^|RZ[A1X[C"vw 5'޷+=dTn_go3>nvZ)ϯj|\s\h)ֱ^K(C-Ri뜒εf ˡ?E۠ YAUbJjyP&(@6;)4\/vC|}(W  jS߃z#7nFn 3G뙛tgy;H_/`+UWIBBo<󦯅)bW?+b8ܟEQ"%l-quKRR8h:λ.nk)^.rc5fg4.tڅ:),| N1(K0 \2d)u3_8/ wkK J[Q,˞h,4X7μ|5w7eIRB.xLJ**xUbeK~;ΟRƕ#eAh=v}Kd!6 ZBoijIoͥ+KpZ쪱v:>Gk e)$L ]ub>@kTCf7ΔqlRn mtm3V&i( ɥ+mtߋ]Eul)LͶnTIy #\ Σ;ua ,FG/L!W֪nGj/FIP+7)@;ݰXA]ɆT@Đh G]-sGWUuUMXPu9X΅3)jVCVXVFtfit(ޑJgYC\;x9:o6@GZϹ^9/s-g#? dUnM1<յ*,χ 蚳\G;Kf`MF\T)m3pqKzY` a͒ J*!Y m+}*9Wo/l2SKUM2)l 'w&Fݯf :M˼tΚ#NIAU‰S3raYk"(afdѥ2ﶷ6b~>Nn6B &vj{ӃIhQJ_xf_v7iui^z~Ƚz2;~2"I2Ymlzspjl{z~\>~!>&ԼBq}}OƉɇ\E˝,É;^V=[ݫ^t(%͡ړ1bB1lХU Aeܢ!Qoȡ®PBK!XH 4iuܼ#Lι2/tH08U|\; տ,7XApdɾceu3[!L{,#W1!WizFۮ8 ~ ىǴ^쵩+^ T2tPT(hK(p_-~W2}WJELR!r4xLJ3GwO);FFbGQŽH@)qfSܞ(卹$ ri^AbK[Y +<Q%.{)84tBy{tɪ [KpGSˈcpn2U#q\%$-NeNAʼ+K z^_Yu(8NnARyP`}񴦠7:Sj҆[?H-d(zz, .Lv1 !j.fa gv1ajd'U BnSC퐚m"`kJ"DZLGJj%!ͺ̿ ]yk$zobúޚ=T{EAj9ňMFڈ#QA- [!4 , sp59}:"Yh1 1 (sdWcFHɗ RiK%}*cY{cUfZ2SEEHjnA$5)<.0/+Z^\؛ApѬM6Aܮ:!c ʒG0&%;$$L`w*G3B]o 劢X{nR5ySbEhIoS_~Ka.5D?HK2Fy[& !`*Rb]7KUyA M1=7He}D.|&h_/֖IG`k-3< &UjN0}Nˉab9zol$fa\1:1 m )F G%(5jK^%,HEϏV")Gݑ4Wr%Ʈ;/ի}Yzr㈟^:]͂@inqw[zwsL.r>=MpOXi9BTSlQal|il}2 uӠ/u=>#N Zm˚컓3ϻOwYP* _#xfXӇ 퍻V7>ДKnC^ )MMbA-7/^֪2YpvUݖ+,O oR[gse2z^֝Y״*BɊ |%wf0I)I} W6wҼ(@)\QA+G A j#/U_%`Hk潇tO).My"hN-H{@Lg㙠ŌIж=4H}UCLlTJ 8מME8QBmbrl[-5SB @t,fN#J('_ZR#al I_Pdo6P߲(Ħn;*VlɮJV LIGi,|rRCED_us҉+BkCp|2!˓  \EҌp ~7Hi 9kBroye^H].i@/6"~N(x>\+m}:8p?1-br֚z_7gea#]Z) Cwg$bOwѻQK(gAظ"\ҕ^;{teBuaT/}[L `q|W o޳ f  wz~8z_{Q =57혈~pe3/h j T?t a)Nwr a|fPEہ`)B 8{!cAh'g+ "3D =D nKn+RA=R_q86zY!{GYs1ERaBqZ) 1߸Ff*Ǒեf*@x$fv:UK9LAjPf4RrJ*Tzʆ꺩^:L$u퀙Ji|Jm2M|\}FRQhp[ʗUOsٵ+j[;rpX_)ψ Ķ``bLF߈Z3ReF M28-+B"HS H䘍%G&srΞRux\H(uR! ݑ:NlrY`19(e0,G-s1\5z!i%[FܽRْU 5JiYvX)Rrrkv 6H YI3i;-F!m|Nlr$njAJq)rBV\*dűBŞ w7y%OBSd*O'ؤ#G^!B=H!iH + i%va˥VJ5#/Q饆ʯ9Kz5|ӗԠh;E0 T3L6qS6Sslj)t1FteőS; #G*JYMGyraz刲Bf)3qGqA J!}ia]O {R+4}i K=&#qyݩkѕ;tUӐ?&~vafrxv;gх6A/x3"@6ϧ}wH q6#BW- l R#A+ x=az~t224ҫ{=[QފL ;C-v!X(/! ;2 ; 4`mMT8Yp NnG^WYa.G!-앛[aTqcg<2"Fl=,M +*~- -E#ϣ!>7[ HJLjC>ȸ_dG)5i񴘖̔PeRh:V+CۅO͉|KDIb8N\}v~\gsa2*kOt+# 2ƀ]ӓp87S wRĉ4dDMFuR W-z~sA^Pl딋r`\`R|ӁɈ,b#4ʄ4 ~GHԒ@~"mbzDtlY.pVFVl?gYvWy`0-^D=9TǧzM9g:V,~2"1lp;}~^8ձbDoy%E8^EJֿTIZ5ѿmwsmק'F{2 #OKj>IEZ>3eBP `. >";[ +-j{~?hVĿQߤI˻4Ap2^ KӉyOp椊'.VJ*~'"wD7NtӟmtbRf& O[2NtORy:Qw'NַqNŎ$j߉ջgI\u:q5^N'iK:I} ߥ"t^҉z{Vo{煩O5sRğ>"pѥ̝0Y^'Z Ned{Oבw&iӜ2SE6wYOIc9>* +e}jv:mwAH6Ή́FSjij V>mqNKDۜ$< ՝{}~ָZi;H'&6ד\߹|Ss=ѩu"7n!z.Nt2ʳ&I֕g)l k?r0օp:6m/<|&bȅCp梋-22c-ͰE/|_lԪn2(ƓdUONs=\}FIK$pHV}"Zpж91b}=aD3` lEK_đ iN6,ԯw:Wd#xiNR;M:^4'8ֈz}Y%T-ؾV61im ϢN"[%9Ks½hIk{yt}s~)ѨO`F,$oF0] lĚQc߲VP$kD %/ZÊ8#ZZ9aI^D 7c U\zvlH?k0-G!j+kg'aNZo\~=aWWce#ҍ\Tm_^m՝hy< VjM"\҉g©|ilDM<&raxĞnsf6Y?!U2ĵXSkq;OAD,oӈGh:u> v QRGF$R,I+}U' I[Qks?+U6QG0I^_q/2sq/rurU.nE:ganEƓ-DH' Nӝ*y* EEkNT"Eu]zK'iE:P$/E&[S](KxQD%rQ/N@\׳ Ʒg+s#T7N>[k,k ]d''yum-;ȟMTC G>$ZE{L.%iE5Y+JMVI]8gḖ0e;D+yV:/IF~#kgN$7B{m;`H~hS>]_*Rs\U9rYmu3q3WDd)f_ Krr΁{C//@WQ)먰 \5.E*DeX$F=):S+R+OK'ҜWN NꏈpbqCm'm255®aT@Ba-0T^Vsw`]JisDsrFXJNjs5'0~ֺI!T ufɯl .cWCߨox=9vmz`{j(V7`ŋpϾr$rgE|ne>geKD[4wS (A}K q=tStZ7BQh$WRI U"49!I뭓!nsR&k4h4غ(X\(:iw11 ;'5tNSZ' 7++'A+SF8.׺ͷI}Z͛8^[*`CZIJH<d/ ( NnkE^9sľr6&;ezb>i=Uk**giYhU'g?WtåW}sp({+gildurIGYWϴmdC_OZeRֶIǜR6w ɋoDkY;Բ'旇'V=M\7Lm;A&Wߴ,flwy9La[p|h"՛t  1M]W `6L4ufQy;^n]gzef3pYnʭW +Ne|nR˵|դԋ7>q 0*o7/1NWJ(U:.g<ddM-_U܉fب[H[c l :Tpϥ8lˎII,^w?vatJ>ܨտdt )j,,lz%K[zR֗K*V'NENF)]?ň~qnd<̻jP;RT߆)my Ԩ^[0E2@zOm=aJ^g*Ò‡+۷Nvyܚb›B[ޝn w"&)6ziq@mz )8kT0r(* EM5;_Z8޹>d]u~9f9~H7'"-gTO$lڐ8$kYq'Ʒqgb&EGAI5!C4\rf"CeRxF~w{9'98iw8"8p@[z>l~l5q ǰjr1]JL[P/r,U5:j~Ӌ}W$^z5e= l^3: w9 q?^t%nY -/ulY[OPSOY;"NvidkJN3}d6PL_-FP\RY tȀ@Pƪ+ЈUVz'9?ޙ~D`#渞w5J0~z^|k×IlhQ]z5_&C.R\,7KHzY&U7!OVFt2BMW[I/ˤ `{K/ˤwզ-k I/jrW,Oɑw=F7t^Hz"o9jy In A+,n"+EW^ODY/Vc?f~D(/'ԻPчl?9к}YO~OdO!6Nl+"VF>]Ğ9V.u{Ig?Y'd͟UF?Z'?MmĞ"65mYJQChb!Xٔ\q])$Ї[O{aK%++4lϨ!,㺩e| |VQ4 ۲˺w{nue8Vňcu?!ZUxmfK]g#EsӒR_ӒRavI )?Tq%`ZRq!ȝf Km)v:~/KcGHȻɤS{ȇͮIቾNۖ>s>RRZqW)7mr,ƾxd}f{9҇)8" E%qVBOqDܬ^v5 n9RՐj0R_vtp)Y't^gF9kJTX}B4$i8eqR-Bف0b9/&)b Ә9)A\Q{؇d=}xDKnG%Of(Y[}H"p~~;>1Q&$VMX^)j~gj 4vO60=iYw.OiD1Gn$K!؎"1v`t"/Bl4*|ZpaY5-,f9Նz%;Nz2Yvw;t᜻pO=mOvO;4f'?(5aWqE@^=ظi>|c9Oz/QRcN.u[P9;{SS֬ٸ5~Ñ< ɟQ=YoSEHX43>rA12#q=bY hIf U RՈ+ynN}Ks-9 z$_S{pfeɴA)[#]CsdGP浊a@qEe{Xn676~(O"f;wc ͦ74A_lykd^;HO#H{XN)Z5xZG";?(QLc2Ar%d<[^Z4ejf7R`]mW~+pu=\U4bu)z%?cK XTpa {dz V>)ӅS{\9;P8M7Ng!7ݾȩ^6HE*071zK.ЂC6;RBvGrVXd2ܠ"r n2IanWքlOCЇ^Ҁz%Gn#w|E8b|veH#۫$k=؍8:vu7m6"ْ_TB%yӜ7d&"a}bm`ZkxC]51`lI)S⡧ -ql,Ol|ϒ r,τJ8Y 8gg9Y:Da[Ah@h60 jP İ{L7`h?Az#՛9ʑ 10^k{. 5s= hAlrF Sz>g9:s6T\k҅$+As^0o pPcAئm F-21{7ٽX.Ai Mɪi0&Nl/R_ Θo됱F C2!޵ Jo`WElK ܅|rwY A-BVEp:.fI`V7݁;3coZ8Dsծ60[H 2tC{݌E=L51Mq]oC7u EGw ـK{i]]%Ю #iޒK+ w$͛t (-&l$y^`S~M $C2 qj) 2`ܶWJ71 ,%Mu 5 sMrΚRYAimnB* (XNK06cCF^B#=6STP]yz%dcaImYU6Ԭ l3d¼5E)Zk rV@;}L=pjoMTT8t0!B@IXmv~ȁ[B]~k ;x4OF!= *A&;4`O)T!J 綾Vpa]cL ) fB=eXUX]`,8W 9lŽ^W__C&ɝv#[9 V7t0ĶR}xmxu?CPB1жgLZnY\TT< u:դ7.DkXI[:û?b552l|I<՗#/ Ve6F ;RP6թ$&8?[bvهi8U%]7 r"I/bgaF$6)pYB\\`>/(..Co u H . -M!||X0nB1Trr) <#$5ŎZ,|is&@ic 5ti t/dn`AU"F?/N¬4BG6` V@ ߂b*%yb0դ[ ]1|g'"n IuciJ>zAf\urKpv!~H]X܃fx/8CfNS@Ɛ]ڌ K3 2 yd9sSڧxVgjZcVPm>YܳmEeP ߰'P U3:Ym}>abr ,!ڱْHk Y dLQF&sajU8j/.)- ؟O*W'`I*mҥEMS4ag=pΘL1XfO& }}' L:_$T+fA.;6ee}`<$?ÎDբc隃Y Lju>.a߽I\U0WaY.:;DJ 0X鐌)l2LfH)PN vA{K'hȘ nDn#*T pfW=Qh}ZiFe9I o43rmRhjN9ޢ]xE49+s!ЊTm7?HƜFIe]ҌU i^ YAA\cgI6`bR޻f& js))=-Vw,97,ҁ!)lynj 2('oz !|P̝ S5 %T,& P5J$XHYn!9v[vs y"zB9J ]~:1UBW`-w0A#hI݈ f fVka YZ8ܓa_Qla\MPR( >5m5 +OF+2&5kXU-/O82<4fXB [E輀 s%cXH]w1okKc5HU#3^۴I!lg/ߞȐ>x7_%m]6 ˯]KefnA d3Mq̂2S8.A&UpME4|WlM3tR$kJN3BhǴA0Гdl-3zۣ eVU3}c"mv50S8c@"RM[ nWIfrM^pbBK U8u&,; +g|XX^ſY9v\Bh)7oP-ƅr !jc{2Opzu,+GXs[hPh/ES~ vs:%BsMsOcmsh78dpL;T+?Å!cK{^hSNQ73 'ukiF)a.ٍ+ZJU=4>WOO)ι_[GiqWޯ9&ooD%o,'nI)y\0|mJcw6IF1D& ,9mNGWBjԣԍJq<8/W F*4YldyNTBW&kƺ~_]xÓ'#Hn`4U\5I+(G-:?S -@/= E6< c\5oJ| !Iŵxhnv '6Ooy!.C6ĵWZgͶ0bH0+6ށ ?^/H 8PlU a[rV7· ?2oH2+bGݨ;C2ROk"t<#bj",H^m  RI-^L&Wy5vARe^8&<0.>v _ wp;u|rSQ9NdC> "ZPd<ğXM;ɾȸZ$(:q@BPԾ Ig N++̀UVG1 ̀ 1eʵk2$WؤѴ.aG yOH-Ac?H\m yhs02MH1!kni hB6{x1V-&JKp:ÎbVT*+hԠX+ H<AHk2TpVi"\lYF sĝs F eS tE~اe`+9(H$>[J9B&%([4f2tCͰ`Uc#1 Ð *&oV#n*hIۜGsia6BnF&8ch9u[\voPv pMb+ }*N'$^kl$ izʤcjFzEz֑T% ܐiᐇmu` E&dDFS"##Q9Q2wLl d"@IFj=&j]ThM*|rMR複i߸q2 +<3X}1N#k!(+pIBnV|P]S:+{ VʛGUkNk>{UiTm`l¼CThxBQT"@_.R/ ~>.#eTG@7 +zd^5@Ԕ\7wT ،sw/5\*}5X TV%X[N >;4VbE߄r(y YA1dkƱ$@=q`[?րQ/n;is[߮#c̾KwDGUdk#a!%L&aW#2*buԇw8{@蛃9vXہ@$CZ! 7{PÏa>W@H~#c1݁÷fQ7Z#Y(]}w_bu󇿵iK~6YZPB+㓩TCu}85G?ɇ0$KTj&6揸۝azF!xvTmBPszޤAPGH/hz̅D'z >j)%:Xd1V0Ot2&:)Y+SǾ"z^D)^>8…B4f!57=Vl@rP$a -^Үgcܰ uk5w\:>2/"Si  B2<^BSC{={; dw줻q ܞI(}13pT OIM'&1aM f g7N! 3P9A#PEU/7|׃xE^.?T@Q߇sd!i*K ȗ|`|u| tFCơCU5%8bB63Ӆ* Pw?x  A[@^.^WUбBçU5fM y1@+3&Hu/[n)=K8AH _Ҥ VPTr= T%`,kZAhnkx6O')6Ǵ`̹Cf-xnv^DLFJ(Lw͔V2WaT `;Pɋ`^TFaҵ|1 ն.L8ݝj{y2 m Ӫ \9+*WKakX b53HZA[T֘% | II\MzKj &4w +!u+D;Vr2-Π!dvGf,f*>Kpp<0,>Yìq=LqkM%*?tj2F&8d@>KU1kos(x覾'8֛$wlrPZ@`anfō _ġ`8f 2!K ,rBK@g]ͨ皾a6[H>vd&30\)k$(uԴKiUG+kULMW#\NI`(;PTإBUu ,hƔ9UXPkSPʙB~x?tlIoNaOޝ@%d@h9V ![9!8sAts̸,nCpX9k) 6d%e&yG 1ܜWq#>|Dl&\+@͛dj2Ēaޚ B:w]! qTs\ ?}5ZiO;᱉D o$ɗP㥐3dމn {89uƫ=pXl5wY7p H 2Ҁdrq;=@HuaT#r}7vcOI;(v[EAȫ; PWʼn086EͮɕTy!j8%>9@Xk|7|FQ!(Q0c 'Nf(%,Fl,,ΗSU|{ ҍ15巨ajw"Ǡm[^Baj 2h%a9pPɛve!?pՖE'Y(qIQ[}zOoN2 <۲z(1$(֤¿ȥ mt@>R^!AȇܞA n9Լ);rc\9sw6M-l{b+)%ۚw%IŹ*pz\P"@|Wlqc&m O G*,HыQ(MTR-Ё 1>yTkzɺ!A`\Rt p rzS;hYUWMF$Ƈ$ ],;ө 藔GA4 3 5 mE.'[ EK0wvc nK3"?ft :֟F)fiz`>.&q`VX,Keh&mgh$NAN oY^nuu2$`V7Rc~ͭ!%VAy2=>?||\ Ri"dm1rt3:6Y0!uҡ Cod90 N>QbQ/O{Zb z04u1J4Ai^cT <:үʐޓQO!ÐEE\4)Ҫ4C7y ܀n k/uu i_AK)yL vf1q 5(uw3.빩`L_%ܛx !צ) XLJ-WE5vVV]|7|_O"U&rKRo\H ^WϕvU[v`G†zS={M gQ1G.~-7`zg XV,xRݦBC>Ƙxj7jSo ZӢM}{g,@G8{11F@U'O QS-\0tyH>&(E!J&=э /3}L>OYi~1}Ix 3峱2~>@CM!E˄nwͺmPw}~g8&< {cmq<:/-e0ŚhbH2K# Iu$чeIŲ q=PEg&%eiRӒiLʖZZF$kOK>,-l]peɶibqӱ:)|VɤJˎVn!; G_h%%.c4Yc1K$m{ʹSݲvҶ[#TG^n ?Cvrt뗕[/J[:lzNdEi{J A勲ruuI/G[-۩d?KEi::_)9y O˛_ iVoCWTD9 C̵&{+)lSG) 7uaT҆ޤb¿S7"?o3w^2s^y$k$r؛,;l5Ra{%Qz {Ѭ:#Rx `gOt]B[qm ~>?f}9w'g'Ŕ Q?,x;R{ ^K7\e޽Y2]I{~#Kz'Iku&pmP8n-!?4-_zT)VchBrV^:D/Ayl%5lXiWD)Zb nPٞr=\廓0^*0@(ddҬ^B{9ll_= .< C(n wާN]Kg^Z'nO6bϾÈWj{׆ 7<\g%7_޽ w{E^RQc{h_sW IEGqjGMFW)V;8?G>=h^%[~ݴSs׮Nχ﵏qğTv2y?nNi6f׬^w7/C܇/CmqԮe#$[ٓnr+>ܛ=x{2<:DC؜qsnn?wmR~ )-񇼆3m!Ӷm^}~n8ޭjlhəC!LBwb3rJڞYO^Jdd}w\<ǧSiiݜ>7?'{Ϻ>ыZkq|Y(nNMkF:Qv<($N \vTw'qUYC JŅ9:yxmOaNta{r̈́Χxߌ -ra,w"˦l|)`ӊMϻ?7p{ƴ|/ ?BmrĦYAiMOBkL?Bme4{\9T2[)̚D3Fh 5Ο5CoO8F -#c~5icnt¾up!Ҁf=U^dr Oiν5kYyw֗C(b5PS]EkY?/''&`e%]VǑ)t=KFj G93d#H ps88^H7.Yv/l<򀓋:^Ie U[?j{]xe<V僭}a'{\.T:ܗ3D*;)ed;QeB/.\/J͇/:B2H-:^R/*hhWIHRrܠt| Oũ6UF<=^2/v4lzjtiqY w-y՟>%T9W^!of9%`ljP,cv; ~Rp|df nIn/igޢrLtrdjibgc +`&X9 tY6c.ޜP!s};&G'Bz0p"=H2Y' ey8! ntY+Π]ȅ I+^o+n8}O"IcdNA^{hx9 /3xd:2s}͠fK$@%ͅ6!C E&bz!Hrn9Y-5M5YD1F݄2Bnq<bpdPo/ mM`ݎe\Z =g9m& ^S)6b(H.8A ҆ @5A &0 F`\xIYi}o[R Lxa jŷ]<ot%r0h7T0yM@bX~,φ jRi.?QC{K .Y ;`|bר o~o{AndV+ xYd~Jad70M*ǥ.3mz@wSdc,8FX!%9: mҷzf ѩ{&m#ǢP! l mMFvc8*τ Uq[*eY|Xs}J!` c _L ]p0"֧v*s<IX;F9AUkr͏h^8$‚TQPrML KY[j`H&5M} u0曜+)]T c[QP='S F9n&HFm0D5F@zD5*wZ 1X?At\HڔBs[?Vo^f%-6hE G25Meˤb^kX2-nRf8Q{qV7 ho1ȯk?融}W54S`Wc<"".?6՞QcZt}jj% 4.UH0xS3 lT|5xanDx҅xmeU舛z(`e JZl\*b\q8)1f&/AOғ`Jt^5z24GL r~H'82ߖ1 yHN+$e<%eHƺk0{IK=7#݊ެzwA_$3V^\?.Gfp~?^N+($++$a`* }4Bnk' 8n݀*Eh5<M$ CLlGS3Bjl^ ͕gˇ,wt);C$ G`@kqWJXZ"0|Li\ e#`$]}[H-r [S̺W^`*".#'GT&=!#P QF)?()'YEh,Uǜpv[:kVC k^ M1:%{FJT5=-80^ֳQ)3" Ê, cd<=K #h_HrB'ǷR8,peu/\Ax5 17Pjҁf*]BWEd!MBpyEw0`2샯͗7~ЀZ<2@Sdԛ20\pd7rbJ" }`2N`n5,܈0@Ln E_͆BjP-WgBWVg#Cc2a u&Y\I_Y\ dZUxB*:# ,O{˘jQ!l$ԅLs9aZNnj@7SӣC4;: r7$0g8;IMR} 1`41G#hUbzE-BLv}hilP",uE8mu-PB5 iZWجc;5&z"&gYg!P9f@|lWե1 !>ܦp3ܔn<*8>w@K\V j%.+L& 9 \N]PIѮhXX}C Qȫj~,A61KYHT$SKQ^HA*{ CZnE Xڱl Tf`4&v;I4RU2'OId7Yy.(R i! uBf.xtu)O3^څHBB?6^xO⻴Y6sNid7P9O}_f иӛIa>x0u躻8 R]҃j zX onoOOaq0[p'̒hVT9:ny@S܊TT +)NOխ1_Nk7vp ~0݄W[;`L\|&K92=8qx1*FaZ_?5m(CJ}f^FڋS endstream endobj 1 0 obj 40786 endobj 6 0 obj << /Type /Page /Parent 7 0 R /Resources 8 0 R /Contents 2 0 R /MediaBox [ 0 0 792 612 ] >> endobj 8 0 obj << /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /ColorSpace << /Cs1 3 0 R /Cs2 4 0 R /Cs3 5 0 R >> /Font << /F4.0 9 0 R /F2.0 10 0 R /F5.0 11 0 R /F3.0 12 0 R /F1.0 13 0 R >> /XObject << /Im2 14 0 R /Im1 15 0 R /Im3 16 0 R >> /Pattern << /P1 17 0 R /P2 18 0 R >> >> endobj 17 0 obj << /Length 19 0 R /Type /Pattern /PatternType 1 /PaintType 2 /TilingType 3 /BBox [ 0 0 8 8 ] /XStep 8 /YStep 8 /Resources 20 0 R /Filter /FlateDecode >> stream x+TT(T03Q(W5T03U@N0 XE,`"Fz\@K ȷT0+@~B ( endstream endobj 19 0 obj 69 endobj 20 0 obj << /ProcSet [ /PDF ] >> endobj 18 0 obj << /Length 21 0 R /Type /Pattern /PatternType 1 /PaintType 2 /TilingType 3 /BBox [ 0 0 8 8 ] /XStep 8 /YStep 8 /Resources 22 0 R /Filter /FlateDecode >> stream x+TT(T03Q(W5T03U@N0 XE,`"Fz\@K ȷT0+@~B ( endstream endobj 21 0 obj 69 endobj 22 0 obj << /ProcSet [ /PDF ] >> endobj 14 0 obj << /Length 23 0 R /Type /XObject /Subtype /Image /Width 640 /Height 480 /ColorSpace 3 0 R /BitsPerComponent 8 /Filter /FlateDecode >> stream xRJQ^җ%9T*Nz-Yk)0l?y>`$ǣЅ'Ѕ-b { ?`(7l=j#oMk^cM_LkZ5sƚ_kZS7kMk5iA5ִ5ִ5d A@5Ȗ [2/ȖA_l/k-@  e_-_ [ ld [@ A@2/ȖA@@ d+l/;[uW|* 6D&/d @__@/3 /D_@;8GA"hݤpo/[?O/]|Z7;!%vM٭"f / /  `W__bb N>@k / /`W__W@__@__@W__W_// /" %{ _Q}IJ XanR:ebM٪  Y D_-ߟg_ / /ܣ ex a  օYV+w-ZAWRٺX= Mu[@'507݊k|'*/pzv-"9EK_o/*T% G 9Eso owH0Ȗ/ /k@__@W__W_// /" [~ ֍K/~C[@w3]ҫFw}m~pONox^)y3{A_dK@__@D_@W__W_// / D_@W-_W@_ / l/ D_@  [ Ȗ/; ?@-_W@b @__@Z# FwRNRFmlM<Ly1tJJ8T`{踓']%a\y_;%?;}_ ?@-_H0%o\bW; h/ :_dK@p?ig^g GV qnj//`=PĹ__;_ߢ  b ; àvĠv}_4ϻ t~piRR_2vC?-Msw)k [o2>yrm/T2(ɮ-w2!K/@ Bz[n45{mu t-e-vԞzF ^s @Ç;}_3A _sS 2P»2Oq20lhG2  @%-(k7Q$k?_eu?˼xI`n*x '-)uK8 rI8!lck'?s֍K/eo2q 4޶_hSpt!/ .cq'ONI;}]_ld A@5Ȗ [2/ȖA_l/k-@  e_-_ [ ld [@ A@2/ȖA_l/k-@  e_-_ [ ldAJlMkjM_LkZSך_kZӚִZӚ֬8[5ךg߯)_9DY _*GorKRne?9G96.o_&7-}~u9/X?᱾J_/c\kH?zyzQf0_}~no%þy(_ľkN5Ys3ׁ;{zOZyb:Yh{ڿ͹]Gg{?ciK?p3ZEzLZyK潩y=޴}Qkߛ~ퟺM9HWk/Yk3,w(tQQ)7+C Y3޴ \3ޔ<ϨĽ)~4bwefs_dsBsγ׳}>5S=rzsUOroJs從_S /To7`y׹oQk:57gԚ)7+'~Y3ޔص̿795CMu3gb?uhs/hW ;7__{|9QkΞyMU8k^:Ju9e^轵ۏ9QU3ϳ6묙o6{i(Tysy.@ endstream endobj 23 0 obj 3725 endobj 15 0 obj << /Length 24 0 R /Type /XObject /Subtype /Image /Width 640 /Height 480 /ColorSpace 3 0 R /BitsPerComponent 8 /Filter /FlateDecode >> stream x ۸@ڤ6M2ub[8N#CI jY`$___. 4=. /i.Ь.s|/Jl{-i9ƴ3i̘c\AdO׿14/lט_cӘ_cSiL_cSiLc/Ȗ&诉l/e" [& e"诉l/k" [ l/Ȗlk" [ Ȗ [& e" [M'z9oӉ|/ / / /'" / //<Wr9]fמ*:鍶޵>$_J)u@OD_@W__W@___@W &" / // / / / //E&" / / / St;?;Yֿo% NYoDyו kv@GSG V|򗖊ծ:x6DZ66=4xԉl{QǯplO9y9Ndދ/ֿֿv^=>, χ;=؟gΓt+n|k455-_u +q˩]ΥrX"egN%7TyH<=iWom)yCϯ<{kp)2ӫٽm6#ZV_{}5GֿֿֿֿXZ/'R7#?MWМo ۨc0}?/xӯoَ 6/}O~oY?7w*"w279&*!z}HyZFhyW~A琫^iw݅2"_Pow}s}_ʿ<-c4߉W+ֿֿֿֿֿֿ+)[!}GW?}|AuR ny-VWNqESK'h͛L/!___O/@I0V /oH0~8._@OD.yL_߁}zQ;'}F&>Lmp^B &<; u&Z*k{Qd9Fq@Kow@诛?dcD_W_DA 9=x GsUCVc{QW_]7bw;_GSW+N}xc4Q~+rIܷ;F{zzpIܷ;F~][bJ]J-Ƶ?/|_|_|>7|7Ȏs Zz} zk lkk\͟W??<ݯbBs)7g?N${ouث]&s?S ʢƛ x8o8O7w]t[ߓ( _{~&__ rg34P@Zy v٨no~׍_E[B{7eq|Mٚ~"^@-H9(d.y}( {>GX+>p?iӳ4g>q)?7lo.} KWW7lL<}!bvz&>-'wst)ς>pߛ___7I^0杧Gc/&>s- ;D ~ ]<6>_K`'^{|ewx ~٪/J}r"-'_!Ȗp_dD踸^hz6诉@.ާhh__Ԁj_}O׿ t\):{?l'>U׿lkt-3Ȗ_dKR~ﯧ-'8}/@%M/@z5 >7]/Ȗ诉OpWeD:. 6%_ _2|k"T ^WnO [& e" [&Ȗ&Ȗl/Ȗ&诉l/e" [&/ȖGM_liMޔ%-rzzPЩV_\M3[p%._D@2_-2D@5-@LdD@@L5-@MdK-2_-@MdK-u4-2D@5-@LdD@@L5-@MdK-2_-@MdK__dD@@Lg"_ _oXR <*:~E^\/]&/[c%.\/k" [& e"诉l/k" [ l/Ȗlk" [ Ȗ [& e" [&Ȗ&Ȗl/Ȗ&诉l/e" [&Ȗ&Ȗl/Ȗ&诉l/e" [&Rc4ƴ3iL_cSiLck51ט4f14%^9S敲+.mgܟsroins?9:3Vv3<ܻý;GWwg峞E~p.ss̜C)~ߗ m8]1tg={;c"[v_@%DP=]d91>e^i;#ēț˼5Jsϼ5ݜt΀=O㲟iu9ocnNv<^R@cTjēțw/6Yi,5f) ;}O&~eG$ԘK911*8fI~uo13@1oMʂcߚR1qk*r;iߜn5\ṽGkEj?3eWGNoΘ{5}z{" 9Yo?M*^h-h`Km#{wƬuˊ{m?5Q36یkj6[(xҩ[` v= endstream endobj 24 0 obj 4983 endobj 16 0 obj << /Length 25 0 R /Type /XObject /Subtype /Image /Width 2284 /Height 513 /ColorSpace 3 0 R /BitsPerComponent 8 /Filter /FlateDecode >> stream xݽjZh}}}I;q֡Μ67((A vR ‘&)=xk-cY1?sN}i_??$N; wdX2,ޑa@|bͰM~ '@' 6w5m;w5m;w5m;w5m;w5m;w5m;w5m;w5m;w5m;wy[U~ssU_&̥ 6;k |bpzA\,ʲY^}3crrR"xz| â x@жm:uusK9Bdy0"_F2pՒ+#â x@0ҩ ; kCD9x)Zw5 7o۶ưX;<ȰiqdYF5ɰ6D^ Muޓ[vwp|n* Ȱiq4MRo k>amh;ZͶX` >OeXȰiqPeR|.yzWֆb0ic!7LN.XvDEMSlu-y# FeYYdXq(]FZug3 vAEM<,_ 9Q [0g6m6 l 6; e~ffM `/?,gXȰiqRVujapZP`*f, E`\&5m;w@ʦiiEmےa 6eYVb躎3V8.nR2,jڀwd9 V!"22בaEp_ɓ'ۻ~3.l 6; YUU%[g,K2בatP,?Ȱ>|ޱOl 6; YW *qK,jMӐ0 _~Ȱ19zȰiqҔx^P ﺎ uu]u|.]\\x /.l 6; M,9 dXCYM?:saꆫEEM3眢Wa!aHw]y+?:{aQ~$L 'Ak'(|~bX">LxI#MLN.}Xy…"â xHPQg/˒ ay.xŧeXᘜ\r]ȰiqRn5xOpY 2,0y.n<X2EQ 5m;w@j4jөS!B",$ Ptp|4 ȰiqR%Cګk2g1 A@Ƈ9W6GEMm)S{Nγ  `rr7 LJ."l 6; )UUQ4~7UfON]§ē,PuZr`CdXԴI#m_쟙|.yNV* N,;V8OX3# i6ZkZI2,`( Hʷ2p,;&l" @:ȰpNX*ע,Ȱ- F#p/La qdX& 2,ޑaDVk˧b,`pE# /? 2,ޑa:otx(|6b(OMSZ C;<& GQ-&2,ޑaQ%`S>'2TU=2`w}LV8O&2,ޑaRuEWmuilf7dXւM82pޑnȰxGH|>KӶ" DY,ˏ2?peudX# sj/pP:u][;!{1d0_~pq# Ȱѣ&52V+ɏ~#xQd?:2 ?r}dX#D*o!kdXθ,2,ޑa"V%]wK/\u-ۀa=porr)~E'w 2,ޑab=\K/M0a D8lؿv\r)2,ޑab5)]ն^fEQRމ@q|Fx"eN/|_\tx wdXX).a) ڶo*C_Q-~#dX#D:ޗbB( [njP>OV;s;`RK s4ȰQNmw4͌\}jᏦ/`BwxHp?*Ն%W" >~%D&*Bs7K-;?~;P5ga e;qtt/kUU1085PnDN01ۻjm;{9+ Jÿ9.! ?99 b:snw\x~ǭ=`5;aeڡG<#EQ0b,`X鯡N@ ~KutYώ[ "~<d<|k]Nn0rC1F5^0} 8e V2,@dH;,Wʱc~ *6/>|0 Njt&Ysa<ėga]ӯ)HֹY_¥\D ,zb,˄?Ku. љuO{*5Z&^<|>\81_4|"!„6z-ʰ1Ěa u[^wbwY&<# &}Sj2uLjʲ i He+NN.Uj{)᳽{Q7 s0?NһpW,>n<+ ZxLkm^0,_~dXPt:QpkO~"C $էejo&kdXv2pW@ra䷝4i-22,jڏ\\(.rzem?Uô,ɨ%QԵ~6^kV $6^j)&ɰ,gl1%Y.2,P5X2,2, NU#xE %QpmKϲA a۶<@˝j ffN+NΜ6aMN.٥KEEz5#>|zIvyr1ϩ&Qp( _cg(" _x=tj[ΰ?lj6zs!ֻՍd +/!N{(<ܽW5w@4)رeiᄆfXؑxۻ^[ZKe'C<=YfXK׃دa%[ӎf}}4w O"#]M^~ġm[ʰ2lnc$$&*@@ err)  naYDSj|'82{1 ȰҬio1qċdw>oLWod-,wmww}Stby†feۻ"63^{TK?藥NKȰ,,,dXIU_ XdX״c_DWo X~/;bAUt:`aQ/cc,8"ϓ-~OM쯑26fHSϰTrvb,2tjJ[`mcq+˒rn-cZeY?υm6qa躎',S .j%,t36p1caE\N>J0~uVn9H<闗/Ss `_Qwj'$fX6ZVۢfF_M;0q}  Y++b|<i-: ©3w}wv0QQLbH6B2a&ɰipD"{eo J^}߯Vu8( R’[~F} k̂=;,Ǧ`YaEVVy%?Bt:j0{*RE %o߻o;~EAcgX\}ScaTӦKr  2MPY)s/axDT9 RWl'f^dX<-[˰(ܘ +6]:YQnyc,Da"3(cZ`J붡zZKőZ*Və:OɰȰ==i)wȰ>2jXzNc*{a XWEdXt$J_2,2,n7'% 2ł}ٌP aZobX/eʽȰXzC;VfdXdX沈 &%ј ,cup-2F*l( wɰ8M~aam.wZɰQa R,`wZ/c^nU|X.?=8>6JQTKJa'\B度Ψ+A c%pI8<#cS!b,,c1GSQ,th dX޿yO~~zϰy !ܳ/nxR,2,5m0ކ&NѫC?ü W6a `Xl*DjbO"ТRbgk2G%0$v] QkR.gsaiA>3Hu"p]얟dXTUEԯu]P0G:J95.nF=aZ{{PuՅfm cM[qfmLN.U&m޵'e< ɰCɲiLmSys, ːvO/WͰ&''7cɴsa߉ʧ{dXdXF #E+9C뉽Hp/< ɰQZ,L U7͗px4CLQ-Ux՛zC]Rpe0ci<Ҫ^@宦}p|{IUU2@@YLY@ԦmWUn:EjbìUUHCdUskF);ۘdXmK4MН\aE]H~m$1V9z]eO*@S7﩯L奦-^p {c3QDcGE:duՊ#a6_7t{xyWekbXk ;MG]B奦-Yԓ|`xԗ#ɰ"`pI Xg د cI˷W 9Ͱ{rQ}i%a_kSD墦-"E{g졫TY/ .( 2 p:2Ak_ 9Gٳt(~NϧD#EtאaiK>=)قoOzX}U,ʲ48V+бlapZo*.Gҭotfn֚nKHb0EY^&rQӖ\{$WΎHM=R,l{xҡ:CXvGUU17m[ա_r(6Pp 9W-dXtW: ~M[ӇX;a3KGX,Ejr=ٍm}ϖmAZegoZG  eG9^2,G;/CaٯiKⱑt=ἯS_Kx_lׂuzlmKj5kl"t:e,a(c>O?n;A gi]v귛XL=rV>2,7Gx_j l:*xEQw |UU\G:XCAJ4%2_n, Q?z`[8taٯiKBc9\3YR=]dX sʡ`4|,!@E *EXFYv;cS ;hJe-rc^|cZ[] `r(6QUu{1C+OԵ{9^zO1ݗM~t3,;DaiK>wxY;7LȰojۖZ(6^|i'Gn%n( g,G)D\~ۥJq KʺӝwOaiKFpbj"@wT2,|W{/ ňjc (.A2= Oie+t3,uK /Mji֬(v 8W\ ^G%ۙf}t:eU] mZ o{|U>tf~4 o^2>*ӥ*y4%[+Ȱ65MC-:Mxiq*.BuS{)qƲnlrrI]b% h3t2,5mj|e4YwOe^Ai6Q\j*||>gn=2xొn[M鞷G|ɐ~v!îXdX;1;OD}<~*Vɢ̰WM-%J*"'| I1Ê{2`G:5)BCbApk&OnR Kc kt +R@:*٨LpxP9vze٘ @]%\m[(Z SbTKFy4%yzEV`]_N;dXfoUNk\+&| AM~ ,c뺎S^ɰYF%327o܌dXUMè&"Ê#ʣ l)G)*SZ l o|`#םVY-?|ȰȰȰȰȰȰ8Ȱɰ}gJ.,ɉbH|e*v,HՒSEB nEeXឥjJEEpG)j)G,)V{w:yVD6Av*тikv"Êl0kEdVUS| + ?DdXfgcNQՒ q{דaaWU1ɏl $u}sV5YbMGgԅ<dXdXQ2,eao(2,2,ݛ [mۤjYq7=tbidXdXp(yBEPFEEEš{3*~c6&hZQTi$ֶ-}H7D#"Â(\aŚa).$2(dXdXdXdX~+zhKbyz 2,f$E!)hletxf42,2,d3,_Z6b PvT+2ZGoQX, 2(g$rquCE1E~ eqM( "ÊVU\ǑHlLE49w&^x0Xp,Y2,NYG"y dkk$ɰ :8>'"VUs hV>r£VnYW/@E4z@yBE2(i0-Da(  2,2,2,fc٘\ q}Sbx׵N? i;5_ߴmqke;kP4GQ- b{;)y̰"JȰȰȰOȰHa_/h^.E $1.6MS ] G:ˎx]19 PaȰɰnzdXo &:z#6&oR㫯`1t&`%ZPL٢ozaY(SR0剎)1_l&I{o㨯` Y3l4K]S dZ[Ƈ#2Q?2M2,2٘q ){uۼlg] ɰ,uP2,ES2,tJs y>qG.E m[d|1VUU̴(*-r*gOs}[ + b.0n Yoap#AE~ûBbƌ,:+ʲd5w\iBy; U\6<2,ˣZ8-ņx޳KbyUUQ.~.kF8bs5bj%ch)X^w=wgMzquxei__GEƅeV۩,n/n jeA."b,/+;ED^(Hpt+nA-Dbt_<>"ò| _8IS\Fb;- \x ue]r]t&d=R% ?:u)fX{Nǃn9N2,5mƅ|LƠ4&xvGhG;n erSIS1bUTKSCNTK8IS̰3VaY ⋙u/SIeiƻob{8b,b41``!pKljȱA{6{ni{2M2,5m,s~gݯ4 ZHnuf4+mZ>f{bl/]ͧo#LJS!ݳq qcoayl`uquY&Ne-mɻ0+-BSoeEAzbᴞb{w_w(Lh6 T&Ț"zG!Vi+t3,XGe-{r' (o^^-s\)F~Ͷh7SlHőN|ݤѷ*Ͱ np;GiaayLt3uʲ2.ܑ*} XCź?ؤې#B=E@az_ws!ò_NPMә #Y:Y&U_tʀ鮒Le-/t&с #-3qbN~ͨ'21ez/T[8Q,e) -P8,[8.oѷa0Z#iaٯi7_՘d2ʀ I.hGcU%B$かoW 3h₠.M88> + B/{.Yg! GI6_edX.jӑY:>H Ke4 t]:ʑfefsZV܉H|pA!zzIFIQ-, Z,,&hLC5TYhfkmxiIT4];ὟgX}+~R1BzXbUq,u_oVQ_If-òQ+#dXjZ"l[ύ*gGu$tyNsP#`BpGM-w"0yT"Y{;M}H*#X#AVĚ*g 2,G5mf ?lY؃Nvid=,$˞JNfQ/#2,w]aqSQ[Q*O%)Eedc,#후 <W7>O,go2,_5mt#Qz<"TUEs<KrƘ6c,DRYSu'kRۋ, />X/4[]ggo2,_5mwlza)#I K `:Ru][>UUIgSgfK ԦꌼެU7[ RXK&fX1VEɰմuoۘbW+fzvrE3x-VeYrW@XKlXvV,‚ mbs0$Ȱ,Xp,;2,T%3{aiAw0o(fX ,X,2,l6A~a;pcqEXHn[*eP C K=(K~_0p4 ;-ן]^ XVa73, Wt$7w Ȱ M3*iYdXkpQfn. N;,BLB# tqucK2sp|n/a[ 'T)6{ayi[X"}o*ai},|B[BiZ,T8Ǔإ lն-)6"6NYg{o 7ċ.BgE(W7VY%Z#-Z^w{?:2,5m#7x7怙_t34愧HW<|^ɰ D`>]ʮ̞iȰXو6P[jXgemU,-;%2 2dsd' 0l/qa9i/isW7;a`*& 稺N{t:yV:Y6xn+Y.W>:8>[~[f2& 咩XQ2,5mks7 yΗ(K^Ƨb2,HsqUUaYX}?i{m˃mYtuAup|f:pqjywqu$#2j6pcLGYLlXxRGSfS1)s㩪J6MCl mrς)I ꑑ҅wq]/zg~7_QqLk Q5|z<{|?:I%2,5ZZ7H6L XAg\r ^g.$MdVLM!<'ayi[XS?8>s]Zʇ F2_hپsTPU.ͭȰ:cm&4 OyHWv2ZR . RȰ .K彦k;au:ádXN/1#l6S68#zIeb¯ ʲ趪~)f*p(:Y;FjEaEPӶQ1dXu+K(Ʋ wYkԭ46 +c4ixjۖ^ ޤB2w,TdXdXdXմu_!o8 11hF*rQTH9wNw%t?:PhQ^pg9òн&1@eM# Η;G+2_ &~V* +˲m-L}OSnjXf'_\-\\X(\>u{׳BEzWI2XkړKŚaec'-g|yht]GsTy֦uMށڽi >~A2.etZuuG5^uZmȰ"iwRdXsj*˒ 5 t>ee$q*s Nl;3źnT +ndX`ý`2kaYxC k pu3`kpJ +]K¨ֆ5vZn>YqGw#3:udc,nҬ ȰRi`' o/{]ȰTm. IaVUdXluł:6wquk#ݖK$u\?dX)`Om#JdEagQ#-G]ޯdX-f3 gE>̀/ʭ,=Y,PFiGه2 Y&꺱>Oa ?cv aYfA&߬[ɰeg1N, ϲUO^cr?@$:3|#n8 Z~e<Ê?dXԴwܷddsJWK4<6|aޥ:c[,fIVhR,B Mylz*`pFcun9/?2,XwȰi?49L* C1uv}aN7YIs8u]:؊PT,łV,l\w.E<#[r$X]^w^v W6raQ~vTDS;|yKg/Q_[Onl%KB^ad7%?uk c7v[\Q1R3ſ-:ǫ35ɦFddXԴ_y~ugwrvGa"5J.oa6gg; 0z,q̲L}}t:HrwVPTLB``>xP⫮{quc*%ú/*1@EM{[u~0u]ZX?[ɰ'zxբo;Cdi۶<{֑.nx|EiNDa)dK3lO?h n~{gmisgz.5;`C[(32OMՊ%l A&Eb]-e|=r>z%q~B^YFV;4=ҥTKLMP6O1{pu:ETR, k650e_ C]_T˃wK3KbpMz'h(,KquWih" nvj!k(BS8dA ]I,BktNS$YCȰĪ{*[ .ss(%VRz;AEȰĊF2ڶ>T%IfZv>b,"CdXb&4Z<G9~,,2&{3 w}aYć @4RGQhLj)JP&̷EEf2,јLӈG&PH%(L*%6^@E|ȰDc:R PUUģ–jwP,*9lʲ`|`&ſX^wI]k2,2 f0,dp#H:Ry-,CxeދKm`SǧI6=9ZaY01TU5X<̈́oJʩ/+˒9xIeq#Z~up|>9l3MYQ0 ybD,m]!봚RNYQ-o$"`c<0i~2©~8Y*2} f.̱>O''~,2,2 f0'`EXy Yac:7 c쟋]k2,2 f0k'C yx)-4c%R-""`c<fXP~e0# VPql*yLl}'&]$YdXd`XX2tjFz.C* cQ)""`c<0eĤHpH3==}eYܣZudcP8ɚ\dXd`pZٌX_m RzXT>)1xɰ,6x2),R4Ӟen(Go)l4cQN6mZaY01a oNjۖX0_׵y.⎂1V"tdz] ̂*هϓ ê Y姾+⎂EQȟO+ pR;d٩aY01uFWdXd:RГ!mu]|ˍÈ׵tw 19pɰ,]9a0 #`:94T_qGAc1d?_k2,2 f0<"` ٌ:fvX] I2, ) 0dzљ&"`c<ϻ+2,@?.1*aV%KEo;=*߅,W=M~+^k2,2 f0dO_2a0bAN,:a`6/abBTaY01f YķTa~A2,`Y1LJ|)YNЪaY01$q| 2}S4M-s;d<.JW 00ET9>Oם&"`c<HbUUet]~Nb( G ,KU+dXd`Q*هϓXגSb( Fi.#}GsaP. 0dzǷokMEf x ڟ(zł!X.JmeY|: :2, ։ 0FZaY01D㟋Xk76Y+uiBmk򧎹\eZRxt/yɰ,ASJXP2|>p?uM` AslrLN.Ů53@4Ȱ\# |dX@Nd1  i`EP-n wey>xHCk)eYFuV)x)9>BR,2,iPFn϶mXdX@d[CdX,aNYGlw:2, >t| Cl _"aA = pLN.y?0|Ȱp$sqK󹝻u6a zwM?_/F#t]G!xXdX@.s X@uMy?5ZȰx: 1#mI8K] 멋2,`mK |>y:8  jUa=dX,2,,>kX$Eayy3hlMdYƤT1dX?:#°npq''a[/Z+_GQ-*1+V8OW334np &*7.dX p vF z?q\;$y*·=|OcD ?UWGeYfvb*a=est]G_Aƹ2godXؽT\18ހYN".}V)׵)JD\. #< Lx+` C$|?:u}Ԡb W*LG ,|c5o0z:ЛCϒ$ ubŋX[Qoj0e۶mUb,i3bC_@˲d^Jg &u!CT$|/w,Sq/*5[cj'[ʀa݇Hxd ,k`yi6lfp CT S![yIa~f3jkH0{m+2E>ò?Hʲ MS23 1_C&v;ڿVki ^;<#4l=dv0V Fz{?J axǚ:2ҮWd R$'*ۉ2yݙ:b½o7Fi}.!,ofK~a&a\9󒊦iȰRΰ2p?Nѕlp0땿[r?Zc512KEW{8zge~xgDPv%vJݘ` 7r=|p]źoA*, jKre[V.Fc!ફWfь- ﻜN#UkUaz 4y +Ѿwxzɿ/aN σ֩O wAu{ v[fc,=ΰ*5V|3Ʋ5vK r%b/14/{Iؠ%yW=3k[蓏fu7'HR,EQaai-(ax c7xqt N >XdX+y8&]fKRuHv1V4Z$Va qa) 2D2,?r22֎kMX?=,; b:9mKe6"^ʗgÆ wVbu]|"͓߼KD%w e^#âT_+_8 ם/1Ua̰l(+fXl&IJmvn?H͙@1 it:,#BRǚe6xAK ks;5ᔒ" KiX3,b4_Zoz Z~ \)!5IkxwUVdX)χ %~$Gng2 "i~բ(^,$+UU"+˒hY + +uXW3,L/Ƣ(9wNPMݷj"xaY˰jw9C! +yOT\l$d`XmR1S$¿j- faEa]\n"V͂Ǝםe ,g5?QS#&J!r Daqb\[}^ x01m>]4fF]* mfV[=۵< +M% O)'Q;AU>L)ňskUbopqĖ:Ͱܰm[G۾%1\b ERG^9ֻFnfşQ,_꥝|7` h +E%7sW[ 7J4,cun/P,K"$u_|Q2:0fFdXQVyIr $v;NwS9 [@z6jFOe$ݏ}Ȱ8.<佻lb,a]EQU>N-00EaͰ.nMս忍aQuT[;< Z%X>jpxy . ʇV-/N 7Hx_YYY AaYx"ha{?K,ȋXodqwpDruv=_ YΰrW˕iZqCc,udX;.ߢ T%a]\x\#q}8}IUe=<fA>{?O;wn~;A9g`,a u\naőa"0ɇ1];p~`6qF02ClཝQ[[[|oXP(%lxm[&aݿE7❃̰C4^VW7ȱ7ػcv{Z{RaZ;8>dH}/C;7fz~#φj'ɰ"Ȱ\0Q#Y 7рŗaWgʆz׉Uᓾ2߿o`OQ ?ukri64lzg.8w٦  ^,  8RŁ6p0&_?,TUW?=鮧:0f<#6mx"Ƌ Mspxa5 |RXrѕ Pѹl 4WQV/qxrd_D?^ ttlr=$z&[3pRݰhx]hN_t l|rV~4?%Q%|vx6eDJђ߲۠vX ! N]S8et@<2 ^/̋N4rIT`\<‘lNp ? <+Dm/#\NP9خPHP /M49M&/$UfjO ahd?Y-jCU=ch.o7ys_,k0"nt1N#(x8L&Ow\Vno\\3m G9D]bp(,L˪K6N@hBüZ`++Hx^6۸ 6^/dΌF`l;CdpX8,4]x%'$rZ7m(ҧ# ~&6Y`9嶕~2UꯈtR9ZN?O i,YvjmҼ.eYl6ckt/לra9|l ;,?m+iYJ&bOP{d2,YNؙatRe3X IЮjͳZlۻ8oF81Bff -]3pX<~.Ba=MI~^ RZϰ0-)?ѧ G B ^Jݰ'tVns~ {hy"H PEu]Y1LX&.J8,?Au8Ɩ|P$*|mjU*7J$,V7Ϯ,ssXm-_Wk`/WkB;F`<Ȋ`Z]Ր;( f&9$,W˕j8V<㜪q{1k}OzB \ccmnܔ$f~(ΜD'^o>9EZ%5z WrrpXIXVڈa3:ar!.NM[-G5ǧW+涢+*ɨaw1|TޙlFd [&In^˰!5}Nja?ҒQ$,ϯ޶[bYIj8w~9.VG}&,"=v䴈!͎V$PYv Н@F y֚⸲6aᰰW:,$,X #{V˰~> :}A܅CB8,c~ /XJ#o-NVr;SUa1\[f3>muM!Adr`&aL_+;en[X`G  )7foAΙG 8U/Ei$۝roF|4/ - ⰜD쳪1]Ny9A1w 4ϷrEm1~ oR,nc85eYƞ_]i2 7pX8,+٩,2qXbYf>rʕʺ+Ê┳3RIlæ+g{0d'b%I.{MjEL 1_EɄlROnma! !Ejϕ~Ů *pIai~RIjP.='盰<||M&O7Y-nIX!eYgsyey/tN&,ѹ,dwu~l+# {IxBuB.asXRZj_ÒɕXV&:/Ò3J,Zd22I>шY\HR<ɭR/O1N Clx\9 42T`al4grG8h:ŏaJJKaq4WUlKK8,VG*\0eK4 6kˆ`㤜+RpXvR,vhGJTscƹr7r'R](2yk\wp l5 a^n :g2X#2 >GKsXVa󤓰 S1A7SQ%O:zuy pX~8t4wQHpUsEdKtE<چlF4 ުET:k"D`gσ/0b QuDS /7!+hj&ai jjdO.y0jNaupkvst: o"b) Y!Ɋ >b JDžC\}J+;i&*k > Ca՟]]eVE3 5kSU0ؙ,cp5cw-\.Xg;ò귑.E*ƸOq!?Cx#gיT /&14V*W]v1 (Ú68~6DO?)0؇pZR .2p߻/ =YݑVVt }~+!20Y YrfYBe,tt{ZEJYz;d谼U6uV^Wl~1Yzj$Fi|5&g :1!xaS!2ܶGw{71*LIL~׺3 eXa;Ɇ~tvEFQL!d2᫷r00̝I 㰬noV;e _ӫCj $Uv%ϿO_rH[pBܘ;,ovRwX޲ aiӲ|`zn3`5`tA^@/:8:n+X%\F 쬪\-_IZ/:0$<ԃjX8,oyw4Z*{I^9lu*'~j*dJjv<*'H#yli;[_G"0r ;4@/Ⱥ?h4*"sXOtQ.O.|61ɯȶ=eD/2Lt.U]&&Mf r뺉>ov3fB339_VUĒj$rl6*P5v`0:]pXsU?|ptIPz`Z, cbB4[VNbpX8Oɏ8,V0Md2!F 0VF#'xW f|VZ@!T QkDc2,Y:ZpXV5pXY$o0F9A~̳pXava0frTtr >LKFc#X!G=,WN?@8,O_\9T2_ҚO 2d8w^NP0in:SLaRa1Yd8]x8LB 7 F]ԢUyqXB.NpX'<5ѹk2]Pc%tC JD-0aZ|>'F#x#d!A40,pxra1 NLbꨜZBnwUdXVtOG Dn;Bw*i) g<5,5шX@c$G ?{|OLV`&a1XV<Myn$f~mSٳr eIaNI~z]DLNXpxQS N/Ҷ:: ^c2ϣmس0i i)"o,K0uX|LqXFZw@8,??? deM;ԼKrL>%9-a1i ߞɒdw-IU4+f&(]9}-q Vy-npX8VV*Ogu `甴ٵڛ 0ǧW9THa1i YY=I'뿐2aBj,Rbvcw8,'W97O?;!wonmM4"#v$ra4E>Iuz^ >ш}K's[dZ1!ʼnJԞ Ԓ#ڥ<4iQ.\uinAjT붲ЈpX8,?M?m2]Gꡑx;#/?\ ؙpXC-&SpX HfUx#RU*\+K?<'\aFa1i'ɬ.DH<ݨ?ɄrI؆(pX [Hzn3PdSl*be P;[,_fE#zX4uxr`Ck"`&PH0 'ՊRa\0Nv8P [3am`*˪hQ.10rیsSoTDnqX<8,?M˛?ҟS?RI¤VPH0 eYҟ鿵r?KBIX8V[-r1~4QBnA?61oUPMP0Ikb'_"8,OSdxŢш  j?'P-H+ApX8t=VK!gh* uwB(|J?w*9 vȭ"'-8,Vx@\UѫӊB_`0p^ɐb"7  2s~97pX;7]*}?w4s7BQ'@p[ )Z\&tye+tJ̄|,$CUi{:Vce뚎pxIߏF#csz rb?1٨[zx￸n]`j9ABwRtoYȭjC86q@k[%A0($(I Cچ[lIX8,˭ÒmT{%t&'إzr>&W-ZO{5Ybh'Dn]=etapX3}0ӇBQv{z j峛F# [ьU6HaF[&y(Eowl9>r K=fX[#[5a4Ɏ$Rg (KeBq>5PכfY]c:N,0jFLxIZeSnV,^I1lka1ilqYxL $bѽ^o?ϧZVxn/aD᰼ W )jy{rMyVH0\DnIN%OSd©}Y]ׄ@o|tߚ.3rQiqX4^5XZ`%SXIcC9AN=4mȭmqX e켬"icCηO1Lb [)dHQ|hpXṋk0uĸBܡúMó1gB\#t'ȭ 8,f?Yhgos tjbĆN=@E>s1dHUU|epX[Ma f{ⶲ!RY q ^g!rk< )2~[ZۺeYo."@L&>18-1yd_r[\?7M[Ŷzk Ɯuf(fhqX;M 4EVNLy k{ٳp8o 2శ~"9,[ \%GHla=K~2s>EYgYB F΋[OYA!,XYҽiݪ#$Dף +-DrXۥ,bGaz,9:T7a/vi`B&2(䏑ƤGA`)_X aށ?Ntۖ{|ze-&#c[<b3tXo+[F7 qCSd+mQ HX̕ڳF"s+òj.Hv{ʔy|CnXNP-eKs~qX?*@* qCSa ̇rIdF B0{މmJ끑`\Ǥeۥ)$~=Ĵuބ9*aO;O$ )u(c$+*2)㊢(aeWw)ò-'㷴. K+\mx[O:RBS+Aò[ą,c↦-ƛxL@,*/{Euٌ~,\79[;ŒՖ׹~:<=)r0߻$P ┾UJ8/ /JcEAY8,'x믃 R9z_[S!.mϮM$RB30Yݯ_˚|5a!nh -A`K]|WN+eUUY,h WZNۆj^*aTNW籘Ie1cu8!\*Ta!nh h4",F5$Pu L&luK!lGչŪ\OH׮xEB;ߘlOb'hwOZ楋p2jL/km/n+e↦uIV!̓tJr:X> j*#갬AIPBRQ֘~Gѳ8w5sn )rp/_H!AÂH r"LQ_X5=v/Zj '%4sVqIhVQ5S{Bc8nYd*"cVPH0g1C)HEh&CW0 f?O"ayH򦱜,CBa98-j8$Sã82fY棱^W8, MA0Bk2$\1OIEe㱇ҚXv^U>]믏T,?(Vw#zlwNt^T6r%F!SGB&˶ ȶaZ*7ˊu[.e6fqBmeʒP[6k`)J-ш ,,e~ܿMhZI'io1)2o\x`ef7X8d:2_rᑊ,O+_gd>>ro]ʳ'o.&Kȧئd~ ܳFqZW Y*w1):۲.rj.9}z`IMF'|!'n듌L-#?kQ>4> ):M'9۬I9Q2+($ \H,_Y^z p +"vnjҩ| V۳ut^wu}aBQV2kNR '=Oz^[y2cOe'U0P[xB ֣ZVLNY[BSH BEQ?y>@RIzLl|ju=Ŋ[eZ i+\k~DLr׈|zYY2X\4an]QWdJK>Un4Eub74hdڑomu&PF̱'o{yV&Ee WVrX]V|v)/y)nށ?P '9*{uԽ#%oʡIb$ormn x5SYGq3 EJ<ҷ@mk/#aZ WO\C]aBVBfhq+3Ll! ³Tjo{d29 ֠<|_sxB=d7}^sZL\̴& W."òWŞ=^]VS|ϙx^GdUjEZɿG  qCSxidZwuc}Ƕ,`^A+[($.hPny?wQ|H]׈T07aEz}eS6Ӟ,|N"Oi-A ">~6irNKJO_8|z>(]uXQpؑO$_L?8, Ms a=fs4"khd7 "Y-')={~|pXDqXaBUar uօ8<6qVa[9AKIa2o mZ[G)A3H56xYsg[Q `MH֖>G-Y+XӖG)y .!)i+ `0 tfx<&Od sVRն 5-A? a N\K?a |V#7679a6apXQk̮V+BgVPHOQ ÜÊ~nLNtp`"8=B{&!h=bmDoaє/1񳄐Ja )ؒ{b&X.mzh42쇤Χ8,8,9A]UrUSZ-T^Swgg8,- aezЬ^K(W[.G]Sa!'R?gEHY 4{ȧ bS /QtrdpX8,9)1Esx<|(7t:M0W7xEsvXhHehUP@>faŸ,QωapX"^1X'Nn#e?sΑa82]W8,X"7hSED*8 |8W pX8,yS02뺦Wg!ng-MOto Ϯ QzhS$8[pbg1@ncIR-`apX8lF< ~ E<[2ĤRNZ,K+Q٦H++^?Dv? ZapX8G!5 /9,K&V('$l <zkV-#Ó 9zT#X8,ˤ)8Պt}VyH~ڑ6*@J9A؇p\. [žU8,UI'vc!ynV"2FDT?`.9apX8n$,I!A`0a) C2f1dIqal6ca~}g"Zt"!+C;b42C)@ x jq[qo& a%RBeY9L7?DXbeR{q~ڌ#FOOa=drN^Q]ځ/Ʋr_ai JC\=! 9KlJԵAi<3u <Ҫ#`8Arm$]d/Aoc'"[w ?8,K)8p6B^5$ѿ),-VE1ϙqXU.o0>Gc#W3wWh^NlLZci=NpX8,}ZPTFFNaz=NH.l`<s6a% Y>ӯ)g:Ccv$dÊ/\ɲ͡3QZmy#Ԥ%V^u'amGΧ=+b'=a6sN4ȏ;JUmqX8xM~slbHUUMHauMQimi5t H+j,pXo6:>}?gCěYTw*۲Yy,yO>9q1W8,3$qz+!498, EFx!^8,LV0svXIŨA O+20oKZedcap`;vpX8,Vmuɜ0N١$t[SyF9,ɄoGYW8ÓlX=!L+DjجVj*|܌d99aknk\Z׾PH@XV8N!U%sxVYeYKgaŎtɆv#ړ˛?aVY>|'Nn*+tſzpal ki$ϒɤ;>N QHRd<pXfY'Pd2Y.x+VRuDǍqXo6O.cx:]oSyB$^m.|V+Iֺ1%-dpX8|B$HzqTz2D($)2qXؙcZ jʲNH|hT]|Bd(auӔpptR]=LMVX_}dxz',64?HjPW; ZZ.iJ:{ xo +ML&i-n5=xFG!u]3^ʲTn^ZidJߘFWV+P␏“U ;)dR8k\ei5[{<̃|JO0Nig4#i[/7'LIpe^a|gn6t3_ i|X'/tzĭD}4Vy):w)X+h:]&t}u)qDwϛU()~|E3|dl KHȈnV3]jye99?^o36Qu j]Vm4nѫ2hzæ#y dK⽍jNiku7|(kD0w+޿% ;4XY7z<,kd]O,,nĪi|xO_8+揉â*l@9p*PvXۧ`36ͼ]a]`348lfq[i:+6úis̗/5uptN|8Oajo48JZa}U4 .n+69 5U 6syGM`WѼ K 9>RX״9l@<`/qw_]S`}8f%ߴ<\xW?' 7>x`! sUT\9>:8:񛦺b럗]iϿɿ+؇mwKŇgs7i% endstream endobj 25 0 obj 41895 endobj 27 0 obj << /Length 26 0 R /N 3 /Alternate /DeviceRGB /Filter /FlateDecode >> stream x}OHQǿ%Be&RNW`oʶkξn%B.A1XI:b]"(73ڃ73{@](mzy(;>7PA+Xf$vlqd}䜛] UƬxiO:bM1Wg>q[ 2M'"()Y'ld4䗉2'&Sg^}8&w֚, \V:kݤ;iR;;\u?V\\C9u(JI]BSs_ QP5Fz׋G%t{3qWD0vz \}\$um+٬C;X9:Y^gB,\ACioci]g(L;z9AnI ꭰ4Iݠx#{zwAj}΅Q=8m (o{1cd5Ugҷtlaȱi"\.5汔^8tph0k!~D Thd6챖:>f&mxA4L&%kiĔ?Cqոm&/By#Ց%i'W:XlErr'=_ܗ)i7Ҭ,F|Nٮͯ6rm^ UHW5;?Ͱh endstream endobj 26 0 obj 706 endobj 3 0 obj [ /ICCBased 27 0 R ] endobj 4 0 obj [ /Pattern 3 0 R ] endobj 5 0 obj [ /Pattern 3 0 R ] endobj 28 0 obj << /Length 29 0 R /Length1 10888 /Filter /FlateDecode >> stream xڍ: xTE93o&d2yIr! I@ GD \F.((vuq]a~3AuuuuuuE M˗*[ .wèOHuXV]w4n HTl?]X Y7,il[,?67ec E<nh92F,gKLnr%khIˢ_ay#!<lQZXBѯ`5Nbu[&C ,Ʉ2;&u{E'C͘G+! 68M{ 䣰s<΋ 5 ԁp~|{ |Ad%WoM4=RNVdG=¬qH>n·n.؏) I8}"z x-r\Z)C hAP5 wMeP0 *}Octܫup/6>!G&t]HwpKpsḿh?OM9G1#r&3⃇Qxq i'S/6Їܯgw&up FF)W$Ƚd9N&_rZK9[i!EF>Z]S֠Y0O@$BhBaS-<'"M:нy7r.}HW; $: n(E0-ފ"$&dZ, Mhɵd;yRrgru6S:1]G[bG)/N⌜Krq\=-nr!M#SwS7n>~~/w__7Dx^ KiT/mK;>xpU>i-s&RTln%$$8֯ҝM$d,IJ#ıoI4Uh.=7}4@RI}A/-?Z-&>ZG&IY G's;:9<JU1HQt>AI7P] "kI=]Ͱ +c2AKOci}N[_v6ט^0Z8aE7E=#6 Y7a(Lѱ"i}:&h^ <) $1jJG,Q?Ldd,[zl3}0K}r\Z #Mo+nVA.7C*/ )3!OvDR9ݤЃY!nb@Ln"`&mnH0!R8txQ!nТ7G2䤔Æ8RDRb֎}/_IJ*3T"p]|A؀#*0臿?K}{.ܳV9MKY,>Aj` xQZn~{H3f=3cA~ѰǏ;U8 -*iW=%2~`q8\2S;4_O@/~h*x]~t~o$No;ky1 N>+IWP- 'Q6] dj+ëB#]/36lMKe1 ʮ?kc!a?4[?#/N"قfҀ&JKijo۽!= W?\{6^um?ґx⽯={M/D3)hK?T sy^k]MM^^4'e掤;ӟ4 0M^"^0}+dXɔ{F[լdS]̶eBVOQߖ0ѕv;ƟCnٿ"cZ$OL$],N,NG!\ŗ~1J,Y3~ۊ&$ٌ oJ|Mι}KS/GmB|,i׮U6kN#C\pOYc:Si&a7kwI$`$0cq}6-?8Ҋ~9_R{&-y[oy bo>ln|NG*RMVg^ŏ w<ڕR G̈́?G,LSMvlQE׮oLH-$qcHK5<2/.]$2ŬkI,@<"pn7q6E[zۿמMDl!_a'i8i8ϩ|8K!RƧ}n*_6_[l\bZf%. 5ƵSߴh r*%ykbL!!6KlfW!=ч#7åhgzYiw J1YeĶ= FEV%رŰ\X<Էm65#{|3G>)G \TрyTk܉qd6v847LV @Ҍ ۅ'=\5p6 k-Ú9N=<#iB[ƭl۹m ۝tO 9Iv}5:``l§'L\>5S!wO]yV,Md~b0jF,!"Bز TV%MN5wrUg}??"xC;P+?Y7|7RZpǗɍ|Fwyc@䫎L ZhSܤB|첀Upgw4l<xΥK:Q'x/v* &)I))Υz-3!)\>xcׇ$n)/q/ ?<7sU`i[=.W{,Dz3&̎8t$LSU_=9.z`+5Y\:$dz.#$6kN6TQ f^iԯbS.d}wN79';8?qk=#=1ȯC]PhleuPG,jaܿaԬllcg4t;rϥOay+mC~Y?2n8z)^_cK#B玔Dti" XAKoJf 8&"4!LfD/ 3`E؁_=`81d? ~8A|[j9tZVx\`:Zlbh05uu%󛮟8MOtXoV`9R4Cd@Yk)RB7y6#D,DXpVr]aDwih bS8^+6pȘX{Șö s.FsBp..B<@8atоn о OEC'XMdȘ!D8pH`:c1XGPЀ9~L?dS.CCe-}ާj'K {4Ÿ'q"5=NR /{{Wy>}B- B B#"SH ]!ۜ617NA>Powa7az7]Bߢq I_~ 1j Fl##a@_ޛesG˭ǍyBd"=D3mp ײvW~ ׁ*F^f;>n"|܇|k7!2-kbH7gR,l@eɵHa;^.Fr ]VZVZVZ<]3q6r^-cPwTyjAh ݵ V@qvs+M-2T:GYzfUuR[*FMj_םZ /a/W Ŭz뫘Uk:u0&P1+R1"/9(}Uz/HvcBfV5|p95Ū?īFy{xlw .k_v|t3x,oUԦա፽SȆ32cAH˂Wxz}\?Ykz遽DuBZ6UKl{hۉCS; xa88&0+n endstream endobj 29 0 obj 7836 endobj 30 0 obj << /Type /FontDescriptor /Ascent 905 /CapHeight 0 /Descent -212 /Flags 32 /FontBBox [ -223 -211 1000 913 ] /FontName /NJIPXX+ArialMS /ItalicAngle 0 /StemV 0 /Leading 33 /MaxWidth 1015 /FontFile2 28 0 R >> endobj 31 0 obj [ 889 750 750 750 750 750 750 750 750 278 750 556 556 556 556 556 750 556 556 556 750 750 750 750 750 750 750 750 667 667 722 722 667 611 ] endobj 9 0 obj << /Type /Font /Subtype /TrueType /BaseFont /NJIPXX+ArialMS /FontDescriptor 30 0 R /Widths 31 0 R /FirstChar 37 /LastChar 70 /Encoding /MacRomanEncoding >> endobj 32 0 obj << /Length 33 0 R /Length1 29620 /Filter /FlateDecode >> stream xڔ `E7^U}tg&3$h #AnI eDEwEuޔw¶K65Kg4̜5(3_6cW dEh2kiBh$xFYPĞo?kޢL@ 9SgV/Bh& ϛC7L7(C .$P~aQ#4!9 u!̣ϸ İQMjhV݁.8x}@N07΋Gc8^uJQe=+{%zjU8Ck7#jGQ# F^6<;fiJ~۲;ډgetvܨ?z-GE&BОQbouCۀwt'=Z0]kP_T;b4 doCh5]{O't;2! ۻr$WڇtxK\Zd,NL- , »0ӧo/gSOUDЃ HI\h[z }lYE%h0fRRՔ@"38W|Nzp:wSC@Xhp7_V2W!3Jm{nxG|(MCӿq(8,{IJaDb!z^o?~d2y-}_Cwgѿ #5x^{f.>#}2bث`.dorwߥ&K+#~X= =;D6a3l!cͰ݂ďxn ĐO$Y@n$%c'?7ę cC1a|cl܃msrG"*"񝋏BM=W c**hxoۅ> ·xPf2Rj ~hPY%]IZ24^J>&0cb,)f1If:YlbZw/?1癋Yeg$mk^k6oBOJV)$‡b=phz2vY `H% ~158l J|Gg5'}<FsHx $U {xR^x$gJ869v!O3/WrPy=4hx8~I# .dnCsɧ zt?Dw2}(ny'~f"%p>f8Z̃OZ2yZ<԰gQxH 5We}<1x@-gzaHWV:m?HA}  A=ZCLΌA ľ&B3 {QnI݌P$K<Hq]H39AFM/P;=؞Gѕ܋F 鏀 AnFס!4è,5Nd'\,Yt=)phׯW>*+zt/ֵKI ͏C]Nf,fU1ɒ(JDևZb-l,r]h92*\RQ_7. ]~W+̕zX U.%P˻#6Bd58ߑo[)`LW.%KH$Ҡ Z픺݀0;ttZFNȔC:w׵zzpXzLV:[XEs݂]?g:rЀ,m9߫\6b7l,0΋iaF7zZ W584E:sMm.#l3[z//|Yf Vp蘉e2/Mј P4$3 mý^oсdl |(wv)y`$4yJ[HH4 GȑӖ>xe:,vGu~ 7 3a_Uu܄F-鬥-z)&,kTmubGFSHN3la*$Z > mdnG{A+i!QA7z7,uxu-H![tzx>껒-{! C_=WӲx/.?:y<P3ZVU yl%t_5B;ڽo> 㹨,=wA[>TbEn֙2"s sq0J X.ɖP<U8i/Y 4ɋy kƉ~u;wů׏'޴r@Dv#| ~Cw.\] YK? 袇6,'XS#I= FM5 E["Ub1b9# ⶟~VI#ʞeS}xhղ+"C/kKT?ޯBHM-00Z̹fb~o kyA 2]QpӪCkFSC65O|BgM=% ;fs Ydn^Af9|C58xS ɚsUgUU9sKpEZf ;VSgݩ34ݞLK|2O|oõz 3U`W!0P@|/  rմZU={Vv|DOwx\2dD6xexن;iN'oP3KQ#"Kp۾}阃pXgPTܪL#w!v+{ ҖR930[] a 쀫AM.bC;zvM* 93븮!8;ؘ~AgƦeayX"P rnݒ>_zz˛fNL2>9?>9;?Wkj9[_Fn䗘n6ioz>#>|AsUսPÑR #I1׺1PB:4mc; 2Ł{qJ,J.lDIԋ0vF fר&jH%{]5m.we$&M?KR|HYx_gcqth#zUk\v%lX,!#ad6#NW-\GJ9tp^A\נ 5F2ڝ!J+ eBkn؛\&Ux о VهÿȽ̿(.+u\e&Mml_)/^r.} 0"R ʚo|@'|s/AO7|}pꧏ<6)sgߪo9y~;'V>c.1R)Wj:"ivi2[flQuY:uV@fNgP#l0Z'k'z N`<ij1d;+|ky9W8 d ;Įqȋ 67&/eMG; x9Evc_rn֝o(q9ˆ;Ͻю;>.RK&ο={NeVDjNTs;{&Ƅ2ӄi۴"qq`6*P"0nkaPWkULA :Tt']4i6Ȕ\Ajd|J7n#{YvVY5Ķ[La Ѩ *6"yȪURŎK\)|+x̬շMntO5vԁ(F{5y|+|wt&t>dk]4'TEpG'SQkz* N H?tT V*~NZu+n̥*CPwC]I* A6'cؠaƻlZ>eڞ_kw=g'-ݳcmxv V~ڠx'`C}LC>JA@ֲ1cb9 X ,x P5"MnM#ka灁8Lݻ^| П>t}SWfr<#E2*2qLk-g!Dʀ@T9?O˼ ;/BVmMM7m/FYABQG"DYB2|tƽRxeT?؋y̵$v:2>VgIzf&X;D#,S?*kfLı,9"aB+ ljAH.!Q^'BcmJMQ1$ld1ۆnU 'fZ,1"'ϫz|gOWQ ug12%:QѣL Tb=%8rVˆA(ouxAc2` 30+G&|lCNm(2J'7y& }E]ײȸȌBi-۸L|Kb<AW$m" rA+=1~L4Jr!>2 Gלp`fQ8m@Y×`W3pk2MHl g\sW6~XkЁ>ϻ6ob1mH~SefΌf _ta;pԲ8#-|ԦO%sr@F{SA49уܳ̓U}]=N#jXsrbZR9;ygqssna{l~0?A[?2ۑ4c0~+(LhY=0 +i1 a}XHĢ78uRv n`X I<ͳ|-bTFzԫ_I}.qIˎfǟ'f"O^75]X{^L}|ȐGA'XBX(3oՂ$BSjrs{ :{iezO/ȉ^CxȪNew1q cl #c"~(nD=.$g'RhywMj7Nd5_?|g3gucGNvL鱳g?Ϳy'&t习( bXQY (Q hra 3$$T"TVZsMfo ֹdɲQ&~#f5wpg3Ó5 Ջ-@SD~Ϥx ?߽פy @S}w|h6&?mnEv̻f q =JB4XHƲ76έђ 5 [Qx 9^iQg,7&FX$Qseb=$@)q\!((N;i82IqDN:l,KFݝ:Ԗֽ-n5Gn Z=T?O-Xx_xa̶ߖ64Ռ\=b7MX>g,ꁂ۝,q!&"$|I*1e 8Kڠ}a% rfY\~: -\΄^%a{,fS9FzxEԯ›+D8V%Q q,eqD0Hbˆm#uK)r-arCDZg*p@Kmdn2l`e7HSe*SB 3-UUV7.HF`4\8On:B>Bfrk? rRD>iRlF)V*1P`!1D yZ7 *aM23v{vNx JU&@DlH~/d{d6Y2EC6'́fS"m; Scΐ_LV+dM :܇Ȍh^`p~?j6̌Pna} xmu}{gofy c $g,הs,1JEoRȝ҅*v {|HRCe=FSzm<n|9/?>!oy#'x2uxRJؿ~umxCE4^n CkkI |"'m1ayԬ^z5&לāxApH8l|'z&E\o_So=}uv6}S1xI9Uۀтaf2c[\֖^ҳ{ ʭSPeSK6͉eZ6F&HK,k6wo-YQBVjX-d%6*G\n7vpA5b1a>oogk*3],bhAc rX0&/n.Tc$)GGt4GkYfIɲB[0i6ݮ6[Wd f]y8f|R*ch:pev?G]j`AG9".&Ɍ@1yLf|jEgh~ԁr7o<=څ 5D"\1C/tthN.2h['4֨Ԇ2Hzd]cX:Nź:];Og>Ze玖m^Ą^x?6OfFS╏Oʴ2qתkF-oc }OǼtAcNmr$BFF e^Ct3WΊ7g t͈ʨ0@9ԝ;}6qF]Y&7EEM LW9GX⾳ "V$%G$C.rպHb\]{%WN:̓Q' > 5rgԴ oeLK]_ ڹH]|.~ό}1"2F@5=Q!)JrxQ9UL!V!,q$/HXژ$Dx^57 Z;8m>ijX-^%NXYl1O&l== ` 4:6ޣ= >`RR/ӰwsJ29\MS:qIѲ  AuQiLfdXԗM"fEqm68U{9 TYiOt?r`<()t k*KH:4j,V x1N(hfZs}Z_ D! 8ҘX9X.vnWYٌF0Έ0fq$N`OϠ ܳ,Eګ/]|<;mΝtl1K/=ehG D@gW2PJvFe;'Nf #}TfVz1$v8Lg sq|؊0nM-sykq-Ctrkui#LKl e@ dx,8”oJ醤c앨U.IH-.N=Ńjx:Y][! zP¹\O<&JW3%:Mg縵܌Y[~G\4wfny'3c6wyFwysP 0) 緑kt@wXilWuO*#)/MN pG=y_$ ߽(4r)<g3.GsY1cL|Udž$w9;cxL21pS5]-bg3qـ+xn8~gj5ɝw(I=͞X,綱]3|pYG\Qic>eHץg/0tWhka Xs%p\unK.  4n{۫\UaaT'Xk}sY\7P>sG?MzC\7K7G)WmѹaZnY?_4EsYVH0ʅBWvu&P2@/Sg_ߺ~v2ff2 VŘ{706;*067%_Pm^ڽB_ny2% c';,#y;vuGYĐ{1{/þr |#q><|m(*#LN|T kfڡYyIE0*d#+ocūV8*+!C'&޳ewےazƹ_ CJd~1zgzsa^攘bH141N`eE5RRpeʂaK==K )-L O-/i*_#?X.Fv삡ô*54X:#c+\ `,Qkn]nrEEy/[YNZ6Rmz/Y st N*ƧLv:3zM> Y ꣒X4Zr.S~Vޝnzj[V,߾'FF{LXr7pf,DͼǪ œ̧ֈՠ ﴮk燶.[{g¿ߛ|dۺGߵE dUFLVFi"NJg%IR$mVҒ+XHD: @HV/ǎ]p ѸR#qo6Ya˸9N`YrvUr& e#H>NJFey 皪M#Lkw `H#VeFFT]CчDkY:M4E5ُ%GA9q :H׶f̭Z͛̓e']zfo@Fl7O%Ƣ;3cebCvc^[8 3*jYcJiP"Ood˟1慧xcBTLjuZ:i,ƿ~̟,:m1 Kh&'b4dl4x ȳtH4̶a.OD E$ 4ͫ_zczv̳;A}K#ڀ0bc^:T¹jH.j&2~TĜ*.'F |'d$ wo/ \?}OO,]Z='yMjcz;cx;ٮifeysw 2 ?=lHt.̻TY5+|aqLL=PR9! aaHӕ5:at\Ux$ 3f?ȶ,h]#:jY{lj5x fcPcEBy}}]@y+z&^e8gE\m Іچx&qQZҖtIӴyyiS9f 7FFMWdwt|#Ӟ-/04!ȷIjP|%8kcCV9\uu Rn7Aٯ}.)cd́=ٻfbw-I}_nA;R0uѮІjlu%DrCEJ$GU9 !{NF#UfksC8| xq:x*"l\;+@v6Qf<Weɺ\/7ɬH-us͛7ykRg!f.N\BuOwsy]O-YӋN^j9rCSֻĻG>z]͜zi(~?`"qRCe _n n rru\}TT})!{VD3A*d2&'=v}X0:|yx[6!Zf-& kYd/iN2 '}P#f]Y{;ZR|ؗR?7>~n§v-Yxj6 ]5uSwҏǯgz cE}ٱ[cG3E,/YEITUR#bH 7X ٱY6h'=%-ȴ[3:\@3krv5^yU}uضƫ{?]0~AR|BsHM[.%lF>thnOg8I$O2MR&s9yi2GmX"}~QriZlZȢ+_r_' ~s[01Q,*2 ŜkM\owwwxs'ln/H"'xW qP.W@l-Ǹ뤜sHN)\{uF#Õ/Njq+~}=Hu G|KNսo_`pCS'\<;n~Ct vg^%74fl]c<5GR\8P #~Q@ra$q֏rj62aXB(ZE}F9iA+x}=+Gl4pXnyҊo^{0mv X>uԔh_n9e͗ HBv :q;.]&y+t3 0wG|[}-۩ISdNL`V%SXͬZTZO*~֌sU\*+@nzG>>B37yWև[7b!}}zgoڙ2^ByքLY&$X"={!TkiR0\ t 07rJv-G!8X"T(Fqx'H3 2[--E72qt#kۅbz@G~=&^vo˟џ \"#N \br<6W9X4Eݹ\){d$Ɉ! !9 uS'>ڰn K!<!>o{=9~Yx(tuF z>uNGs=n`cg7ƊG{ulLXeesprW$;6E,oLfѦ!;ߔ.*qs9z}A.ԈCM,ClXF ęeM"вOThB\h)usBŵx;nzJهo?}BeAڐUDHFJTQ&kL[ǰI˭V*0D31EqZN-ϯ;#F$YM"kV+н҃z*!j9{,+ CU͢bˢn1YA 6VXj4Φ* xBUeyM&QӺ!Jmd.֕V5V7i7U8} Vyk%EG9/A1YVEL`֪.n=%wV5ȡ)a7RK֖> wЖ 1}|@?B-e⌘>[ejmِtz|%D-J^`MmgM+'*uuœp-J`S1aF:.Cn# )`ԋwTe;lbT;>i{-rCoKf\,wȝt@4| y3\z-MfaE讄KNէUަU.6LL/6 [wϘϵe3LfE2Doy&(KFŢHE:d4^ yMjX^3be6lޢɖ)t.r{Ak&S~9BF [C߁ӾΝa1],o%F0G3GH#<*;fON[1$^K[h3q]2:4TXZ] pGG֌,!kĜ1ca`c,08!Ix$β ,\-d5s@plIؽd\v7l tdqU7wOLLO7sDAqZ2bS2<2\D)/p=A`ߐq~gM߯qE&d2l6Yݴ=8}fmVڥ*Tc6 9Fؼ&MgB9QՍP7hw^n4aJ,6xv<9dm >dF ٝgI0'>Ss2v$>>2x%*=5B~ZDb1Gw̼ZCWb>DF?@گU9/O1jQqY>|TF~ zbXteтFlE葻"B>^<s|G9D9Mېcﮍ1Ƿ 9ېC'NZX&ސnG-mG-mG-m~pMGee*O_ZFOk$Dh$I4HwD$!QDO9(ю~[eFHGE$ZH^4oxLN8 鼚 cj4:OQt"ܧ7?)a46mnŖ: 0Ef2ЭEt!N! Mwp׆n9х؅ݹ%GvGy4O˕ݲ_^ C=  -;|n`SkGp1pSl+r8pq|հ 5pSs9l~rh1_r9^&^ɓaRӣ\!#(g0@+20HaCPa!qy!e".T3E(STeQ8e8L&VEn7ql.~S<%gRd/I 7eI˥ %:Tuo zݱ}gƢkޡuԢ83'.詏+Uw"EWAgC[PS?\U74TS֞ɲ*[nY ˬEtE7+Qke! ·w&5^suNyk 7̝9'Aᘹ.fA!RQ;rNRQ2ԁ? ntkø>+km.]1$ͪ;f25a\(,, >S?ғ#} 1OSKA[j%\bH+!~ȃvj%`@_K/E0IdB& y endstream endobj 33 0 obj 21740 endobj 34 0 obj << /Type /FontDescriptor /Ascent 905 /CapHeight 0 /Descent -212 /Flags 32 /FontBBox [ -223 -211 1000 913 ] /FontName /FLLUTI+ArialMS /ItalicAngle 0 /StemV 0 /Leading 33 /MaxWidth 1015 /FontFile2 32 0 R >> endobj 35 0 obj [ 278 750 750 750 750 889 750 750 333 333 750 584 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 750 584 584 750 750 750 667 667 722 722 667 611 778 722 278 500 667 556 833 722 778 667 750 722 667 611 722 667 944 667 750 611 278 750 278 750 556 333 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 500 750 750 750 584 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 350 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 1000 333 333 750 222 ] endobj 10 0 obj << /Type /Font /Subtype /TrueType /BaseFont /FLLUTI+ArialMS /FontDescriptor 34 0 R /Widths 35 0 R /FirstChar 32 /LastChar 213 /Encoding /MacRomanEncoding >> endobj 36 0 obj << /Length 37 0 R /Length1 16904 /Filter /FlateDecode >> stream xڭ{ |T{+wf7熕<Hx'`&)"Ix($ꛪB& hj`V[Jߙ}}ޜsf̽3s9g"Ҡ> kUG7z@ov_+k[#$eiUgR^Yv݆|>xOBnt99yذgVwuL<] cZqcw@~1;V-פP~5kqӐݻ\ݏBeЋhD }?r!Co?9) Zݥ$ R]`:r8$197~m,CG @GqzBn4B -բ'PA&(shz]z',B#lԡndAqB5'AyJ"8 -ItE==> Oa\h !Zf"ԉǛȃh7['WIY[hYtN#3?YnF;Ch25 Dzp#&3}P8' 9#vt;z.:j\‡CaM<{=\@I:H+y'6MP{<{7] 0r<|/xH `b>dK9.YO C r@wx@7q]+29E>b3{؋ܭg:MH}k'^r1o) '%& hsd<֐)̆kY W7%a'Gq.cbL00/3af a;]ncObc\Ͻ,sȊed}&(ɛMȓXa^a%d.uaexHLF21w0? feY瓓˸ f) %!1@̚\9b#"Yqn>g;TA6*ٖ)3~+$g U7 Q [}VܟnyO&;q6v?^ \CX1Bu{'tJ-. ƃèP+#3n&<icF1Aw.&`  bsqAf"h6Gm-TkZЭ܉hS/?'AZ`-ӡo[_Xla;oooC7`Vb)6,Sn6==˽p:Wa|?h9([f#k,sT$u+z <y3;,ka+GMZNnKFɇWehN CZ|.}x7GlE'Уm"3 /zBMeFPL2gCA'X':(!wP+VN*6"Z^VZR\TX g2ܮ n[LFi5jR!q,C0B"OAԩ94뀂_Y'!ĥj•5ES551/P,'"/j}0^8ҷZĨ!Ji-=A.8.%/﯋ժ_UN ԐTC*铱 u)ЩW[jiLcqyVK]i͉$pM3| }Xjf\jFXAGv ۆykw\ݒ`:Zi0[Hxmni]鯳h80\ŭ @shlomIР@Aǔ_-_+$jk013dǓgN$*֎Z`ꟽMlWrr"!%A~"~72OJIiqeb#4P%@OZ|0rrV w%|H(k|}B7ŕ%%%I% ҉p8MD^3 },Kr"I @@|dZxQ'd}ZRyu:nM8\QN%>ҦP/ynyE[7%)~_㬅-B]|Bsȥy)bl$57{a -?.P[ K 0մ0ҚJ#= O }IxXJPSSU4W%#Q[-/w\U]h Cg}5UV1FFQ~(+֚ b7WUb< |LX-6 #2E?9GÐ`@^ΝjH9*+A@e9,@izcd,ZGr)//6܆!.Zi27,).-MitKQ!A\h/*\bṡM45/jfw@`rNԴkJ/'כp+=r?yq+V .(qd2xg]e/||功/)ّ)Eeӆ^h,mjH~<-GJVLp:2.#3ujK5t"މth B'v(ys']l eECfx*ҲR53A.?遝tu+uўҵ|4TQ[2 7ͮxulާq{3m#v5Zmmɸ=C^BJ'#lȇJae}' I .ɴr+"9ww!+f61HFTVH \t8wnOr )NON*쑙&n;Nk3 Pr$X\RB0Ulwӥ!DxSVG z~Ӫ_Kkq]_/w~~qk>O]o??s}7u׮R ndnA.SS9gâ rlHsotyw9^f6`= 7DqrgDtYQbQ( 0=N4 #M.&_:A5~lQQ/pv,CZ;tn+a+oɏx<jpoPdI7{JZ"lIeCRɱ]9>oh2m3h~_{)ӝ'ث,.`s=v%nLK2ðt"bS;W1[nYRWUJtnNN+we(T:pjzZIcYMŴ+oQݢϵlwe{%+Pq,7SQǫi,S)xuƝ뭽~~ ./l ȸ۽[ ~!y n>y{={=< ψGYqF[Jy,(%b1Q=o5IGC4yWzxW̲b|?ò3aݞynHsMc9F9887eJqՄjpM&;7'ʳdy{r,䕩g>';& T5,$\#,U]'lЀluPaT]ZsFʨumj6bFUwG"nFFjFVh[,-۲B, 5˨-)%ܚ-lT?p]0G/G//QH @PP%84RDD4^r6rN3@ĢN|%\ry5} 64=ņQϕźZ%[o^}OCS?<cff;,fa^(yOjj,GTo-[,{,BZZ+*Hb⌂H(*dtfh8A:^':=Q u5Ɣ~71 (8=Z^j½ŮaX w Hrxmᚖ2 i'I>8CԾF̉jfdE)Bȑ9R*SrJ)'Q3oCT i~$K^'L?X=m~DZ6?9Ȥ׮1mW,멊1zs&i|"l4Wʠ~Y~Aٲ\[Yִhuٺ`=n[-=%i{7hYtzE9삹/I+u/W4V:)48ۗJ)M@k"MSI6+'L`u->6x{NzDUBMbۀD(m9 c1El_ f DAgͶ>ɐbRYxV$"N8f0ǫ3%\o.pg,όt{,ܔ՝5u:Z',_$.:BxnϽ@`r 0hW+ f431|C+kDܞ|g/j[}߼Qm(ܰyzg]+衢}gjɔv'w,-M3r=]w.(,Z.ww ٱ~սV_ZO2baeC\F^{Ѷ׏n^y?'DOX)D\rڐdڪVcӘʰ,W#7U{X*^J{8zpS VA#TvCPw %B !d8/~l (͍ʘ!)*,x؀IwbNfNt?ˬh O)=%nNBZ4ƇlNLbqzq|RZ}mwfqZ8?i| f#3o7ly<͒,sNɑ0?z[:c?n9fy|%}g,0oǷ)wwba +cn ;x yz@6Ta#M0,>Mq3gٽl_h5}yUD֜D󬅃״A7;m–ң#ĶִL!bQS9;Yߞ c@$gP 4FjE )R&-/`ȬN^RH{q:DoD:NQQxeuS:q|tL z€pSQzv8a :}ڸlsJVVqӊ);b U7|?p[}0yeGP/Q3+q{6v\L!J՗)T]H6 waݤm״kº5[3lLQf0vNeEDfƼ$6R/Ĉ"Hd 1rf3adM ӈIf;횰חN.H=;zsVH\vo>CRo Rw< ܖI/S?ɡή.cY15D(s7bQij.16z(.a.I}Jc~`ʨ* iZU6;Bb!'~l"읎Y?+7%J4^,pAB) {zffϗDE,{7!b$lxbU5xM6y.ߩeacʌSdMsCp!2'o#JMlJURr~R? &c$a!UWUcߨ8fyQw@q\KجS)zcr"Tt4%H9$a|}ޔ4bUD=gS(}^hjJo~ 6kw=KWY3Pjb5BU %At-Gi{UKg`U m%>{үqYת9U@2Vǜz}b2I60+e`L> nB-4sqKP qіgzu \pyR!^t!xHI#[]1ZnˈR6]N~ Rccqrjm˱%L?24GE@_8{?4~HaηIN)4E`:4z*E1:B> 6C p5YʮH\Qnit:] ݄ 1C{Soz~2dAz–Ɔ٫Wu\?cΥ7{0I_s(ZQ-GSTԀYh6O#uu0pDB)c,)1yeh93dqH'đ3U*#w|ĜGHVn*-`&uF?A"p$6dހ1 ၯ3ρL1 γGtBT`jq`ah s9<SPb ZT4f@@ .Cҁ@)G oz@>iq3L3N̚!r_` u7M?muRK@ JQ!OGm@AArArilTgSN l:&J>[CU0 0 5 wZ<pZ*^4r Av1eAɖ3KA"gosJUD uH%GZ=#E*Ӆ~@`_х,5s`fU $[f ck$EA1rpY\٭S2RP+Ee[la0c*&hT (>`h:QVs و켌Kieݲ>^rl>5ëuZT79tR ۡ\`hhQ\C? YPOPRwz .s.C럧LT= (jj0RTT|V*o@*wtnd[*8tYB0 3p=Mf"F .Ϡx'$|wIR~QנAbm&$H**mS6T#UeJx#bGG/CmG{焵%iVSpڭ}ͭ]֖UZCZ. ;(<#.Ba$C* C)@Ɔbg(v5\j]e45AC@]1@m#5K iRC=8VlV"R EA]C]@\dPvڡX@9к](@hOO8|)"Z0ɤ|PԜ{' 2G8O$zZ䕨bȷ";8tK 0#6tT\CSa8ya<=Vddg݃ %pd=.L@,o{_Ch7*|`F&b U _; xڑC0J><1w6I]Wv 9^'FkYI"y\gȝ4Q+t BP(d ؈*"I>c)f4O(&N498alDseayrv<ܘP4/jV%a涀Ңqqn~{k+nLtN!j競"Jkq!Z_=(>"zſX36iI<њ(dFkcb 8!jnJZ[㍤n6-k[/WC^ PZjȋHզK@Mu^oxTiYY~hL T#.䗞'.Z !0wAX/=LAÜ` U"Ze,eзl_ ՝V VZ0Q(NoIEe|]n @<{rkS89N0ٵҎ%}Kj}`G(W; vKj:Ď:_GmǶ4^֮mlmm==F~Hjm=&>&876 *Puk)zUOk,-IM,&4rr( V'ewU,M< _5 #k݊k֬YKaݺ0RZX9zH,KVeįEONؖ؞@p['ݻڻŻ;=Q-؀^fh^ Zu@f׮[CXj..\RE]bs P0C?mx-ar:Zbk+Sx$|h0Efh tHU/BPzXք1tЯhMx?{pQ 3@OzuD*Ikm( endstream endobj 37 0 obj 12448 endobj 38 0 obj << /Type /FontDescriptor /Ascent 891 /CapHeight 0 /Descent -216 /Flags 32 /FontBBox [ -167 -216 1009 913 ] /FontName /BVZXKJ+TimesNewRomanMS /ItalicAngle 0 /StemV 0 /Leading 42 /MaxWidth 1000 /FontFile2 36 0 R >> endobj 39 0 obj [ 250 778 778 778 778 833 778 778 778 778 778 778 778 778 778 778 500 778 778 778 778 500 778 778 778 500 778 778 778 778 778 778 778 722 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 778 444 778 444 500 444 778 500 778 278 778 778 778 778 500 500 778 778 333 778 278 778 500 778 778 500 ] endobj 11 0 obj << /Type /Font /Subtype /TrueType /BaseFont /BVZXKJ+TimesNewRomanMS /FontDescriptor 38 0 R /Widths 39 0 R /FirstChar 32 /LastChar 121 /Encoding /MacRomanEncoding >> endobj 40 0 obj << /Length 41 0 R /Length1 12072 /Filter /FlateDecode >> stream xڥz XG[s \= (Q2hMjTrjD̽d5\kȡ}&Od$9vɚlfz~{7uUb:5ͭ0@c^]ig7}+?@9\yW,[y߻H55c9k՚ KLv,7]vimX.ZJ rX\Ӽfg:<,h]~=em[ob6#Vg7`a9׾F dFք P 6H;$Ckqb~pk0~p>9K-X[tm//H<p @/Ʒ2 }KGz0y"J8j+ {mĄ+þ^3wCA2_H-qIj '}>kExNy[qee0f8#8$G}^<y3U.!eaA1$jõs\Y'I9 ~W(Pq5k|ߑd \+/+c1s:4|gh`" tӃܞ"p`!bXdd GZi9.R w5?o?b9تX{lTψ 90y\ ͸kzvn=>w#3|$fRDƓ \MQr$oFLI|B|5?_. ѱ=1s'R "وh>`9|3샟ሏÓƷ"r2 '"#I>) d6CdLn .3 ^ͳ{99GAdfjLG(:;i}O6}O@?_qVΎo&pӸ+\;=x}>o%ۅ;OOtStfVEt8G\!nw7'$|Ѓэ+X,|Hsv8M{sj~[n4LfPwldQr I?B(I{j[ 8Lbm5dVk:5B'.S` /P:B9$ ! ҏxa)p=3_*ZN |J~I> `+wɟHV!17[AS?]O17`:2\A@`*] rϐ1r[l<e Ǹc,y{I\C 6·Q[(bFH&oP{[]I(ס!Bx?0HFy5uR^k:\IAQ*%90fj:Q UW>lLIq‚Qyܑ9#YLGHOKMqd{ͪX&A/KN]$WmMa2%}X|QES؃U҄=MR R7`28LIOTyj|>O,mվOx@_+bOkU'L<5M:k{}j>?0vZs2Y3dYS|5aM!e4/ ϞNz¤joI|–FU0a]UXԆnOw=4|˚65V[θ.㶪΋[SW;;wz.n}ivmSg-*鳥Wje$ߪΫpCR:0w'%%Rjo2\mι{AҖnGlId3ˇ۴Frus$lFaR$Åeйt,@WxnCKXjTƱz?,d+>O緀ҚD.[X10aP>ss_U8 ZyL~ަ-*L>n sH`!1'/{`Ij  aZN$/`-C-ݛ|Ⱦ4-9,,#fո0q?, yj:Ϳo;ܖȅB\*Mh*"'^1L !c?"OmXi^S$vҒ  \ZI;90uvv/iEYv6u6GbK|Gw4 h$ֿ;5\Cn0Gv]twPB&5tga[σVõa%#=TҚRZ+UhZ4TG`i|fX aZ7%OQϦe= 2zepK:SHUYW@G`L|Ō ļ#F V5#A~.c ϻ$EbZ%vLfD(d-qk ﲻ^r db0.ż-ym-[͎BD,I3WT3Of (rrV^xFbS* آ|z1PA 05sCP`S+{^Sئg|qdHޕ^$/M3|9.ɤ/J &^ordN✛~}!8"7_jd@H/J CXemteVF\Yۺ5YEWO-l\Y>}Ո$),⽳7&wZYu#ٕٓW$x g'g&$m\ZS((YeNLKd2@ZAZeڬiBZZ , % 䤤) B`L`1ƨX~D-eƟ)q:qqk\歷5;``BA|4?ϞхbbƶwFDmlcJUkLٛtYˁ5W\xɡ(gMqڶWȱ]LR{܈v ?žkLWk:zjrn+"ENެLtv}\EB4$,YyUMdѭ 'AI9#,!0+fjK[X34n@}> ac\I9ӖŖϔt.x~_<=dk,j"7&ǯ%|nqy[gvE۫q OYHy:#gɜ̧ct,:/yrW\_Ƒ"}ErsY,F.!UEYoW9d|],:U<#ٚОg gϰW``jwbަ~#Y?;Fm݁09E8̔SWmqyGyWMۺ얛M\2=k2):Zڎ}qcx\xL}wC&/ # *3J])Tܜ3\3F )وՒY Bfc'ԔDg7KE|kSvk޾(gUw?lI3w ʄb8"X%P)*;ف14ۖ\ *p$9z;ǔr}z'Z'Pp=Xl*P+ǩQMznqd;$#pN: ηb'!}LFom[4ًV_''wK. (בOV| |ظQa%ZLrmpo4" ϝfy)4 8:ScӲ3&%ٝ ZLǝ٣gRcmC<߮:;}!fI׭.kN`u,(?Uw^{UvuykfFtx}2ߣx{HTj2ɖZrnU\)n]yJzʪ v0RNdgR&YYIޫE LLbeZlZkMOaFKoōbBbrOؕ0hY;׸.Ѣ m+hL s087e۠qEF|I aщ3,;<7ߙ>zIhNX`w{o7*XdPd!x AE3[6`l,C~[T@/?)c#<J \EOf)V'^5hx}Ѕ)4XJ%@ h{*nrNJuxyk9|xVO RHGh2p=J,!G#e"I*HMX Z7!{åmm4I m*'SBL)%Tw3:flp ov OaQkA@V2a V//  $A,=95l"7 ݿ!7"iH!RdB0$娗p68;K#R͒JiTO/7vcS(֯KX͜Iy1)SDy?øT \0 \mã`ML"/݁n{7"7~ѼowC#oi^Unɼ0^4KJJDr%<iDfDO]+dG8CM9YqAىHE,цƢwM5WN-Hh8Đs# @ΚV0v y[7x$@5ux7 mⶸ1m L= N2jv26f.kfygJę嘛YLQDAiH@QG&!sn}IVi9C̚5~ F?W37ilugevT:ˉ5~q;~řT2~NFgo]S ooʎ9c/;6^*?cۥ/\=YmyMmĂܭkl?wȟRd޵[hvCxR`W0weee>ϒi'F[ sV=[?BMdc6Ȭ #H{3;n:y#GNۊt`Âi:k%oS=<LOv=w#cAVq& ,IMi,"QD#]7֒ЮMH ~.**[ovIDvLۜ%$劓2@EG?xuwz :߆}yG!h :vh0]kkIl2E ɐD$΢(Ţ.fY/*IB:Iy33C2w@2" 0~᫭VnKQ mYֲ\[.1ey%{f .Zrw*~gs0#I[}rsW-T|+%'<ᵡ=9:Ix}yf3JR?W&^ h9cbzX1Uư:\Tk_/0mi3[O!8|FG(m\'onэ}.n-8s}(^p3.R4!m(lU `ڤSkZΟꖥ3i0siC@-L)0 `:Z90|.4wL$SbYc H-F0Y%3ut"Ox*ŌPoM;+-/玎vLT!\Y-`\ <~79=Mn{SR,A8jS<cmnHN,#_k-^ŕZ\ŖDZ|J-YZV5z2~߂6#%(* 摠JL =%1JQ*Cgak57Z-#T Ӊ;iJydtip Xٓ;](qϳJ1ߓۦ>=ZCQȏ{5"_j`zF:M}%q~ޣFTr#EݭɝQիkrXrBejԨ LI.uX.vMƋ]bP*bWؕ.%jmd$$58H?L:%:żW(i\8)(I8u&P<_, I$l\᱁. ԅًB݄ހa+B`~(BܬT;>Uͷ4vm T骴MV)_tn>P^&bqX]{7X&Psmri[x6& PΝW0-5`Su 5xЈW VqÄu D4N#TRShǩ@#a=&jK endstream endobj 41 0 obj 8620 endobj 42 0 obj << /Type /FontDescriptor /Ascent 905 /CapHeight 0 /Descent -212 /Flags 96 /FontBBox [ -262 -211 1062 913 ] /FontName /JBPIEC+Arial-ItalicMS /ItalicAngle -12 /StemV 0 /Leading 33 /MaxWidth 1015 /FontFile2 40 0 R >> endobj 43 0 obj [ 278 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 750 722 750 750 750 750 722 750 750 750 750 750 750 750 750 750 750 750 611 750 750 750 750 750 750 750 750 750 750 750 750 556 556 500 556 556 750 556 556 222 750 750 222 750 556 556 556 556 333 500 278 556 500 750 750 500 ] endobj 12 0 obj << /Type /Font /Subtype /TrueType /BaseFont /JBPIEC+Arial-ItalicMS /FontDescriptor 42 0 R /Widths 43 0 R /FirstChar 32 /LastChar 121 /Encoding /MacRomanEncoding >> endobj 44 0 obj << /Length 45 0 R /Length1 23228 /Filter /FlateDecode >> stream xڥ| xTEpUv{tw:N I'! D HdSG*Jtie˸y2ϼuɛ(O ;UnZ9uꜪF!GA[fg+ sV, |Ð;o]p<>Zpm=E.7kwc~Gh P`,Yx˲U`C%sf˭Xh;e֪V?͓ Y{xm|n[sB-ݺt^kmp.иkR@Ft>tFԏqOE۰@XB(ڌOH}BDO^w- tFo,Fhԟ桯Q<PkPg8hB_.@6t7WjQmEfv+wR:DG0ZFI4a3I c^v &tځz=XOQq*:h:ZVh/z[p#w;#u8jBW'ϲԈ' =-/ױ{5'R/_2>_JJ=z FC" z';YZb>z-?$nf֢fr %#t h'ԇ>6l ;ѓ=q! =:zs~ n7%x;~|ϊ=l?N%LMH}2AנP::</Odl#ϒ_3Wrv${;n-I%$/N>uS1BaT Yt>E3 ˭x=~ D7 'ut7y< )licbL7sU0;Ndg)L)w57{{{;WsVKn^_ - dȮt;PI4;@s <_x%ށO`0"أL!$' 1Qf3\,9,cV3ed213/35';]>anq=wgs_^ 8 W27Ad/k(atՃ( |P sY _r[' Ku?>5r#nnv{ j2d?>y_7[ ĕx 8)^TzXcY#@ws *gUIS7}}G?`. h7,2ACT5:[ f=tn*,; 54"IT0Xe9Xw հb>)9yV Vu#;A=JvIݖZp!wj|&XWOr.b_cťp+^wq迀409k=7nMb0ޡ0&t33(Af@VhnNX`m=q= : vŒ@"t w]P2vۈeП -m c3UzOGעCjQ}h wVHXF1b RC"1)(ˋm0 ̣DT cx!vZ3êVJ.*E¹9`?ȗ]NfѠɒ( G[pK ƌ),(uYAK"EWIZj+kPskjb%P CBnq XE iTFe- .W 6~@BgdVUC4I䒨t"MPFOa#|yQnRjU|h;+> RoVl$'54ۉhزqVw}v(6=d- NMo$a 'W)3zOm$jߟϚzQRr40z'Gv)h9ie@FsIL F 8 ݤF"M2H$Fn璄9Hcׅ\Q|u\jTi"DCJ9" {QЏ(ROry?eUsPu,tA߉dy) 9&͡+l~ Wh \?6W!sUՐkny!RQQڌgTNCc۷X5{FNFzHmX%^ie-In;慎~TRa,ˏ {w놏ɟ fpWwW͵8bݢo*6 9nNNÓm;sU3magm4 )ͨ:fECȬTV8y7+GYiEeY IWmٺ&M,Kwǁdˡm`OU[yv ͟ֆJ7;&w0MnxY8xM@3LUX?`,˺x`iVT|Tk>@+r*˘pc_I)6ut*ɉ|Fyu}}uu}#~}ĵݝ%+=cQQQ1Ǖ00;]ݙӷ7kO Vm}}3=#Sʓ Kuqeb ^9~-b0`G7&o-+%u@  CB \;k[O891xΕ$r\ZP2K \Uܜa h  D\m}i*9RUp@B9bX:ڕK'W4j;f.޻s'Û}mϙE׎kRx߮y+zJ@뀮nE}eeQÓOl!}Xx M:T`3 F+T4⭰  }7rLdJnyXgsjclf6b+5DQhUe5IpbՉUa%>zaA&z@Yp X%bPUKinGw*K\8r;"V9R|svWoYJNG `vі)o\eu7ɏ8yv9%YvT KD?N]Poě7q re^ }GsD+#" ;\w==hOdw%=9WZZEzRQs5'&`kj"Y8+QHP$O#y`1dO&;xxRZ=9фMRIfeI{?mkio*G4Ghһ9P[s4XEFqPygCZd]b\cn?}cku+zfNll9aRgeڥ 1OT'668v)Wmri-' V:cfVWW3K5`MXZ ziHI9hf#lůH_)_H>60{aZy.W%q3-DQz̠c2-t%:r͠9W[nۙ3tt^Rs%"ɹeD(ˉ=*7d -n ^k 0{z|s֔>DtLZQh=0Km8zrWfh<*8*\ 56g q*ȵs}ZV,ǶsdH1B ԇ^ON@E!2*`if*5i[ ی?8?˽8tih,-$G ,\$mw|im'`}K? ǻazRu:&,u <!Ua190lxLNueL`(D72el%Y")dR,dI7+*i|P:,~ح. m#rPX>Md? +U=d3{YJ&&_?.Bq*a"ZQ#M8@5Q~"ꙣ6E`=NmwwY F$"UJҼʾ nЯ5g`[o_\Z y\v`-2H"qDɊZk}>>;#>%ҁ0=9-Ѹ+kkL-hN>R D Xe a咷Up/:y,ٓ\|_?}>Aߎc'KZd*xGJ'ž(.wWƚnݨCƓ̋rXoDE2ی6M0V6-7ަ/VI+|>^r$8Ÿxa㯌1` Io78Vņ[l6b@юD#u"ȠoO ׵p T"${ȜvvAS'?ں5|٩f5S)udk5 &\%cycWNa~xc19Ns'se ]B/x'3!iݿƥ`VOW}(dowEq >iiB|˂ࢼ+}k}ۂzKq1 ynOlR+ٯض]w=C|T.0d焪 P`0/t ٻ=粭,l>G ,vNw@l /qra2ya=U~/4Ӆ<#F܂X*lQ6N9H3P`m7oi14GиMP.F[\sT.!5C*zvZ< vN&K!``e_F=d ֪埁#_0y<80*>[~>[F7\:hX,R6tdQhxvUzeݚo6'wy¡8)tZ_w msʳbo9io˜W!^=R5T,%\*K[%e" Qr:=C"U.Ak*16VN3\ԴW4Nej`U[N-nYr<{r ?|@h:TIP@HWWKµteݼc#y^gur^0G 6z[LKoz1تw͸*&Mh5h8 b$l꺕?t-Xy%w|`,L^d[|V6QTK]E<֩a1 .e];iUMn9Ds]q/ TdQ$|%/C^Ͻɽ-}+Y;"ԿJlXgыs% ycb2-qidcEYTTʶ2fQ)`Z@DP=M?ݡ7?=w&J OEg{g[3Nj:C3Ι575ηc[J ;μ9tyݒ߼ØO/w]KSvЄVr21!<*]A0&f1uq:dt*F`4+dg8_5g "1t{Dцǐzy C7' ,S]ubW~k0FF'@r 漥b$FZ.hξ@9;? EiҠjA\R`3- s(st b{ZdЮA SPT <WCP<[Kdj,~ D jX`%(z.5,UtНN ]ml-ӬRi{=Zȵn qU< 7wJJդO/̌~rۢx:{oX!k C)eQ(2sOVd`` 79hȫ+e?kl+ 6p2f,2'K 8Ht 3c-+.À;2&[95EãixX![R0s-O9}BM 3O~w/onHHK٨\#Gڹ˝M_Ԭ:)1`1a?-é422w^=(/؇vޡ111抅hq.0S]{A}"D~_7ƒ'+Vv!'YG,9 Gd>-fDԫ&7˂%Y% gAف\pƩՉPD3C wFZr":XqgG# "%TZ=Lduڲ<)okVx5eo,ҍ GA;th9ri1WcfSm%O_9z̦mIBQX]lx@T`uZg E-N3}pzU)7SAɺHk0/8=> ܕ:N?sW(5HxtΤˢ*M7?6O8oh㽝I?y`{HO1q.u1ܟSsX2)|n9[ VLzlѧF?QOd/ <$)R*ge̴쳜Q̟vAς5=8+m]&Ӧ(P仪4M6ԐpNiHkokoK)6Jy32=ꦺW_5|r1~S]wk&X ȫj/oCbivvXvضG)[Xzo?]0 [dڐS^=7βζ62<׍3L4Gf 9p U[f°cn{ v'׌YZݻ;a6ґt>q)K bg PlC{PYӘK[Q]&JDLh΂U^Ϭ~7Ĕ6WѶsvk.NSv#S } ,a\;YN(X{w:䗪VbuCdqp -]֠,Pv$]q4yF厺vM opIO쫲6O vЇ@?`yŰ_!9&c,{|A&M,‹{B-ۧgb)YD$㩰J"OiKޤ(H-' GHrbl&mRLSI51&P,)b2ԮJ/J!4;v~;v~jn5G*@E{XZrLt?'ߝ mƩQrŰ %eAܤigѭ V%e]NC0hVJ^ʃv,TC,]o3|fq4?=+gݢ]=ݟ`r{J~M0"9Q")ᡈT!Pa< L!{ܛAah;-͐~ߋ ŵ8(/CHBeSB.-WKh!(@CA!lb|h<+~i#_驀btt27\gv(<)533<'0KY3o[6[o`wBG 2Y%ko%h.5Ub4 !~VÒ-ET7jȩڥf\4rsO׌RDo9 y ~ A]aգ4C 0 h"jDd49]P] !tc҃j\= &]`wthq&$L(%WP4^P>~r^xd(i29KO:tN}</utD^@ΰJ MG=L%:}~KLMT@')Dtg%mdkрl%[UWܒL 7P *`MW遠BJ6t~Awh[X!pV r+A}F+`F  Fr0Z0Z4hh0x[EhFhF 005FhF C 0TP5 0TPC0TPC0J0JD(( 0JDF00`4`#a( `(g9}}F`F`i}}d~Do @9('儆rPNʉԗi 6!n/n۫r7 HFBHF00F`tF`th.@15.$Jqנo4hvktoG\4 E4:@L0 pރ)WY0Q% >O &~"}|O^b(E@-D R5$@ϖ7FbL^>^-V"WcVtTI`IՇGOB GFfz7NgߍA vCB%REr!ߤfg< !!!@@G[$#ݝ^JP3νp_I€@=ـZl걘20n EVd.H ZnpS( ,_k*E~ Nkȏ?UT)T\?N3odw Y5 d- $6XJ^ip9 )5/<4.:T?䵟y}<$T6zjyuC!C]5MWR_M?X5mUS3kW-Uh}^D徱iF$ v 2 G:T{]YCh<L GEE3Hf.ȵzx{?y@94]'Z[5$Sf4QQI~gҏ؅F/?/|/nϲ,_V-ފPC`JCޢ tRe 헤ݩ^xAe;b&*%C UX.9;ǑŚLVveReYWqyJa'XJ*4TEؚhkeGnGQG%}n([ign-:@H. SY>F[w/5l![Qrat puY endstream endobj 45 0 obj 16663 endobj 46 0 obj << /Type /FontDescriptor /Ascent 905 /CapHeight 0 /Descent -212 /Flags 32 /FontBBox [ -168 -211 1000 913 ] /FontName /DCEBTF+Arial-BoldMS /ItalicAngle 0 /StemV 0 /Leading 33 /MaxWidth 1000 /FontFile2 44 0 R >> endobj 47 0 obj [ 278 750 750 750 750 750 750 750 750 750 750 750 278 750 750 750 556 556 556 556 750 556 750 750 556 750 333 750 750 750 750 750 750 722 722 722 722 667 611 750 722 278 750 750 611 833 722 778 750 750 722 667 611 722 750 944 750 750 750 750 750 750 750 750 750 556 750 556 611 556 333 611 611 278 750 556 278 889 611 611 611 750 389 556 333 611 556 750 750 556 500 ] endobj 13 0 obj << /Type /Font /Subtype /TrueType /BaseFont /DCEBTF+Arial-BoldMS /FontDescriptor 46 0 R /Widths 47 0 R /FirstChar 32 /LastChar 122 /Encoding /MacRomanEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [ 6 0 R ] >> endobj 48 0 obj << /Type /Catalog /Pages 7 0 R >> endobj 49 0 obj << /CreationDate (D:20071026025051-04'00') /Creator (PowerPoint) /ModDate (D:20071026025051-04'00') /Producer (Mac OS X 10.3.9 Quartz PDFContext) >> endobj 50 0 obj [ <84b319b6553ec814a39272025fa035d1> <84b319b6553ec814a39272025fa035d1> ] endobj xref 0 51 0000000000 65535 f 0000040882 00000 n 0000000022 00000 n 0000093924 00000 n 0000093960 00000 n 0000093994 00000 n 0000040903 00000 n 0000165957 00000 n 0000041009 00000 n 0000102353 00000 n 0000125347 00000 n 0000138690 00000 n 0000148213 00000 n 0000165779 00000 n 0000041923 00000 n 0000045837 00000 n 0000051009 00000 n 0000041293 00000 n 0000041608 00000 n 0000041549 00000 n 0000041568 00000 n 0000041864 00000 n 0000041883 00000 n 0000045816 00000 n 0000050988 00000 n 0000093073 00000 n 0000093904 00000 n 0000093095 00000 n 0000094028 00000 n 0000101955 00000 n 0000101976 00000 n 0000102197 00000 n 0000102524 00000 n 0000124355 00000 n 0000124377 00000 n 0000124598 00000 n 0000125520 00000 n 0000138059 00000 n 0000138081 00000 n 0000138310 00000 n 0000138871 00000 n 0000147582 00000 n 0000147603 00000 n 0000147833 00000 n 0000148393 00000 n 0000165147 00000 n 0000165169 00000 n 0000165395 00000 n 0000166016 00000 n 0000166066 00000 n 0000166231 00000 n trailer << /Size 51 /Root 48 0 R /Info 49 0 R /ID 50 0 R >> startxref 166321 %%EOF kmer-code-2013-trunk/sim4dbutils/0000755000000000000000000000000012641613357015421 5ustar rootrootkmer-code-2013-trunk/sim4dbutils/pickUniquePolish.C0000644000000000000000000002451012322046702021011 0ustar rootroot#include #include #include #include #include "bio.h" #include "sim4.H" // Derived from pickBestPolish.c. We report only the single best // match, when it is obvious that there is EXACTLY one best match. // // Example: we have ten matches, but one is 3%id better than everyone // else -- that is an obviously unique match. The rest are noise. // // Example: ten matches, but they're all about the same quality -- within // a few percent id, and about the same length. We pick no match, and // silently discard all. // uint32 statOneMatch = 0; uint32 statConsistent = 0; uint32 statInconsistent = 0; uint32 statUnique = 0; uint32 statLost = 0; uint32 consistentTie = 0; uint32 consistentMatches = 0; uint32 consistentIdentity = 0; uint32 consistentTooShort = 0; uint32 consistentNot = 0; uint32 totLQ = 0; uint32 totMQ = 0; uint32 totRQ = 0; uint32 qualityDifference = 5; uint32 minQuality = 95; sim4polishWriter *W = 0L; void pickUniqueSlave(sim4polish **p, uint32 pNum) { uint32 identitym = 0, nmatchesm = 0; // Best score for the mList uint32 identityi = 0, nmatchesi = 0; // Best score the the iList uint32 matchi = 0, matchm = 0; // Difficult choice here.... // if (pNum == 1) { statOneMatch++; statUnique++; W->writeAlignment(p[0]); return; } // Find the best percentIdentity and best numberOfMatches. // // identityi is the best percent identity of all the matches for this EST, and // nmatchesi is the number of matches for the longest best identity match(es). // matchi is the match index // // nmatchesm is the best numMatches of all the matches for this EST, and // identitym is the highest percent identity for the best numMatches match(es). // matchm is the match index for (uint32 i=0; i_percentIdentity > identityi) || (p[i]->_percentIdentity == identityi && p[i]->_numMatches > nmatchesi)) { identityi = p[i]->_percentIdentity; nmatchesi = p[i]->_numMatches; matchi = i; } if ((p[i]->_numMatches > nmatchesm) || (p[i]->_numMatches == nmatchesm && p[i]->_percentIdentity > identitym)) { nmatchesm = p[i]->_numMatches; identitym = p[i]->_percentIdentity; matchm = i; } } bool matchIsOK = false; // If we are in agreement on what the best quality match is, // see if the best match is obviously unique. // if ((identityi == identitym) || (nmatchesi == nmatchesm)) { statConsistent++; // It's clear what the quality values of the best match is, but we // don't know if those values are shared by more than one match. // Count the number of matches with exactly those scores. If // there is more than one, then we cannot pick out a single best. // uint32 numBest = 0; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi)) numBest++; if (numBest > 1) { // Dang, we mapped this guy more than once, exactly the same! // consistentTie++; } else { // We claim to have a single best match. See if any other // matches are close to the quality of that one. // // This says if (p[i]/ii >= 1.0 - Q), then we're close. uint32 closeQuality = 0; for (uint32 i=0; i_percentIdentity * 100) >= (identityi * (100 - qualityDifference))) || ((p[i]->_numMatches * 100) >= (nmatchesi * (100 - qualityDifference)))) closeQuality++; // If only one match has close quality (the one we want to save!), // save it. Otherwise, label this query as multiple. uint32 length = p[matchi]->_exons[0]._estFrom - p[matchi]->_exons[0]._estTo; if (closeQuality == 1) { matchIsOK = true; consistentMatches++; } else if ((length > 100) && (length / p[matchi]->_estLen < 0.5)) { consistentTooShort++; } else { consistentNot++; } } } else { // Otherwise, we disagree on what the best match is. // // That is, the match with the highest identity is not the match // with the highest number of matches -- a longer match exists, but // at lower overall percent identity. statInconsistent++; // Estimate the identity of the extended part, assuming the piece // matched in common is matched at about the same identity. Or // just give up and say it's mapped to multiple places! } if (matchIsOK) { statUnique++; assert(matchi == matchm); W->writeAlignment(p[matchi]); } else { statLost++; } } // Delete all matches that are spanned, report everything else. // Matches that are close ties in span, but are clearly lower quality are deleted. // void pickCoveringSlave(sim4polish **p, uint32 pNum, char doCovering) { uint32 *bgn = new uint32 [pNum]; uint32 *end = new uint32 [pNum]; for (uint32 i=0; i_matchOrientation == SIM4_MATCH_FORWARD) { bgn[i] = p[i]->_exons[0]._estFrom - 1; end[i] = p[i]->_exons[0]._estTo; } else { bgn[i] = p[i]->_estLen - p[i]->_exons[0]._estTo; end[i] = p[i]->_estLen - p[i]->_exons[0]._estFrom + 1; } } if (doCovering == 'g') { bgn[i] = p[i]->_exons[0]._genFrom - 1; end[i] = p[i]->_exons[0]._genTo; } } for (uint32 i=0; i_numExons == 1); for (uint32 j=i+1; jwriteAlignment(p[i]); } delete [] bgn; delete [] end; } // Just a wrapper around the real best picker, so that we can easily // destroy polishes when we're done. // void pickUnique(sim4polish **p, uint32 pNum, char doCovering) { if (doCovering != 0) pickCoveringSlave(p, pNum, doCovering); else pickUniqueSlave(p, pNum); for (uint32 i=0; i file\n", argv[0]); fprintf(stderr, " -q qualDiff Only report alignments where the best is qualDiff better\n"); fprintf(stderr, " in percent identity and coverage\n"); fprintf(stderr, "\n"); fprintf(stderr, " -cq Only report alignments that are not contained in some\n"); fprintf(stderr, " other alignment in the QUERY SEQUENCE.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -cg Only report alignments that are not contained in some\n"); fprintf(stderr, " other alignment in the GENOMIC SEQUENCE.\n"); fprintf(stderr, "\n"); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); exit(1); } // Read polishes, picking the best when we see a change in the // estID. sim4polishReader *R = new sim4polishReader("-"); sim4polish **p = new sim4polish * [pAlloc]; sim4polish *q = 0L; W = new sim4polishWriter("-", style); if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); while (R->nextAlignment(q)) { bool doPick = false; if ((doCovering == 'q') && (q->_estID != lastID)) doPick = true; if ((doCovering == 'g') && (q->_genID != lastID)) doPick = true; if ((doCovering == 0) && (q->_estID != lastID)) doPick = true; if ((doPick == true) && (pNum > 0)) { pickUnique(p, pNum, doCovering); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } p[pNum++] = q; lastID = (doCovering == 'g') ? q->_genID : q->_estID; q = 0L; // Otherwise we delete the alignment we just saved! } if (pNum > 0) pickUnique(p, pNum, doCovering); #if 0 fprintf(stderr, "Uni:"uint32FMTW(8)" Con:"uint32FMTW(8)" (T:"uint32FMTW(8)" M:"uint32FMTW(8)" I:"uint32FMTW(8)" N:"uint32FMTW(8)") Inc:"uint32FMTW(8)" -- Save:"uint32FMTW(8)" Lost:"uint32FMTW(8)"\n", statOneMatch, statConsistent, consistentTie, consistentMatches, consistentIdentity, consistentNot, statInconsistent, statUnique, statLost); #endif delete [] p; delete R; delete W; return(0); } kmer-code-2013-trunk/sim4dbutils/summarizePolishes.C0000644000000000000000000001554612322046702021251 0ustar rootroot#include #include #include "bio++.H" //#include "fasta.H" #include "sim4.H" using namespace std; #include // // Current ESTmapper generated summary is: // // GOOD: >= 95% identity, >= 80% composite, >= 0 bp // cDNA-genomic matches 28715039 matches (24921387 different cDNA and 81 genomic) // Matches per cDNA 1.1522 matches/cDNA // Matches per genomic 354506.6543 matches/genomic // // cDNA COUNTS: // cDNA: 27440540 // cDNA-good: 24921387 ( 90.8196%) // cDNA-missing: 26071 ( 0.0950%) // cDNA-zero: 2493082 ( 9.0854%) // // // New format / summary should be // // // X% identity coverage: 50 55 60 65 70 75 80 85 90 95 100 // sequence-genomic matches %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u // Unique sequences %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u // Matches per sequence %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u // Unique genomic %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u // Matches per genomic %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u // // usage: // // Report raw numbers at 90, 95, 99 percent identity, 50, 60, 70, 80, 90, // 100 percent coverage: // summarizePolishes -i 90 95 99 -c 50 60 70 80 90 100 -p polishes-file // // Report percentages at same // summarizePolishes -i 90 95 99 -c 50 60 70 80 90 100 -nf cdna-file -p polishes-file // summarizePolishes -i 90 95 99 -c 50 60 70 80 90 100 -n num-seqs -p polishes-file // // Read from stdin, default to 95 percent identity, 50 percent coverage: // summarizePolishes -p - // // Buckets? Cumulative? Both? If we do buckets with size 1, we'll // use lots of space, but be fast. Compute correct bucket sizes on // output. 101*101 entries, 6 million sequences -> 190GB. // // So keep sorted list of values, find first bucket that is <= the // match we have. 792MB for the example below (3 %i, 11 %c, 6 million // seqs). // // Read in all %i,%c. Compute each identity x coverage pair // separately. 48MB for scores + 24MB for a pair. Memory efficient, // maybe not compute efficient. // struct match { uint32 _estid; uint32 _genid; uint32 _identity; uint32 _coverage; }; void readMatches(char *filename, vector &matches) { sim4polishReader *R = 0L; if ((filename != 0L) && (strcmp(filename, "-") != 0)) { fprintf(stderr, "Reading matches from '%s'\n", filename); R = new sim4polishReader(filename); } else { fprintf(stderr, "Reading matches from 'stdin'\n"); R = new sim4polishReader("-"); } matches.clear(); sim4polish *p = 0L; while (R->nextAlignment(p)) { match m; m._estid = p->_estID; m._genid = p->_genID; m._identity = p->_percentIdentity; m._coverage = p->_querySeqIdentity; matches.push_back(m); } delete R; fprintf(stderr, "read %d matches.\n", (int)matches.size()); } int main(int argc, char **argv) { char *polishesFile = 0L; uint32 numSeqs = 0; char *sequenceFile = 0L;; uint32 idLen = 0; uint32 id[101] = { 0 }; uint32 cvLen = 0; uint32 cv[101] = { 0 }; bool formatExcel = false; if (argc == 1) { fprintf(stderr, "usage: %s [-excel] [-p polishes-file] [-n num-seqs | -nf seq-file] [-i val ...] [-c val ...]\n", argv[0]); exit(1); } int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-polishes", 2) == 0) { polishesFile = argv[++arg]; } else if (strncmp(argv[arg], "-n", 3) == 0) { numSeqs = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-nf", 3) == 0) { sequenceFile = argv[++arg]; } else if (strncmp(argv[arg], "-identity", 2) == 0) { arg++; while ((argv[arg]) && (argv[arg][0] != '-')) id[idLen++] = atoi(argv[arg++]); arg--; } else if (strncmp(argv[arg], "-coverage", 2) == 0) { arg++; while ((argv[arg]) && (argv[arg][0] != '-')) cv[cvLen++] = atoi(argv[arg++]); arg--; } else if (strncmp(argv[arg], "-excel", 2) == 0) { formatExcel=true; } arg++; } if (polishesFile == 0L) { fprintf(stderr, "ERROR: No polishes file specified!\n"); exit(1); } if (idLen == 0) { fprintf(stderr, "WARNING: Defaulting to 95%% identity.\n"); id[idLen++] = 95; } if (cvLen == 0) { fprintf(stderr, "WARNING: Defaulting to 50%% coverage.\n"); cv[cvLen++] = 50; } fprintf(stderr, "Polishes: %s\n", polishesFile); fprintf(stderr, "numSeqs: "uint32FMT"\n", numSeqs); fprintf(stderr, "sequenceFile: %s\n", sequenceFile); fprintf(stderr, "ids: "uint32FMT" -- ", idLen); for (uint32 i=0; i matches; readMatches(polishesFile, matches); // Find the largest cDNA and genomic idx // uint32 estmax = 0; uint32 genmax = 0; for (uint32 i=0; i #include #include #include #include "sim4.H" void pickBest(sim4polishWriter *W, sim4polish **p, int pNum, int uniq) { int i; if (pNum == 1) { if (uniq) W->writeAlignment(p[0]); } else { if (!uniq) for (i=0; iwriteAlignment(p[0]); } for (i=0; i file\n", argv[0]); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); exit(1); } // Read polishes, picking the best when we see a change in // the estID. sim4polishWriter *W = new sim4polishWriter("-", style); sim4polishReader *R = new sim4polishReader("-"); sim4polish **p = new sim4polish * [pAlloc]; sim4polish *q = 0L; if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); while (R->nextAlignment(q)) { if ((q->_estID != estID) && (pNum > 0)) { pickBest(W, p, pNum, uniq); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } p[pNum++] = q; estID = q->_estID; q = 0L; // Else we'll delete the polish we just saved! } if (pNum > 0) pickBest(W, p, pNum, uniq); delete [] p; delete R; delete W; return(0); } kmer-code-2013-trunk/sim4dbutils/parseSNP.C0000644000000000000000000004475512322046702017225 0ustar rootroot#include #include #include #include #include #include #include #include "bio.h" #include "sim4.H" // Writes things with mappings that don't contain the snp itself to a // failure file. Otherwise, if the mapping is above the threshold, a // line describing the snp is output. sim4polishWriter *multiMultiFile = 0L; // multiple hits, at least one is multiple exon sim4polishWriter *multiSingleFile = 0L; // multiple hits, all are single exon sim4polishWriter *singleMultiFile = 0L; // single hit, and it has more than one exon sim4polishWriter *singleSingleFile = 0L; // single hit, single exon int smpass = 0; int sspass = 0; int mmpass = 0; int mspass = 0; int smfail = 0; int ssfail = 0; int mmfail = 0; int msfail = 0; int failedsnps = 0; int failedmatches = 0; FILE *validSNPMapFile = 0L; sim4polishWriter *failedSNPMapFile = 0L; char fieldDelimiter = 0; const char *sizeTag = "/size="; const char *posTag = "/pos="; int positionOffset = 0; int outputFormat = 1; static char * findSNPid(char *defline) { char *ret = 0L; int sta = 0; int len = 0; int i = 0; if (fieldDelimiter == 0) { for (len=1; defline[len] && !isspace(defline[len]); len++) ; } else { for (len=1; defline[len] && defline[len] != fieldDelimiter; len++) ; } #if 0 // This was used for a set of SNPs with a non-standard defline // structure. It returns the field between the first '|' and the // next '_'. // for (len=1; defline[len] && defline[len] != '_'; len++) ; for (sta=len-1; sta > 0 && defline[sta] != '|'; sta--) ; #endif ret = new char [len+1]; for (i=sta; i_estDefLine); uint32 exonWithSNP = ~uint32ZERO; uint32 i = 0; uint32 seqOffset = 0; // If the match is complement, then the alignment is printed using // the reverse complemented SNP sequence, and so we need to find // the offset at the end of the sequence (not always the same as // the offset at the start of the sequence). // // XXX: Previous version had this as "p->_estLen - pos + siz", which // seems wrong. This version does what appears to be reverse // complement - size. I don't understand if this is a "size" or // just a "1" thing. // seqOffset = pos; if (p->_matchOrientation == SIM4_MATCH_COMPLEMENT) seqOffset = p->_estLen - pos - 1; // Find the exon with the SNP // for (i=0; i_numExons; i++) if (((p->_exons[i]._estFrom-1) <= seqOffset) && (seqOffset <= (p->_exons[i]._estTo-1))) exonWithSNP = i; if (exonWithSNP == ~uint32ZERO) return(0); // If we are printing to a file, continue to find the location, otherwise, // just return. // if (F) { char *SNPid = findSNPid(p->_estDefLine); char *GENid = findGENid(p->_genDefLine); char SNPbase = 0; char GENbase = 0; // Now, we examine the alignment strings to decide exactly // where the SNP is located in the genomic. // // bpToExaine - the number of bases we need to skip in the // alignment (counted in the snp), +1 because we are currently at // the bp before the alignment (so we need to skip one more space). // // XXX: these used to be int! // uint32 bpToExamine = seqOffset - (p->_exons[exonWithSNP]._estFrom - 1) + 1; uint32 examinePos = 0; uint32 genPosition = p->_exons[exonWithSNP]._genFrom - 1; // Recent runs of dbSNP showed that we are off by one (too many if forward, too few if complement). This is a hack to fix it. // if (p->_matchOrientation == SIM4_MATCH_COMPLEMENT) bpToExamine++; else bpToExamine--; while (bpToExamine > 0) { // If the SNP alignment eats up a base pair, decrement // the number of bp left to examine. // if (p->_exons[exonWithSNP]._estAlignment[examinePos] != '-') bpToExamine--; // If the the genomic alignment is not a gap, increment the // position. // if (p->_exons[exonWithSNP]._genAlignment[examinePos] != '-') genPosition++; examinePos++; } // Adjust the quality values, treating the SNP as a match always. // SNPbase = p->_exons[exonWithSNP]._estAlignment[examinePos-1]; GENbase = p->_exons[exonWithSNP]._genAlignment[examinePos-1]; p->_exons[exonWithSNP]._estAlignment[examinePos-1] = 'A'; p->_exons[exonWithSNP]._genAlignment[examinePos-1] = 'A'; p->s4p_updateAlignmentScores(); p->_exons[exonWithSNP]._estAlignment[examinePos-1] = SNPbase; p->_exons[exonWithSNP]._genAlignment[examinePos-1] = GENbase; if (outputFormat == 1) { fprintf(F, "%s %s "uint32FMT" %c/%c %s global["uint32FMT" "uint32FMT"] exon["uint32FMT" "uint32FMT" "uint32FMT" "uint32FMT"]\n", SNPid, GENid, genPosition, SNPbase, GENbase, (p->_matchOrientation == SIM4_MATCH_FORWARD) ? "forward" : "complement", p->_percentIdentity, p->_querySeqIdentity, p->_numExons, exonWithSNP, p->_exons[exonWithSNP]._percentIdentity, (uint32)floor(100.0 * (double)p->_exons[exonWithSNP]._numMatches / (double)p->_estLen)); } else if (outputFormat == 2) { // The format is all on one line, data fields separated by tab. // No spaces -- "sa=C" instead of "sa = C" // // SNPid // GENid // genomic position of SNP // sa=c -- snp allele // ga=c -- genome allele // mo={f|r} -- mapping orientation // pi=n -- percent identity // pc=n -- percent coverage // nb=n -- number of alignment blocks // bl=n -- alignment block with the snp // bp=n -- position of the snp in the alignment block // bi=n -- percent identity of the block // bc=n -- percent coverage of the block // // The first three items are mandatory, are always in that // order, and are always the first three. The others are // optional, and can occur in any order. There might be more // present than listed here. // // The order and content should be consistent for any given // version of the software. // fprintf(F, "%s %s "uint32FMT" sa=%c ga=%c mo=%c pi="uint32FMT" pc="uint32FMT" nb="uint32FMT" bl="uint32FMT" bp="uint32FMT" bi="uint32FMT" bc="uint32FMT"\n", "a", //SNPid, "b", //GENid, genPosition, p->_exons[exonWithSNP]._estAlignment[examinePos-1], // sa p->_exons[exonWithSNP]._genAlignment[examinePos-1], // ga (p->_matchOrientation == SIM4_MATCH_FORWARD) ? 'f' : 'r', // mo p->_percentIdentity, // pi p->_querySeqIdentity, // pc p->_numExons, // nb exonWithSNP, // bl examinePos, // bp p->_exons[exonWithSNP]._percentIdentity, // bi (uint32)floor(100.0 * (double)p->_exons[exonWithSNP]._numMatches / (double)p->_estLen)); // bc } else { } delete [] SNPid; delete [] GENid; } return(1); } // Just a wrapper around the real best picker, so that we can easily // destroy polishes when we're done. // static void parseSNP(sim4polish **p, int pNum) { int numMulti = 0; int i; // Count the number of matches that have more than one exon // for (i=0; i_numExons > 1) numMulti++; if (pNum == 1) { // // Exactly one match for this SNP // if (numMulti == 0) { // Match has one exon if (singleSingleFile) singleSingleFile->writeAlignment(p[0]); if (printSNP(validSNPMapFile, p[0])) { sspass++; } else { ssfail++; if (failedSNPMapFile) failedSNPMapFile->writeAlignment(p[0]); } } else { // Match has more than one exon if (singleMultiFile) singleMultiFile->writeAlignment(p[0]); if (printSNP(validSNPMapFile, p[0])) { smpass++; } else { smfail++; if (failedSNPMapFile) failedSNPMapFile->writeAlignment(p[0]); } } } else { // // More than one match for this SNP // if (numMulti == 0) { int pass=0, fail=0; // All the matches are single exon if (multiSingleFile) for (i=0; iwriteAlignment(p[i]); for (i=0; iwriteAlignment(p[i]); } if (pass==1) sspass++; if (pass > 1) mspass++; if (!pass && fail) msfail++; } else { int pass=0, fail=0; // At least one match has more than one exon -- the correct one // might be a single exon, but we don't know which is which. if (multiMultiFile) for (i=0; iwriteAlignment(p[i]); for (i=0; iwriteAlignment(p[i]); } if (pass==1) smpass++; if (pass > 1) mmpass++; if (!pass && fail) mmfail++; } } for (i=0; inextAlignment(q)) { if (q->_estID < estID) { fprintf(stderr, "ERROR: Polishes not sorted by SNP idx! this="uint32FMT", looking for "uint32FMT"\n", q->_estID, estID); exit(1); } if ((q->_estID != estID) && (pNum > 0)) { parseSNP(p, pNum); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } estID = q->_estID; if ((q->_percentIdentity >= percentID) && (q->_querySeqIdentity >= percentCO)) { p[pNum++] = q; } else { delete q; } q = 0L; // Otherwise we delete the one we just saved! } if (pNum > 0) parseSNP(p, pNum); fprintf(stdout, "SNPs with:\n"); fprintf(stdout, " single hit, single exon: %6d\n", sspass); fprintf(stdout, " single hit, multiple exons: %6d\n", smpass); fprintf(stdout, " multiple hits, single exon: %6d\n", mspass); fprintf(stdout, " multiple hits, multiple exons: %6d\n", mmpass); fprintf(stdout, "SNPs that failed:\n"); fprintf(stdout, " single hit, single exon: %6d\n", ssfail); fprintf(stdout, " single hit, multiple exons: %6d\n", smfail); fprintf(stdout, " multiple hits, single exon: %6d\n", msfail); fprintf(stdout, " multiple hits, multiple exons: %6d\n", mmfail); fclose(validSNPMapFile); delete failedSNPMapFile; delete multiMultiFile; delete multiSingleFile; delete singleMultiFile; delete singleSingleFile; return(0); } kmer-code-2013-trunk/sim4dbutils/coveragehack.C0000644000000000000000000001411212322046702020134 0ustar rootroot#include #include #include "util++.H" #include "bio++.H" #include "sim4.H" // g++ -o coveragehack coveragehack.C -I../libutil -I../libbio -I../libsim4 -L../libutil -L../libbio -L../libsim4 -lsim4 -lbio -lutil // Flag that tells which side of the alignment our contaminated assembly is on. // 1 (R) -- if atac the contaminant is on the left, the assembly is on the right // uint32 orientation = 1; // // WARNING! This is stale code. It does not compile. The fasta interface has changed. // void readATAC(intervalList **coverage, char *path) { char line[1024] = {0}; splitToWords S(line); errno = 0; FILE *F = fopen(path, "r"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", path, strerror(errno)), exit(1); while (!feof(F)) { fgets(line, 1024, F); if ((line[0] == 'M') && (line[2] == 'u')) { S.split(line); uint32 taglength = 0; while (S[8][taglength] != ':') taglength++; uint32 idx = atoi(S[8] + taglength + 1); uint32 beg = atoi(S[9]); uint32 len = atoi(S[10]); if (orientation == 2) { while (S[4][taglength] != ':') taglength++; idx = atoi(S[4] + taglength + 1); beg = atoi(S[5]); len = atoi(S[6]); } if (coverage[idx] == 0L) coverage[idx] = new intervalList(); coverage[idx]->add(beg, len); } } fclose(F); } void readSIM4(intervalList **coverage, int which, char *path) { errno = 0; FILE *F = fopen(path, "r"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", path, strerror(errno)), exit(1); while (!feof(F)) { sim4polish *p = new sim4polish(F); if (p) { switch (which) { case 1: // The query are contaminant reads, the genomic is the assembly if ((p->_percentIdentity >= 94) && (p->_querySeqIdentity >= 80)) { uint32 idx = p->_genID; if (coverage[idx] == 0L) coverage[idx] = new intervalList(); coverage[idx]->add(p->_exons[0]._genFrom, p->_exons[0]._genTo - p->_exons[0]._genFrom + 1); } break; case 2: // The query are assembly scaffolds, the genomic is the contaminant assembly (one or a few contigs) // uint32 idx = p->_estID; if (coverage[idx] == 0L) coverage[idx] = new intervalList(); if (p->_matchOrientation == SIM4_MATCH_FORWARD) { coverage[idx]->add(p->_exons[0]._estFrom, p->_exons[0]._estTo - p->_exons[0]._estFrom + 1); } else { coverage[idx]->add(p->_estLen - p->_exons[0]._estTo + 1, p->_exons[0]._estTo - p->_exons[0]._estFrom + 1); } break; } delete p; } } fclose(F); } #define MAXSCAFFOLD 200000 int main(int argc, char **argv) { intervalList **coverage = new intervalList* [MAXSCAFFOLD]; intervalList **gaps = new intervalList* [MAXSCAFFOLD]; FastAWrapper *W = 0L; uint32 minCov = 80; bool includeGapsAsContamination = true; for (uint32 i=0; ifind(i); FastASequenceInCore *S = W->getSequence(); intervalList gaps; // Compute how much of the scaffold is gap. uint32 gapBeg = W->sequenceLength(i); char *seq = S->sequence(); for (uint32 beg=0, len=W->sequenceLength(i); beg beg) gapBeg = beg; } else { if (gapBeg < beg) { gaps.add(gapBeg, beg-gapBeg); gapBeg = W->sequenceLength(i); } } } // Geez! I suppose we could have just directly counted ACGT above! gaps.merge(); coverage[i]->merge(); uint32 coveredLength = coverage[i]->sumOfLengths(); uint32 gapLength = gaps.sumOfLengths(); uint32 totalLength = W->sequenceLength(i) - gapLength; if (100 * coveredLength > minCov * totalLength) { sumOfLengths += coveredLength; sequences++; double cov = 100.0 * coveredLength / (double)totalLength; fprintf(stderr, "sequence ["uint32FMT"] %s covered "uint32FMT" out of "uint32FMT" (%7.3f)\n", i, S->header(), coveredLength, totalLength, cov); delete S; } // Dump a special scaffold if (i == 4796) { for (uint32 z=0; znumberOfIntervals(); z++) { fprintf(stderr, "interval[%3d] %6d - %6d\n", z, coverage[i]->lo(z), coverage[i]->hi(z)); } } } } fprintf(stderr, "Found "uint32FMT" bases in "uint32FMT" scaffolds.\n", sumOfLengths, sequences); } kmer-code-2013-trunk/sim4dbutils/pickUniquePolish-nhgri.C0000644000000000000000000004472412322046702022127 0ustar rootroot#include #include #include #include #include #include "bio.h" #include "sim4.H" // Kaz Kylheku library. #include "kazlib/dict.h" #include "kazlib/except.h" #include "kazlib/hash.h" #include "kazlib/list.h" #include "kazlib/sfx.h" this is now dead code. It was unused and needed too much effort to update to sim4polishReader // Derived from pickBestPolish.c. We report only the single best // match, when it is obvious that there is EXACTLY one best match. // // Example: we have ten matches, but one is 3%id better than everyone // else -- that is an obviously unique match. The rest are noise. // // Example: ten matches, but they're all about the same quality -- within // a few percent id, and about the same length. We pick no match, and // silently discard all. // // Modified to: // a) not print out unique matches // b) print hangs // c) print q20 bases inside mapped regions, outside, etc. // // It needs two args -f seq.fasta -q qlt.fasta, both must have an // index -- build it for the seq.fasta, and COPY the index to // qlt.fastaidx. Be sure to 'touch -r seq.fasta qlt.fasta' to get // the same timestamp on the files. // // // Further modified to behave like pickUniquePolish (print unique matches // to a specific file). // // so: pickUniquePolish-nhgri needs to read polishes on stdin // -f qry.fasta -- query sequences for quality comparison // -q qlt.fasta -- // -scores X.scores -- write stats to file X // -unique X.bz2 -- write uniquely mapped stuff to bzip2 file X.bz2 // -filter X -- filter out polishes less than X% of the longest // -output X.bz2 -- write filtered polishes to bzip2 file X.bz2 // // It has two modes: // -f -q -- just compute stats on the input. // all options -- filter, and compute stats. // // bzip2 -dc pass?/map-gen*-qlt$id.sim4db.bz2 | // $bin/fixPolishesIID -c $qry -g $gen | // $bin/filterPolishes -node -D | // $bin/sortPolishes -c -m 768 -t /scratch -v | // $bin/pickUniquePolish-nhgri > all-$id.scores // -o all-$id.sim4db.bz2 // -F X // -f $qry // -q $qlt // -stats all-$id.scores | // -uniq all-$id.sim4db.bz2 uint32 statOneMatch = 0; uint32 statConsistent = 0; uint32 statInconsistent = 0; uint32 statUnique = 0; uint32 statLost = 0; uint32 consistentTie = 0; uint32 consistentMatches = 0; uint32 consistentIdentity = 0; uint32 consistentTooShort = 0; uint32 consistentNot = 0; uint32 totLQ = 0; uint32 totMQ = 0; uint32 totRQ = 0; seqCache *SEQ = 0L; seqCache *QLT = 0L; double filter = 0.0; FILE *oFile = 0L; int oFileIsPipe = 0; FILE *sFile = 0L; FILE *uFile = 0L; bool doFiltering = false; void analyze(uint32 iid, uint32 clrl, uint32 clrr, uint32 len, bool isForward, char type) { seqInCore *Q = QLT->getSequenceInCore(iid);; char *q = Q->sequence(); uint32 i = 0; uint32 lq = 0; uint32 mq = 0; uint32 rq = 0; for ( ;i= '0' + 20) lq++; for ( ;i= '0' + 20) mq++; for ( ; i= '0' + 20) rq++; delete Q; if (isForward) { totLQ += lq; totMQ += mq; totRQ += rq; } else { totLQ += rq; totMQ += mq; totRQ += lq; } fprintf(sFile, uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%c\n", iid, clrl, clrr, len, lq, mq, rq, type); } void analyze(sim4polish *p, char type) { uint32 clrl = p->_exons[0]._estFrom - 1; uint32 clrr = p->_exons[0]._estTo - 1; if (p->_matchOrientation == SIM4_MATCH_COMPLEMENT) { clrl = p->_estLen - (p->_exons[0]._estTo - 1); clrr = p->_estLen - (p->_exons[0]._estFrom - 1); } analyze(p->_estID, clrl, clrr, p->_estLen, p->_matchOrientation != SIM4_MATCH_COMPLEMENT, type); } void pickBestSlave(sim4polish **p, uint32 pNum) { uint32 identitym = 0, nmatchesm = 0; // Best score for the mList uint32 identityi = 0, nmatchesi = 0; // Best score the the iList uint32 matchi = 0, matchm = 0; // Difficult choice here.... // if (pNum == 1) { statOneMatch++; statUnique++; if (uFile) p[0]->s4p_printPolish(uFile); if (oFile) p[0]->s4p_printPolish(oFile); analyze(p[0], 'U'); return; } // Find the best percentIdentity and best numberOfMatches. // // identityi is the best percent identity of all the matches for this EST, and // nmatchesi is the number of matches for the longest best identity match(es). // matchi is the match index // // nmatchesm is the best numMatches of all the matches for this EST, and // identitym is the highest percent identity for the best numMatches match(es). // matchm is the match index for (uint32 i=0; i_percentIdentity > identityi) || (p[i]->_percentIdentity == identityi && p[i]->_numMatches > nmatchesi)) { identityi = p[i]->_percentIdentity; nmatchesi = p[i]->_numMatches; matchi = i; } if ((p[i]->_numMatches > nmatchesm) || (p[i]->_numMatches == nmatchesm && p[i]->_percentIdentity > identitym)) { nmatchesm = p[i]->_numMatches; identitym = p[i]->_percentIdentity; matchm = i; } } bool matchIsOK = false; // If we are in agreement on what the best quality match is, // see if the best match is obviously unique. // if ((identityi == identitym) || (nmatchesi == nmatchesm)) { statConsistent++; // It's clear what the quality values of the best match is, but we // don't know if those values are shared by more than one match. // Count the number of matches with exactly those scores. If // there is more than one, then we cannot pick out a single best. // uint32 numBest = 0; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi)) numBest++; if (numBest > 1) { // Dang, we mapped this guy more than once, exactly the same! // consistentTie++; } else { // We claim to have a single best match. See if any other // matches are close to the quality of that one. uint32 closeQuality = 0; for (uint32 i=0; i_percentIdentity * 102) >= (identityi * 100)) || ((p[i]->_numMatches * 102) >= (nmatchesi * 100))) closeQuality++; // If only one match has close quality (the one we want to save!), // save it. Otherwise, label this query as multiple. uint32 length = p[matchi]->_exons[0]._estFrom - p[matchi]->_exons[0]._estTo; if (closeQuality == 1) { matchIsOK = true; consistentMatches++; } else if ((length > 100) && (length / p[matchi]->_estLen < 0.5)) { consistentTooShort++; } else { consistentNot++; } } } else { // Otherwise, we disagree on what the best match is. // // That is, the match with the highest identity is not the match // with the highest number of matches -- a longer match exists, but // at lower overall percent identity. statInconsistent++; // Estimate the identity of the extended part, assuming the piece // matched in common is matched at about the same identity. Or // just give up and say it's mapped to multiple places! } uint32 best = 0; uint32 besti = 0; if (matchIsOK) { statUnique++; if (uFile) p[matchi]->s4p_printPolish(uFile); assert(matchi == matchm); besti = matchi; analyze(p[besti], 'G'); } else { statLost++; // Just pick the longest match, analyze that. for (uint32 i=0; i_exons[0]._estFrom - p[i]->_exons[0]._estTo; if ((len > best) || ((len == best) && (p[i]->_numMatches > p[besti]->_numMatches))) { best = len; besti = i; } } analyze(p[besti], 'N'); } #if 0 uint32 nm = (uint32)(p[besti]->_numMatches * 0.75); uint32 sv = 0; for (uint32 i=0; i_numMatches >= nm) sv++; fprintf(stderr, "Saved "uint32FMT" matches more than nmatches "uint32FMT" (from best of "uint32FMT")\n", sv, nm, p[besti]->_numMatches); #endif // besti is the best/longest match we have. Decide on a threshold // to throw out the obvious junk. // if ((oFile) && (doFiltering)) { uint32 nm = (uint32)(p[besti]->_numMatches * filter); for (uint32 i=0; i_numMatches >= nm) p[i]->s4p_printPolish(oFile); } #if 0 fprintf(stderr, "Uni:"uint32FMTW(8)" Con:"uint32FMTW(8)" (T:"uint32FMTW(8)" M:"uint32FMTW(8)" I:"uint32FMTW(8)" N:"uint32FMTW(8)") Inc:"uint32FMTW(8)" -- Save:"uint32FMTW(8)" Lost:"uint32FMTW(8)"\r", statOneMatch, statConsistent, consistentTie, consistentMatches, consistentIdentity, consistentNot, statInconsistent, statUnique, statLost); #endif } // Just a wrapper around the real best picker, so that we can easily // destroy polishes when we're done. // void pickBest(sim4polish **p, uint32 pNum) { pickBestSlave(p, pNum); for (uint32 i=0; i_estDefLine); dnode_t *gid = dict_lookup(GENdict, q->_genDefLine); if ((cid == 0L) || (gid == 0L)) { const char *msg = "both deflines"; if (cid) msg = "genomic defline"; if (gid) msg = "est defline"; q->s4p_printPolish(stdout); fprintf(stderr, "ERROR: Couldn't find %s (%p %p) in the dictionary!\n", msg, cid, gid); exit(1); } q->_estID = (uint32)(unsigned long)dnode_get(cid); q->_genID = (uint32)(unsigned long)dnode_get(gid); } // // Stolen from sortPolishes // int mergeFilesLen; int mergeFilesMax; FILE **mergeFiles; char **mergeNames; sim4polish **mergePolishes; sim4polish * nextPolish(void) { int smallestPolish = 0; int nextPolish = 1; // If no merge files, read from stdin // if (mergeFilesLen == 0) { return(new sim4polish(stdin)); } // Find the smallest polish. // for (nextPolish = smallestPolish+1; nextPolish < mergeFilesLen; nextPolish++) { if (s4p_estIDcompare(mergePolishes+smallestPolish, mergePolishes+nextPolish) > 0) smallestPolish = nextPolish; } // If the smallestPolish is 0L, we're all done. Otherwise, dump // the current smallest and fill it with a new polish. // if (mergePolishes[smallestPolish] == 0L) { return(0L); } else { sim4polish *ret = mergePolishes[smallestPolish]; mergePolishes[smallestPolish] = new sim4polish(mergeFiles[smallestPolish]); // fix the iid's to be consistent in our partition, so we can have the input files // sorted by est iid. if (mergePolishes[smallestPolish]) fixIID(mergePolishes[smallestPolish], IIDdict); // fix the iid's to be consistent globally fixIID(ret, SEQdict); return(ret); } } // // Stolen from fixPolishesIID // void addToDict(dict_t *d, char *n) { dnode_t *node = 0L; char *dcpy = 0L; if (n == 0L) return; seqCache *F = new seqCache(n); seqInCore *S = F->getSequenceInCore(); while (S) { node = (dnode_t *)palloc(sizeof(dnode_t)); dcpy = (char *)palloc(sizeof(char) * S->headerLength() + 1); strcpy(dcpy, S->header()); dnode_init(node, (void *)(unsigned long)S->getIID()); dict_insert(d, node, dcpy); delete S; S = F->getSequenceInCore(); } delete F; } int headerCompare(const void *a, const void *b) { char *A = *((char **)a); char *B = *((char **)b); //fprintf(stderr, "%s -- %s\n", A, B); return(strcmp(A, B)); } int main(int argc, char **argv) { uint32 pNum = 0; uint32 pAlloc = 8388608; uint32 estID = ~uint32ZERO; bool *found = 0L; // From fixPolishesIID.c IIDdict = 0L; SEQdict = 0L; GENdict = 0L; // Incorporated from sortPolishes mergeFilesLen = 0; mergeFilesMax = sysconf(_SC_OPEN_MAX); mergeFiles = new FILE * [mergeFilesMax]; mergeNames = new char * [mergeFilesMax]; mergePolishes = new sim4polish * [mergeFilesMax]; // Default to printing stats on stdout. sFile = stdout; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-n") == 0) { pAlloc = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-fpart") == 0) { arg++; fprintf(stderr, "reading query deflines from '%s'\n", argv[arg]); IIDdict = dict_create(DICTCOUNT_T_MAX, headerCompare); addToDict(IIDdict, argv[arg]); } else if (strcmp(argv[arg], "-g") == 0) { ++arg; fprintf(stderr, "reading genomic deflines from '%s'\n", argv[arg]); GENdict = dict_create(DICTCOUNT_T_MAX, headerCompare); addToDict(GENdict, argv[arg]); } else if (strcmp(argv[arg], "-F") == 0) { ++arg; fprintf(stderr, "reading query deflines from '%s'\n", argv[arg]); SEQdict = dict_create(DICTCOUNT_T_MAX, headerCompare); addToDict(SEQdict, argv[arg]); } else if (strcmp(argv[arg], "-f") == 0) { ++arg; SEQ = new seqCache(argv[arg]); } else if (strcmp(argv[arg], "-q") == 0) { ++arg; QLT = new seqCache(argv[arg]); } else if (strcmp(argv[arg], "-filter") == 0) { filter = atof(argv[++arg]); doFiltering = true; } else if (strcmp(argv[arg], "-output") == 0) { char cmd[1024] = {0}; errno = 0; ++arg; if (strcmp(argv[arg] + strlen(argv[arg]) - 4, ".bz2") == 0) { sprintf(cmd, "bzip2 -1c > %s", argv[arg]); oFile = popen(cmd, "w"); oFileIsPipe = 1; } else if (strcmp(argv[arg] + strlen(argv[arg]) - 3, ".gz") == 0) { sprintf(cmd, "gzip -1c > %s", argv[arg]); oFile = popen(cmd, "w"); oFileIsPipe = 1; } else { fprintf(stderr, "Got %s, not .bz2 not .gz!\n", argv[arg]); exit(1); } if (errno) fprintf(stderr, "Failed to open '%s': %s\n", cmd, strerror(errno)); doFiltering = true; } else if (strcmp(argv[arg], "-scores") == 0) { errno = 0; sFile = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", argv[arg-1], strerror(errno)); doFiltering = true; } else if (strcmp(argv[arg], "-unique") == 0) { char cmd[1024] = {0}; errno = 0; arg++; if (strcmp(argv[arg] + strlen(argv[arg]) - 4, ".bz2") == 0) sprintf(cmd, "bzip2 -1c > %s", argv[arg]); else if (strcmp(argv[arg] + strlen(argv[arg]) - 3, ".gz") == 0) sprintf(cmd, "gzip -1c > %s", argv[arg]); else sprintf(cmd, "cat > %s", argv[arg]); uFile = popen(cmd, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", cmd, strerror(errno)); doFiltering = true; } else if (strncmp(argv[arg], "-M", 2) == 0) { arg++; while ((arg < argc) && (fileExists(argv[arg]))) { if (mergeFilesLen >= mergeFilesMax) { fprintf(stderr, "%s: ERROR! Too many input files! Should be less than %d\n", argv[0], mergeFilesMax); exit(1); } mergeNames[mergeFilesLen] = argv[arg]; mergeFiles[mergeFilesLen++] = openFile(argv[arg], "r"); arg++; } arg--; } else { fprintf(stderr, "unknown option: %s\n", argv[arg]); } arg++; } if (doFiltering) { if (uFile == 0L) fprintf(stderr, "ERROR: -unique is required\n"), exit(1); if (sFile == 0L) fprintf(stderr, "ERROR: -scores is required\n"), exit(1); if ((filter < 0.0) || (filter > 1.0)) fprintf(stderr, "ERROR: -filter value of %f invalid. 0 <= F <= 100.\n", filter), exit(1); } if ((IIDdict == 0L) || (SEQdict == 0L) || (GENdict == 0L)) { fprintf(stderr, "WARNING! No sequence dictionaries, NOT FIXING IIDs! (supply -fpart, -f and -g)\n"); } if ((SEQ == 0L) || (QLT == 0L)) { fprintf(stderr, "I need -f and -q\n"); exit(1); } // We no longer require that input polishes be sorted increasingly; // now they only must be grouped. This remembers if we've seen a // match or not. At the end, we'll analyze() those we haven't done // already. // found = new bool [ SEQ->getNumberOfSequences() ]; for (uint32 i=0; igetNumberOfSequences(); i++) found[i] = false; // Initialize the merge -- if no merge files, nothing done! // for (int i=0; i_estID != estID) && (pNum > 0)) { //fprintf(stderr, "PickBest for estID "uint32FMT"\n", estID); found[estID] = true; pickBest(p, pNum); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } p[pNum++] = q; estID = q->_estID; } if (pNum > 0) { found[estID] = true; pickBest(p, pNum); } // Attempt cleanup // for (int i=0; igetNumberOfSequences(); estID++) if (found[estID] == false) analyze(estID, 0, SEQ->getSequenceLength(estID), SEQ->getSequenceLength(estID), true, 'M'); delete [] mergeFiles; delete [] mergeNames; delete [] mergePolishes; if (oFile) pclose(oFile); if (uFile) pclose(uFile); if (sFile) fclose(sFile); fprintf(stderr, "Uni:"uint32FMTW(8)" Con:"uint32FMTW(8)" (T:"uint32FMTW(8)" M:"uint32FMTW(8)" I:"uint32FMTW(8)" S:"uint32FMTW(8)" N:"uint32FMTW(8)") Inc:"uint32FMTW(8)" -- Save:"uint32FMTW(8)" Lost:"uint32FMTW(8)"\n", statOneMatch, statConsistent, consistentTie, consistentMatches, consistentIdentity, consistentTooShort, consistentNot, statInconsistent, statUnique, statLost); fprintf(stderr, "total: LQ:"uint32FMT" MQ:"uint32FMT" RQ:"uint32FMT"\n", totLQ, totMQ, totRQ); return(0); } kmer-code-2013-trunk/sim4dbutils/test/0000755000000000000000000000000012641613357016400 5ustar rootrootkmer-code-2013-trunk/sim4dbutils/test/parsesnp-snp.fasta0000644000000000000000000000242607662357004022056 0ustar rootroot>rev_comp_end_of_first_exon /pos=50 /size=1 /genotype=G_T CGAATTTACAGGGGACATCCTACGAACTCCGGTGTCAGAGGACATGCTGG G TCGGGTTTTCAATGGCTCCGGCAAGCCCATTGACAAGGGGCCAGTGGTCA >forward_end_of_first_exon /pos=50 /size=1 /genotype=G_T TGACCACTGGCCCCTTGTCAATGGGCTTGCCGGAGCCATTGAAAACCCGA C CCAGCATGTCCTCTGACACCGGAGTTCGTAGGATGTCCCCTGTAAATTCG >rev_comp_start_of_second_exon /pos=49 /size=1 /genotype=G_T CGAATTTACAGGGGACATCCTACGAACTCCGGTGTCAGAGGACATGCTGG G TCGGGTTTTCAATGGCTCCGGCAAGCCCATTGACAAGGGGCCAGTGGTCA >forward_start_of_second_exon /pos=51 /size=1 /genotype=G_T TGACCACTGGCCCCTTGTCAATGGGCTTGCCGGAGCCATTGAAAACCCGA C CCAGCATGTCCTCTGACACCGGAGTTCGTAGGATGTCCCCTGTAAATTCG >reverse_end_of_second_exon /pos=0 /size=1 /genotype=G_T CGAATTTACAGGGGACATCCTACGAACTCCGGTGTCAGAGGACATGCTGG G TCGGGTTTTCAATGGCTCCGGCAAGCCCATTGACAAGGGGCCAGTGGTCA >forward_end_of_second_exon /pos=100 /size=1 /genotype=G_T TGACCACTGGCCCCTTGTCAATGGGCTTGCCGGAGCCATTGAAAACCCGA C CCAGCATGTCCTCTGACACCGGAGTTCGTAGGATGTCCCCTGTAAATTCG >reverse_start_of_first_exon /pos=100 /size=1 /genotype=G_T CGAATTTACAGGGGACATCCTACGAACTCCGGTGTCAGAGGACATGCTGG G TCGGGTTTTCAATGGCTCCGGCAAGCCCATTGACAAGGGGCCAGTGGTCA >forward_start_of_first_exon /pos=0 /size=1 /genotype=G_T TGACCACTGGCCCCTTGTCAATGGGCTTGCCGGAGCCATTGAAAACCCGA C CCAGCATGTCCTCTGACACCGGAGTTCGTAGGATGTCCCCTGTAAATTCG kmer-code-2013-trunk/sim4dbutils/test/parsesnp-correct-parsed0000644000000000000000000000133407662357004023073 0ustar rootrootrev_comp_end_of_first_exon genomicTestChunk 51 C/X complement global[98 98] exon[2 0 98 49] forward_end_of_first_exon genomicTestChunk 51 C/X forward global[98 98] exon[2 0 98 49] rev_comp_start_of_second_exon genomicTestChunk 643 C/Y complement global[98 98] exon[2 1 98 48] forward_start_of_second_exon genomicTestChunk 643 C/Y forward global[98 98] exon[2 1 98 48] reverse_end_of_second_exon genomicTestChunk 692 g/g complement global[98 98] exon[2 1 98 48] forward_end_of_second_exon genomicTestChunk 692 g/g forward global[98 98] exon[2 1 98 48] reverse_start_of_first_exon genomicTestChunk 1 t/t complement global[98 98] exon[2 0 98 49] forward_start_of_first_exon genomicTestChunk 1 t/t forward global[98 98] exon[2 0 98 49] kmer-code-2013-trunk/sim4dbutils/test/parsesnp-gen.fasta0000644000000000000000000000131707662357004022025 0ustar rootroot>genomicTestChunk TGACCACTGGCCCCTTGTCAATGGGCTTGCCGGAGCCATTGAAAACCCGAXCTGAGGGTGGACGAGGAGTGTTGCAGGGT GCTCAGGCTAGCCCTGTGTCCCTCACTACTGTCTACCCTCCACACCACCACCAGCTCCCACCCACTCCCCACAGGAGTGC CCTGTATCCCCCTCCCCGGCAGCCCACAGGTAACCCAGAGAGCCAGCTACAAGGACTGTCCTGTGAGAGTCTTCCTTCCT CTCCTGGAAACCTTTACAGGCAAGGCCTTGGCCCAGGACCATGACTCTAATGGGGGATTCCAGGACTAGAGAGAGGAGAG GGCCAGGCCAGGTCTTGGGAGAGAATTAGGGGATATTCAAGGCTTAGCAGTCCCTTTCAGCCTGGCTCCAAATAGGTGGC TACCTACTGTGTCTCTAGTTTATTGAACCCACCTTCCTCATTTATTGAACACCTGTATGTCAGACCCTGAGCTGGGGTCA GGAATCAGAGAGAAGACATACCCCAGACCTCAAAGAGCCCACCAGAGAGACAGGCAGGAAGTAAACAGGAAGTGACAGTG TAGTGTGCTGAGGGTTTGGTCAGAGAAGCTGGGCTGGGAGGGCAGAGGAGCACCCCCACCCCTGCCCCTCCCTGTCCCTC ACYCAGCATGTCCTCTGACACCGGAGTTCGTAGGATGTCCCCTGTAAATTCG kmer-code-2013-trunk/sim4dbutils/depthOfPolishes.C0000644000000000000000000000757512417326046020640 0ustar rootroot#include #include #include #include #include "sim4.H" // ./genomics/sim4dbutils/depthOfPolishes -v < runA.1.ms12.filtered.sim4db > depth-out // plot [112000:113000][] "depth-out" using 2 with lines int main(int argc, char **argv) { uint32 genomeLength = 0; uint32 seqIdx = 0; int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-l", 2) == 0) { genomeLength = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-s", 2) == 0) { seqIdx = strtouint32(argv[++arg], 0L); } else { fprintf(stderr, "Unknown arg '%s'\n", argv[arg]); } arg++; } intervalList IL; sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; while (R->nextAlignment(p)) { uint32 beg = p->_exons[0]._genFrom - 1; uint32 end = p->_exons[p->_numExons-1]._genTo; if (p->_genID != seqIdx) continue; if (end > genomeLength) genomeLength = end; IL.add(beg, end-beg); } intervalList ID(IL); // The extra 1000 here is so we can be lazy in the // output section when computing averages. // uint32 *DD = new uint32 [genomeLength + 1000]; for (uint32 i=0; i 1) ? DD[i-2] : 0); ave5 += DD[i+2] - ((i > 2) ? DD[i-3] : 0); ave11 += DD[i+5] - ((i > 5) ? DD[i-6] : 0); ave51 += DD[i+25] - ((i > 25) ? DD[i-25] : 0); ave101 += DD[i+50] - ((i > 50) ? DD[i-51] : 0); ave201 += DD[i+100] - ((i > 100) ? DD[i-101] : 0); ave501 += DD[i+250] - ((i > 250) ? DD[i-251] : 0); ave1001 += DD[i+500] - ((i > 500) ? DD[i-501] : 0); ave2001 += DD[i+1000] - ((i > 1000) ? DD[i-1001] : 0); fprintf(stdout, uint32FMT"\t"uint32FMT"\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", i, DD[i], ave3 / (double)((i >= 1) ? 3 - ((i < genomeLength - 1) ? 0 : i + 2 - genomeLength) : i+2), ave5 / (double)((i >= 2) ? 5 - ((i < genomeLength - 2) ? 0 : i + 3 - genomeLength) : i+3), ave11 / (double)((i >= 5) ? 11 - ((i < genomeLength - 4) ? 0 : i + 5 - genomeLength) : i+6), ave51 / (double)((i >= 25) ? 51 - ((i < genomeLength - 24) ? 0 : i + 25 - genomeLength) : i+26), ave101 / (double)((i >= 50) ? 101 - ((i < genomeLength - 49) ? 0 : i + 50 - genomeLength) : i+51), ave201 / (double)((i >= 100) ? 201 - ((i < genomeLength - 99) ? 0 : i + 100 - genomeLength) : i+101), ave501 / (double)((i >= 250) ? 501 - ((i < genomeLength - 249) ? 0 : i + 250 - genomeLength) : i+251), ave1001 / (double)((i >= 500) ? 1001 - ((i < genomeLength - 499) ? 0 : i + 500 - genomeLength) : i+501), ave2001 / (double)((i >= 1000) ? 2001 - ((i < genomeLength - 999) ? 0 : i + 1000 - genomeLength) : i+1001)); } return(0); } kmer-code-2013-trunk/sim4dbutils/removeRedundant.C0000644000000000000000000001560212322046702020661 0ustar rootroot#include #include #include #include "bio++.H" #include "sim4.H" #include "s4p_overlap.H" // Remove redundant polishes from an input set. // // Redundancy is defined as two polishes that overlap on the genome. // Any amount of overlap is redundant. // // The longest of the overlapping matches is saved. //#define DEBUGOUT int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "usage: %s [-gff3] \n", argv[0]); fprintf(stderr, "(yes, you _must_ give it a file. stdin is not possible.)\n"); fprintf(stderr, "WARNING THIS IS PROTOTYPE BROKEN CODE!\n"); exit(1); } sim4polishStyle wstyle = sim4polishStyleDefault; sim4polishStyle rstyle = sim4polishStyleDefault; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-gff3") == 0) { wstyle = sim4polishGFF3; } arg++; } uint32 matchesWithNoOverlap = 0; uint32 matchesWithOverlap = 0; uint32 notPerfectClique = 0; // Open a polishFile and force the index to build // First find the input file type, with a hack sim4polishReader *reader = new sim4polishReader(argv[argc-1]); rstyle = reader->getsim4polishStyle(); delete reader; sim4polishFile *Afile = new sim4polishFile(argv[argc-1], rstyle); Afile->setPosition(0); sim4polishWriter *writer = new sim4polishWriter("-", wstyle); if (rstyle != wstyle) fprintf(stderr, "warning: input format and output format differ.\n"); // Ask both for the largest EST iid seen, then iterate over those. // uint32 largestIID = Afile->maxIID(); for (uint32 iid=0; iidgetEST(iid); if (A->length() > 0) { // fill out the overlap matrix olap_t **overlap = new olap_t* [A->length()]; overlap[0] = new olap_t [A->length() * A->length()]; for (uint32 i=1; ilength(); i++) overlap[i] = overlap[i-1] + A->length(); for (uint32 a=0; alength(); a++) for (uint32 b=0; blength(); b++) if (a == b) overlap[a][b] = 0; else overlap[a][b] = findOverlap((*A)[a], (*A)[b]); // look for guys with no overlaps, print and remove them sim4polishList *W = new sim4polishList; for (uint32 a=0; alength(); a++) { bool nooverlaps = true; for (uint32 b=0; blength(); b++) if (overlap[a][b]) nooverlaps = false; if (nooverlaps) { matchesWithNoOverlap++; writer->writeAlignment((*A)[a]); } else { matchesWithOverlap++; W->push(new sim4polish((*A)[a])); } } #if 1 fprintf(stderr, "IID="uint32FMTW(8)" -- overlap:"uint32FMT" noOverlap:"uint32FMT"\r", iid, matchesWithOverlap, matchesWithNoOverlap); fflush(stderr); #endif // A is junk, W contains the matches that overlap. delete A; A = 0L; // Report all the overlaps #ifdef DEBUGOUT for (uint32 a=0; alength(); a++) { sim4polish *p = (*W)[a]; fprintf(stderr, uint32FMTW(3)": "uint32FMTW(3)"--"uint32FMTW(3)"\n", iid, p->exons[0].genFrom, p->exons[p->numExons-1].genTo); } #endif // while we have matches in the set of overlapping matches, // find a connected component, check that it is/is not a // clique, and decide which match to keep. uint32 *clique = new uint32 [W->length()]; uint32 cliqueSize = 0; bool inserted = false; uint32 *length = new uint32 [W->length()]; while (W->length() > 0) { #ifdef DEBUGOUT fprintf(stderr, "IID="uint32FMTW(8)" -- examine "uint32FMT" matches\n", iid, W->length()); #endif // Find the length of all the matches in this set for (uint32 a=0; alength(); a++) { length[a] = 0; for (uint32 i=0; i<(*W)[a]->_numExons; i++) length[a] += (*W)[a]->_exons[i]._genTo - (*W)[a]->_exons[i]._genFrom + 1; } // reconstruct the overlap matrix -- hey, if you want to be // efficient and recover this from the existing one, nobody is // stopping you. for (uint32 a=0; alength(); a++) for (uint32 b=0; blength(); b++) if (a == b) overlap[a][b] = 0; else overlap[a][b] = findOverlap((*W)[a], (*W)[b]); // OK, now find the clique/connected component for (uint32 i=0; ilength(); i++) clique[i] = 0; clique[0] = 1; cliqueSize = 1; inserted = true; while (inserted) { inserted = false; // If a is in the clique, add all it's overlaps for (uint32 a=0; alength(); a++) { if (clique[a]) { for (uint32 b=0; blength(); b++) { if ((overlap[a][b]) && (!clique[b])) { clique[b] = 1; cliqueSize++; inserted = true; } } } } } #ifdef DEBUGOUT fprintf(stderr, "IID="uint32FMTW(8)" -- examine "uint32FMT" matches, found "uint32FMT" overlapping\n", iid, W->length(), cliqueSize); #endif // Check that it is a clique if (cliqueSize > 2) { uint32 num = 0; for (uint32 a=0; alength(); a++) for (uint32 b=0; blength(); b++) if (clique[a] && clique[b] && overlap[a][b]) num++; if (num != cliqueSize * (cliqueSize-1)) { notPerfectClique++; fprintf(stderr, "\nNOT A PERFECT CLIQUE! Found "uint32FMT" overlaps, wanted "uint32FMT" in the clique.\n", num, cliqueSize * (cliqueSize-1)); //for (uint32 a=0; alength(); a++) // if (clique[a]) // writer->writeAlignment((*W)[a]); } } // Find the longest member, output it uint32 longest = 0; while (clique[longest] == 0) longest++; for (uint32 i=0; ilength(); i++) if ((clique[i]) && (length[longest] < length[i])) longest = i; writer->writeAlignment((*W)[longest]); // Remove the clique from the set of overlaps A = new sim4polishList; for (uint32 i=0; ilength(); i++) { if (clique[i] == 0) A->push(new sim4polish((*W)[i])); } delete W; W = A; A = 0L; } delete [] clique; delete W; delete [] overlap[0]; delete [] overlap; } delete A; } delete writer; delete Afile; fprintf(stderr, "\nmatches withOvl:"uint32FMT" withoutOvl:"uint32FMT"\n", matchesWithOverlap, matchesWithNoOverlap); fprintf(stderr, "not perfect clique:"uint32FMT"\n", notPerfectClique); } kmer-code-2013-trunk/sim4dbutils/sortPolishes.C0000644000000000000000000002523412322046702020217 0ustar rootroot#include #include #include #include #include #include "sim4.H" #include "bio.h" #include "util.h" // Sorts a file of polishes by cDNA or genomic idx. sim4polishReader * writeTemporary(char *filePrefix, sim4polish **p, uint32 pLen, sim4polishStyle style, int (*fcn)(const void *, const void *)) { sim4polishWriter *W = new sim4polishWriter(0L, style, true); sim4polishReader *R; qsort(p, pLen, sizeof(sim4polish *), fcn); for (uint32 i=0; iwriteAlignment(p[i]); R = new sim4polishReader(0L, W); delete W; return(R); } // Save the polish using palloc; // sim4polish * savePolish(sim4polish *q, uint64 *alloc) { int l; // Copy the base polish structure. // sim4polish *r = (sim4polish *)palloc(sizeof(sim4polish)); memcpy(r, q, sizeof(sim4polish)); *alloc += sizeof(sim4polish); // Copy the deflines. // if (q->_estDefLine && q->_genDefLine) { l = strlen(q->_estDefLine) + 1; r->_estDefLine = (char *)palloc(sizeof(char) * l); memcpy(r->_estDefLine, q->_estDefLine, sizeof(char) * l); *alloc += l * sizeof(char); l = strlen(q->_genDefLine) + 1; r->_genDefLine = (char *)palloc(sizeof(char) * l); memcpy(r->_genDefLine, q->_genDefLine, sizeof(char) * l); *alloc += l * sizeof(char); } // Copy the base exon structure. // r->_exons = (sim4polishExon *)palloc(sizeof(sim4polishExon) * q->_numExons); memcpy(r->_exons, q->_exons, sizeof(sim4polishExon) * q->_numExons); *alloc += sizeof(sim4polishExon) * q->_numExons; // Copy the exon alignments. // for (uint32 i=0; i_numExons; i++) { if (q->_exons[i]._estAlignment) { l = strlen(q->_exons[i]._estAlignment) + 1; r->_exons[i]._estAlignment = (char *)palloc(sizeof(char) * l); memcpy(r->_exons[i]._estAlignment, q->_exons[i]._estAlignment, sizeof(char) * l); *alloc += l * sizeof(char); } if (q->_exons[i]._genAlignment) { l = strlen(q->_exons[i]._genAlignment) + 1; r->_exons[i]._genAlignment = (char *)palloc(sizeof(char) * l); memcpy(r->_exons[i]._genAlignment, q->_exons[i]._genAlignment, sizeof(char) * l); *alloc += l * sizeof(char); } } return(r); } void statusReport(uint32 pLen, uint32 mergeFilesLen, uint64 arrayAlloc, uint64 matchAlloc, uint64 upperAlloc) { if (pLen > 0) { fprintf(stderr, "Read: "uint32FMTW(8)" polishes -- "uint32FMTW(5)" temporary files -- "uint64FMTW(5)"MB / "uint64FMTW(5)"MB -- "uint64FMTW(5)" bytes/polish\r", pLen, mergeFilesLen, (arrayAlloc + matchAlloc) >> 20, upperAlloc >> 20, matchAlloc / pLen); fflush(stderr); } } // The OS limit is usually hit before this, but this is // the maximum number of files we can have open at once. // //#define MERGE_FILES_MAX OPEN_MAX int main(int argc, char **argv) { bool beVerbose = false; char *filePrefix = NULL; uint32 pLen = 0; uint32 pMax = 1 * 1024 * 1024; uint64 upperAlloc = getProcessSizeLimit(); // Maximum allowed memory usage uint64 arrayAlloc = 0; // Static stuff: the process, arrays uint64 matchAlloc = 0; // palloc size, matches int (*fcn)(const void *, const void *) = 0L; bool moreInput = true; uint32 mergeFilesLen = 0; uint32 mergeFilesMax = sysconf(_SC_OPEN_MAX); sim4polishReader **mergeFiles = new sim4polishReader * [mergeFilesMax]; char **mergeNames = new char * [mergeFilesMax]; sim4polishStyle style = sim4polishStyleDefault; if ((mergeFiles == 0L) || (mergeNames == 0L)) { fprintf(stderr, "sortPolishes: Failed to initialize.\n"); exit(1); } for (uint32 i=0; i= mergeFilesMax) { fprintf(stderr, "%s: ERROR! Too many input files! Should be less than %d\n", argv[0], mergeFilesMax); exit(1); } mergeNames[mergeFilesLen] = argv[arg]; mergeFiles[mergeFilesLen++] = new sim4polishReader(argv[arg]); arg++; } arg--; } else { fprintf(stderr, "unknown option: %s\n", argv[arg]); err++; } arg++; } if ((err) || (fcn == 0L) || ((mergeFilesLen == 0) && (isatty(fileno(stdin))))) { fprintf(stderr, "usage: %s [-c | -g] [-m M] [-t T] [-gff3] [-M [file ...]]\n", argv[0]); fprintf(stderr, " -c (-C) Sort by the cDNA index (defline).\n"); fprintf(stderr, " -g (-G) Sort by the genomic index (defline).\n"); fprintf(stderr, " -M Skip the sort, just do a merge.\n"); fprintf(stderr, " -m M Use at most M MB of core, using a disk-based merge if memory\n"); fprintf(stderr, " is exhausted. Default: 4096.\n"); fprintf(stderr, " -t T Use directory 'T' for temporary files. Default is the current\n"); fprintf(stderr, " working directory. The sort unlinks files immediately after\n"); fprintf(stderr, " creation: no files will exist, but space will be used.\n"); fprintf(stderr, " -gff3 Format output as GFF3.\n"); fprintf(stderr, " -v Be verbose.\n"); fprintf(stderr, "\n"); fprintf(stderr, " Both sort methods use the OTHER index as a secondary key.\n"); if (fcn == 0L) fprintf(stderr, "\nERROR: what key do you want to sort on (-c, -g, -C, -G)\n"); if ((mergeFilesLen == 0) && (isatty(fileno(stdin)))) fprintf(stderr, "\nERROR: no files to merge\n"); exit(1); } if (mergeFilesLen > 0) fprintf(stderr, "Found %d files to merge!\n", mergeFilesLen); // XXX: Experimental method to automagically determine the amount of memory available (or, to at // least, determine if this process can get to be as big as the user said it can. // arrayAlloc = getProcessSizeCurrent(); sim4polish **p = new sim4polish * [pMax]; memset(p, 0, sizeof(sim4polish *) * pMax); arrayAlloc += sizeof(sim4polish *) * pMax; // With small memory sizes, we occasionally run out of data space. This looks like an artifact // of not having palloc() use a blocksize that divides our upperAlloc size. This attempts to // sync them up. // psetblocksize(upperAlloc / 16); // This produced a crash in readBuffer //psetdebug(2); sim4polishReader *R = new sim4polishReader("-"); sim4polish *q = 0L; if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); while (R->nextAlignment(q)) { // Allocate more pointer space, if we need to // if ((pLen >= pMax) || (arrayAlloc + matchAlloc >= upperAlloc)) { // Either realloc space (if we're still small enough to do so) or // write an intermediate file. if (arrayAlloc + matchAlloc + sizeof(sim4polish*) * pMax * 2 < upperAlloc) { sim4polish **P = new sim4polish * [pMax * 2]; memcpy(P, p, sizeof(sim4polish *) * pMax); delete [] p; pMax *= 2; p = P; arrayAlloc += sizeof(sim4polish *) * pMax; } else { if (beVerbose) { statusReport(pLen, mergeFilesLen+1, arrayAlloc, matchAlloc, upperAlloc); fprintf(stderr, "\n"); } if (mergeFilesLen >= mergeFilesMax) { fprintf(stderr, "Too many open files. Try increasing memory size.\n"); exit(1); } mergeFiles[mergeFilesLen++] = writeTemporary(filePrefix, p, pLen, style, fcn); pfree(); matchAlloc = 0; pLen = 0; } } p[pLen++] = savePolish(q, &matchAlloc); // COPY the polish. if (beVerbose && ((pLen % 2000) == 0)) statusReport(pLen, mergeFilesLen+1, arrayAlloc, matchAlloc, upperAlloc); } if (beVerbose) { statusReport(pLen, mergeFilesLen+1, arrayAlloc, matchAlloc, upperAlloc); fprintf(stderr, "\n"); } sim4polishWriter *W = new sim4polishWriter("-", style); if (mergeFilesLen == 0) { // No temporary files. Sort the polishes, and dump. qsort(p, pLen, sizeof(sim4polish *), fcn); for (uint32 i=0; iwriteAlignment(p[i]); } else { // Crud. Temporary files. Sort the last batch, dump it, then do // a merge. // if (mergeFilesLen >= mergeFilesMax) { fprintf(stderr, "Too many open files. Try increasing memory size.\n"); exit(1); } mergeFiles[mergeFilesLen++] = writeTemporary(filePrefix, p, pLen, style, fcn); pfree(); matchAlloc = 0; pLen = 0; delete [] p; } // // The merge // if (mergeFilesLen > 0) { if (beVerbose) fprintf(stderr, "Merging temporary files....\n"); sim4polish **p = new sim4polish * [mergeFilesLen]; memset(p, 0, sizeof(sim4polish *) * mergeFilesLen); for (uint32 i=0; inextAlignment(p[i]); while (moreInput) { uint32 smallestPolish = 0; // Find the smallest polish. // for (uint32 nextPolish = smallestPolish+1; nextPolish < mergeFilesLen; nextPolish++) { if ((*fcn)(p+smallestPolish, p+nextPolish) > 0) smallestPolish = nextPolish; } // If the smallestPolish is 0L, we're all done. Otherwise, dump // the current smallest and fill it with a new polish. // if (p[smallestPolish] == 0L) { moreInput = false; } else { W->writeAlignment(p[smallestPolish]); mergeFiles[smallestPolish]->nextAlignment(p[smallestPolish]); } } // Attempt cleanup // for (uint32 i=0; i #include #include #include #include #include "sim4reader.h" #define SHOWTRIMMING char const *usage = "usage: %s [-save trimmedFile]\n" " -savetrimming Saves a before/after of each trimmed match.\n" " All matches are printed to stdout (untrimmed and trimmed).\n" "\n"; int main(int argc, char ** argv) { int arg = 1; FILE *trimmedFile = 0L; int beVerbose = 0; sim4polish *p; int polishesProcessed = 0; int polishesTrimmed = 0; if (isatty(fileno(stdin)) || isatty(fileno(stdout))) { fprintf(stderr, usage, argv[0]); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); if (isatty(fileno(stdout))) fprintf(stderr, "error: Please redirect the polishes to a file.\n (They are on stdout)\n\n"); exit(1); } arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-savetrimming", 2) == 0) { arg++; errno=0; trimmedFile = fopen(argv[arg], "w"); if (errno) { fprintf(stderr, "Can't open '%s' for writing\n%s\n", argv[arg], strerror(errno)); exit(1); } } else if (strncmp(argv[arg], "-verbose", 2) == 0) { beVerbose = 1; } arg++; } while ((p = readPolish(stdin)) != 0L) { int trimFirst = 0; int trimLast = 0; /* Decide if we need to trim anything */ if (p->numExons > 1) { int exA; int exB; int dist; int qual; int size; exA = 0; // First exon exB = 1; // Second exon dist = p->exons[exB].genFrom - p->exons[exA].genTo + 1; qual = p->exons[exA].percentIdentity; size = p->exons[exA].estTo - p->exons[exA].estFrom + 1; trimFirst = 1; if (dist < 100000) trimFirst = 0; if (size >= 50) trimFirst = 0; if (size >= 25 + (int)((dist - 100000) * 25.0 / 900000.0)) trimFirst = 0; if ((qual >= 98) && (size >= 25 + (int)((dist - 100000) * 25.0 / 1400000.0))) trimFirst = 0; // Reverse our decision if the first exon is of low quality. // if ((qual < 85) && (dist >= 10000)) { if (trimFirst == 0) fprintf(trimmedFile, "Trimming frist exon based only on percent ID\n"); trimFirst = 1; } exA = p->numExons - 1; // Last exon exB = p->numExons - 2; // Second to last dist = p->exons[exA].genFrom - p->exons[exB].genTo + 1; qual = p->exons[exA].percentIdentity; size = p->exons[exA].estTo - p->exons[exA].estFrom + 1; trimLast = 1; if (dist < 100000) trimLast = 0; if (size >= 50) trimLast = 0; if (size >= 25 + (int)((dist - 100000) * 25.0 / 900000.0)) trimLast = 0; if ((qual >= 98) && (size >= 25 + (int)((dist - 100000) * 25.0 / 1400000.0))) trimLast = 0; // Reverse our decision if the first exon is of low quality. // if ((qual < 85) && (dist >= 10000)) { if (trimLast == 0) fprintf(trimmedFile, "Trimming last exon based only on percent ID\n"); trimLast = 1; } } if (trimmedFile && (trimFirst || trimLast)) { fprintf(trimmedFile, "------------------------------------------------------------BEFORE\n"); printPolish(trimmedFile, p); } if (beVerbose) { polishesProcessed++; if (trimFirst || trimLast) polishesTrimmed++; if ((polishesProcessed % 10000) == 0) { fprintf(stderr, " %d processed, %d trimmed (%8.5f%%)\r", polishesProcessed, polishesTrimmed, 100.0 * (double)polishesTrimmed / (double)polishesProcessed); fflush(stderr); } } // If there is one intron, and we've been asked to remove // either the first or the last (it should say to remove // both), then remove the shorter of the two. // if ((trimFirst || trimLast) && (p->numExons == 2)) { trimFirst = 0; trimLast = 0; if ((p->exons[0].estTo - p->exons[0].estFrom) > (p->exons[1].estTo - p->exons[1].estFrom)) trimLast = 1; else trimFirst = 1; } // Remove the first exon, by circularly shifting the list of // exons. The exon trimmed from the start is moved to the end of // the exon list. // if (trimFirst) { int i; sim4polishExon save; memcpy(&save, p->exons+0, sizeof(sim4polishExon)); for (i=1; inumExons; i++) memcpy(p->exons+i-1, p->exons+i, sizeof(sim4polishExon)); memcpy(p->exons+p->numExons-1, &save, sizeof(sim4polishExon)); p->numExons--; } // Trimming the last exon is easy; just decrement the size of the // list. // if (trimLast) { p->numExons--; // We also need to clear the intron orientation flag in the new // last exon // p->exons[p->numExons-1].intronOrientation = INTRON_NONE; } if (trimmedFile && (trimFirst || trimLast)) { fprintf(trimmedFile, "------------------------------------------------------------AFTER\n"); printPolish(trimmedFile, p); fprintf(trimmedFile, "============================================================EOP\n"); } printPolish(stdout, p); // Insert the exons back in, so they will be destroyed properly. // if (trimFirst) p->numExons++; if (trimLast) p->numExons++; destroyPolish(p); } return(0); } kmer-code-2013-trunk/sim4dbutils/comparePolishes.C0000644000000000000000000003605012322046702020654 0ustar rootroot#include #include #include #include #include "bio++.H" #include "sim4.H" #include "s4p_overlap.H" // Matches two sets of polishes to each other using a simple overlap // heuristic. // // Arguments (are horrible, whatcha gonna do about it?): // // -i min percent id (default 95) // -c min percent coverage (default 50) // -a polishes input file 1 // -b polishes input file 2 // -gff3 write output as GFF3 // // Output is on standard out, and is tab-delimited. It reports // stuff about the 'same' matches: // // ESTiid ESTlen overlap A%id A%cov #cdnagaps #exons B%id B%cov #cdnagaps #exons // Try to analyze cDNA gaps. // // For cDNA gaps larger than GAP_MINIMUM, count it as a gap only if // the genomic gap is within GAP_DIFFERENCE of the cDNA gap. // // XXX This needs some tweaking! // #define GAP_MINIMUM 10 #define GAP_DIFFERENCE 4 sim4polishWriter * openOutput(const char *prefix, const char *suffix, sim4polishStyle style) { char name[FILENAME_MAX]; sprintf(name, "%s.%s", prefix, suffix); return(new sim4polishWriter(name, style)); } int main(int argc, char **argv) { uint32 minI = 95; uint32 minC = 50; const char *prefix = "comparePolishes"; sim4polishFile *Afile = 0L; sim4polishFile *Bfile = 0L; // goodOverlap -- match in A maps uniquely to B and likewise. // // novelInA -- a match in A has no counterpart in B. // novelInB -- similar for B. // // multipleInA -- a match in B maps to multiple things in A. // multipleInB -- similar for A. // // multipleInA requires that the matches in A map only to the single // match in B. // // hairyOverlap -- multiple matches in both. // uint32 goodOverlap = 0; // the number of lines in the output uint32 novelInA = 0; uint32 novelInB = 0; uint32 multipleInA = 0; uint32 multipleInB = 0; uint32 hairyOverlap = 0; bool doGFF3; sim4polishStyle Astyle = sim4polishStyleDefault; sim4polishStyle Bstyle = sim4polishStyleDefault; sim4polishStyle style = sim4polishStyleDefault; int arg=1; while(arg < argc) { if (strcmp(argv[arg], "-i") == 0) { minI = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-c") == 0) { minC = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-a") == 0) { // Ugly hack to obtain the style of the input files, but can be fixed later sim4polishReader *AR = new sim4polishReader(argv[++arg]); Astyle = AR->getsim4polishStyle(); delete AR; Afile = new sim4polishFile(argv[arg], Astyle); } else if (strcmp(argv[arg], "-b") == 0) { // Ugly hack to obtain the style of the input files, but can be fixed later sim4polishReader *BR = new sim4polishReader(argv[++arg]); Bstyle = BR->getsim4polishStyle(); delete BR; Bfile = new sim4polishFile(argv[arg], Bstyle); } else if (strcmp(argv[arg], "-p") == 0) { prefix = argv[++arg]; } else if (strcmp(argv[arg], "-gff3") == 0) { doGFF3 = true; style = sim4polishGFF3; } arg++; } if ((Afile == 0L) || (Bfile == 0L)) { fprintf(stderr, "usage: %s [-i percent-identity] [-c percent-coverage] -a input-set-a -b input-set-b [-p output-prefix] [-gff3]\n", argv[0]); fprintf(stderr, "only -a and -b are mandatory, but you should give all anyway\n"); exit(1); } // Open the output files // sim4polishWriter *fasame = openOutput(prefix, "a-same", style); sim4polishWriter *fbsame = openOutput(prefix, "b-same", style); sim4polishWriter *fanovel = openOutput(prefix, "a-novel", style); sim4polishWriter *fbnovel = openOutput(prefix, "b-novel", style); sim4polishWriter *famulti = openOutput(prefix, "a-multi", style); sim4polishWriter *fbmulti = openOutput(prefix, "b-multi", style); sim4polishWriter *fhairy = openOutput(prefix, "hairy", style); // Force index builds // Afile->setPosition(0); Bfile->setPosition(0); // Find the largest IID // uint32 largestIID = Afile->maxIID(); if (largestIID < Bfile->maxIID()) largestIID = Bfile->maxIID(); // Iterate over all the ESTs. for (uint32 iid=0; iidgetEST(iid); sim4polishList *B = Bfile->getEST(iid); sim4polishList *Ta = 0L; sim4polishList *Tb = 0L; // Filter by quality. A->filterByQuality(minI, minC); B->filterByQuality(minI, minC); // fill out the overlap matrix olap_t **overlap = new olap_t* [A->length()]; overlap[0] = new olap_t [A->length() * B->length()]; for (uint32 i=1; ilength(); i++) overlap[i] = overlap[i-1] + B->length(); for (uint32 a=0; alength(); a++) for (uint32 b=0; blength(); b++) overlap[a][b] = findOverlap((*A)[a], (*B)[b]); // Find and remove those matches that are unique to either set. // Removing is a big pain, because we either have to know // something about the removal process, or we need to rebuild the // overlap matrix after each removal. Instead, we build a new set. bool *removeA = new bool [A->length()]; bool *removeB = new bool [B->length()]; for (uint32 a=0; alength(); a++) removeA[a] = false; for (uint32 b=0; blength(); b++) removeB[b] = false; for (uint32 a=0; alength(); a++) { uint32 ovl = 0; for (uint32 b=0; blength(); b++) if (overlap[a][b]) ovl++; if (ovl == 0) { removeA[a] = true; novelInA++; if (fanovel) fanovel->writeAlignment((*A)[a]); } } for (uint32 b=0; blength(); b++) { uint32 ovl = 0; for (uint32 a=0; alength(); a++) if (overlap[a][b]) ovl++; if (ovl == 0) { removeB[b] = true; novelInB++; if (fbnovel) fbnovel->writeAlignment((*B)[b]); } } // // Now find all those that are perfect matches. Yeah, yeah, we // could ignore those that we already marked for removal. // for (uint32 a=0; alength(); a++) { uint32 Boverlaps = 0; uint32 theBovl = 0; // Count the number of things we overlap in B. for (uint32 b=0; blength(); b++) { if (overlap[a][b]) { Boverlaps++; theBovl = b; } } // If exactly one overlap, we just need to check if the guy in B // also has one overlap with anybody in A. if (Boverlaps == 1) { // Count the number of overlaps the guy in B has with A. If // 1, it's a goodOverlap, else it's a multipleInA. uint32 b = theBovl; uint32 Aoverlaps = 0; for (uint32 x=0; xlength(); x++) if (overlap[x][b]) Aoverlaps++; if (Aoverlaps == 1) { removeA[a] = true; removeB[b] = true; goodOverlap++; // ESTiid ESTlen overlap A%id A%cov AgenLen #exons #cdnagaps B%id B%cov BgenLen #exons #cdnagaps uint32 AgenLen = 0, BgenLen = 0; uint32 Agaps = 0, Bgaps = 0; for (uint32 x=0; x < (*A)[a]->_numExons; x++) AgenLen += (*A)[a]->_exons[x]._genTo - (*A)[a]->_exons[x]._genFrom + 1; for (uint32 x=0; x < (*B)[b]->_numExons; x++) BgenLen += (*B)[b]->_exons[x]._genTo - (*B)[b]->_exons[x]._genFrom + 1; #ifdef GAP_MINIMUM for (uint32 x=1; x < (*A)[a]->_numExons; x++) { int egap = (*A)[a]->_exons[x]._estFrom - (*A)[a]->_exons[x-1]._estTo; int ggap = (*A)[a]->_exons[x]._genFrom - (*A)[a]->_exons[x-1]._genTo; int dgap = 0; if (egap > ggap) dgap = egap - ggap; else dgap = ggap - egap; if ((egap > GAP_MINIMUM) && (dgap < GAP_DIFFERENCE)) Agaps++; } for (uint32 x=1; x < (*B)[b]->_numExons; x++) { int egap = (*B)[b]->_exons[x]._estFrom - (*B)[b]->_exons[x-1]._estTo; int ggap = (*B)[b]->_exons[x]._genFrom - (*B)[b]->_exons[x-1]._genTo; int dgap = 0; if (egap > ggap) dgap = egap - ggap; else dgap = ggap - egap; if ((egap > GAP_MINIMUM) && (dgap < GAP_DIFFERENCE)) Bgaps++; } #else for (uint32 x=1; x < (*A)[a]->_numExons; x++) if ( (*A)[a]->_exons[x]._estFrom - (*A)[a]->_exons[x-1]._estTo != 1 ) Agaps++; for (uint32 x=1; x < (*B)[b]->_numExons; x++) if ( (*B)[b]->_exons[x]._estFrom - (*B)[b]->_exons[x-1]._estTo != 1 ) Bgaps++; #endif double score = 0; if (AgenLen > BgenLen) score = (double)overlap[a][b] / (double)BgenLen; else score = (double)overlap[a][b] / (double)AgenLen; fprintf(stdout, uint32FMT"\t"uint32FMT"\t"OLAPTFMT"\t%f\t%8.3f\t%8.3f\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%8.3f\t%8.3f\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\n", iid, (*A)[a]->_estLen, overlap[a][b], score, (*A)[a]->s4p_percentIdentityExact(), (*A)[a]->s4p_percentCoverageExact(), AgenLen, (*A)[a]->_numExons, Agaps, (*B)[b]->s4p_percentIdentityExact(), (*B)[b]->s4p_percentCoverageExact(), BgenLen, (*B)[b]->_numExons, Bgaps); if (fasame) fasame->writeAlignment((*A)[a]); if (fbsame) fbsame->writeAlignment((*B)[b]); } } } // // Rebuild // Ta = new sim4polishList; Tb = new sim4polishList; for (uint32 a=0; alength(); a++) if (removeA[a] == false) Ta->push(new sim4polish((*A)[a])); for (uint32 b=0; blength(); b++) if (removeB[b] == false) Tb->push(new sim4polish((*B)[b])); delete A; delete B; A = Ta; B = Tb; Ta = Tb = 0L; // Rebuild overlaps // for (uint32 a=0; alength(); a++) for (uint32 b=0; blength(); b++) overlap[a][b] = findOverlap((*A)[a], (*B)[b]); // // And now all we're left with is a bunch of intersecting crud. // // Grab the first match in A. Find all the overlaps with things // in B. For each of those, find the overlaps in A. Repeat // until nothing changes. Generate a report. Remove all those // matches. Do it all again until there are no more matches. while (A->length()) { for (uint32 a=0; alength(); a++) removeA[a] = false; for (uint32 b=0; blength(); b++) removeB[b] = false; removeA[0] = true; bool keepGoing = true; while (keepGoing) { keepGoing = false; // For all of A, if we have something marked for removal, see if we // overlap with anything in B. If that b is not marked for removal, // mark it, and keep going. // for (uint32 a=0; alength(); a++) { if (removeA[a]) { for (uint32 b=0; blength(); b++) { if ((overlap[a][b]) && (removeB[b] == false)) { removeB[b] = true; keepGoing = true; } } } } // Same thing, but for B. // for (uint32 b=0; blength(); b++) { if (removeB[b]) { for (uint32 a=0; alength(); a++) { if ((overlap[a][b]) && (removeA[a] == false)) { removeA[a] = true; keepGoing = true; } } } } } // Found a component. Output it. uint32 inA = 0; uint32 inB = 0; for (uint32 a=0; alength(); a++) if (removeA[a]) inA++; for (uint32 b=0; blength(); b++) if (removeB[b]) inB++; if ((inA > 1) && (inB > 1)) { hairyOverlap++; //fprintf(fhairy, "EST="uint32FMT" "uint32FMT" "uint32FMT"\n", (*A)[0]->_estID, inA, inB); for (uint32 a=0; alength(); a++) if (removeA[a]) fhairy->writeAlignment((*A)[a]); for (uint32 b=0; blength(); b++) if (removeB[b]) fhairy->writeAlignment((*B)[b]); } else if ((inA == 1) && (inB > 1)) { multipleInB++; //fprintf(fbmulti, "EST="uint32FMT" "uint32FMT" "uint32FMT"\n", (*A)[0]->_estID, inA, inB); for (uint32 a=0; alength(); a++) if (removeA[a]) fbmulti->writeAlignment((*A)[a]); for (uint32 b=0; blength(); b++) if (removeB[b]) fbmulti->writeAlignment((*B)[b]); } else if ((inA > 1) && (inB == 1)) { multipleInA++; //fprintf(famulti, "EST="uint32FMT" "uint32FMT" "uint32FMT"\n", (*A)[0]->_estID, inA, inB); for (uint32 a=0; alength(); a++) if (removeA[a]) famulti->writeAlignment((*A)[a]); for (uint32 b=0; blength(); b++) if (removeB[b]) famulti->writeAlignment((*B)[b]); } else { fprintf(stderr, "ERROR! inA="uint32FMT" inB="uint32FMT"\n", inA, inB); } // // Rebuild // Ta = new sim4polishList; Tb = new sim4polishList; for (uint32 a=0; alength(); a++) if (removeA[a] == false) Ta->push(new sim4polish((*A)[a])); for (uint32 b=0; blength(); b++) if (removeB[b] == false) Tb->push(new sim4polish((*B)[b])); delete A; delete B; A = Ta; B = Tb; Ta = Tb = 0L; // Rebuild overlaps // for (uint32 a=0; alength(); a++) for (uint32 b=0; blength(); b++) overlap[a][b] = findOverlap((*A)[a], (*B)[b]); } if ((iid % 100) == 0) { fprintf(stderr, "IID:"uint32FMTW(8)" good:"uint32FMTW(4)" Anovel:"uint32FMTW(4)" Amulti:"uint32FMTW(4)" Bnovel:"uint32FMTW(4)" Bmulti:"uint32FMTW(4)" hairy:"uint32FMTW(4)"\r", iid, goodOverlap, novelInA, multipleInA, novelInB, multipleInB, hairyOverlap); fflush(stderr); } #if 0 if ((iid % 1234) == 0) { fprintf(stderr, "IID:"uint32FMTW(8)" good:"uint32FMTW(4)" Anovel:"uint32FMTW(4)" Amulti:"uint32FMTW(4)" Bnovel:"uint32FMTW(4)" Bmulti:"uint32FMTW(4)" hairy:"uint32FMTW(4)"\r", iid, goodOverlap, novelInA, multipleInA, novelInB, multipleInB, hairyOverlap); fflush(stderr); } #endif delete [] overlap[0]; delete [] overlap; delete [] removeA; delete [] removeB; delete A; delete B; } delete fasame; delete fbsame; delete fanovel; delete fbnovel; delete famulti; delete fbmulti; delete fhairy; delete Afile; delete Bfile; fprintf(stderr, "\ngood:"uint32FMTW(4)" Anovel:"uint32FMTW(4)" Amulti:"uint32FMTW(4)" Bnovel:"uint32FMTW(4)" Bmulti:"uint32FMTW(4)" hairy:"uint32FMTW(4)"\n", goodOverlap, novelInA, multipleInA, novelInB, multipleInB, hairyOverlap); exit(0); } kmer-code-2013-trunk/sim4dbutils/vennPolishes.C0000644000000000000000000001226412322046702020175 0ustar rootroot#include #include #include #include "bio++.H" #include "sim4.H" const char *usage = "usage: %s [options] ...\n" "\n" " Given n sets of sim4 polishes (of the same set of cDNA to the same\n" " set of genomic, but this isn't enforced) this code will generate a\n" " Venn diagram of how the sequences map.\n" "\n" " -n there are in the input set\n" " -i filter matches to be >= identity\n" " default = 95\n" " -c filter matches to be >= coverage\n" " default = 50\n" " -d dump the sequence IIDs in to stdout\n" "\n" " -plot write a plot-able datafile of the venn diagram\n" " for percent identity to 100 (inclusive)\n" " and coverage.\n"; // Yes, yes. Tell me all about how bad globals are. uint32 minI = 95; uint32 minC = 50; uint32 foundMax = 100000; uint32 dumpIID = ~uint32ZERO; int numArgs = 0; bool plot = false; uint32 numFiles = 0; uint32 **found = 0L; uint32 indexMax = 0; uint32 *counts = 0L; uint32 *sizes = 0L; void doVenn(uint32 minI, uint32 minC) { // Count how many elements are in each set for (uint32 i=0; i= minI) sizes[i]++; } for (uint32 i=0; i= minI) membership |= 1 << dataset; } if (membership == dumpIID) fprintf(stdout, uint32FMT"\n", thisguy); counts[membership]++; } } int main(int argc, char **argv) { if ((argc < 5)) { fprintf(stderr, usage, argv[0]); exit(1); } int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-n") == 0) { foundMax = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-i") == 0) { minI = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-c") == 0) { minC = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-d") == 0) { dumpIID = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-plot") == 0) { plot = true; } else { // Assume we got all the options, and we are at a file. // numArgs = arg; arg = argc; } arg++; } numFiles = argc - numArgs; found = new uint32 * [numFiles]; if (numFiles > 16) { fprintf(stderr, "WARNING: You gave me "uint32FMT" files! That's pretty big. I don't know\n", numFiles); fprintf(stderr, " if I'm up to it. Fasten seat belts and hang on!\n"); } for (int arg=numArgs; argnextAlignment(p)) { if ((p->_percentIdentity >= minI) && (p->_querySeqIdentity >= minC)) { if (p->_estID >= foundMax) { fprintf(stderr, "Please increase foundMax, or make me reallocate storage.\n"); exit(1); } if (found[arg-numArgs][p->_estID] < p->_percentIdentity) found[arg-numArgs][p->_estID] = p->_percentIdentity; } } } // There are 2^n categories for n files. // // If A and B, then there is // // A B // 0 0 - neither (we can't compute this) // 0 1 - only B // 1 0 - only A // 1 1 - both A and B // // So, we make an array of size 2^n that holds the ocunts of each // class. It's indexed by a bit vector. // indexMax = 1 << numFiles; counts = new uint32 [indexMax]; sizes = new uint32 [numFiles]; if (dumpIID != ~uint32ZERO) { doVenn(minI, minC); } else if (plot) { for (uint32 id=minI; id <= 100; id++) { doVenn(id, minC); fprintf(stdout, uint32FMTW(3)" ", id); for (uint32 i=0; i #include #include #include #include "sim4.H" // Input matches should be sorted by cDNA, and ran through pickBest. // This code will remove all matches that have the same genomic span, // and warn when two matches have nearly the same genomic span. sim4polishWriter *W = 0L; void pickBest(sim4polish **p, int pNum) { int i, j; for (i=0; i_numExons == p[j]->_numExons) && (p[i]->_genID == p[j]->_genID)) { int a, b; int sd = 666; int ed = 666; a = p[i]->_exons[0]._genFrom; b = p[j]->_exons[0]._genFrom; if (a < b) sd = b - a; else sd = a - b; a = p[i]->_exons[p[i]->_numExons-1]._genTo; b = p[j]->_exons[p[j]->_numExons-1]._genTo; if (a < b) ed = b - a; else ed = a - b; if ((sd == 0) && (ed == 0)) { //fprintf(stderr, "%d and %d are exact; %d removed.\n", i, j, j); delete p[j]; p[j] = 0L; } else if ((sd < 10) && (ed < 10)) { char *alignI = p[i]->s4p_polishToString(sim4polishS4DB); char *alignJ = p[j]->s4p_polishToString(sim4polishS4DB); fprintf(stderr, "----------------------------------------\n"); fprintf(stderr, "Warning: %d and %d are similar.\n", i, j); fprintf(stderr, "%s\n", alignI); fprintf(stderr, "%s\n", alignJ); fprintf(stderr, "----------------------------------------\n"); delete [] alignI; delete [] alignJ; } } } } for (i=0; iwriteAlignment(p[i]); delete p[i]; } } } int main(int argc, char **argv) { uint32 pNum = 0; uint32 pAlloc = 8388608; uint32 estID = ~uint32ZERO; sim4polishStyle style = sim4polishStyleDefault; int arg = 1; while (arg < argc) { if (strcmp(argv[1], "-gff3") == 0) style = sim4polishGFF3; else fprintf(stderr, "usage: %s [-gff3] < file > file\n", argv[0]); arg++; } if (isatty(fileno(stdin))) { fprintf(stderr, "usage: %s [-gff3] < file > file\n", argv[0]); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); exit(1); } // Read polishes, picking the best when we see a change in // the estID. sim4polishReader *R = new sim4polishReader("-"); sim4polish **p = new sim4polish * [pAlloc]; sim4polish *q = 0L; W = new sim4polishWriter("-", style); if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); while (R->nextAlignment(q)) { if ((q->_estID != estID) && (pNum > 0)) { pickBest(p, pNum); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } p[pNum++] = q; estID = q->_estID; q = 0L; // Else we will delete the polish we just saved! } if (pNum > 0) pickBest(p, pNum); delete [] p; delete R; delete W; return(0); } kmer-code-2013-trunk/sim4dbutils/mergePolishes.C0000644000000000000000000000710012322046702020317 0ustar rootroot#include #include #include #include "bio++.H" //#include "fasta.H" #include "sim4.H" // usage: mergeInput -m match1 cdna1 -m match2 cdna2 -m ... -o match cdna [-gff3] // // Merges the results from two ESTmapper runs. The runs MUST be on // the same genomic sequence using DIFFERENT cDNA inputs. static void loadNext(uint32 idx, sim4polish **polishes, sim4polishReader **inMatch, uint32 *numSeqs) { if (inMatch[idx]->nextAlignment(polishes[idx])) polishes[idx]->_estID += numSeqs[idx]; } int main(int argc, char **argv) { char **inMatchName = new char * [argc]; char **inSeqName = new char * [argc]; char *otMatchName = 0L; char *otSeqName = 0L; sim4polishReader **inMatch = new sim4polishReader * [argc]; sim4polish **polishes = new sim4polish * [argc]; sim4polishWriter *otMatch = 0L; uint32 *numSeqs = new uint32 [argc]; uint32 numIn = 0; sim4polishStyle style = sim4polishStyleDefault; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { arg++; inMatchName[numIn] = (char *)argv[arg++]; inSeqName[numIn] = (char *)argv[arg++]; inMatch[numIn] = new sim4polishReader(inMatchName[numIn]); numIn++; } else if (strcmp(argv[arg], "-o") == 0) { arg++; otMatchName = (char *)argv[arg++]; otSeqName = (char *)argv[arg++]; } else if (strcmp(argv[arg], "-gff3") == 0) { style = sim4polishGFF3; } } if ((numIn < 1) || (otMatchName == 0L)) { fprintf(stderr, "usage: %s -o match cdna -m match1 cdna1 -m match2 cdna2 -m ... [-gff3]\n", argv[0]); exit(1); } otMatch = new sim4polishWriter(otMatchName, style); for (uint32 i=0; igetsim4polishStyle() != style) { fprintf(stderr, "warning: input format and output format may differ.\n"); break; } // Merge the input sequences into the output sequence. We also count the number of sequences // here, so we don't need random-access of the input. // fprintf(stderr, "Merging sequences.\n"); FILE *O = fopen(otSeqName, "w"); for (uint32 i=0; igetSequenceInCore(); numSeqs[i] = 0; while (B) { fprintf(O, ">%s\n%s\n", B->header(), B->sequence()); numSeqs[i]++; delete B; B = I->getSequenceInCore(); } delete I; } fclose(O); // Make numSeqs[] be the offset needed to convert a polish in each inMatch[] file into a polish // in the merged file. // uint32 o = 0; uint32 s = 0; for (uint32 i=0; i 0)) first = i; otMatch->writeAlignment(polishes[first]); loadNext(first, polishes, inMatch, numSeqs); } delete [] polishes; delete inMatch; delete otMatch; } kmer-code-2013-trunk/sim4dbutils/s4p_overlap.C0000644000000000000000000000222112415073322017747 0ustar rootroot#include "util++.H" #include "sim4.H" // Build an interval list with all exons (from both guys), merge // overlapping regions, compute the length, subtract from the total. // Result: the number of bp that the two matches overlap in the // genomic. // uint32 findOverlap(sim4polish *A, sim4polish *B) { if ((A->_genID != B->_genID) || (A->_matchOrientation != B->_matchOrientation)) return(0); uint32 length = 0; uint32 total = 0; intervalList IL; for (uint32 i=0; i_numExons; i++) { length = A->_exons[i]._genTo - A->_exons[i]._genFrom + 1; total += length; IL.add(A->_exons[i]._genFrom, length); } for (uint32 i=0; i_numExons; i++) { length = B->_exons[i]._genTo - B->_exons[i]._genFrom + 1; total += length; IL.add(B->_exons[i]._genFrom, length); } IL.merge(); #ifdef OLAP_IS_SHORT if (total - IL.sumOfLengths() > 65536) { fprintf(stderr, "findOverlap()-- ERROR! The overlap is larger than the return type!\n"); fprintf(stderr, "findOverlap()-- Switch to 32-bit ints in s4p_overlap.H.\n"); } #endif return(total - IL.sumOfLengths()); } kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/0000755000000000000000000000000012641613357022553 5ustar rootrootkmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/intronstats.pl0000644000000000000000000000707607605137611025510 0ustar rootroot#!/usr/local/bin/perl $| = 1; use strict; use FindBin; use lib "/home/walenzbp/projects/scripts"; use libBri; my $tot = 0; my $sma = 0; my $big = 0; my $smafirst = 0; my $smalast = 0; my $bigfirst = 0; my $biglast = 0; my $smaoneintronF = 0; my $smaoneintronL = 0; my $smaoneintronB = 0; my $bigoneintron = 0; my $interiorintron = 0; my $ff=0; my $fc=0; my $lf=0; my $lc=0; my @bigA; my @smaA; open(SMA, "> sma-exon-after-big-intron"); open(BIG, "> big-exon-after-big-intron"); open(SMAO, "> sma-exon-after-big-oneintron"); open(BIGO, "> big-exon-after-big-oneintron"); while (!eof(STDIN)) { $tot++; my %p = &libBri::readPolish(*STDIN); my $exonsLen = scalar(@{$p{'exons'}}); my $firstintron = 1; if ($exonsLen > 1) { my @exons = @{$p{'exons'}}; my $lastC = shift @exons; loop: my $thisC = shift @exons; my $gap = $thisC->{'GENOMICstart'} - $lastC->{'GENOMICend'}; if ($gap > 499999) { if (($firstintron) && (scalar(@exons) == 0)) { # Exactly one intron # if ((($lastC->{'cDNAend'} - $lastC->{'cDNAstart'}) < 50) && (($thisC->{'cDNAend'} - $thisC->{'cDNAstart'}) < 50)) { $sma++; $smaoneintronB++; print SMAO $p{'raw'}; } elsif (($lastC->{'cDNAend'} - $lastC->{'cDNAstart'}) < 50) { $sma++; $smaoneintronF++; print SMAO $p{'raw'}; } elsif (($thisC->{'cDNAend'} - $thisC->{'cDNAstart'}) < 50) { $sma++; $smaoneintronL++; print SMAO $p{'raw'}; } else { $big++; $bigoneintron++; print BIGO $p{'raw'}; } } elsif ($firstintron) { # First intron # if (($lastC->{'cDNAend'} - $lastC->{'cDNAstart'}) < 50) { $sma++; $smafirst++; print SMA $p{'raw'}; if ($p{'matchOrientation'} eq "forward") { $ff++; } else { $fc++; } } else { $big++; $bigfirst++; print BIG $p{'raw'}; } } elsif (scalar(@exons) == 0) { # Last intron # if (($thisC->{'cDNAend'} - $thisC->{'cDNAstart'}) < 50) { $sma++; $smalast++; print SMA $p{'raw'}; if ($p{'matchOrientation'} eq "forward") { $lf++; } else { $lc++; } } else { $big++; $biglast++; print BIG $p{'raw'}; } } else { # Interior intron # $interiorintron++; } print "int: $interiorintron sma: $sma(First:$smafirst,Last:$smalast,"; print "oneF:$smaoneintronF,oneL:$smaoneintronL,oneB:$smaoneintronB) -- big "; print "$big(First:$bigfirst,Last:$biglast,One:$bigoneintron) -- tot $tot -- "; print "ff=$ff,fc=$fc lf=$lf,lc=$lc\n"; } $firstintron = 0; $lastC = $thisC; goto loop if (scalar(@exons) > 0); } } close(SMA); close(BIG); close(SMAO); close(BIGO); kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/evalThresh.pl.out0000644000000000000000000004040207605137611026021 0ustar rootrootREALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 10000bp long. oneExon: 2656968 allSmallExons: 1524188 good: 125574 probably good: 36280 junkExonsLeft: 19490 junkExonsRight: 18815 junkExonsBoth: 28 intronOnGap: 15100 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 20000bp long. oneExon: 2656968 allSmallExons: 1634856 good: 55684 probably good: 15887 junkExonsLeft: 11615 junkExonsRight: 10569 junkExonsBoth: 21 intronOnGap: 10843 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 30000bp long. oneExon: 2656968 allSmallExons: 1669430 good: 34761 probably good: 9604 junkExonsLeft: 9384 junkExonsRight: 8675 junkExonsBoth: 19 intronOnGap: 7602 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 40000bp long. oneExon: 2656968 allSmallExons: 1690898 good: 22957 probably good: 6061 junkExonsLeft: 7761 junkExonsRight: 7483 junkExonsBoth: 19 intronOnGap: 4296 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 50000bp long. oneExon: 2656968 allSmallExons: 1700693 good: 16879 probably good: 4490 junkExonsLeft: 7095 junkExonsRight: 7000 junkExonsBoth: 19 intronOnGap: 3299 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 60000bp long. oneExon: 2656968 allSmallExons: 1706978 good: 12632 probably good: 3424 junkExonsLeft: 6726 junkExonsRight: 6544 junkExonsBoth: 19 intronOnGap: 3152 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 70000bp long. oneExon: 2656968 allSmallExons: 1711730 good: 9407 probably good: 2600 junkExonsLeft: 6440 junkExonsRight: 6268 junkExonsBoth: 19 intronOnGap: 3011 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 80000bp long. oneExon: 2656968 allSmallExons: 1714912 good: 7388 probably good: 2165 junkExonsLeft: 6173 junkExonsRight: 6005 junkExonsBoth: 19 intronOnGap: 2813 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 90000bp long. oneExon: 2656968 allSmallExons: 1717745 good: 5703 probably good: 1682 junkExonsLeft: 5981 junkExonsRight: 5792 junkExonsBoth: 17 intronOnGap: 2555 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 100000bp long. oneExon: 2656968 allSmallExons: 1719978 good: 4589 probably good: 1405 junkExonsLeft: 5785 junkExonsRight: 5653 junkExonsBoth: 17 intronOnGap: 2048 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 110000bp long. oneExon: 2656968 allSmallExons: 1721713 good: 3601 probably good: 1147 junkExonsLeft: 5655 junkExonsRight: 5492 junkExonsBoth: 17 intronOnGap: 1850 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 120000bp long. oneExon: 2656968 allSmallExons: 1722934 good: 2983 probably good: 867 junkExonsLeft: 5566 junkExonsRight: 5353 junkExonsBoth: 17 intronOnGap: 1755 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 130000bp long. oneExon: 2656968 allSmallExons: 1723896 good: 2464 probably good: 751 junkExonsLeft: 5463 junkExonsRight: 5268 junkExonsBoth: 17 intronOnGap: 1616 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 140000bp long. oneExon: 2656968 allSmallExons: 1724711 good: 2136 probably good: 691 junkExonsLeft: 5202 junkExonsRight: 5192 junkExonsBoth: 17 intronOnGap: 1526 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 150000bp long. oneExon: 2656968 allSmallExons: 1725290 good: 1914 probably good: 621 junkExonsLeft: 5135 junkExonsRight: 5123 junkExonsBoth: 17 intronOnGap: 1375 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 160000bp long. oneExon: 2656968 allSmallExons: 1726033 good: 1550 probably good: 512 junkExonsLeft: 5064 junkExonsRight: 5062 junkExonsBoth: 17 intronOnGap: 1237 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 170000bp long. oneExon: 2656968 allSmallExons: 1726502 good: 1373 probably good: 449 junkExonsLeft: 5009 junkExonsRight: 4997 junkExonsBoth: 17 intronOnGap: 1128 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 180000bp long. oneExon: 2656968 allSmallExons: 1726957 good: 1195 probably good: 410 junkExonsLeft: 4957 junkExonsRight: 4918 junkExonsBoth: 17 intronOnGap: 1021 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 190000bp long. oneExon: 2656968 allSmallExons: 1727281 good: 1087 probably good: 375 junkExonsLeft: 4912 junkExonsRight: 4854 junkExonsBoth: 17 intronOnGap: 949 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 200000bp long. oneExon: 2656968 allSmallExons: 1727565 good: 990 probably good: 363 junkExonsLeft: 4860 junkExonsRight: 4792 junkExonsBoth: 17 intronOnGap: 888 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 210000bp long. oneExon: 2656968 allSmallExons: 1727799 good: 952 probably good: 340 junkExonsLeft: 4822 junkExonsRight: 4748 junkExonsBoth: 17 intronOnGap: 797 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 220000bp long. oneExon: 2656968 allSmallExons: 1727999 good: 890 probably good: 329 junkExonsLeft: 4777 junkExonsRight: 4706 junkExonsBoth: 17 intronOnGap: 757 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 230000bp long. oneExon: 2656968 allSmallExons: 1728222 good: 807 probably good: 316 junkExonsLeft: 4733 junkExonsRight: 4673 junkExonsBoth: 17 intronOnGap: 707 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 240000bp long. oneExon: 2656968 allSmallExons: 1728381 good: 751 probably good: 309 junkExonsLeft: 4695 junkExonsRight: 4640 junkExonsBoth: 17 intronOnGap: 682 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 250000bp long. oneExon: 2656968 allSmallExons: 1728559 good: 713 probably good: 296 junkExonsLeft: 4653 junkExonsRight: 4608 junkExonsBoth: 17 intronOnGap: 629 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 260000bp long. oneExon: 2656968 allSmallExons: 1728687 good: 681 probably good: 289 junkExonsLeft: 4624 junkExonsRight: 4577 junkExonsBoth: 17 intronOnGap: 600 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 270000bp long. oneExon: 2656968 allSmallExons: 1728816 good: 654 probably good: 277 junkExonsLeft: 4588 junkExonsRight: 4547 junkExonsBoth: 17 intronOnGap: 576 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 280000bp long. oneExon: 2656968 allSmallExons: 1728958 good: 627 probably good: 266 junkExonsLeft: 4547 junkExonsRight: 4509 junkExonsBoth: 17 intronOnGap: 551 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 290000bp long. oneExon: 2656968 allSmallExons: 1729115 good: 569 probably good: 253 junkExonsLeft: 4504 junkExonsRight: 4483 junkExonsBoth: 17 intronOnGap: 534 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 300000bp long. oneExon: 2656968 allSmallExons: 1729229 good: 547 probably good: 248 junkExonsLeft: 4469 junkExonsRight: 4452 junkExonsBoth: 17 intronOnGap: 513 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 310000bp long. oneExon: 2656968 allSmallExons: 1729412 good: 511 probably good: 222 junkExonsLeft: 4433 junkExonsRight: 4392 junkExonsBoth: 17 intronOnGap: 488 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 320000bp long. oneExon: 2656968 allSmallExons: 1729498 good: 497 probably good: 218 junkExonsLeft: 4393 junkExonsRight: 4371 junkExonsBoth: 16 intronOnGap: 482 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 330000bp long. oneExon: 2656968 allSmallExons: 1729639 good: 458 probably good: 206 junkExonsLeft: 4368 junkExonsRight: 4351 junkExonsBoth: 16 intronOnGap: 437 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 340000bp long. oneExon: 2656968 allSmallExons: 1729757 good: 449 probably good: 205 junkExonsLeft: 4329 junkExonsRight: 4330 junkExonsBoth: 16 intronOnGap: 389 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 350000bp long. oneExon: 2656968 allSmallExons: 1729832 good: 442 probably good: 203 junkExonsLeft: 4302 junkExonsRight: 4300 junkExonsBoth: 16 intronOnGap: 380 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 360000bp long. oneExon: 2656968 allSmallExons: 1729923 good: 431 probably good: 199 junkExonsLeft: 4273 junkExonsRight: 4270 junkExonsBoth: 16 intronOnGap: 363 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 370000bp long. oneExon: 2656968 allSmallExons: 1730017 good: 412 probably good: 198 junkExonsLeft: 4247 junkExonsRight: 4239 junkExonsBoth: 15 intronOnGap: 347 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 380000bp long. oneExon: 2656968 allSmallExons: 1730084 good: 406 probably good: 194 junkExonsLeft: 4218 junkExonsRight: 4217 junkExonsBoth: 15 intronOnGap: 341 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 390000bp long. oneExon: 2656968 allSmallExons: 1730168 good: 401 probably good: 191 junkExonsLeft: 4187 junkExonsRight: 4190 junkExonsBoth: 15 intronOnGap: 323 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 400000bp long. oneExon: 2656968 allSmallExons: 1730268 good: 379 probably good: 172 junkExonsLeft: 4158 junkExonsRight: 4162 junkExonsBoth: 15 intronOnGap: 321 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 410000bp long. oneExon: 2656968 allSmallExons: 1730331 good: 366 probably good: 171 junkExonsLeft: 4142 junkExonsRight: 4137 junkExonsBoth: 15 intronOnGap: 313 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 420000bp long. oneExon: 2656968 allSmallExons: 1730389 good: 364 probably good: 168 junkExonsLeft: 4116 junkExonsRight: 4116 junkExonsBoth: 15 intronOnGap: 307 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 430000bp long. oneExon: 2656968 allSmallExons: 1730448 good: 361 probably good: 168 junkExonsLeft: 4094 junkExonsRight: 4089 junkExonsBoth: 15 intronOnGap: 300 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 440000bp long. oneExon: 2656968 allSmallExons: 1730503 good: 358 probably good: 167 junkExonsLeft: 4076 junkExonsRight: 4064 junkExonsBoth: 15 intronOnGap: 292 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 450000bp long. oneExon: 2656968 allSmallExons: 1730551 good: 355 probably good: 167 junkExonsLeft: 4055 junkExonsRight: 4043 junkExonsBoth: 15 intronOnGap: 289 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 460000bp long. oneExon: 2656968 allSmallExons: 1730605 good: 351 probably good: 166 junkExonsLeft: 4038 junkExonsRight: 4014 junkExonsBoth: 15 intronOnGap: 286 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 470000bp long. oneExon: 2656968 allSmallExons: 1730666 good: 349 probably good: 161 junkExonsLeft: 4015 junkExonsRight: 3992 junkExonsBoth: 15 intronOnGap: 277 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 480000bp long. oneExon: 2656968 allSmallExons: 1730721 good: 348 probably good: 159 junkExonsLeft: 3987 junkExonsRight: 3973 junkExonsBoth: 15 intronOnGap: 272 total: 4396443 REALLY QUIET MODE ENABLED -- NO matches are output! A big intron is one that is at least 490000bp long. oneExon: 2656968 allSmallExons: 1730792 good: 343 probably good: 156 junkExonsLeft: 3959 junkExonsRight: 3943 junkExonsBoth: 15 intronOnGap: 267 total: 4396443 THRESHOLD = 10000 THRESHOLD = 20000 THRESHOLD = 30000 THRESHOLD = 40000 THRESHOLD = 50000 THRESHOLD = 60000 THRESHOLD = 70000 THRESHOLD = 80000 THRESHOLD = 90000 THRESHOLD = 100000 THRESHOLD = 110000 THRESHOLD = 120000 THRESHOLD = 130000 THRESHOLD = 140000 THRESHOLD = 150000 THRESHOLD = 160000 THRESHOLD = 170000 THRESHOLD = 180000 THRESHOLD = 190000 THRESHOLD = 200000 THRESHOLD = 210000 THRESHOLD = 220000 THRESHOLD = 230000 THRESHOLD = 240000 THRESHOLD = 250000 THRESHOLD = 260000 THRESHOLD = 270000 THRESHOLD = 280000 THRESHOLD = 290000 THRESHOLD = 300000 THRESHOLD = 310000 THRESHOLD = 320000 THRESHOLD = 330000 THRESHOLD = 340000 THRESHOLD = 350000 THRESHOLD = 360000 THRESHOLD = 370000 THRESHOLD = 380000 THRESHOLD = 390000 THRESHOLD = 400000 THRESHOLD = 410000 THRESHOLD = 420000 THRESHOLD = 430000 THRESHOLD = 440000 THRESHOLD = 450000 THRESHOLD = 460000 THRESHOLD = 470000 THRESHOLD = 480000 THRESHOLD = 490000 kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/evalThresh.pl0000644000000000000000000000032307605137611025211 0ustar rootroot#!/bin/perl $threshold = 10000; while ($threshold < 500000) { print "THRESHOLD = $threshold\n"; system("./splitMatches -qquiet -threshold $threshold < /part3/polishes-good"); $threshold += 10000 } kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/evalThresh.ps0000644000000000000000000004560707605137611025236 0ustar rootroot%!PS-Adobe-2.0 %%Title: evalThresh.ps %%Creator: gnuplot 3.7 patchlevel 0 %%CreationDate: Fri Jun 28 11:19:16 2002 %%DocumentFonts: (atend) %%BoundingBox: 50 50 554 770 %%Orientation: Landscape %%Pages: (atend) %%EndComments /gnudict 256 dict def gnudict begin /Color true def /Solid false def /gnulinewidth 5.000 def /userlinewidth gnulinewidth def /vshift -46 def /dl {10 mul} def /hpt_ 31.5 def /vpt_ 31.5 def /hpt hpt_ def /vpt vpt_ def /M {moveto} bind def /L {lineto} bind def /R {rmoveto} bind def /V {rlineto} bind def /vpt2 vpt 2 mul def /hpt2 hpt 2 mul def /Lshow { currentpoint stroke M 0 vshift R show } def /Rshow { currentpoint stroke M dup stringwidth pop neg vshift R show } def /Cshow { currentpoint stroke M dup stringwidth pop -2 div vshift R show } def /UP { dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def } def /DL { Color {setrgbcolor Solid {pop []} if 0 setdash } {pop pop pop Solid {pop []} if 0 setdash} ifelse } def /BL { stroke gnulinewidth 2 mul setlinewidth } def /AL { stroke gnulinewidth 2 div setlinewidth } def /UL { gnulinewidth mul /userlinewidth exch def } def /PL { stroke userlinewidth setlinewidth } def /LTb { BL [] 0 0 0 DL } def /LTa { AL [1 dl 2 dl] 0 setdash 0 0 0 setrgbcolor } def /LT0 { PL [] 1 0 0 DL } def /LT1 { PL [4 dl 2 dl] 0 1 0 DL } def /LT2 { PL [2 dl 3 dl] 0 0 1 DL } def /LT3 { PL [1 dl 1.5 dl] 1 0 1 DL } def /LT4 { PL [5 dl 2 dl 1 dl 2 dl] 0 1 1 DL } def /LT5 { PL [4 dl 3 dl 1 dl 3 dl] 1 1 0 DL } def /LT6 { PL [2 dl 2 dl 2 dl 4 dl] 0 0 0 DL } def /LT7 { PL [2 dl 2 dl 2 dl 2 dl 2 dl 4 dl] 1 0.3 0 DL } def /LT8 { PL [2 dl 2 dl 2 dl 2 dl 2 dl 2 dl 2 dl 4 dl] 0.5 0.5 0.5 DL } def /Pnt { stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore } def /Dia { stroke [] 0 setdash 2 copy vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath stroke Pnt } def /Pls { stroke [] 0 setdash vpt sub M 0 vpt2 V currentpoint stroke M hpt neg vpt neg R hpt2 0 V stroke } def /Box { stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath stroke Pnt } def /Crs { stroke [] 0 setdash exch hpt sub exch vpt add M hpt2 vpt2 neg V currentpoint stroke M hpt2 neg 0 R hpt2 vpt2 V stroke } def /TriU { stroke [] 0 setdash 2 copy vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath stroke Pnt } def /Star { 2 copy Pls Crs } def /BoxF { stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath fill } def /TriUF { stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath fill } def /TriD { stroke [] 0 setdash 2 copy vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath stroke Pnt } def /TriDF { stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath fill} def /DiaF { stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath fill } def /Pent { stroke [] 0 setdash 2 copy gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath stroke grestore Pnt } def /PentF { stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath fill grestore } def /Circle { stroke [] 0 setdash 2 copy hpt 0 360 arc stroke Pnt } def /CircleF { stroke [] 0 setdash hpt 0 360 arc fill } def /C0 { BL [] 0 setdash 2 copy moveto vpt 90 450 arc } bind def /C1 { BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc closepath fill vpt 0 360 arc closepath } bind def /C2 { BL [] 0 setdash 2 copy moveto 2 copy vpt 90 180 arc closepath fill vpt 0 360 arc closepath } bind def /C3 { BL [] 0 setdash 2 copy moveto 2 copy vpt 0 180 arc closepath fill vpt 0 360 arc closepath } bind def /C4 { BL [] 0 setdash 2 copy moveto 2 copy vpt 180 270 arc closepath fill vpt 0 360 arc closepath } bind def /C5 { BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc 2 copy moveto 2 copy vpt 180 270 arc closepath fill vpt 0 360 arc } bind def /C6 { BL [] 0 setdash 2 copy moveto 2 copy vpt 90 270 arc closepath fill vpt 0 360 arc closepath } bind def /C7 { BL [] 0 setdash 2 copy moveto 2 copy vpt 0 270 arc closepath fill vpt 0 360 arc closepath } bind def /C8 { BL [] 0 setdash 2 copy moveto 2 copy vpt 270 360 arc closepath fill vpt 0 360 arc closepath } bind def /C9 { BL [] 0 setdash 2 copy moveto 2 copy vpt 270 450 arc closepath fill vpt 0 360 arc closepath } bind def /C10 { BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill 2 copy moveto 2 copy vpt 90 180 arc closepath fill vpt 0 360 arc closepath } bind def /C11 { BL [] 0 setdash 2 copy moveto 2 copy vpt 0 180 arc closepath fill 2 copy moveto 2 copy vpt 270 360 arc closepath fill vpt 0 360 arc closepath } bind def /C12 { BL [] 0 setdash 2 copy moveto 2 copy vpt 180 360 arc closepath fill vpt 0 360 arc closepath } bind def /C13 { BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc closepath fill 2 copy moveto 2 copy vpt 180 360 arc closepath fill vpt 0 360 arc closepath } bind def /C14 { BL [] 0 setdash 2 copy moveto 2 copy vpt 90 360 arc closepath fill vpt 0 360 arc } bind def /C15 { BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill vpt 0 360 arc closepath } bind def /Rec { newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto neg 0 rlineto closepath } bind def /Square { dup Rec } bind def /Bsquare { vpt sub exch vpt sub exch vpt2 Square } bind def /S0 { BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare } bind def /S1 { BL [] 0 setdash 2 copy vpt Square fill Bsquare } bind def /S2 { BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare } bind def /S3 { BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare } bind def /S4 { BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare } bind def /S5 { BL [] 0 setdash 2 copy 2 copy vpt Square fill exch vpt sub exch vpt sub vpt Square fill Bsquare } bind def /S6 { BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare } bind def /S7 { BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill 2 copy vpt Square fill Bsquare } bind def /S8 { BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare } bind def /S9 { BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare } bind def /S10 { BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill Bsquare } bind def /S11 { BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare } bind def /S12 { BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare } bind def /S13 { BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill 2 copy vpt Square fill Bsquare } bind def /S14 { BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill 2 copy exch vpt sub exch vpt Square fill Bsquare } bind def /S15 { BL [] 0 setdash 2 copy Bsquare fill Bsquare } bind def /D0 { gsave translate 45 rotate 0 0 S0 stroke grestore } bind def /D1 { gsave translate 45 rotate 0 0 S1 stroke grestore } bind def /D2 { gsave translate 45 rotate 0 0 S2 stroke grestore } bind def /D3 { gsave translate 45 rotate 0 0 S3 stroke grestore } bind def /D4 { gsave translate 45 rotate 0 0 S4 stroke grestore } bind def /D5 { gsave translate 45 rotate 0 0 S5 stroke grestore } bind def /D6 { gsave translate 45 rotate 0 0 S6 stroke grestore } bind def /D7 { gsave translate 45 rotate 0 0 S7 stroke grestore } bind def /D8 { gsave translate 45 rotate 0 0 S8 stroke grestore } bind def /D9 { gsave translate 45 rotate 0 0 S9 stroke grestore } bind def /D10 { gsave translate 45 rotate 0 0 S10 stroke grestore } bind def /D11 { gsave translate 45 rotate 0 0 S11 stroke grestore } bind def /D12 { gsave translate 45 rotate 0 0 S12 stroke grestore } bind def /D13 { gsave translate 45 rotate 0 0 S13 stroke grestore } bind def /D14 { gsave translate 45 rotate 0 0 S14 stroke grestore } bind def /D15 { gsave translate 45 rotate 0 0 S15 stroke grestore } bind def /DiaE { stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath stroke } def /BoxE { stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath stroke } def /TriUE { stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath stroke } def /TriDE { stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath stroke } def /PentE { stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath stroke grestore } def /CircE { stroke [] 0 setdash hpt 0 360 arc stroke } def /Opaque { gsave closepath 1 setgray fill grestore 0 setgray closepath } def /DiaW { stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V Opaque stroke } def /BoxW { stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V Opaque stroke } def /TriUW { stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V Opaque stroke } def /TriDW { stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V Opaque stroke } def /PentW { stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat Opaque stroke grestore } def /CircW { stroke [] 0 setdash hpt 0 360 arc Opaque stroke } def /BoxFill { gsave Rec 1 setgray fill grestore } def end %%EndProlog %%Page: 1 1 gnudict begin gsave 50 50 translate 0.100 0.100 scale 90 rotate 0 -5040 translate 0 setgray newpath (Helvetica) findfont 140 scalefont setfont 1.000 UL LTb 742 280 M 63 0 V 6157 0 R -63 0 V 658 280 M (0) Rshow 742 936 M 63 0 V 6157 0 R -63 0 V 658 936 M (20000) Rshow 742 1592 M 63 0 V 6157 0 R -63 0 V -6241 0 R (40000) Rshow 742 2248 M 63 0 V 6157 0 R -63 0 V -6241 0 R (60000) Rshow 742 2904 M 63 0 V 6157 0 R -63 0 V -6241 0 R (80000) Rshow 742 3560 M 63 0 V 6157 0 R -63 0 V -6241 0 R (100000) Rshow 742 4216 M 63 0 V 6157 0 R -63 0 V -6241 0 R (120000) Rshow 742 4872 M 63 0 V 6157 0 R -63 0 V -6241 0 R (140000) Rshow 742 280 M 0 63 V 0 4529 R 0 -63 V 742 140 M (0) Cshow 1364 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (5) Cshow 1986 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (10) Cshow 2608 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (15) Cshow 3230 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (20) Cshow 3852 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (25) Cshow 4474 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (30) Cshow 5096 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (35) Cshow 5718 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (40) Cshow 6340 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (45) Cshow 6962 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (50) Cshow 1.000 UL LTb 742 280 M 6220 0 V 0 4592 V -6220 0 V 742 280 L 1.000 UL LT0 6311 4739 M (allSmallExons/100) Rshow 6395 4739 M 399 0 V 742 780 M 124 36 V 125 12 V 124 7 V 125 3 V 124 2 V 124 1 V 125 1 V 124 1 V 125 1 V 124 1 V 124 0 V 125 0 V 124 1 V 125 0 V 124 0 V 124 0 V 125 0 V 124 1 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 1 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 1.000 UL LT1 6311 4599 M (good) Rshow 6395 4599 M 399 0 V 742 4399 M 866 2106 L 991 1420 L 124 -387 V 1240 834 L 1364 694 L 1488 589 L 125 -67 V 124 -55 V 125 -36 V 124 -33 V 124 -20 V 125 -17 V 124 -11 V 125 -7 V 124 -12 V 124 -6 V 125 -6 V 124 -3 V 125 -4 V 124 -1 V 124 -2 V 125 -3 V 124 -1 V 125 -2 V 124 -1 V 124 -1 V 125 0 V 124 -2 V 125 -1 V 124 -1 V 124 -1 V 125 -1 V 124 0 V 125 -1 V 124 0 V 124 0 V 125 -1 V 124 0 V 125 -1 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 -1 V 125 0 V 124 0 V 1.000 UL LT2 6311 4459 M (probably good) Rshow 6395 4459 M 399 0 V 742 1470 M 866 801 L 991 595 L 1115 479 L 125 -52 V 124 -35 V 124 -27 V 125 -14 V 124 -16 V 125 -9 V 124 -8 V 124 -10 V 125 -3 V 124 -2 V 125 -3 V 124 -3 V 124 -2 V 125 -2 V 124 -1 V 125 0 V 124 -1 V 124 0 V 125 -1 V 124 0 V 125 0 V 124 -1 V 124 0 V 125 0 V 124 -1 V 125 0 V 124 -1 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 -1 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 -1 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 1.000 UL LT3 6311 4319 M (junkExonsLeft) Rshow 6395 4319 M 399 0 V 742 919 M 866 661 L 991 588 L 124 -53 V 125 -22 V 124 -12 V 124 -10 V 125 -9 V 124 -6 V 125 -6 V 124 -5 V 124 -2 V 125 -4 V 124 -8 V 125 -3 V 124 -2 V 124 -2 V 125 -1 V 124 -2 V 125 -2 V 124 -1 V 124 -1 V 125 -2 V 124 -1 V 125 -1 V 124 -1 V 124 -2 V 125 -1 V 124 -1 V 125 -1 V 124 -2 V 124 -1 V 125 -1 V 124 -1 V 125 -1 V 124 -1 V 124 -1 V 125 -1 V 124 -1 V 125 -1 V 124 0 V 124 -1 V 125 -1 V 124 0 V 125 -1 V 124 -1 V 124 0 V 125 -1 V 124 -1 V 1.000 UL LT4 6311 4179 M (junkExonsRight) Rshow 6395 4179 M 399 0 V 742 897 M 866 627 L 991 565 L 124 -40 V 125 -15 V 124 -15 V 124 -9 V 125 -9 V 124 -7 V 125 -5 V 124 -5 V 124 -4 V 125 -3 V 124 -3 V 125 -2 V 124 -2 V 124 -2 V 125 -3 V 124 -2 V 125 -2 V 124 -1 V 124 -2 V 125 -1 V 124 -1 V 125 -1 V 124 -1 V 124 -1 V 125 -1 V 124 -1 V 125 -1 V 124 -2 V 124 -1 V 125 0 V 124 -1 V 125 -1 V 124 -1 V 124 -1 V 125 -1 V 124 -1 V 125 0 V 124 -1 V 124 -1 V 125 -1 V 124 -1 V 125 0 V 124 -1 V 124 -1 V 125 -1 V 124 -1 V 1.000 UL LT5 6311 4039 M (junkExonsBoth) Rshow 6395 4039 M 399 0 V 742 281 M 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 -1 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 125 0 V 124 0 V 124 0 V 125 0 V 124 0 V 1.000 UL LT6 6311 3899 M (intronOnGap) Rshow 6395 3899 M 399 0 V 742 775 M 866 636 L 991 529 L 1115 421 L 125 -33 V 124 -5 V 124 -4 V 125 -7 V 124 -8 V 125 -17 V 124 -6 V 124 -3 V 125 -5 V 124 -3 V 125 -5 V 124 -4 V 124 -4 V 125 -4 V 124 -2 V 125 -2 V 124 -3 V 124 -1 V 125 -2 V 124 -1 V 125 -1 V 124 -1 V 124 -1 V 125 -1 V 124 0 V 125 -1 V 124 -1 V 124 0 V 125 -2 V 124 -1 V 125 -1 V 124 0 V 124 -1 V 125 0 V 124 0 V 125 0 V 124 -1 V 124 0 V 125 0 V 124 0 V 125 -1 V 124 0 V 124 0 V 125 0 V 124 0 V stroke grestore end showpage %%Page: 2 2 gnudict begin gsave 50 50 translate 0.100 0.100 scale 90 rotate 0 -5040 translate 0 setgray newpath (Helvetica) findfont 140 scalefont setfont 1.000 UL LTb 658 280 M 63 0 V 6241 0 R -63 0 V 574 280 M (0) Rshow 658 1198 M 63 0 V 6241 0 R -63 0 V -6325 0 R (2000) Rshow 658 2117 M 63 0 V 6241 0 R -63 0 V -6325 0 R (4000) Rshow 658 3035 M 63 0 V 6241 0 R -63 0 V -6325 0 R (6000) Rshow 658 3954 M 63 0 V 6241 0 R -63 0 V -6325 0 R (8000) Rshow 658 4872 M 63 0 V 6241 0 R -63 0 V -6325 0 R (10000) Rshow 658 280 M 0 63 V 0 4529 R 0 -63 V 658 140 M (0) Cshow 1288 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (5) Cshow 1919 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (10) Cshow 2549 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (15) Cshow 3180 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (20) Cshow 3810 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (25) Cshow 4440 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (30) Cshow 5071 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (35) Cshow 5701 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (40) Cshow 6332 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (45) Cshow 6962 280 M 0 63 V 0 4529 R 0 -63 V 0 -4669 R (50) Cshow 1.000 UL LTb 658 280 M 6304 0 V 0 4592 V -6304 0 V 658 280 L 1.000 UL LT0 6311 4739 M (allSmallExons/100) Rshow 6395 4739 M 399 0 V 1.000 UL LT1 6311 4599 M (good) Rshow 6395 4599 M 399 0 V 1391 4872 M 23 -272 V 127 -927 V 126 -774 V 126 -512 V 126 -453 V 126 -284 V 126 -239 V 126 -150 V 126 -102 V 2549 992 L 126 -82 V 126 -81 V 126 -50 V 127 -44 V 126 -18 V 126 -28 V 126 -38 V 126 -26 V 126 -18 V 126 -14 V 126 -13 V 126 -12 V 126 -27 V 126 -10 V 126 -16 V 126 -7 V 127 -18 V 126 -4 V 126 -3 V 126 -5 V 126 -9 V 126 -3 V 126 -2 V 126 -10 V 126 -6 V 126 -1 V 126 -1 V 126 -2 V 127 -1 V 126 -2 V 126 -1 V 126 0 V 126 -2 V 1.000 UL LT2 6311 4459 M (probably good) Rshow 6395 4459 M 399 0 V 902 4872 M 8 -182 V 1036 3063 L 126 -721 V 126 -490 V 126 -378 V 127 -200 V 126 -222 V 1793 925 L 1919 807 L 2045 678 L 126 -53 V 126 -28 V 126 -32 V 126 -50 V 126 -29 V 126 -18 V 126 -16 V 127 -5 V 126 -11 V 126 -5 V 126 -6 V 126 -3 V 126 -6 V 126 -3 V 126 -6 V 126 -5 V 126 -6 V 126 -2 V 126 -12 V 126 -2 V 127 -5 V 126 -1 V 126 -1 V 126 -2 V 126 0 V 126 -2 V 126 -1 V 126 -9 V 126 0 V 126 -2 V 126 0 V 126 0 V 127 0 V 126 -1 V 126 -2 V 126 -1 V 126 -1 V 1.000 UL LT3 6311 4319 M (junkExonsLeft) Rshow 6395 4319 M 399 0 V 875 4872 M 35 -283 V 126 -745 V 126 -306 V 126 -169 V 126 -132 V 127 -122 V 126 -89 V 126 -90 V 126 -59 V 126 -41 V 126 -47 V 126 -120 V 126 -31 V 126 -33 V 126 -25 V 126 -24 V 126 -20 V 127 -24 V 126 -18 V 126 -20 V 126 -21 V 126 -17 V 126 -19 V 126 -14 V 126 -16 V 126 -19 V 126 -20 V 126 -16 V 126 -16 V 126 -19 V 127 -11 V 126 -18 V 126 -13 V 126 -13 V 126 -12 V 126 -13 V 126 -14 V 126 -14 V 126 -7 V 126 -12 V 126 -10 V 126 -8 V 127 -10 V 126 -8 V 126 -10 V 126 -13 V 126 -13 V 1.000 UL LT4 6311 4179 M (junkExonsRight) Rshow 6395 4179 M 399 0 V 822 4872 M 88 -608 V 126 -548 V 126 -222 V 126 -209 V 126 -127 V 127 -121 V 126 -97 V 126 -64 V 126 -74 V 126 -64 V 126 -39 V 126 -35 V 126 -32 V 126 -28 V 126 -29 V 126 -37 V 126 -29 V 127 -29 V 126 -20 V 126 -19 V 126 -15 V 126 -15 V 126 -15 V 126 -14 V 126 -14 V 126 -17 V 126 -12 V 126 -15 V 126 -27 V 126 -10 V 127 -9 V 126 -10 V 126 -13 V 126 -14 V 126 -14 V 126 -11 V 126 -12 V 126 -13 V 126 -11 V 126 -10 V 126 -12 V 126 -12 V 127 -9 V 126 -14 V 126 -10 V 126 -9 V 126 -13 V 1.000 UL LT5 6311 4039 M (junkExonsBoth) Rshow 6395 4039 M 399 0 V 658 293 M 126 -3 V 126 -1 V 126 0 V 126 0 V 126 0 V 126 0 V 127 0 V 126 -1 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 127 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 -1 V 127 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 126 0 V 127 0 V 126 0 V 126 0 V 126 0 V 126 0 V 1.000 UL LT6 6311 3899 M (intronOnGap) Rshow 6395 3899 M 399 0 V 817 4872 M 910 3771 L 1036 2253 L 126 -458 V 126 -68 V 126 -64 V 127 -91 V 126 -119 V 126 -233 V 126 -90 V 126 -44 V 126 -64 V 126 -41 V 126 -70 V 126 -63 V 126 -50 V 126 -49 V 126 -33 V 127 -28 V 126 -42 V 126 -18 V 126 -23 V 126 -12 V 126 -24 V 126 -13 V 126 -12 V 126 -11 V 126 -8 V 126 -9 V 126 -12 V 126 -3 V 127 -20 V 126 -22 V 126 -5 V 126 -7 V 126 -8 V 126 -2 V 126 -9 V 126 -1 V 126 -3 V 126 -3 V 126 -3 V 126 -4 V 127 -1 V 126 -2 V 126 -4 V 126 -2 V 126 -2 V stroke grestore end showpage %%Trailer %%DocumentFonts: Helvetica %%Pages: 2 kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/dbEST-intronSize-histogram0000644000000000000000000026250607605137611027605 0ustar rootroot3326570 1192472 569706 381713 233305 156017 130697 91773 65684 54975 48528 36169 34014 30591 21371 22180 17177 18572 14307 11074 9758 9740 10932 7420 9510 6956 7878 7106 5811 4556 6366 5035 4344 4265 5352 5675 4935 4286 3514 3827 2544 2666 2852 2739 1453 1793 2416 1649 1588 2023 1183 1777 1338 1416 1517 1087 1330 1482 1153 1325 966 825 1003 810 972 988 677 1127 1394 1335 884 897 439 407 772 1037 746 405 425 620 919 305 458 610 692 764 563 328 454 695 327 367 342 671 415 590 1137 263 175 329 459 307 203 125 533 153 246 290 136 100 246 150 194 132 113 115 325 253 186 140 185 103 110 59 157 72 131 166 201 148 76 66 46 134 208 38 38 34 72 98 37 69 53 30 81 90 80 44 108 40 65 239 24 101 41 122 136 123 63 60 58 50 20 64 38 100 52 46 20 50 36 113 30 46 54 20 3 80 15 44 32 11 9 40 18 52 26 64 16 24 28 12 6 10 14 13 26 12 94 7 30 12 8 14 10 15 18 10 6 4 40 6 20 10 6 18 26 4 2 16 14 9 20 58 14 51 18 14 4 12 26 32 18 22 8 6 4 8 2 8 26 14 10 4 12 6 16 6 4 6 2 0 8 4 16 20 2 20 9 2 2 4 4 2 32 16 4 2 4 10 8 16 36 2 0 2 8 0 2 2 0 6 52 34 24 9 1 0 16 4 6 2 0 2 10 0 2 8 12 12 92 0 2 8 2 12 2 6 0 0 4 0 2 10 2 0 2 2 12 2 0 0 2 0 0 7 0 4 80 10 0 0 2 6 2 2 4 2 2 0 2 0 2 0 0 2 2 10 0 0 2 4 2 4 0 6 0 0 14 0 2 2 4 4 0 2 16 2 6 2 2 4 4 2 0 2 4 0 0 4 4 4 0 2 2 2 2 0 0 0 0 2 72 0 4 2 2 0 0 0 0 0 2 6 0 2 6 2 0 10 2 0 0 2 2 0 0 2 0 4 0 0 0 0 0 0 0 4 2 0 2 0 0 0 2 0 0 0 0 4 4 0 0 0 0 2 0 0 0 0 6 2 0 0 0 0 0 0 2 0 4 2 0 0 8 0 0 0 0 0 0 0 2 0 2 0 0 2 0 0 2 6 2 0 2 2 0 0 4 0 0 2 0 0 0 2 2 6 1 0 4 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 6 0 2 2 0 14 0 8 8 0 4 2 0 0 0 2 0 2 0 0 0 0 0 12 0 0 4 0 2 0 0 2 0 0 0 2 4 0 0 0 0 4 6 0 0 0 0 2 0 2 6 0 2 0 2 2 2 2 0 2 0 0 2 2 0 4 0 0 2 0 0 0 0 4 0 0 4 2 2 0 0 6 0 0 0 0 0 0 2 2 0 2 0 0 0 0 2 6 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 14 0 0 2 0 0 4 0 2 2 0 0 0 0 0 0 0 12 0 0 2 2 4 0 0 0 2 0 0 0 2 0 6 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 8 2 0 2 2 0 0 4 0 0 2 2 0 2 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 2 0 0 2 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 6 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 4 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 4 0 0 0 0 0 4 0 0 0 2 0 0 0 4 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 2 0 4 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 6 0 0 0 0 0 0 2 2 0 0 0 0 0 0 2 0 2 4 0 0 2 0 0 0 0 0 0 2 0 4 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 0 0 4 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 2 0 4 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 2 0 0 2 0 2 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 2 0 4 0 0 0 0 2 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 2 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 2 4 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 6 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 6 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/evalThresh-gnuplot0000644000000000000000000000205607605137611026272 0ustar rootroot set terminal postscript color set output "evalThresh.ps" plot \ "evalThresh.dat" using 3 title "allSmallIntron/100" with lines, \ "evalThresh.dat" using 4 title "good" with lines, \ "evalThresh.dat" using 5 title "probably good" with lines, \ "evalThresh.dat" using 6 title "junkExonsLeft" with lines, \ "evalThresh.dat" using 7 title "junkExonsRight" with lines, \ "evalThresh.dat" using 8 title "junkExonsBoth" with lines, \ "evalThresh.dat" using 9 title "intronOnGap" with lines plot [][0:10000] \ "evalThresh.dat" using 3 title "allSmallIntrons/100" with lines, \ "evalThresh.dat" using 4 title "good" with lines, \ "evalThresh.dat" using 5 title "probably good" with lines, \ "evalThresh.dat" using 6 title "junkExonsLeft" with lines, \ "evalThresh.dat" using 7 title "junkExonsRight" with lines, \ "evalThresh.dat" using 8 title "junkExonsBoth" with lines, \ "evalThresh.dat" using 9 title "intronOnGap" with lines "evalThresh.dat" using 2 title "oneExon/100" with lines, \ "evalThresh.dat" using 10 title "total/100" with lines kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/evalThresh.dat0000644000000000000000000000543407605137611025356 0ustar rootroot100 26569.68 15241.88 125574 36280 19490 18815 28 15100 43964.43 200 26569.68 16348.56 55684 15887 11615 10569 21 10843 43964.43 300 26569.68 16694.3 34761 9604 9384 8675 19 7602 43964.43 400 26569.68 16908.98 22957 6061 7761 7483 19 4296 43964.43 500 26569.68 17006.93 16879 4490 7095 7000 19 3299 43964.43 600 26569.68 17069.78 12632 3424 6726 6544 19 3152 43964.43 700 26569.68 17117.3 9407 2600 6440 6268 19 3011 43964.43 800 26569.68 17149.12 7388 2165 6173 6005 19 2813 43964.43 900 26569.68 17177.45 5703 1682 5981 5792 17 2555 43964.43 1000 26569.68 17199.78 4589 1405 5785 5653 17 2048 43964.43 1100 26569.68 17217.13 3601 1147 5655 5492 17 1850 43964.43 1200 26569.68 17229.34 2983 867 5566 5353 17 1755 43964.43 1300 26569.68 17238.96 2464 751 5463 5268 17 1616 43964.43 1400 26569.68 17247.11 2136 691 5202 5192 17 1526 43964.43 1500 26569.68 17252.9 1914 621 5135 5123 17 1375 43964.43 1600 26569.68 17260.33 1550 512 5064 5062 17 1237 43964.43 1700 26569.68 17265.02 1373 449 5009 4997 17 1128 43964.43 1800 26569.68 17269.57 1195 410 4957 4918 17 1021 43964.43 1900 26569.68 17272.81 1087 375 4912 4854 17 949 43964.43 2000 26569.68 17275.65 990 363 4860 4792 17 888 43964.43 2100 26569.68 17277.99 952 340 4822 4748 17 797 43964.43 2200 26569.68 17279.99 890 329 4777 4706 17 757 43964.43 2300 26569.68 17282.22 807 316 4733 4673 17 707 43964.43 2400 26569.68 17283.81 751 309 4695 4640 17 682 43964.43 2500 26569.68 17285.59 713 296 4653 4608 17 629 43964.43 2600 26569.68 17286.87 681 289 4624 4577 17 600 43964.43 2700 26569.68 17288.16 654 277 4588 4547 17 576 43964.43 2800 26569.68 17289.58 627 266 4547 4509 17 551 43964.43 2900 26569.68 17291.15 569 253 4504 4483 17 534 43964.43 3000 26569.68 17292.29 547 248 4469 4452 17 513 43964.43 3100 26569.68 17294.12 511 222 4433 4392 17 488 43964.43 3200 26569.68 17294.98 497 218 4393 4371 16 482 43964.43 3300 26569.68 17296.39 458 206 4368 4351 16 437 43964.43 3400 26569.68 17297.57 449 205 4329 4330 16 389 43964.43 3500 26569.68 17298.32 442 203 4302 4300 16 380 43964.43 3600 26569.68 17299.23 431 199 4273 4270 16 363 43964.43 3700 26569.68 17300.17 412 198 4247 4239 15 347 43964.43 3800 26569.68 17300.84 406 194 4218 4217 15 341 43964.43 3900 26569.68 17301.68 401 191 4187 4190 15 323 43964.43 4000 26569.68 17302.68 379 172 4158 4162 15 321 43964.43 4100 26569.68 17303.31 366 171 4142 4137 15 313 43964.43 4200 26569.68 17303.89 364 168 4116 4116 15 307 43964.43 4300 26569.68 17304.48 361 168 4094 4089 15 300 43964.43 4400 26569.68 17305.03 358 167 4076 4064 15 292 43964.43 4500 26569.68 17305.51 355 167 4055 4043 15 289 43964.43 4600 26569.68 17306.05 351 166 4038 4014 15 286 43964.43 4700 26569.68 17306.66 349 161 4015 3992 15 277 43964.43 4800 26569.68 17307.21 348 159 3987 3973 15 272 43964.43 4900 26569.68 17307.92 343 156 3959 3943 15 267 43964.43 kmer-code-2013-trunk/sim4dbutils/cleanPolishes-experiments/evalThresh-plot.pl0000644000000000000000000000201107605137611026161 0ustar rootrootopen(F, "< evalThresh.pl.out"); while (!eof(F)) { $_ = ; if (m/at least (\d+)bp/) { print "$1\t"; $_ = ; $_ = ;if (m/oneExon:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 1\n"; } $_ = ;if (m/allSmall.*:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 2\n"; } $_ = ;if (m/good:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 3\n"; } $_ = ;if (m/probably\sgood:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 4\n"; } $_ = ;if (m/junkExonsLeft:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 5\n"; } $_ = ;if (m/junkExonsRight:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 6\n"; } $_ = ;if (m/junkExonsBoth:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 7\n"; } $_ = ;if (m/intronOnGap:\s+(\d+)/) { print "$1\t"; } else { print STDERR "no 8\n"; } $_ = ;if (m/total:\s+(\d+)/) { print "$1"; } else { print STDERR "no 9\n"; } print "\n"; } } close(F); kmer-code-2013-trunk/sim4dbutils/filterPolishes.C0000644000000000000000000002441012322046702020510 0ustar rootroot#include #include #include #include #include #include "bio.h" #include "sim4.H" int main(int argc, char ** argv) { uint32 minC = 0; uint32 minI = 0; uint32 minL = 0; uint32 cdna = ~uint32ZERO; uint32 geno = ~uint32ZERO; uint32 minExons = 0; uint32 maxExons = ~uint32ZERO; uint32 beVerbose = 0; int GOODsilent = 0; sim4polishWriter *GOOD = 0L; int CRAPsilent = 0; sim4polishWriter *CRAP = 0L; sim4polishWriter *JUNK = 0L; uint64 pmod = 1; uint64 good = 0; uint64 crap = 0; uint64 junk = 0; int doSelfFilter = 0; int doSegregation = 0; uint32 doSegregationLo = 0; uint32 doSegregationHi = 0; char *filePrefixGOOD = 0L; char *filePrefixCRAP = 0L; char *filePrefixJUNK = 0L; sim4polishWriter **SEGREGATE = 0L; bool noDefLines = false; bool noAlignments = false; bool doGFF3 = false; sim4polishStyle style = sim4polishStyleDefault; // We limit scaffolds to be below the number of open files per // process. // uint32 maxScaffold = sysconf(_SC_OPEN_MAX); int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-verbose", 2) == 0) { beVerbose = 1; } else if (strncmp(argv[arg], "-c", 2) == 0) { minC = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-i", 2) == 0) { minI = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-l", 2) == 0) { minL = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-minexons", 3) == 0) { minExons = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-maxexons", 3) == 0) { maxExons = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-o", 2) == 0) { filePrefixGOOD = argv[++arg]; GOODsilent = 0; } else if (strncmp(argv[arg], "-O", 2) == 0) { GOODsilent = 1; } else if (strncmp(argv[arg], "-d", 2) == 0) { filePrefixCRAP = argv[++arg]; CRAPsilent = 0; } else if (strncmp(argv[arg], "-q", 2) == 0) { CRAPsilent = 1; } else if (strncmp(argv[arg], "-D", 2) == 0) { CRAPsilent = 1; } else if (strncmp(argv[arg], "-j", 2) == 0) { filePrefixJUNK = argv[++arg]; } else if (strncmp(argv[arg], "-C", 2) == 0) { cdna = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-G", 2) == 0) { geno = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-selfhits", 4) == 0) { doSelfFilter = 1; } else if (strncmp(argv[arg], "-segregate", 4) == 0) { doSegregation = 1; doSegregationLo = atoi(argv[++arg]); doSegregationHi = atoi(argv[++arg]); if (doSegregationHi - doSegregationLo + 1 > maxScaffold) fprintf(stderr, "error: -segregate range too big; must be less than %u.\n", maxScaffold), exit(1); SEGREGATE = new sim4polishWriter * [maxScaffold]; memset(SEGREGATE, 0, sizeof(sim4polishWriter *) * maxScaffold); } else if (strncmp(argv[arg], "-nodeflines", 4) == 0) { noDefLines = true; } else if (strncmp(argv[arg], "-noalignments", 4) == 0) { noAlignments = true; } else if (strncmp(argv[arg], "-gff3", 4) == 0) { doGFF3 = true; style = sim4polishGFF3; } else { fprintf(stderr, "UNKNOWN option '%s'\n", argv[arg]); exit(1); } arg++; } if (isatty(fileno(stdin))) { fprintf(stderr, "usage: %s [-c c] [-i i] [-o o]\n", argv[0]); fprintf(stderr, " -verbose Report progress\n"); fprintf(stderr, "\n"); fprintf(stderr, " -c c Discard polishes below c%% composite (default: 0).\n"); fprintf(stderr, " -i i Discard polishes below i%% identity (default: 0).\n"); fprintf(stderr, " -l l Discard polishes below l identities (default: 0).\n"); fprintf(stderr, "\n"); fprintf(stderr, " -minexons e Discard polishes below e exons (default: 0).\n"); fprintf(stderr, " -maxexons e Discard polishes above e exons (default: infinity).\n"); fprintf(stderr, "\n"); fprintf(stderr, " -C c Discard polishes that are not from cDNA idx 'c'\n"); fprintf(stderr, " -G g Discard polishes that are not from genomic idx 'g'\n"); fprintf(stderr, "\n"); fprintf(stderr, " -o o Write saved polishes to the 'o' file (default == stdout).\n"); fprintf(stderr, " -O Don't write saved polishes.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -d o Write discarded polishes to the 'o' file (default == stdout).\n"); fprintf(stderr, " -D Don't write discarded polishes.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -j o Write intractable and aborted polishes to the 'o' file. By\n"); fprintf(stderr, " default these are silently discarded.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -selfhits Filter out alignments to ourself -- if you did an all-to-all\n"); fprintf(stderr, " mapping of a set onto itself. Deflines needed!\n"); fprintf(stderr, "\n"); fprintf(stderr, " -segregate a b Segregate polishes by genomic idx, for idx's between a and b inclusive.\n"); fprintf(stderr, " b-a must be less than %u.\n", maxScaffold); fprintf(stderr, " Must be used with -o.\n"); fprintf(stderr, " Will create numerous files 'o.%%05d'.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -nodeflines Strip out deflines.\n"); fprintf(stderr, " -noalignments Strip out alignments.\n"); fprintf(stderr, " -gff3 Write output in GFF3 format.\n"); fprintf(stderr, "\n"); fprintf(stderr, " All conditions must be met.\n"); exit(1); } if ((CRAPsilent == 0) && (GOODsilent == 0) && (filePrefixGOOD == 0L) && (filePrefixCRAP == 0L)) { fprintf(stderr, "error: filter has no effect; saved and discarded polishes\n"); fprintf(stderr, " both printed to the same place!\n"); fprintf(stderr, " (try using one of -o, -O, -d, -D)\n"); exit(1); } if (doSegregation && (filePrefixGOOD == 0L)) { fprintf(stderr, "error: you must specify a file prefix when segregating (-s requires -o)\n"); exit(1); } if (noDefLines && doGFF3) fprintf(stderr, "warning: No deflines option inactive with GFF3.\n"); if (beVerbose) { fprintf(stderr, "Filtering at "uint32FMT"%% coverage and "uint32FMT"%% identity and "uint32FMT"bp.\n", minC, minI, minL); if ((cdna != ~uint32ZERO) && (cdna != ~uint32ZERO)) fprintf(stderr, "Filtering for cDNA idx "uint32FMT" and genomic idx "uint32FMT"\n", cdna, geno); else if (cdna != ~uint32ZERO) fprintf(stderr, "Filtering for cDNA idx "uint32FMT".\n", cdna); else if (geno != ~uint32ZERO) fprintf(stderr, "Filtering for genomic idx "uint32FMT".\n", geno); } // Prepare input files sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); // Prepare output files if (filePrefixGOOD != 0L) GOOD = new sim4polishWriter(filePrefixGOOD, style); if (filePrefixCRAP != 0L) CRAP = new sim4polishWriter(filePrefixCRAP, style); if (filePrefixJUNK != 0L) JUNK = new sim4polishWriter(filePrefixJUNK, style); if ((CRAPsilent == 0) && (CRAP == 0L)) CRAP = new sim4polishWriter("-", sim4polishS4DB); if ((GOODsilent == 0) && (GOOD == 0L)) GOOD = new sim4polishWriter("-", sim4polishS4DB); // Start processing while (R->nextAlignment(p)) { if (noDefLines && (doGFF3 == false)) p->s4p_removeDefLines(); if (noAlignments) p->s4p_removeAlignments(); if (JUNK && ((p->_strandOrientation == SIM4_STRAND_INTRACTABLE) || (p->_strandOrientation == SIM4_STRAND_FAILED))) { junk++; JUNK->writeAlignment(p); } else { if ((p->_percentIdentity >= minI) && (p->_querySeqIdentity >= minC) && (p->_numCovered >= minL) && ((cdna == ~uint32ZERO) || (cdna == p->_estID)) && ((geno == ~uint32ZERO) || (geno == p->_genID)) && (minExons <= p->_numExons) && (p->_numExons <= maxExons) && ((doSelfFilter == 0) || (strcmp(p->_estDefLine, p->_genDefLine) != 0))) { good++; if (doSegregation) { if ((doSegregationLo <= p->_genID) && (p->_genID <= doSegregationHi)) { if (SEGREGATE[p->_genID - doSegregationLo] == 0L) { char filename[1024]; sprintf(filename, "%s.%04d", filePrefixGOOD, (int)p->_genID); SEGREGATE[p->_genID - doSegregationLo] = new sim4polishWriter(filename, sim4polishS4DB); } SEGREGATE[p->_genID - doSegregationLo]->writeAlignment(p); } } else { if (!GOODsilent) GOOD->writeAlignment(p); } } else { crap++; if (!CRAPsilent) CRAP->writeAlignment(p); } } if ((beVerbose) && ((good+crap) == pmod)) { pmod += 8888 + (random() % 1000); if (junk > 0) fprintf(stderr, " Filter: %6.2f%% ("uint64FMT" matches processed) ("uint64FMT" failed/intractable)\r", 100.0 * good / (good+crap), good+crap, junk); else fprintf(stderr, " Filter: %6.2f%% ("uint64FMT" matches processed)\r", 100.0 * good / (good+crap), good+crap); fflush(stderr); } } if (beVerbose) { if (junk > 0) fprintf(stderr, " Filter: %6.2f%% ("uint64FMT" matches processed) ("uint64FMT" failed/intractable)\n", 100.0 * good / (good+crap), good+crap, junk); else fprintf(stderr, " Filter: %6.2f%% ("uint64FMT" matches processed)\n", 100.0 * good / (good+crap), good+crap); } delete R; if (doSegregation) { for (uint32 i=0; i #include #include #include #include #include "bio++.H" #include "sim4.H" //#define MIN_EXON_LENGTH 50 //#define MIN_PERCENT_IDENTITY 88 #define MIN_EXON_LENGTH 20 #define MIN_PERCENT_IDENTITY 90 bool lowComplexityExon(char *s) { int cnt[5][5] = { {0,0,0,0,0}, {0,0,0,0,0}, {0,0,0,0,0}, {0,0,0,0,0}, {0,0,0,0,0}}; int map[256] = {0}; int i, j, l = 0; int a=0, b=0, c=0; double qual = 0.0; if (s == 0L) return(false); map['A'] = map['a'] = 1; map['C'] = map['c'] = 2; map['G'] = map['g'] = 3; map['T'] = map['t'] = 4; for (i=0, j=1, l=0; s[j]; i++, j++, l++) cnt[map[s[i]]][map[s[j]]]++; if (l > MIN_EXON_LENGTH) return(false); for (i=0; i<5; i++) { for (j=0; j<5; j++) { if (a < cnt[i][j]) { c = b; b = a; a = cnt[i][j]; } else if (b < cnt[i][j]) { c = b; b = cnt[i][j]; } else if (c < cnt[i][j]) { c = cnt[i][j]; } } } qual = (double)(a+b+c) / (double)(l); return(qual > 0.75); } // Delete exons before/after a specific intron. // void trimExonsBefore(int intronSplit, sim4polish *p) { for (int i=0; is4p_deleteExon(0); } void trimExonsAfter(int intronSplit, sim4polish *p) { for (int i=p->_numExons-1; i>=intronSplit; i--) p->s4p_deleteExon(i); } int main(int argc, char ** argv) { int totMatches = 0; int oneExon = 0; int smaIntron = 0; int junkFirst = 0; int junkLast = 0; int junkBoth = 0; int splitOnGap = 0; int goodQual = 0; int probGood = 0; bool filter = true; bool saveJunk = false; uint32 intronLimit = 100000; // Before / after files // bool beforeafter = false; #if 0 sim4polishWriter *splGood = 0L; sim4polishWriter *splProbGood = 0L; #endif sim4polishWriter *splJunkLeft = 0L; sim4polishWriter *splJunkRight = 0L; sim4polishWriter *splJunkBoth = 0L; sim4polishWriter *splIntronGap = 0L; // Segregation files // bool segregate = false; #if 0 sim4polishWriter *filtOne = 0L; sim4polishWriter *filtAllSmall = 0L; #endif sim4polishWriter *filtGood = 0L; sim4polishWriter *filtProbGood = 0L; sim4polishWriter *filtJunkLeft = 0L; sim4polishWriter *filtJunkRight = 0L; sim4polishWriter *filtJunkBoth = 0L; sim4polishWriter *filtIntronGap = 0L; sim4polishStyle style = sim4polishStyleDefault; bool hasBeenWarned = false; bool beVerbose = false; int arg = 1; int err = 0; while (arg < argc) { if (strncmp(argv[arg], "-threshold", 2) == 0) { intronLimit = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-quiet", 2) == 0) { fprintf(stderr, "QUIET MODE ENABLED -- non-modified matches not output!\n"); filter = false; } else if (strncmp(argv[arg], "-beforeafter", 2) == 0) { fprintf(stderr, "DEBUG MODE ENABLED -- many 'spl.*' files created!\n"); beforeafter = true; } else if (strncmp(argv[arg], "-segregate", 3) == 0) { fprintf(stderr, "SEGREGATION MODE ENABLED -- many 'filt.*' files created!\n"); segregate = true; } else if (strncmp(argv[arg], "-gff3", 5) == 0) { style = sim4polishGFF3; } else if (strncmp(argv[arg], "-savejunk", 3) == 0) { saveJunk = true; } else if (strncmp(argv[arg], "-verbose", 2) == 0) { beVerbose = true; } else { err++; } arg++; } if ((err) || (isatty(fileno(stdin))) || (isatty(fileno(stdout)) && filter)) { fprintf(stderr, "usage: %s [-threshold t] [-savejunk] [-gff3] [-quiet] [-debug]\n", argv[0]); fprintf(stderr, " -threshold Introns bigger than this are candidates for trimming (default = 100000).\n"); fprintf(stderr, " -quiet Don't print unmodified matches\n"); fprintf(stderr, " -beforeafter Save (in separate files) the before/after of each modified match\n"); fprintf(stderr, " -segregate Save (in separate files) the after of each modified match\n"); fprintf(stderr, " -gff3 Write output in GFF3 format\n"); fprintf(stderr, " -savejunk Also print the trimmed pieces (as separate matches)\n"); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n"); if (isatty(fileno(stdout)) && filter) fprintf(stderr, "error: Please redirect the polishes (stdout) to a file.\n"); exit(1); } if (beVerbose) fprintf(stderr, "A big intron is one that is at least "uint32FMT"bp long.\n", intronLimit); if (beforeafter) { #if 0 splGood = new sim4polishWriter("spl.good", style); splProbGood = new sim4polishWriter("spl.probGood", style); #endif splJunkLeft = new sim4polishWriter("spl.junkLeft", style); splJunkRight = new sim4polishWriter("spl.junkRight", style); splJunkBoth = new sim4polishWriter("spl.junkBoth", style); splIntronGap = new sim4polishWriter("spl.intronGap", style); } if (segregate) { #if 0 filtOne = new sim4polishWriter("filt.filtOne", style); filtAllSmall = new sim4polishWriter("filt.allSmall", style); #endif filtGood = new sim4polishWriter("filt.good", style); filtProbGood = new sim4polishWriter("filt.probGood", style); filtJunkLeft = new sim4polishWriter("filt.junkLeft", style); filtJunkRight = new sim4polishWriter("filt.junkRight", style); filtJunkBoth = new sim4polishWriter("filt.junkBoth", style); filtIntronGap = new sim4polishWriter("filt.intronGap", style); } sim4polishWriter *W = new sim4polishWriter("-", style); sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); while (R->nextAlignment(p)) { uint32 exA; uint32 exB; if (p->_numExons == 1) { oneExon++; if (filter) W->writeAlignment(p); #if 0 if (segregate) filtOneExon->writeAlignment(p); #endif } else { // Find the big intron. We assume there is only one big intron. // uint32 biggestIntron = 0; uint32 intronSplit = 0; uint32 intronOri = 0; for (exA=0, exB=1; exB < p->_numExons; exA++, exB++) { uint32 dist = p->_exons[exB]._genFrom - p->_exons[exA]._genTo + 1; if (dist > biggestIntron) { biggestIntron = dist; intronSplit = exB; intronOri = p->_exons[exA]._intronOrientation; } } if (intronOri == 0) { fprintf(stderr, "didn't find the largest intron? (got zero)?\n"); exit(1); } if (intronOri == SIM4_INTRON_NONE) { fprintf(stderr, "biggest intron isn't an intron? (got none)?\n"); exit(1); } if (biggestIntron < intronLimit) { smaIntron++; if (filter) W->writeAlignment(p); #if 0 if (segregate) filtAllSmall->writeAlignment(p); #endif } else { // Declare the split obvious if all exons on either side are // below MIN_EXON_LENGTH, difficult otherwise. // bool killFirst = true; bool killLast = true; for (uint32 i=0; i_exons[i]._estTo - p->_exons[i]._estFrom + 1 >= MIN_EXON_LENGTH) && (p->_exons[i]._percentIdentity >= MIN_PERCENT_IDENTITY) && (lowComplexityExon(p->_exons[i]._estAlignment) == false)) killFirst = false; for (uint32 i=intronSplit; i_numExons; i++) if ((p->_exons[i]._estTo - p->_exons[i]._estFrom + 1 >= MIN_EXON_LENGTH) && (p->_exons[i]._percentIdentity >= MIN_PERCENT_IDENTITY) && (lowComplexityExon(p->_exons[i]._estAlignment) == false)) killLast = false; // Sometimes, all exons look crappy. If they have a large // intron too, just kill the match. // if ((killFirst == true) && (killLast == true)) { junkBoth++; if ((hasBeenWarned == false) && ((p->_exons[0]._estAlignment == 0L) || (p->_exons[0]._genAlignment == 0L))) { hasBeenWarned = true; fprintf(stderr, "cleanPolishes: Need alignments to recompute scores correctly!\n"); } sim4polish *a = new sim4polish(p); sim4polish *b = new sim4polish(p); trimExonsAfter(intronSplit, a); trimExonsBefore(intronSplit, b); if (filter && saveJunk) { W->writeAlignment(a); W->writeAlignment(b); } if (beforeafter) { //fprintf(splJunkBoth, "====================\n"); splJunkBoth->writeAlignment(p); splJunkBoth->writeAlignment(a); splJunkBoth->writeAlignment(b); } if (segregate) { filtJunkBoth->writeAlignment(a); filtJunkBoth->writeAlignment(b); } delete a; delete b; } // If the first half (before the big intron) is crappy, delete // those exons. // if ((killFirst == true) && (killLast == false)) { junkFirst++; sim4polish *a = new sim4polish(p); sim4polish *b = new sim4polish(p); trimExonsAfter(intronSplit, a); trimExonsBefore(intronSplit, b); if (filter) { if (saveJunk) W->writeAlignment(a); W->writeAlignment(b); } if (beforeafter) { //fprintf(splJunkLeft, "====================\n"); splJunkLeft->writeAlignment(p); splJunkLeft->writeAlignment(a); splJunkLeft->writeAlignment(b); } if (segregate) { filtJunkLeft->writeAlignment(a); filtJunkLeft->writeAlignment(b); } delete a; delete b; } if ((killFirst == false) && (killLast == true)) { junkLast++; sim4polish *a = new sim4polish(p); sim4polish *b = new sim4polish(p); trimExonsAfter(intronSplit, a); trimExonsBefore(intronSplit, b); if (filter) { W->writeAlignment(a); if (saveJunk) W->writeAlignment(b); } if (beforeafter) { //fprintf(splJunkRight, "====================\n"); splJunkRight->writeAlignment(p); splJunkRight->writeAlignment(a); splJunkRight->writeAlignment(b); } if (segregate) { filtJunkRight->writeAlignment(a); filtJunkRight->writeAlignment(b); } delete a; delete b; } if ((killFirst == false) && (killLast == false)) { if (intronOri == SIM4_INTRON_GAP) { splitOnGap++; // Break the polish into two pieces, one before and one // after the large intron. This is done by copying the // entire polish, then deleting one half from each. // // XXX If we want to update the strand prediction of the // split pieces, we should // // a) make sure that all the intron signals agree // b) make sure that the percent identites of each exon are > 90% // // For now, we don't. sim4polish *a = new sim4polish(p); sim4polish *b = new sim4polish(p); trimExonsBefore(intronSplit, a); trimExonsAfter(intronSplit, b); if (filter) { W->writeAlignment(a); W->writeAlignment(b); } if (beforeafter) { //fprintf(splIntronGap, "====================\n"); splIntronGap->writeAlignment(p); splIntronGap->writeAlignment(a); splIntronGap->writeAlignment(b); } if (segregate) { filtIntronGap->writeAlignment(a); filtIntronGap->writeAlignment(b); } delete a; delete b; } else { // If there is a valid strand prediction and // a) all exons >= 90% // b) all exons >= 95% // c) all exons >= 95%, except first and last, which can be >= 90% // save the match as is. // bool qualIsC = ((p->_exons[0]._percentIdentity >= 90) && (p->_exons[p->_numExons-1]._percentIdentity >= 90)); for (exA=1; exA < p->_numExons-1; exA++) if (p->_exons[exA]._percentIdentity < 95) qualIsC = false; // If the match looks good, but just has a large intron, keep it. // if (qualIsC && ((p->_strandOrientation == SIM4_STRAND_POSITIVE) || (p->_strandOrientation == SIM4_STRAND_NEGATIVE))) { goodQual++; if (filter) W->writeAlignment(p); if (segregate) filtGood->writeAlignment(p); } else { probGood++; if (filter) W->writeAlignment(p); if (segregate) filtProbGood->writeAlignment(p); } } } } // Has a big intron } // More than one exon totMatches++; } delete R; delete W; if (beforeafter) { #if 0 delete splGood; delete splProbGood; #endif delete splJunkLeft; delete splJunkRight; delete splJunkBoth; delete splIntronGap; } if (segregate) { #if 0 delete filtOne; delete filtAllSmall; #endif delete filtGood; delete filtProbGood; delete filtJunkLeft; delete filtJunkRight; delete filtJunkBoth; delete filtIntronGap; } if (beVerbose) { fprintf(stderr, "\n"); fprintf(stderr, "oneExon: %7d\n", oneExon); fprintf(stderr, "allSmallIntrons: %7d\n", smaIntron); fprintf(stderr, "good: %7d\n", goodQual); fprintf(stderr, "probably good: %7d\n", probGood); fprintf(stderr, "junkExonsLeft: %7d\n", junkFirst); fprintf(stderr, "junkExonsRight: %7d\n", junkLast); fprintf(stderr, "junkExonsBoth: %7d\n", junkBoth); fprintf(stderr, "intronOnGap: %7d\n", splitOnGap); fprintf(stderr, "total: %7d\n", totMatches); } return(0); } kmer-code-2013-trunk/sim4dbutils/pickBestPolish.C0000644000000000000000000003240312322046702020440 0ustar rootroot#include #include #include #include "bio.h" #include "sim4.H" // Picks the best polish (or set of polishes that are all of the same // best quality) for each cDNA. // // Validate mode will print out ALL input matches, in the following // format // // estid gaid nummatches percentid (genFr genTo %) () () // // With a * somewhere to denote the best ones. Separate ESTs with // a dashed line. #define EPS_X 1 #define EPS_N_ESTS 10 #define EPS_N_MRNA 15 #define EPS_I 3 uint32 EPS_N = EPS_N_ESTS; uint32 doValidate = 0; sim4polishWriter *W = 0L; static void printPolishValidate(FILE *O, sim4polish *p, uint32 isBest) { fprintf(O, uint32FMTW(8)" "uint32FMTW(8)" "uint32FMTW(4)" "uint32FMTW(4), p->_estID, p->_genID, p->_percentIdentity, p->_numMatches); for (uint32 i=0; i_numExons; i++) fprintf(O, " ("uint32FMTW(6)"/"uint32FMTW(6)" "uint32FMTW(6)"/"uint32FMTW(6)" "uint32FMTW(3)")", p->_exons[i]._estFrom, p->_exons[i]._genFrom, p->_exons[i]._estTo, p->_exons[i]._genTo, p->_exons[i]._percentIdentity); if (isBest) fprintf(O, " *"); fprintf(O, "\n"); } static void pickBestSlave(sim4polish **p, uint32 pNum) { uint32 identitym = 0, nmatchesm = 0; // Best score for the mList uint32 identityi = 0, nmatchesi = 0; // Best score the the iList uint32 numExons = 0, numExonsi = 0, numExonsm = 0; uint32 tmp_nmatches = 0; double alpha; // Difficult choice here.... // if (pNum == 1) { if (doValidate == 0) W->writeAlignment(p[0]); return; } if ((p[0]->_estID % 1287) == 0) { fprintf(stderr, "Picking Best for estID="uint32FMT" with %5d choices.\r", p[0]->_estID, pNum); fflush(stderr); } // Find the best percentIdentity and best numberOfMatches. // // identityi is the best percent identity of all the matches for this EST, and // nmatchesi is the number of matches for the longest best identity match(es). // // nmatchesm is the best numMatches of all the matches for this EST, and // identitym is the highest percent identity for the best numMatches match(es). for (uint32 i=0; i_percentIdentity > identityi) || (p[i]->_percentIdentity == identityi && p[i]->_numMatches > nmatchesi)) { identityi = p[i]->_percentIdentity; nmatchesi = p[i]->_numMatches; } if ((p[i]->_numMatches > nmatchesm) || (p[i]->_numMatches == nmatchesm && p[i]->_percentIdentity > identitym)) { nmatchesm = p[i]->_numMatches; identitym = p[i]->_percentIdentity; } } // Otherwise, if the best scores on both lists are the same, pick // the matches with the largest number of exons // if ((identityi == identitym) && (nmatchesi == nmatchesm)) { // Find the largest number of exons, allowing some margin in numMatches // numExonsi = 0; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches >= nmatchesi) && (numExonsi < p[i]->_numExons)) numExonsi = p[i]->_numExons; numExons = numExonsi; tmp_nmatches = nmatchesi; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches >= nmatchesi - EPS_N) && (numExons < p[i]->_numExons - EPS_X)) { tmp_nmatches = p[i]->_numMatches; numExons = p[i]->_numExons; } // Scan the entire list, printing the best stuff. We cannot just // scan both the mList and iList, as those probably contain // duplicates. if (doValidate) { if (tmp_nmatches == nmatchesi) fprintf(stdout, "--------------------1 (Clear Winner)\n"); else fprintf(stdout, "--------------------2 (Exon Clear Winner)\n"); for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == tmp_nmatches) && (p[i]->_numExons == numExons))); } else { for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == tmp_nmatches) && (p[i]->_numExons == numExons)) W->writeAlignment(p[i]); } return; } // Start over. Find the best two percentIdentities. Break ties // with numMatches. // // i will be the best, // m will be the second best // identityi = identitym = 0; nmatchesi = nmatchesm = 0; for (uint32 i=0; i_percentIdentity > identityi) { identitym = identityi; nmatchesm = nmatchesi; identityi = p[i]->_percentIdentity; nmatchesi = p[i]->_numMatches; } else if ((p[i]->_percentIdentity == identityi) && (p[i]->_numMatches > nmatchesi)) { nmatchesi = p[i]->_numMatches; } else if ((p[i]->_percentIdentity < identityi) && ((p[i]->_percentIdentity > identitym) || ((p[i]->_percentIdentity == identitym) && (p[i]->_numMatches > nmatchesm)))) { nmatchesm = p[i]->_numMatches; identitym = p[i]->_percentIdentity; } } // Now, 'i' is the highest percent identity, 'm' is the second // highest. By definition, numMatches for 'i' is less than // numMatches for 'm'. // If the number of matches is different, output everything with the // top score. // // We are guaranteed that the identities are the same. (I think) if (nmatchesi >= nmatchesm) { // Find the match(es) with the largest number of exons numExonsi = 0; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches >= nmatchesi) && (numExonsi < p[i]->_numExons)) numExonsi = p[i]->_numExons; numExons = numExonsi; tmp_nmatches = nmatchesi; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches >= nmatchesi - EPS_N) && (numExons < p[i]->_numExons - EPS_X)) { numExons = p[i]->_numExons; tmp_nmatches = p[i]->_numMatches; } if (doValidate) { if (tmp_nmatches == nmatchesi) fprintf(stdout, "--------------------3 (?)\n"); else fprintf(stdout, "--------------------4 (Exon ?)\n"); for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == tmp_nmatches) && (p[i]->_numExons == numExons))); } else { for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == tmp_nmatches) && (p[i]->_numExons == numExons)) W->writeAlignment(p[i]); } return; } // Otherwise, compute alpha alpha = ((nmatchesm - nmatchesi) / ((nmatchesm / (double)identitym) - (nmatchesi / (double)identityi)))/100; // If alpha below a magic threshold, pick the shorter match. // if (alpha < 0.8) { // Find the match(es) with the largest number of exons numExons = tmp_nmatches = 0; for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches >= nmatchesi) && (numExons < p[i]->_numExons)) numExons = p[i]->_numExons; if (doValidate) { fprintf(stdout, "--------------------5 (alpha < 0.8)\n"); for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi) && (p[i]->_numExons == numExons))); } else { for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi) && (p[i]->_numExons == numExons)) W->writeAlignment(p[i]); } return; } // Otherwise, pick the longer one. // XXX: We can still check: // if an internal gap is in N's // the number of exons // etc, etc. // See if the smaller one has an internal gap that corresponds to // N's in the genome. If so, assume that the exon mapped to the // N's and pick the smaller. // // Need code to process genome, finding N's larger than some threshold. // Output as 'genID beg end' // Find the largest number of exons for each of the contenders numExonsi = numExonsm = 0; for (uint32 i=0; i_percentIdentity == identitym) && (p[i]->_numMatches == nmatchesm) && (numExonsm < p[i]->_numExons)) numExonsm = p[i]->_numExons; else if ((p[i]->_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi) && (numExonsi < p[i]->_numExons)) numExonsi = p[i]->_numExons; } if ((numExonsi > numExonsm + EPS_X) || (identityi > identitym + EPS_I)) { if (doValidate) { if (numExonsi > numExonsm + EPS_X) fprintf(stdout, "--------------------6 (Exon Plus alpha > 0.8)\n"); else fprintf(stdout, "--------------------7 (Pctid Plus alpha > 0.8)\n"); for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi) && (p[i]->_numExons == numExonsi))); } else { for (uint32 i=0; i_percentIdentity == identityi) && (p[i]->_numMatches == nmatchesi) && (p[i]->_numExons == numExonsi)) W->writeAlignment(p[i]); } } else { numExons = numExonsm; tmp_nmatches = nmatchesm; for (uint32 i=0; i_percentIdentity == identitym) && (p[i]->_numMatches >= nmatchesm - EPS_N) && (numExons < p[i]->_numExons - EPS_X)) { tmp_nmatches = p[i]->_numMatches; numExons = p[i]->_numExons; } if (doValidate) { if (numExons == numExonsm) fprintf(stdout, "--------------------8 (alpha > 0.8)\n"); else fprintf(stdout, "--------------------9 (Exon alpha > 0.8)\n"); for (uint32 i=0; i_percentIdentity == identitym) && (p[i]->_numMatches == tmp_nmatches) && (p[i]->_numExons == numExons))); } else { for (uint32 i=0; i_percentIdentity == identitym) && (p[i]->_numMatches == tmp_nmatches) && (p[i]->_numExons == numExons)) W->writeAlignment(p[i]); } } } // Just a wrapper around the real best picker, so that we can easily // destroy polishes when we're done. // static void pickBest(sim4polish **p, uint32 pNum) { pickBestSlave(p, pNum); for (uint32 i=0; i file\n", argv[0]); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); exit(1); } // Read polishes, picking the best when we see a change in the // estID. sim4polishReader *R = new sim4polishReader("-"); sim4polish **p = new sim4polish * [pAlloc]; sim4polish *q = 0L; W = new sim4polishWriter("-", style); if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); while (R->nextAlignment(q)) { if ((q->_estID != estID) && (pNum > 0)) { pickBest(p, pNum); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } p[pNum++] = q; estID = q->_estID; q = 0L; // Otherwise we delete the alignment we just saved! } if (pNum > 0) pickBest(p, pNum); delete [] p; delete R; delete W; return(0); } kmer-code-2013-trunk/sim4dbutils/mappedCoverage.C0000644000000000000000000001624212415073322020443 0ustar rootroot#include #include #include #include #include "bio.h" #include "sim4.H" // Reports the amount of sequence covered by ALL matches for that // sequence. Example: if sequence iid 4 has two matches, one // covering the first 30% and the second covering the last 30%, this // will report that sequence iid 4 is covered 60%. // // Takes no options, reads from stdin, writes to stdout. int main(int argc, char **argv) { uint32 covMax = 0; intervalList **cov = 0L; uint32 *len = 0L; uint32 lastIID = 0; bool isRaw = false; bool isBlast = false; char *fastaname = 0L; char *covname = 0L; seqCache *F = 0L; FILE *C = stdout; int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-mask") == 0) { fastaname = argv[++arg]; } else if (strcmp(argv[arg], "-cov") == 0) { covname = argv[++arg]; } else if (strcmp(argv[arg], "-raw") == 0) { isRaw = true; } else if (strcmp(argv[arg], "-blast") == 0) { isBlast = true; } else { fprintf(stderr, "unknown arg: '%s'\n", argv[arg]); err++; } arg++; } if ((err) || (isatty(fileno(stdin)))) { fprintf(stderr, "usage: %s [-mask in.fasta] [-cov dat] [-raw | -blast] < sim4db-results\n", argv[0]); fprintf(stderr, " -mask Read sequences from in.fasta, lower-case mask\n"); fprintf(stderr, " any base with an alignment, write to out.fasta\n"); fprintf(stderr, " -cov Write coverage statistics to 'dat' instead of stdout\n"); fprintf(stderr, " -raw If present, assume the 'sim4db-results' are\n"); fprintf(stderr, " a space-separated list of 'iid begin end', one per line\n"); fprintf(stderr, " -blast Same idea as raw, expects 'UID.IID' for query id,\n"); fprintf(stderr, " blast format (-m) 9.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Output on stdout is the masked sequence if -mask is specified,\n"); fprintf(stderr, "otherwise, it is the coverage statistics.\n"); fprintf(stderr, "\n"); fprintf(stderr, "-mask is almost a required option - we need it to get the length.\n"); fprintf(stderr, "of sequences with no mapping (100%% uncovered) and to get the\n"); fprintf(stderr, "number of sequences.\n"); fprintf(stderr, "\n"); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); } if (fastaname) { C = 0L; F = new seqCache(fastaname); } if (covname) { errno = 0; C = fopen(covname, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for write: %s\n", covname, strerror(errno)), exit(1); } covMax = 1024 * 1024; if (F) covMax = F->getNumberOfSequences(); cov = new intervalList * [covMax]; len = new uint32 [covMax]; fprintf(stderr, "Found "uint32FMT" sequences in the input file.\n", covMax); for (uint32 i=0; i= covMax) { fprintf(stderr, "ERROR: Found iid "uint32FMT", but only allocated "uint32FMT" places!\n", iid, covMax); exit(1); } if (cov[iid] == 0L) { cov[iid] = new intervalList; len[iid] = 0; } if (iid >= lastIID) { lastIID = iid + 1; } cov[iid]->add(beg, end-beg); } } else { sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; while (R->nextAlignment(p)) { if (p->_estID > covMax) fprintf(stderr, "DIE! You have more sequences in your polishes than in your source!\n"), exit(1); if (p->_estID >= covMax) { fprintf(stderr, "ERROR: Found iid "uint32FMT", but only allocated "uint32FMT" places!\n", p->_estID, covMax); exit(1); } if (cov[p->_estID] == 0L) { cov[p->_estID] = new intervalList; len[p->_estID] = p->_estLen; } if (p->_estID >= lastIID) { lastIID = p->_estID + 1; } for (uint32 e=0; e_numExons; e++) { p->_exons[e]._estFrom--; // Convert to space-based if (p->_matchOrientation == SIM4_MATCH_FORWARD) cov[p->_estID]->add(p->_exons[e]._estFrom, p->_exons[e]._estTo - p->_exons[e]._estFrom); else cov[p->_estID]->add(p->_estLen - p->_exons[e]._estTo, p->_exons[e]._estTo - p->_exons[e]._estFrom); } } } // Scan the list of intervalLists, compute the amount covered, print. // for (uint32 iid=0; iidnumberOfIntervals(); sumLengths = cov[iid]->sumOfLengths(); cov[iid]->merge(); } if (F) { seqInCore *S = F->getSequenceInCore(iid); if (len[iid] == 0) len[iid] = S->sequenceLength(); assert(len[iid] == S->sequenceLength()); char *seq = new char [len[iid] + 1]; strcpy(seq, S->sequence()); for (uint32 p=0; pnumberOfIntervals(); c++) { l = cov[iid]->lo(c); h = cov[iid]->hi(c); if (h > len[iid]) { fprintf(stderr, "ERROR: range "uint32FMT"-"uint32FMT" out of bounds (seqLen = "uint32FMT")\n", l, h, len[iid]); assert(h <= len[iid]); } for (uint32 p=l; pheader(), seq); delete [] seq; delete S; } if (C) { double percentCovered = 0.00; if (cov[iid]) percentCovered = cov[iid]->sumOfLengths() / (double)len[iid]; fprintf(C, uint32FMT"\t"uint32FMT"\t%5.3f\t"uint32FMT"\t"uint32FMT"\n", iid, len[iid], percentCovered, numRegions, sumLengths); } } } kmer-code-2013-trunk/sim4dbutils/doc.txt0000644000000000000000000000315010502307254016714 0ustar rootrootsim4db tools as of July 17, 2006 filterPolishes headPolishes mappedCoverage mergePolishes sortPolishes pickBestPolish pickUniquePolish pickUniquePolish-nhgri cleanPolishes fixPolishesIID plotIntronSize plotCoverageVsIdentity parseSNP comparePolishes convertToAtac trimSequencesBasedOnMatches uniqPolishes summarizePolishes removeDuplicate vennPolishes realignPolishes removeRedundant reportAlignmentDifferences ---------------------------------------- filterPolishes Filters polishes by percent identity, percent coverage, length of alignment (number of matches), number of exons, query of genomic IID. Can segregate polishes, placing polishes for each genomic IID into a separate file. Also, can remove deflines or alignments, and can "normalize" the genomic coordinates by adding in the match offset. ---------------------------------------- headPolishes Like the UNIX head command, returns the first N polishes in a file. ---------------------------------------- mappedCoverage Returns the percentage of each query that is covered by an alignment. Also can mask out those regions with N's. ---------------------------------------- mergePolishes Merges multiple sets of polishes, mapped to the same genomic sequences, into one file, updating the query IID. ---------------------------------------- sortPolishes pickBestPolish pickUniquePolish pickUniquePolish-nhgri cleanPolishes fixPolishesIID plotIntronSize plotCoverageVsIdentity parseSNP comparePolishes convertToAtac trimSequencesBasedOnMatches uniqPolishes summarizePolishes removeDuplicate vennPolishes realignPolishes removeRedundant reportAlignmentDifferences kmer-code-2013-trunk/sim4dbutils/Make.include0000644000000000000000000001103212001455776017637 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBSIM4/ :=$(realpath $/../libsim4/)/ src := $/cleanPolishes.C \ $/fixPolishesIID.C \ $/comparePolishes.C \ $/convertToAtac.C \ $/convertToExtent.C \ $/convertPolishes.C \ $/detectChimera.C \ $/depthOfPolishes.C \ $/filterPolishes.C \ $/headPolishes.C \ $/mappedCoverage.C \ $/mergePolishes.C \ $/parseSNP.C \ $/pickBestPolish.C \ $/pickBestPair.C \ $/pickUniquePolish.C \ $/plotCoverageVsIdentity.C \ $/removeDuplicate.C \ $/sortPolishes.C \ $/summarizePolishes.C \ $/uniqPolishes.C \ $/vennPolishes.C \ $/realignPolishes.C \ $/removeRedundant.C \ $/reportAlignmentDifferences.C \ $/s4p_overlap.C $/.C_SRCS :=${filter %.c,${src}} $/.CXX_SRCS :=${filter %.C,${src}} obj_c := ${$/.C_SRCS:.c=.o} obj_C := ${$/.CXX_SRCS:.C=.o} # always using c++ to link $/.CXX_EXES := $/cleanPolishes \ $/fixPolishesIID \ $/comparePolishes \ $/convertToAtac \ $/convertToExtent \ $/convertPolishes \ $/detectChimera \ $/depthOfPolishes \ $/filterPolishes \ $/headPolishes \ $/mappedCoverage \ $/mergePolishes \ $/parseSNP \ $/pickBestPolish \ $/pickBestPair \ $/pickUniquePolish \ $/plotCoverageVsIdentity \ $/removeDuplicate \ $/sortPolishes \ $/summarizePolishes \ $/uniqPolishes \ $/vennPolishes \ $/realignPolishes \ $/removeRedundant \ $/reportAlignmentDifferences $/.CLEAN := $/*.o $(eval $/%.d $/%.o: CFLAGS+=-I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBSIM4/}) $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBSIM4/}) $/filterPolishes : $/filterPolishes.o $/headPolishes : $/headPolishes.o $/mappedCoverage : $/mappedCoverage.o $/mergePolishes : $/mergePolishes.o $/sortPolishes : $/sortPolishes.o $/pickBestPolish : $/pickBestPolish.o $/pickBestPair : $/pickBestPair.o $/pickUniquePolish : $/pickUniquePolish.o $/cleanPolishes : $/cleanPolishes.o $/fixPolishesIID : $/fixPolishesIID.o $/plotIntronSize : $/plotIntronSize.o $/plotCoverageVsIdentity : $/plotCoverageVsIdentity.o $/parseSNP : $/parseSNP.o $/comparePolishes : $/comparePolishes.o $/s4p_overlap.o $/convertToAtac : $/convertToAtac.o $/convertToExtent : $/convertToExtent.o $/convertPolishes : $/convertPolishes.o $/depthOfPolishes : $/depthOfPolishes.o $/detectChimera : $/detectChimera.o $/trimSequencesBasedOnMatches : $/trimSequencesBasedOnMatches.o $/uniqPolishes : $/uniqPolishes.o $/summarizePolishes : $/summarizePolishes.o $/removeDuplicate : $/removeDuplicate.o $/vennPolishes : $/vennPolishes.o $/realignPolishes : $/realignPolishes.o $/removeRedundant : $/removeRedundant.o $/s4p_overlap.o $/reportAlignmentDifferences : $/reportAlignmentDifferences.o # Yeah, not everyone needs all these libraries. Live with it. # ${$/.C_EXES} ${$/.CXX_EXES}: ${LIBSIM4/}libsim4.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a # Run the test cases for parseSNP $/.parseSNP-test: $/parseSNP ${SIM4DB/}sim4db @ echo See page 85 in Big Bad Bri IR#2 for notes on parseSNP-test. @ t=`dirname $<`/test ${SIM4DB/}sim4db -align -cdna $t/parsesnp-snp.fasta -genomic $t/parsesnp-gen.fasta -o $t/parsesnp-sim4.out @ t=`dirname $<`/test $< -O $t/parsesnp-good -F $t/parsesnp-fail < $t/parsesnp-sim4.out @-t=`dirname $<`/test diff $t/parsesnp-good $t/parsesnp-correct-parsed > $t/parsesnp-diffs @ t=`dirname $<`/test; \ if test -s $t/parsesnp-diffs ; then \ echo "parseSNP tests FAILED" ; \ cat $t/parsesnp-diffs ; \ exit 13 ; \ else \ echo "parseSNP tests passed" ; \ t=`dirname $<` rm -f $t/parsesnp-good $t/parsesnp-fail $t/parsesnp-sim4.out $t/parsesnp-diffs $t/parsesnp-gen.fastaidx $t/parsesnp-snp.fastaidx; \ fi kmer-code-2013-trunk/sim4dbutils/plotCoverageVsIdentity.C0000644000000000000000000000211312322046702022165 0ustar rootroot#include #include #include #include #include #include "sim4.H" int main(int argc, char ** argv) { int c[101] = {0}; int i[101] = {0}; if (isatty(fileno(stdin))) { fprintf(stderr, "creates three files:\n"); fprintf(stderr, " coverage.histogram\n"); fprintf(stderr, " identity.histogram\n"); fprintf(stderr, " c-vs-i.scatter\n"); fprintf(stderr, "\n"); fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); exit(1); } FILE *C = fopen("coverage.histogram", "w"); FILE *I = fopen("identity.histogram", "w"); FILE *S = fopen("c-vs-i.scatter", "w"); sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; while (R->nextAlignment(p)) { fprintf(S, uint32FMT" "uint32FMT"\n", p->_percentIdentity, p->_querySeqIdentity); i[p->_percentIdentity]++; c[p->_querySeqIdentity]++; } for (int x=0; x<101; x++) { fprintf(C, "%d\n", c[x]); fprintf(I, "%d\n", i[x]); } fclose(C); fclose(I); fclose(S); return(0); } kmer-code-2013-trunk/sim4dbutils/realignPolishes.C0000644000000000000000000002032712322046702020647 0ustar rootroot#include #include #include #include #include "bio++.H" #include "seqCache.H" #include "sim4.H" // This code takes basic sim4db format polishes and recomputes the // alignments and scores. Required in the input polishes are the EST // id, genomic id, exon coordinates and an orientation. int main(int argc, char **argv) { // Load all the sequences. We really do need all the ESTs in core, // since they probably aren't in a useful sorted order. You can // probably figure out a way to get rid of the seqCache for the // GEN. Doing so will reduce memory usage by about 50%. seqCache *EST = 0L; seqCache *GEN = 0L; int mergeTolerancePerc = 0; int mergeToleranceBase = 0; int statsOnly = 0; int warnOnChange = 0; // Statistics on the exon merge int mergedExons = 0; int mergedMatches = 0; int numcdnagaps = 0; int nummatcheswithgaps = 0; FILE *mergeLog = 0L; int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-merge", 2) == 0) { mergeTolerancePerc = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-b", 2) == 0) { mergeToleranceBase = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-M", 2) == 0) { mergeLog = fopen(argv[++arg], "w"); } else if (strncmp(argv[arg], "-e", 2) == 0) { if (statsOnly) { EST = new seqCache(argv[++arg], 1000, false); // debugging only! } else { EST = new seqCache(argv[++arg], 0, false); EST->loadAllSequences(); } } else if (strncmp(argv[arg], "-g", 2) == 0) { GEN = new seqCache(argv[++arg], 0, false); GEN->loadAllSequences(); } else if (strncmp(argv[arg], "-q", 2) == 0) { statsOnly = 1; } else if (strncmp(argv[arg], "-w", 2) == 0) { warnOnChange = 1; } arg++; } if ((statsOnly == 0) && (!EST || !GEN)) { fprintf(stderr, "usage: %s [-merge percent-tolerance] [-M merge-log] [-q] -e est.fasta -g genome.fasta < polishes > somewhere\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " Polishes _MUST_ be sorted by genomic index.\n"); fprintf(stderr, " If not, performance will be worse than atrocious.\n"); fprintf(stderr, "\n"); fprintf(stderr, " percent-tolerance -- merge exons separated by gap if\n"); fprintf(stderr, " the cDNA and genomic gaps differ by less than p percent.\n"); fprintf(stderr, " A value of 5 means 5%%\n"); fprintf(stderr, "\n"); fprintf(stderr, " -q: Don't actually do the work, just count the statistics\n"); fprintf(stderr, "\n"); fprintf(stderr, "\n"); exit(1); } char *s1 = new char [16 * 1024 * 1024]; char *s2 = new char [16 * 1024 * 1024]; int l1 = 0; int l2 = 0; speedCounter *C = new speedCounter("%12.0f polishes -- %12.0f polishes/second\r", 1.0, 0xff, true); sim4polishWriter *W = new sim4polishWriter("-", sim4polishS4DB); sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; while (R->nextAlignment(p)) { //fprintf(stdout, "BEFORE\n"); //p->s4p_printPolish(stdout); // If we have a mergeTolerance, merge adjacent exons that are // separated my approximately equal sized cDNA and genomic gaps. // // Possible a better way to do this is to check if the identity // of the missing region is decent, too. // Remember the id/cv of this guy for the log // double id = 0.0; double cv = 0.0; if (mergeLog) { id = p->s4p_percentIdentityExact(); cv = p->s4p_percentCoverageExact(); } int merged = 0; int gapped = 0; if ((mergeTolerancePerc > 0) || (mergeToleranceBase > 0)) { for (uint32 i=1; i_numExons; i++) { int cgap = p->_exons[i]._estFrom - p->_exons[i-1]._estTo; int ggap = p->_exons[i]._genFrom - p->_exons[i-1]._genTo; bool mergeGap = false; // New method -- check if the gaps are within 20bp of each other // int diff = cgap - ggap; if (diff < 0) diff = -diff; if (diff < mergeToleranceBase) mergeGap = true; // Original method -- cehck if the gaps are within 10% of each other // int ctol = cgap * (100 + mergeTolerancePerc); int gtol = ggap * (100 + mergeTolerancePerc); cgap *= 100; ggap *= 100; if (((cgap < ggap) && (ctol > ggap)) || ((ggap < cgap) && (gtol > cgap))) mergeGap = true; if (cgap > 1) { numcdnagaps++; gapped++; } if ((cgap > 1) && (mergeGap)) { // Merge i and i-1 if adding in the tolerance makes either // the cgap or the ggap longer than the other gap. i.e., the // cgap was shorter, but including the tolerance makes it // longer, so they're about the same size. if (mergeLog) fprintf(mergeLog, "MERGE: "uint32FMTW(4)"-"uint32FMTW(4)" (%6.2f,%6.2f) "uint32FMTW(4)"-"uint32FMTW(4) " and "uint32FMTW(8)"-"uint32FMTW(8)" (%6.2f,%6.2f) "uint32FMTW(8)"-"uint32FMTW(8)"\n", p->_exons[i-1]._estFrom, p->_exons[i-1]._estTo, cgap / 100.0, ctol / 100.0, p->_exons[i]._estFrom, p->_exons[i]._estTo, p->_exons[i-1]._genFrom, p->_exons[i-1]._genTo, ggap / 100.0, gtol / 100.0, p->_exons[i]._genFrom, p->_exons[i]._genTo); // merge exons p->_exons[i-1]._estTo = p->_exons[i]._estTo; p->_exons[i-1]._genTo = p->_exons[i]._genTo; // delete this exon p->s4p_deleteExon(i); // Do it again! i--; merged++; mergedExons++; } } if (merged) mergedMatches++; if (gapped) nummatcheswithgaps++; } // For each exon, generate an alignment if (statsOnly == 0) { p->_estLen = EST->getSequenceInCore(p->_estID)->sequenceLength(); p->_estPolyA = 0; p->_estPolyT = 0; for (uint32 i=0; i_numExons; i++) { l1 = p->_exons[i]._estTo - p->_exons[i]._estFrom + 1; l2 = p->_exons[i]._genTo - p->_exons[i]._genFrom + 1; strncpy(s1, EST->getSequenceInCore(p->_estID)->sequence() + p->_exons[i]._estFrom - 1, l1); strncpy(s2, GEN->getSequenceInCore(p->_genID)->sequence() + p->_exons[i]._genFrom - 1, l2); if (p->_matchOrientation == SIM4_MATCH_COMPLEMENT) { strncpy(s1, EST->getSequenceInCore(p->_estID)->sequence() + p->_estLen - p->_exons[i]._estTo, l1); reverseComplementSequence(s1, l1); } s1[l1] = 0; s2[l2] = 0; delete [] p->_exons[i]._estAlignment; delete [] p->_exons[i]._genAlignment; p->_exons[i]._estAlignment = new char [l1+l2+1]; p->_exons[i]._genAlignment = new char [l1+l2+1]; halign(s1, s2, l1, l2, p->_exons[i]._estAlignment, p->_exons[i]._genAlignment); } // There isn't an intron after the last exon. Force it. // p->_exons[p->_numExons-1]._intronOrientation = SIM4_INTRON_NONE; // Check that we didn't radically change things uint32 nm = p->_numMatches; p->s4p_updateAlignmentScores(); W->writeAlignment(p); if (warnOnChange) { uint32 diff = 0; if (nm < p->_numMatches) diff = p->_numMatches - nm; if (nm > p->_numMatches) diff = nm - p->_numMatches; if (diff > p->_numMatches / 100) fprintf(stdout, "WARNING: CHANGED! "uint32FMT" -> "uint32FMT"\n", nm, p->_numMatches); } } if (merged) { fprintf(mergeLog, "MERGED\tEST\t"uint32FMT"\tfrom\t%8.3f\t%8.3f\tto\t%8.3f\t%8.3f\n", p->_estID, id, cv, p->s4p_percentIdentityExact(), p->s4p_percentCoverageExact()); } C->tick(); } if ((mergeTolerancePerc > 0) || (mergeToleranceBase > 0)) { fprintf(stderr, "FOUND: %d gaps in %d matches.\n", numcdnagaps, nummatcheswithgaps); fprintf(stderr, "MERGED: %d gaps in %d matches.\n", mergedExons, mergedMatches); } delete GEN; delete EST; return(0); } kmer-code-2013-trunk/sim4dbutils/trimSequencesBasedOnMatches.C0000644000000000000000000000643012322046702023106 0ustar rootroot#include #include #include #include #include #include "bri++.H" #include "sim4reader.h" int main(int argc, char ** argv) { FastA *seqs = 0L; FastABuffer seqsbuffer; FILE *pfile = 0L; sim4polish *p = 0L; if (argc == 1) { fprintf(stderr, "usage: %s -sequence s.fasta -polishes p.polished\n", argv[0]); exit(1); } int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-sequence", 2) == 0) { seqs = new FastA(argv[++arg], true); } else if (strncmp(argv[arg], "-polishes", 2) == 0) { errno = 0; pfile = fopen(argv[++arg], "r"); if (errno) { fprintf(stderr, "Can't open '%s': %s\n", argv[arg], strerror(errno)); exit(1); } } else { fprintf(stderr, "%s: unknown option '%s'\n", argv[arg]); } arg++; } if (seqs == 0L) { fprintf(stderr, "error: you need to specify '-sequence s.fasta'\n"); exit(1); } if (pfile == 0L) { fprintf(stderr, "error: you need to specify '-polishes p.polished'\n"); exit(1); } uint32 numseqs = seqs->numberOfSequences(); uint32 *lrange = new uint32 [numseqs]; uint32 *hrange = new uint32 [numseqs]; for (uint32 i=0; iestID] > p->exons[0].estFrom-1) lrange[p->estID] = p->exons[0].estFrom-1; if (hrange[p->estID] < p->exons[p->numExons-1].estTo) hrange[p->estID] = p->exons[p->numExons-1].estTo; numRead++; if ((numRead & 0xff) == 0) { fprintf(stderr, "Reading matches: %u\r", numRead); fflush(stderr); } destroyPolish(p); } fprintf(stderr, "\n"); uint32 seqcopylen = 128 * 1024; char *seqcopy = new char [seqcopylen + 1]; char *defcopy = new char [128 * 1024]; seqs->first(seqsbuffer); for (uint32 i=0; inext(seqsbuffer)) { // If there is no polish for the sequence, just write the whole // thing out. This is a hack, so that svi will run. // if (lrange[i] >= hrange[i]) { lrange[i] = 0; hrange[i] = seqsbuffer.sequenceLength(); } if (lrange[i] < hrange[i]) { //seqs->seek(seqsbuffer, i); if (seqsbuffer.sequenceLength() > seqcopylen) { delete [] seqcopy; seqcopylen = seqsbuffer.sequenceLength() + 128 * 1024; seqcopy = new char [seqcopylen + 1]; } for (uint32 j=0, k=lrange[i]; k #include #include #include #include "sim4.H" #include "util++.H" int main(int argc, char **argv) { char *outPrefix = 0L; char datName[FILENAME_MAX]; char gnuName[FILENAME_MAX]; char pngName[FILENAME_MAX]; char gnuCmd[FILENAME_MAX]; char *inName = 0L; int arg = 1; int err = 0; while (arg < argc) { if (strncmp(argv[arg], "-o", 2) == 0) { outPrefix = argv[++arg]; } else if (strncmp(argv[arg], "-i", 2) == 0) { inName = argv[++arg]; } else { fprintf(stderr, "Unknown arg '%s'\n", argv[arg]); err++; } arg++; } if ((inName == 0L) || (outPrefix == 0L) || (err != 0)) { fprintf(stderr, "usage: %s -i sim4db -o outputPrefix\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " Creates outputPrefix.dat containing the number of errors at each\n"); fprintf(stderr, " base position, and outputPrefix.png the visual representation.\n"); fprintf(stderr, "\n"); fprintf(stderr, " Suggested usage:\n"); fprintf(stderr, "\n"); fprintf(stderr, " snapper2\n"); fprintf(stderr, " -queries Q.fasta\n"); fprintf(stderr, " -genomic G.fasta\n"); fprintf(stderr, " -positions G.posDB\n"); fprintf(stderr, " -aligns\n"); fprintf(stderr, " -minmatchidentity 94\n"); fprintf(stderr, " -minmatchcoverage 90\n"); fprintf(stderr, " -mersize 18\n"); fprintf(stderr, " -ignore 500\n"); fprintf(stderr, " -numthreads 16\n"); fprintf(stderr, " -verbose\n"); fprintf(stderr, " -output Q.sim4db\n"); fprintf(stderr, "\n"); fprintf(stderr, " pickBestPolish < Q.sim4db > Q.best.sim4db\n"); fprintf(stderr, "\n"); fprintf(stderr, " reportAlignmentDifferences\n"); fprintf(stderr, " -i Q.best.sim4db\n"); fprintf(stderr, " -o Q\n"); fprintf(stderr, "\n"); fprintf(stderr, "\n"); exit(1); } fprintf(stderr, "Reading input from '%s'\n", inName); fprintf(stderr, "Writing output to '%s'\n", outPrefix); // Open output files early, in case they fail. errno = 0; sprintf(datName, "%s.dat", outPrefix); sprintf(gnuName, "%s.gnuplot", outPrefix); sprintf(pngName, "%s.png", outPrefix); FILE *DAT = fopen(datName, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for writing data: %s\n", datName, strerror(errno)), exit(1); FILE *GNU = fopen(gnuName, "w"); if (errno) fprintf(stderr, "Failed to open '%s' for writing gnuplot command: %s\n", gnuName, strerror(errno)), exit(1); // Read matches. uint32 lMax = 1048576; uint32 lLen = 0; uint32 *nTot = new uint32 [lMax]; uint32 *nIde = new uint32 [lMax]; uint32 *nMis = new uint32 [lMax]; uint32 *nIns = new uint32 [lMax]; uint32 *nDel = new uint32 [lMax]; memset(nTot, 0, sizeof(uint32) * lMax); memset(nIde, 0, sizeof(uint32) * lMax); memset(nMis, 0, sizeof(uint32) * lMax); memset(nIns, 0, sizeof(uint32) * lMax); memset(nDel, 0, sizeof(uint32) * lMax); sim4polishReader *R = new sim4polishReader(inName); sim4polish *p = 0L; while (R->nextAlignment(p)) { bool fwd = (p->_matchOrientation == SIM4_MATCH_FORWARD); for (uint32 exon=0; exon_numExons; exon++) { sim4polishExon *e = p->_exons + exon; // Fail if there are no alignments. if ((e->_estAlignment == 0L) || (e->_genAlignment == 0L)) fprintf(stderr, "FAIL: Input has no alignment strings (-aligns option in snapper2).\n"), exit(1); // Parse the alignment to find ungapped blocks uint32 aPos = 0; // Position in the alignment uint32 qPos = e->_estFrom - 1; // Actual position in the query sequence uint32 gPos = e->_genFrom - 1; // Actual position in the genome sequence if (fwd == false) qPos = p->_estLen - e->_estFrom + 1; bool notDone = true; // There should be a way to get rid of this stupid variable.... while (notDone) { notDone = ((e->_estAlignment[aPos] != 0) && (e->_genAlignment[aPos] != 0)); // If we find the end of a gapless block, emit a match if (e->_estAlignment[aPos] == e->_genAlignment[aPos]) nIde[qPos]++; else if (e->_estAlignment[aPos] == '-') nDel[qPos]++; else if (e->_genAlignment[aPos] == '-') nIns[qPos]++; else nMis[qPos]++; nTot[qPos]++; assert(qPos < lMax); if (lLen < qPos) lLen = qPos; //fprintf(stdout, "%s "uint32FMT" %c ->_ %s "uint32FMT" %c\n", // p->_estDefLine, qPos, e->_estAlignment[aPos], // p->_genDefLine, gPos, e->_genAlignment[aPos]); if (e->_estAlignment[aPos] != '-') if (fwd) qPos++; else qPos--; if (e->_genAlignment[aPos] != '-') gPos++; aPos++; } } } // Index // nTot // nIde, percent // nDel, percent // nIns, percent // nMis, percent fprintf(DAT, "#idx\tnTot\tnIde\tfrac\tnDel\tfrac\tnIns\tfrac\tnMis\tfrac\tnErr\tfrac\n"); for (uint32 i=0; i<=lLen; i++) fprintf(DAT, "%u\t%u\t%u\t%6.4f\t%u\t%6.4f\t%u\t%6.4f\t%u\t%6.4f\t%u\t%6.4f\n", i, nTot[i], nIde[i], (double)nIde[i] / nTot[i], nDel[i], (double)nDel[i] / nTot[i], nIns[i], (double)nIns[i] / nTot[i], nMis[i], (double)nMis[i] / nTot[i], nTot[i] - nIde[i], (double)(nTot[i] - nIde[i]) / nTot[i]); fprintf(GNU, "set terminal png\n"); fprintf(GNU, "set output \"%s\"\n", pngName); fprintf(GNU, "set title \"Fraction error per base for '%s'\"\n", inName); fprintf(GNU, "set xlabel \"Base position\"\n"); fprintf(GNU, "set ylabel \"Fraction error\"\n"); fprintf(GNU, "plot [][0:0.04] \\\n"); fprintf(GNU, " \"%s\" using 1:4 with lines title \"nTot\", \\\n", datName); fprintf(GNU, " \"%s\" using 1:6 with lines title \"nDel\", \\\n", datName); fprintf(GNU, " \"%s\" using 1:8 with lines title \"nIns\", \\\n", datName); fprintf(GNU, " \"%s\" using 1:10 with lines title \"nMis\", \\\n", datName); fprintf(GNU, " \"%s\" using 1:12 with lines title \"nErr\"\n", datName); fclose(DAT); fclose(GNU); sprintf(gnuCmd, "gnuplot < %s", gnuName); system(gnuCmd); return(0); } kmer-code-2013-trunk/sim4dbutils/fixPolishesIID.C0000644000000000000000000000615712322046702020347 0ustar rootroot#include #include #include #include #include "bio.h" #include "sim4.H" #include #include using namespace std; // Updates the IID's in a set of polishes. If a file of deflines (or // fasta file) is supplied, the IIDs will match those, otherwise, // they remain the same. void addToDict(map &d, char *n) { if (n == 0L) return; seqCache *F = new seqCache(n); seqInCore *S = F->getSequenceInCore(); while (S) { string s = S->header(); d[s] = S->getIID(); delete S; S = F->getSequenceInCore(); } delete F; } int main(int argc, char **argv) { char *cDeflines = 0L; char *gDeflines = 0L; sim4polishStyle style = sim4polishStyleDefault; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-c") == 0) { cDeflines = argv[++arg]; } else if (strcmp(argv[arg], "-g") == 0) { gDeflines = argv[++arg]; } else if (strcmp(argv[arg], "-gff3") == 0) { style = sim4polishGFF3; } else { fprintf(stderr, "Unknown arg: %s\n", argv[arg]); } arg++; } if (isatty(fileno(stdin))) { fprintf(stderr, "usage: %s [-c c.fasta] [-g g.fasta] [-gff3] < polishes > polishes\n", argv[0]); fprintf(stderr, " -c c.fasta Read cDNA deflines from c.fasta\n"); fprintf(stderr, " -g g.fasta Read genomic deflines from g.fasta\n"); fprintf(stderr, " -gff3 Write output as GFF3\n"); fprintf(stderr, "\n"); fprintf(stderr, " Rewrites the input polishes, updating the sequence index to match\n"); fprintf(stderr, " that of the associated fasta file. One or both of -c and -g may be used.\n"); fprintf(stderr, " Polishes that refer to a sequence not present in the input fasta file are\n"); fprintf(stderr, " not output.\n"); exit(1); } // We parse args, then build the dictionaries, so we can do // any quick error detection first. map g; map c; if (gDeflines) { fprintf(stderr, "Reading genomic deflines from '%s'\n", gDeflines); addToDict(g, gDeflines); } if (cDeflines) { fprintf(stderr, "Reading genomic deflines from '%s'\n", cDeflines); addToDict(c, cDeflines); } // Read all the matches, changing IIDs. If we find a defline // with no IID, holler and die. sim4polishWriter *W = new sim4polishWriter("-", style); sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: input format and output format differ.\n"); fprintf(stderr, "Filtering polishes.\n"); while (R->nextAlignment(p)) { string cd = p->_estDefLine; string gd = p->_genDefLine; if (cDeflines != 0L) { if (c.find(cd) == c.end()) // EST defline not in the input sequences, don't output. continue; p->_estID = c[cd]; } if (gDeflines != 0L) { if (g.find(gd) == g.end()) // Genomic defline not in the input sequences, don't output. continue; p->_genID = g[gd]; } W->writeAlignment(p); } delete R; delete W; } kmer-code-2013-trunk/sim4dbutils/LOG0000644000000000000000000002351107605137611015765 0ustar rootroot After trimming: dsc154p:/home/walenzbp/projects/sim4reader> perl ../../dbEST-20020331/intronstats.pl < trimmed.out int: 206 sma: 195(First:16,Last:23,oneF:92,oneL:64,oneB:0) big 1319(First:330,Last:346,One:643) tot 4297418 ff=4,fc=12 lf=12,lc=11 sim4begin 3529019[872-0-0] 1363[2586779-3425929] <538-0-96-forward-forward> edef=>CRA|162000089143028 /altid=gi|11947691 /dataset=dbest /taxon=9606 /org=Homo sapiens /date=12/21/2000 /altid=gb_acc|BF673796.1 /organ=prosta te /tissue_type= /length=872 /clone_end=5' /def=602135941F1 NIH_MGC_83 Homo sapiens cDNA clone IMAGE:4272299 5', mRNA sequence. ddef=>CRA|GA_x54KRE8RWM2:1..4526107 /organism=Homo sapiens /order=1 /ga_uid=181000064531106 /len=4526107 1-53 (2001-2053) <53-0-100> -> 54-136 (2322-2404) <83-0-100> -> 137-281 (2499-2643) <144-0-99> -> 282-337 (3107-3162) <54-0-96> -> 338-507 (3337-3509) <167-0-95> -> 508-553 (822566-822611) <37-0-77> gcttcttcctctttctcgactccatcttcgcggtagctgggaccgccgttcag gcttcttcctctttctcgactccatcttcgcggtagctgggaccgccgttcag tcgccaatatgcagctctttgtccgcgcccaggagctacacaccttcgaggtgaccggccaggaaacggtcgcccagatcaag tcgccaatatgcagctctttgtccgcgcccaggagctacacaccttcgaggtgaccggccaggaaacggtcgcccagatcaag gctcatgtagcctcactggagggcattgccccggaagatcaagtcgtgctcctggcaggcgcgcccctggaggatgaggccactctgggccagtgcggggtggaggccctgaTtaccctggaagtagcaggccgcatgcttggag gctcatgtagcctcactggagggcattgccccggaagatcaagtcgtgctcctggcaggcgcgcccctggaggatgaggccactctgggccagtgcggggtggaggccctgaCtaccctggaagtagcaggccgcatgcttggag gtaaagtccatggttccctggcccgtgcCTgaaaagtgagaggtcagactcctaag gtaaagtccatggttccctggcccgtgcTGgaaaagtgagaggtcagactcctaag gtggccaaacaggagaagaagaCgaagaagacaggtcgggctaagcgg-ggatgcagtacaaccggcgcttGtgtcaacgtGgtgcccacctttggcaagaagaagggccccaatgccaactcttaagtcttt-gtaattctggc tt-ctctaataaaaaagc-acttagttca gtggccaaacaggagaagaagaAgaagaagacaggtcgggctaagcggCggatgcagtacaaccggcgctt-tgtcaacgtTgtgcccacctttggcaagaagaagggccccaatgccaactcttaagtctttTgtaattctggc ttTctctaataaaaaagcCacttagttca Gcc-AAAAaaaaaaaaaaaaaaaaaaaaaagtggg-ggGgggCCgCga TccGTCTCaaaaaaaaaaaaaaaaaaaaaagtgggAggCgggA-g-ga sim4end On the latest run (with 85%, 10k filtering): int: 206 sma: 147(First:10,Last:20,oneF:66,oneL:51,oneB:0) big 1184(First:316,Last:325,One:543) tot 4297418 ff=3,fc=7 lf=10,lc=10 There are still ~1300 matches with big introns. See: -rw-rw-r-- 1 walenz assembly 982915 Jun 24 18:22 big-exon-after-big-intron -rw-rw-r-- 1 walenz assembly 738089 Jun 24 18:22 big-exon-after-big-oneintron -rw-rw-r-- 1 walenz assembly 45228 Jun 24 18:22 sma-exon-after-big-intron -rw-rw-r-- 1 walenz assembly 161196 Jun 24 18:22 sma-exon-after-big-oneintron sim4begin 3252894[607-0-0] 90[582719-1347738] <605-0-99-forward-forward> edef=>CRA|107000020413693 /altid=gi|9345515 /dataset=dbest /taxon=9606 /org=Homo sapiens /date=07/21 /2000 /altid=gb_acc|BE409065.1 /organ=placenta /tissue_type=choriocarcinoma /length=607 /clone_end=5 ' /def=601301223F1 NIH_MGC_21 Homo sapiens cDNA clone IMAGE:3635909 5', mRNA sequence. ddef=>CRA|GA_x9V1BB6:1..4925599 /organism=Homo sapiens /order=1 /ga_uid=332442982 /len=4925599 1-172 (2001-2172) <172-0-100> -> 173-340 (756341-756508) <167-0-99> -> 341-401 (758205-758265) <61-0-100> -> 402-424 (758575-758597) <23-0-100> -> 425-500 (759007-759082) <76-0-100> -> 501-607 (762918-763025) <106-0-97> tgctgcctgtgtagttgcagccgcggccgcctcccgccagctcgcctcggggaacaggacgcgcgtgagctcaggcgtccccgccccagcttttctcgga accatgaaccccaactgcgcccggtgcggcaagatcgtgtatcccacggagaaggtgaactgtctggataag tgctgcctgtgtagttgcagccgcggccgcctcccgccagctcgcctcggggaacaggacgcgcgtgagctcaggcgtccccgccccagcttttctcgga accatgaaccccaactgcgcccggtgcggcaagatcgtgtatcccacggagaaggtgaactgtctggataag cccgccgcctgcgcgggggagcccagcacagaccgccgccgggaccccgagtcgcgcaccccagccccaccgGccaccccgcgcgccatggaccccaagg accgcaagaagatccagttctcggtgcccgcgccccctagccagctcgacccccgccaggtggagatg cccgccgcctgcgcgggggagcccagcacagaccgccgccgggaccccgagtcgcgcaccccagccccaccgCccaccccgcgcgccatggaccccaagg accgcaagaagatccagttctcggtgcccgcgccccctagccagctcgacccccgccaggtggagatg atccggcgcaggagaccaacgcctgccatgctgttccggctctcagagcactcctcaccag atccggcgcaggagaccaacgcctgccatgctgttccggctctcagagcactcctcaccag aggaggaagcctccccccaccag aggaggaagcctccccccaccag agagcctcaggagaggggcaccatctcaagtcgaagagacccaacccctgtgcctacacaccaccttcgctgaaag agagcctcaggagaggggcaccatctcaagtcgaagagacccaacccctgtgcctacacaccaccttcgctgaaag ctgtgcagcgcattgctgagtctcacctgcagtctatcagcaatttgaatgagaaccaggc-tcagaggaggaggatgagctgggggagcttcgggagct gg-ttatcA ctgtgcagcgcattgctgagtctcacctgcagtctatcagcaatttgaatgagaaccaggcCtcagaggaggaggatgagctgggggagcttcgggagct ggGttatc- sim4end sim4begin 1618397[849-0-0] 1420[13169688-14270773] <765-0-98-complement-unknown> edef=>CRA|225000001589124 /altid=gi|15746938 /dataset=dbest /taxon=9606 /org=Homo sapiens /date=09/2 5/2001 /altid=gb_acc|BI755360.1 /organ=brain /tissue_type= /length=849 /clone_end=5' /def=603024964F 1 NIH_MGC_114 Homo sapiens cDNA clone IMAGE:5195750 5', mRNA sequence. ddef=>CRA|GA_x54KRE8WCJ9:1..15664065 /organism=Homo sapiens /order=1 /ga_uid=181000064676425 /len=15 664065 1-120 (1988-2108) <117-0-96> <- 121-259 (2551-2688) <135-0-97> <- 260-385 (94537-94662) <125-0-99> <- 386-629 (222351-222595) <242-0-98> == 703-849 (1098940-1099085) <146-0-99> ctggtttcttcG-tgaaccactggaattcagccatggggactgcagaggcttcacagctcaggatgcccttctgCcGgactgaaacaccagtgttcttgg cttttgagatatagggaggat ctggtttcttcCTtgaaccactggaattcagccatggggactgcagaggcttcacagctcaggatgcccttctgAcCgactgaaacaccagtgttcttgg cttttgagatatagggaggat agttGacagtgatttGtGactttccgcacatcgggcgcagcgacatcgttcaaGgcgctgcattcgtactccccggactggtctcgcttgatgtcagaga tctccaggtactcatcctcacttacaaagccctggcctt agttTacagtgattt-tTactttccgcacatcgggcgcagcgacatcgttcaaCgcgctgcattcgtactccccggactggtctcgcttgatgtcagaga tctccaggtactcatcctcacttacaaagccctggcctt cGttgactgacaggtgtctccatgtcacagttggctctggtctgccaatagcaagacacagcagggtcacactgcttccctcattcacagtgatgtctga ggagatattcatgatctgaggaggaa cCttgactgacaggtgtctccatgtcacagttggctctggtctgccaatagcaagacacagcagggtcacactgcttccctcattcacagtgatgtctga ggagatattcatgatctgaggaggaa cttgcactattaggtgaacccgggacgttttgggatgattgtctgtctgcacagagcaggtgtacggaccttcgtcatacacatccacattttggatcat gatgctgtactgggttggtgtattgaccaggatgatcacacgagggtctatggaccacttgtcattcccagcgtagaggatggtgctgcggtttagccag gccacccgggttacccggtcatctatggtacacctg-agGgTggc cttgcactattaggtgaacccgggacgttttgggatgattgtctgtctgcacagagcaggtgtacggaccttcgtcatacacatccacattttggatcat gatgctgtactgggttggtgtattgaccaggatgatcacacgagggtctatggaccacttgtcattcccagcgtagaggatggtgctgcggtttagccag gccacccgggttacccggtcatctatggtacacctgCagTgAggc cctgggatgaagagcagggcagttgtcgccgagaagacgacccagtaggcaggatggtacatctcgacgctgcggtgctctcagCctgccgggcttgcta ctgcttctgctgctgctaccgctgctgccttcctctgtgctgaattc cctgggatgaagagcagggcagttgtcgccgagaagacgacccagtaggcaggatggtacatctcgacgctgcggtgctctcag-ctgccgggcttgcta ctgcttctgctgctgctaccgctgctgccttcctctgtgctgaattc sim4end sim4begin 2694118[754-0-0] 1420[13169772-14270767] <550-0-97-complement-unknown> edef=>CRA|222000001431581 /altid=gi|15437350 /dataset=dbest /taxon=9606 /org=Homo sapiens /date=09/0 5/2001 /altid=gb_acc|BI550038.1 /organ=brain /tissue_type=hippocampus /length=754 /clone_end=5' /def =603192502F1 NIH_MGC_95 Homo sapiens cDNA clone IMAGE:5263800 5', mRNA sequence. ddef=>CRA|GA_x54KRE8WCJ9:1..15664065 /organism=Homo sapiens /order=1 /ga_uid=181000064676425 /len=15 664065 1-30 (1994-2024) <30-0-96> <- 31-166 (2467-2602) <133-0-97> == 286-536 (222259-222511) <246-0-97> == 610-754 (1098856-1099000) <141-0-97> gtgttc-tggcttttgagatatagggaggat gtgttcTtggcttttgagatatagggaggat aGgtttacagtgattttAacttCccgcacatcgggcgcagcgacatcgttcaacgcgctgcattcgtactccccggact-gtctcgcttgatgtcagaga tctccaggtactcatcctcacttacaaagccctggcc a-gtttacagtgattttTacttTccgcacatcgggcgcagcgacatcgttcaacgcgctgcattcgtactccccggactGgtctcgcttgatgtcagaga tctccaggtactcatcctcacttacaaagccctggcc ggaGGAa-cttgcactattaggtgaacccgggacgttttgggatgattgtctgtctgcacagagcaggtgtacggaccttcgtcatacacatccacattt tggatcatgatgctgtactgggttggtgtattgaccaggatgatcacacgagggtctatggaccacttgtcattcccagcgtagaggatggtgctgcggt ttagccaggccacccgggttacccggtcatctatggtacacctg-agGgTggc ggaCTTaCcttgcactattaggtgaacccgggacgttttgggatgattgtctgtctgcacagagcaggtgtacggaccttcgtcatacacatccacattt tggatcatgatgctgtactgggttggtgtattgaccaggatgatcacacgagggtctatggaccacttgtcattcccagcgtagaggatggtgctgcggt ttagccaggccacccgggttacccggtcatctatggtacacctgCagTgAggc cctgggatgaagagcagggcagttgtcgccgagaagacgacccagtaggcaggatggtacatctcgacgctgcggtgctctcagctgccgggcttgctac tgcttctgctgctgctaccgctgctgccttcctctgtgctCCGCt cctgggatgaagagcagggcagttgtcgccgagaagacgacccagtaggcaggatggtacatctcgacgctgcggtgctctcagctgccgggcttgctac tgcttctgctgctgctaccgctgctgccttcctctgtgctGAATt sim4end sim4begin 134996[500-0-0] 1442[3243352-11655873] <385-0-96-complement-unknown> edef=>CRA|1000482720785 /altid=gi|4189471 /dataset=dbest /taxon=9606 /org=Homo sapiens /date=03/18/1 999 /altid=gb_acc|AI379618.1 /organ=mixed (see below) /tissue_type=Pooled human melanocyte, fetal he art, and pregnant uterus /length=500 /clone_end=3' /def=tc58d12.x1 Soares_NhHMPu_S1 Homo sapiens cDN A clone IMAGE:2068823 3' similar to TR:Q13538 Q13538 ORF2: FUNCTION UNKNOWN. ;, mRNA sequence. ddef=>CRA|GA_x54KRE902N0:1..24267006 /organism=Homo sapiens /order=1 /ga_uid=181000064731840 /len=24 267006 27-82 (6826861-6826916) <50-0-89> == 162-205 (6826997-6827040) <40-0-88> <- 206-500 (8410225-8410521) <295-0-99> tactcCtggtgaagatgctGCGaacattgttgaCatgaTaacaaaggatttagaat tactcTtggtgaagatgctATTaacattgttgaGatgaCaacaaaggatttagaat taaaatgctatcaaacagcaTcA-catActacagaAaaatctttc taaaatgctatcaaacagca-cTGcatGctacagaGaaatctttc atgaaaaagagtcaatcgattcaagctt-cattgttgcctttattttaagaaattaccacaaccaccccaaccttcagcaaccaccatcctgatcagtcc acaggcatcaacatggaccgaacaccctccaccagcaaaaagattagaacttgctgaaggcttagtttattgttagcattt-cttagcaacaaagtattt ttaataaaagtttttaatttaatgatttgtttgacataatgctattacacatttagtagactacagtatggtataagcagaacttttacatacatta atgaaaaagagtcaatcgattcaagcttCcattgttgcctttattttaagaaattaccacaaccaccccaaccttcagcaaccaccatcctgatcagtcc acaggcatcaacatggaccgaacaccctccaccagcaaaaagattagaacttgctgaaggcttagtttattgttagcatttCcttagcaacaaagtattt ttaataaaagtttttaatttaatgatttgtttgacataatgctattacacatttagtagactacagtatggtataagcagaacttttacatacatta sim4end kmer-code-2013-trunk/sim4dbutils/detectChimera.C0000644000000000000000000001205312415073322020256 0ustar rootroot#include #include #include #include #include "sim4.H" // Attempts to look for query that are chimeric. It is assumed that // your query have been mapped to a target reference genome, such // that little pieces will be mapped. The heuristic used is simple. // The mapping intervals are merged together, and if there are two // blocks that do not overlap, then it is chimeric. Intervals are // decreased by 3bp before merging. #define QUERY_LENGTH 2048 int main(int argc, char **argv) { bool beVerbose = false; uint32 chimeraOverlap = 5; int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-v", 2) == 0) { beVerbose = true; } else if (strncmp(argv[arg], "-o", 2) == 0) { chimeraOverlap = strtouint32(argv[++arg], 0L); } else { fprintf(stderr, "Unknown arg '%s'\n", argv[arg]); } arg++; } intervalList IL; intervalList ILfull; uint32 ILid = 0; char lastdefline[1024] = { 0 }; uint32 numPts = 0; uint32 maxPts = 1024; uint32 *begPt = new uint32 [maxPts]; uint32 *endPt = new uint32 [maxPts]; uint32 *genBeg = new uint32 [maxPts]; uint32 *genEnd = new uint32 [maxPts]; uint32 queryLength = 0; char spaces[QUERY_LENGTH+1]; char lines[QUERY_LENGTH+1]; char equals[QUERY_LENGTH+1]; for (uint32 i=0; inextAlignment(p)) { if ((p->_estID != ILid) && (lastdefline[0])) { #if 0 fprintf(stdout, "\n\n"); fprintf(stdout, "IL "uint32FMT"\n", IL.numberOfIntervals()); for (uint32 i=0; i 1) && (ILfull.sumOfLengths() >= 0.9 * queryLength)) { fprintf(stdout, "%s\n", lastdefline); equals[queryLength] = 0; fprintf(stdout, " %s\n", equals); equals[queryLength] = '='; // Bubble sort the positions. // for (uint32 a=0; a begPt[b]) || ((begPt[a] == begPt[b]) && (endPt[a] > endPt[b]))) { uint32 x = begPt[a]; uint32 y = endPt[a]; begPt[a] = begPt[b]; endPt[a] = endPt[b]; begPt[b] = x; endPt[b] = y; x = genBeg[a]; y = genEnd[a]; genBeg[a] = genBeg[b]; genEnd[a] = genEnd[b]; genBeg[b] = x; genEnd[b] = y; } } } for (uint32 i=0; i= QUERY_LENGTH) { fprintf(stdout, "WARNING: Next line (begin) truncated to %d positions!\n", QUERY_LENGTH); begPt[i] = QUERY_LENGTH-1; } if (endPt[i] >= QUERY_LENGTH) { fprintf(stdout, "WARNING: Next line (end) truncated to %d positions!\n", QUERY_LENGTH); endPt[i] = QUERY_LENGTH-1; } spaces[begPt[i]] = 0; lines[endPt[i] - begPt[i]] = 0; fprintf(stdout, uint32FMTW(3)"-"uint32FMTW(3)" %s%s ("uint32FMT","uint32FMT")\n", begPt[i], endPt[i], spaces, lines, genBeg[i], genEnd[i]); spaces[begPt[i]] = ' '; lines[endPt[i] - begPt[i]] = '-'; } fprintf(stdout, "\n\n"); } // end of chimera detected IL.clear(); ILfull.clear(); numPts = 0; } strcpy(lastdefline, p->_estDefLine); ILid = p->_estID; queryLength = p->_estLen; uint32 beg = p->_exons[0]._estFrom - 1; uint32 end = p->_exons[p->_numExons-1]._estTo; if (numPts == maxPts) { fprintf(stdout, "Wow! The next guy is a deep mapping! I'm only showing the\n"); fprintf(stdout, "first "uint32FMT" alignments.\n", maxPts); } else if (numPts < maxPts) { begPt[numPts] = beg; endPt[numPts] = end; genBeg[numPts] = p->_exons[0]._genFrom - 1; genEnd[numPts] = p->_exons[p->_numExons-1]._genTo; } numPts++; //fprintf(stdout, "beg,end = %d,%d\n", (int)beg, (int)end); if (end - beg > 2 * chimeraOverlap) { IL.add(beg + chimeraOverlap, end - beg - 2 * chimeraOverlap); ILfull.add(beg, end - beg); } } return(0); } kmer-code-2013-trunk/sim4dbutils/README0000644000000000000000000000162107605137611016277 0ustar rootrootFour sim4 related utilities: ------------------------------------------------------------ filterPolishes.c Filters polishes by percent identity and composite. Writes output to stdout or a file, takes input from stdin. ------------------------------------------------------------ mergePolishes.C Merges multiple sim4db outputs. The output must be from the same genomic file, with different cDNA files. ------------------------------------------------------------ pickBestPolish.c Picks the best polish. Input is stdin, output is stdout. No options. ------------------------------------------------------------ sortPolishes.c Sorts polishes by ESTid or GENid. Input is stdin, output is stdout. Takes "-n N" to set the maximum number of polishes in the input. ------------------------------------------------------------ stripPolishes.c Removes deflines and alignments from a polish file. kmer-code-2013-trunk/sim4dbutils/convertPolishes.C0000644000000000000000000000217511467253404020717 0ustar rootroot#include #include #include #include #include #include "bio.h" #include "sim4.H" int main(int argc, char ** argv) { sim4polishWriter *GOOD = 0L; sim4polishStyle in_style, out_style; // We limit scaffolds to be below the number of open files per // process. // if (argc != 1) { fprintf(stderr, "S4DB to GFF3 format converter.\nUsage: %s < input_file > output_file\n", argv[0]); exit(1); } sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; in_style = R->getsim4polishStyle(); if (in_style == sim4polishS4DB) out_style = sim4polishGFF3; else if (in_style == sim4polishGFF3) out_style = sim4polishS4DB; else { fprintf(stderr, "ERROR: Unrecognized or unsupported polishes format. Aborting.\n"); exit(1); } if (GOOD == 0L) GOOD = new sim4polishWriter("-", out_style); while (R->nextAlignment(p)) { #if 0 if (noDefLines) p->s4p_removeDefLines(); if (noAlignments) p->s4p_removeAlignments(); #endif GOOD->writeAlignment(p); } delete R; delete GOOD; return(0); } kmer-code-2013-trunk/sim4dbutils/pickBestPair.C0000644000000000000000000003564312415074151020110 0ustar rootroot#include #include #include #include "bio.h" #include "sim4.H" #include #include #include using namespace std; #define SEQNAME_MAX 64 class mapResult { public: uint32 seqIdx; char seqName[SEQNAME_MAX]; uint32 refIdx; char refName[SEQNAME_MAX]; uint32 refBgn; uint32 refEnd; bool forward; }; class readData { public: readData() { cloneIndex = 999999999; isFirstMate = 0; }; readData(uint32 index, uint32 first) { cloneIndex = index; isFirstMate = first; }; uint32 cloneIndex : 31; uint32 isFirstMate : 1; }; map nameToIndex; uint32 nameToIndexIndex = 0; bool readMR(FILE *in, mapResult &mr) { static char line[1024]; static splitToWords W; // Skip header. if (ftell(in) == 0) { fgets(line, 1024, in); } fgets(line, 1024, in); if (feof(in)) return(false); chomp(line); W.split(line); if (strlen(W[0]) >= SEQNAME_MAX) W[0][SEQNAME_MAX-1] = 0; if (strlen(W[6]) >= SEQNAME_MAX) W[6][SEQNAME_MAX-1] = 0; assert(strlen(W[0]) < SEQNAME_MAX); assert(strlen(W[6]) < SEQNAME_MAX); mr.seqIdx = W(1); mr.refIdx = W(7); mr.refBgn = W(8); mr.refEnd = W(9); mr.forward = (W(4) < W(5)) ? true : false; strcpy(mr.seqName, W[0]); strcpy(mr.refName, W[6]); return(true); } mapResult & readMRsim4db(sim4polish *p, mapResult &mr) { if (strlen(p->_estDefLine) >= SEQNAME_MAX) p->_estDefLine[SEQNAME_MAX-1] = 0; if (strlen(p->_genDefLine) >= SEQNAME_MAX) p->_genDefLine[SEQNAME_MAX-1] = 0; assert(strlen(p->_estDefLine) < SEQNAME_MAX); assert(strlen(p->_genDefLine) < SEQNAME_MAX); mr.seqIdx = p->_estID; mr.refIdx = p->_genID; mr.refBgn = p->_exons[0]._genFrom - 1; mr.refEnd = p->_exons[0]._genTo; mr.forward = (p->_matchOrientation == SIM4_MATCH_FORWARD) ? true : false; strcpy(mr.seqName, p->_estDefLine); strcpy(mr.refName, p->_genDefLine); return(mr); } bool readMRcoords(FILE *in, mapResult &mr) { static char line[1024]; static splitToWords W; // Skip header. if (ftell(in) == 0) { fgets(line, 1024, in); fgets(line, 1024, in); fgets(line, 1024, in); fgets(line, 1024, in); } fgets(line, 1024, in); if (feof(in)) return(false); chomp(line); W.split(line); // Since we don't have indexes in coords files, we must assign them based on // object names. // But we use "same index" to infer pairing. This won't work. string refNam(W[9]); string seqNam(W[10]); if (nameToIndex.find(refNam) == nameToIndex.end()) { nameToIndex[refNam] = readData(nameToIndexIndex++, false); } if (nameToIndex.find(seqNam) == nameToIndex.end()) { fprintf(stderr, "1 failed to find mate index for read '%s'\n", W[9]); } uint32 seqIdx = nameToIndex[seqNam].cloneIndex; uint32 refIdx = nameToIndex[refNam].cloneIndex; if (strlen(W[9]) >= SEQNAME_MAX) W[9][SEQNAME_MAX-1] = 0; if (strlen(W[10]) >= SEQNAME_MAX) W[10][SEQNAME_MAX-1] = 0; assert(strlen(W[9]) < SEQNAME_MAX); assert(strlen(W[10]) < SEQNAME_MAX); mr.seqIdx = seqIdx; mr.refIdx = refIdx; mr.refBgn = W(0); mr.refEnd = W(1); mr.forward = (W(2) < W(3)) ? true : false; strcpy(mr.seqName, W[10]); strcpy(mr.refName, W[9]); return(true); } bool readMRcoords(FILE *in, mapResult &mr, bool &is1) { static char line[1024]; static splitToWords W; // Skip header. if (ftell(in) == 0) { fgets(line, 1024, in); fgets(line, 1024, in); fgets(line, 1024, in); fgets(line, 1024, in); } fgets(line, 1024, in); if (feof(in)) return(false); chomp(line); W.split(line); // Since we don't have indexes in coords files, we must assign them based on // object names. // But we use "same index" to infer pairing. This won't work. string refNam(W[9]); string seqNam(W[10]); if (nameToIndex.find(refNam) == nameToIndex.end()) { nameToIndex[refNam] = readData(nameToIndexIndex++, false); } if (nameToIndex.find(seqNam) == nameToIndex.end()) { fprintf(stderr, "2 failed to find mate index for read '%s'\n", W[10]); for (uint32 i=0; i<12; i++) fprintf(stderr, "%2d -- '%s'\n", i, W[i]); exit(1); } uint32 seqIdx = nameToIndex[seqNam].cloneIndex; uint32 refIdx = nameToIndex[refNam].cloneIndex; is1 = nameToIndex[seqNam].isFirstMate; if (strlen(W[9]) >= SEQNAME_MAX) W[9][SEQNAME_MAX-1] = 0; if (strlen(W[10]) >= SEQNAME_MAX) W[10][SEQNAME_MAX-1] = 0; assert(strlen(W[9]) < SEQNAME_MAX); assert(strlen(W[10]) < SEQNAME_MAX); mr.seqIdx = seqIdx; mr.refIdx = refIdx; mr.refBgn = W(0); mr.refEnd = W(1); mr.forward = (W(2) < W(3)) ? true : false; strcpy(mr.seqName, W[10]); strcpy(mr.refName, W[9]); return(true); } int main(int argc, char **argv) { vector in1extent, in1sim4db, in1coords, incoords; vector in2extent, in2sim4db, in2coords; vector mateMaps; char *out = NULL; char orient = 0; uint32 distMin = 0; uint32 distMax = uint32MAX; double minIdent = 0; double minLength = 0; double minCoverage = 0; bool allowDups = false; int arg = 1; int err = 0; while (arg < argc) { if (strcmp(argv[arg], "-1extent") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) in1extent.push_back(argv[++arg]); else if (strcmp(argv[arg], "-2extent") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) in2extent.push_back(argv[++arg]); else if (strcmp(argv[arg], "-1sim4db") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) in1sim4db.push_back(argv[++arg]); else if (strcmp(argv[arg], "-2sim4db") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) in2sim4db.push_back(argv[++arg]); else if (strcmp(argv[arg], "-1coords") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) in1coords.push_back(argv[++arg]); else if (strcmp(argv[arg], "-2coords") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) in2coords.push_back(argv[++arg]); else if (strcmp(argv[arg], "-coords") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) incoords.push_back(argv[++arg]); else if (strcmp(argv[arg], "-matemap") == 0) while ((arg+1 < argc) && (argv[arg+1][0] != '-')) mateMaps.push_back(argv[++arg]); else if (strcmp(argv[arg], "-insert") == 0) { orient = argv[++arg][0]; distMin = atoi(argv[++arg]); distMax = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-minident") == 0) minIdent = atoi(argv[++arg]); else if (strcmp(argv[arg], "-minlength") == 0) minLength = atoi(argv[++arg]); else if (strcmp(argv[arg], "-mincoverage") == 0) minCoverage = atoi(argv[++arg]); else if (strcmp(argv[arg], "-allowduplicates") == 0) allowDups = true; else if (strcmp(argv[arg], "-o") == 0) out = argv[++arg]; else err++; arg++; } if (out == NULL) { fprintf(stderr, "usage: %s -1 in1.extent -2 in2.extent -o prefix\n", argv[0]); exit(1); } vector mr1; vector mr2; mapResult mr; // Load mate map if needed if (mateMaps.size() > 0) { for (uint32 mm=0; mmnextAlignment(p)) { mr1.push_back(readMRsim4db(p, mr)); } delete IN; } for (uint32 ii=0; iinextAlignment(p)) { mr2.push_back(readMRsim4db(p, mr)); } delete IN; } for (uint32 ii=0; ii totalPairs; map sizedPairs; while ((mr1bgn < mr1END) && (mr2bgn < mr2END)) { if ((mr1[mr1bgn].seqIdx < mr2[mr2bgn].seqIdx) && (mr1bgn < mr1END)) mr1bgn++; if ((mr2[mr2bgn].seqIdx < mr1[mr1bgn].seqIdx) && (mr2bgn < mr2END)) mr2bgn++; if (mr1[mr1bgn].seqIdx != mr2[mr2bgn].seqIdx) // SequenceA 1 3 5 7 8 // SequenceB 2 4 6 8 // 1st pass, A increases to 3, B increases to 4 // 2nd pass, A increases to 5, B increases to 6 // 3rd pass, A increases to 7, B increases to 8 // 4th pass, A increases to 8, B doesn't change. continue; assert(mr1[mr1bgn].seqIdx == mr2[mr2bgn].seqIdx); mr1end = mr1bgn + 1; mr2end = mr2bgn + 1; while (mr1[mr1bgn].seqIdx == mr1[mr1end].seqIdx) mr1end++; while (mr2[mr2bgn].seqIdx == mr2[mr2end].seqIdx) mr2end++; // Group of reads from mr1bgn-mr1end and mr2bgn-mr2end need to be compared. if ((mr1end - mr1bgn > 1) && (mr2end - mr2bgn > 1)) { fprintf(DUP, "%s\t%u\t%s\t%u\n", mr1[mr1bgn].seqName, mr1end - mr1bgn, mr2[mr2bgn].seqName, mr2end - mr2bgn); if (allowDups == false) { mr1bgn = mr1end; mr2bgn = mr2end; } } // Now find all possible pairs. for (uint32 i1=mr1bgn; i1 0); if (df > dr) { if ((mr1[i1].forward == true) && (mr2[i2].forward == true)) ori = 'N'; if ((mr1[i1].forward == true) && (mr2[i2].forward == false)) ori = 'I'; if ((mr1[i1].forward == false) && (mr2[i2].forward == true)) ori = 'O'; if ((mr1[i1].forward == false) && (mr2[i2].forward == false)) ori = 'A'; totalPairs[ori]++; if ((orient == 0) || ((ori == orient) && (distMin <= df) && (df <= distMax))) { sizedPairs[ori]++; fprintf(LOG, "%c "uint32FMT" "uint32FMT" %s ("uint32FMT","uint32FMT") "uint32FMT" %s ("uint32FMT","uint32FMT") "uint32FMT" %s\n", ori, df, mr1[i1].seqIdx, mr1[i1].seqName, mr1[i1].refBgn, mr1[i1].refEnd, mr2[i2].seqIdx, mr2[i2].seqName, mr2[i2].refBgn, mr2[i2].refEnd, mr1[i1].refIdx, mr1[i1].refName); } } else { if ((mr2[i2].forward == true) && (mr1[i1].forward == true)) ori = 'N'; if ((mr2[i2].forward == true) && (mr1[i1].forward == false)) ori = 'I'; if ((mr2[i2].forward == false) && (mr1[i1].forward == true)) ori = 'O'; if ((mr2[i2].forward == false) && (mr1[i1].forward == false)) ori = 'A'; totalPairs[ori]++; if ((orient == 0) || ((ori == orient) && (distMin <= dr) && (dr <= distMax))) { sizedPairs[ori]++; fprintf(LOG, "%c "uint32FMT" "uint32FMT" %s ("uint32FMT","uint32FMT") "uint32FMT" %s ("uint32FMT","uint32FMT") "uint32FMT" %s\n", ori, dr, mr2[i2].seqIdx, mr2[i2].seqName, mr2[i2].refBgn, mr2[i2].refEnd, mr1[i1].seqIdx, mr1[i1].seqName, mr1[i1].refBgn, mr1[i1].refEnd, mr2[i2].refIdx, mr2[i2].refName); } } } } mr1bgn = mr1end; mr2bgn = mr2end; } fprintf(STA, "alignments: "uint32FMT" "uint32FMT"\n", mr1END, mr2END); fprintf(STA, "totalPairs[%c]: %u\n", 'N', totalPairs['N']); fprintf(STA, "totalPairs[%c]: %u\n", 'I', totalPairs['I']); fprintf(STA, "totalPairs[%c]: %u\n", 'O', totalPairs['O']); fprintf(STA, "totalPairs[%c]: %u\n", 'A', totalPairs['A']); fprintf(STA, "sizedPairs[%c]: %u\n", 'N', sizedPairs['N']); fprintf(STA, "sizedPairs[%c]: %u\n", 'I', sizedPairs['I']); fprintf(STA, "sizedPairs[%c]: %u\n", 'O', sizedPairs['O']); fprintf(STA, "sizedPairs[%c]: %u\n", 'A', sizedPairs['A']); fclose(LOG); fclose(DUP); fclose(STA); exit(0); } kmer-code-2013-trunk/sim4dbutils/s4p_overlap.H0000644000000000000000000000073612322046702017764 0ustar rootroot#ifndef S4P_OVERLAP_H #define S4P_OVERLAP_H // Using 16-bit ints for storing the amount overlapped gives a big // memory reduction, but will fail for long sequences (mRNA, // probably). findOverlap() checks for overflow. // #define OLAP_IS_SHORT #ifdef OLAP_IS_SHORT typedef uint16 olap_t; #define OLAPTFMT uint16FMT #else typedef uint32 olap_t; #define OLAPTFMT uint32FMT #endif olap_t findOverlap(sim4polish *A, sim4polish *B); #endif // S4P_OVERLAP_H kmer-code-2013-trunk/sim4dbutils/cleanPolishes-20020626.C0000644000000000000000000001767107605137611021227 0ustar rootroot#include #include #include #include #include #include "sim4reader.h" #define SHOWTRIMMING char const *usage = "usage: %s [-save splitFile] [-threshold t]\n" " -threshold Introns bigger than this are split into two matches (default = 150000).\n" " -savesplits Saves a before/after of each split match.\n" " All matches are printed to stdout (untrimmed and trimmed).\n" "\n"; bool lowComplexityExon(char *s) { int cnt[5][5] = {0}; int map[256] = {0}; int i, j, len = 0; int a=0, b=0, c=0; double qual = 0.0; if (s == 0L) return(false); map['A'] = map['a'] = 1; map['C'] = map['c'] = 2; map['G'] = map['g'] = 3; map['T'] = map['t'] = 4; for (i=0; i<5; i++) for (j=0; j<5; j++) cnt[i][j] = 0; for (i=0, j=1; s[j]; i++, j++) { cnt[map[s[i]]][map[s[j]]]++; len++; } for (i=0; i<5; i++) { for (j=0; j<5; j++) { if (a < cnt[i][j]) { c = b; b = a; a = cnt[i][j]; } else if (b < cnt[i][j]) { c = b; b = cnt[i][j]; } else if (c < cnt[i][j]) { c = cnt[i][j]; } } } qual = (double)(a+b+c) / (double)(len); if (len > 50) qual = 0.0; //if (qual > 0.75) //fprintf(stdout, "%8.5f:\t%s\n", qual, s); return(qual > 0.75); } int main(int argc, char ** argv) { int arg = 1; FILE *splitFile = 0L; int intronLimit = 150000; sim4polish *p; #if 0 if (isatty(fileno(stdin)) || isatty(fileno(stdout))) { fprintf(stderr, usage, argv[0]); if (isatty(fileno(stdin))) fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); if (isatty(fileno(stdout))) fprintf(stderr, "error: Please redirect the polishes to a file.\n (They are on stdout)\n\n"); exit(1); } #endif arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-savesplits", 2) == 0) { arg++; errno=0; splitFile = fopen(argv[arg], "w"); if (errno) { fprintf(stderr, "Can't open '%s' for writing\n%s\n", argv[arg], strerror(errno)); exit(1); } } else if (strncmp(argv[arg], "-threshold", 2) == 0) { intronLimit = atoi(argv[++arg]); } arg++; } // Statistics on the splitting quality / frequency int totMatches = 0; int oneExon = 0; int smaIntron = 0; int junkFirst = 0; int junkLast = 0; int junkBoth = 0; int splitOnGap = 0; int goodQual = 0; int flanking = 0; FILE *junkF = fopen("spl.junkfirst", "w"); FILE *junkL = fopen("spl.junklast", "w"); FILE *junkB = fopen("spl.junkboth", "w"); FILE *splGap = fopen("spl.splitGap", "w"); FILE *good = fopen("spl.good", "w"); FILE *flank = fopen("spl.flanking", "w"); while ((p = readPolish(stdin)) != 0L) { int exA; int exB; if (p->numExons == 1) { oneExon++; } else { // Find the big intron. We assume there is only one big intron. // int biggestIntron = 0; int intronSplit = 0; int intronOri = 0; for (exA=0, exB=1; exB < p->numExons; exA++, exB++) { int dist = p->exons[exB].genFrom - p->exons[exA].genTo + 1; if (dist > biggestIntron) { biggestIntron = dist; intronSplit = exB; intronOri = p->exons[exA].intronOrientation; } } if (intronOri == 0) { fprintf(stderr, "didn't find the largest intron? (got zero)?\n"); exit(1); } if (intronOri == INTRON_NONE) { fprintf(stderr, "biggest intron isn't an intron? (got none)?\n"); exit(1); } if (biggestIntron < 100000) { smaIntron++; } else { // Declare the split obvious if all exons on either side are // below 30bp, difficult otherwise. // bool killFirst = true; bool killLast = true; for (int i=0; iexons[i].estTo - p->exons[i].estFrom + 1 >= 50) && (p->exons[i].percentIdentity >= 88) && (lowComplexityExon(p->exons[i].estAlignment) == false)) killFirst = false; for (int i=intronSplit; inumExons; i++) if ((p->exons[i].estTo - p->exons[i].estFrom + 1 >= 50) && (p->exons[i].percentIdentity >= 88) && (lowComplexityExon(p->exons[i].estAlignment) == false)) killLast = false; // We shouldn't ever want to kill both sides. // if ((killFirst == true) && (killLast == true)) { junkBoth++; fprintf(junkB, "==============================JUNK FIRST AND LAST?\n"); printPolish(junkB, p); } if ((killFirst == true) && (killLast == false)) { junkFirst++; printPolish(junkF, p); fprintf(junkF, "==============================\n"); } if ((killFirst == false) && (killLast == true)) { junkLast++; printPolish(junkL, p); fprintf(junkL, "==============================\n"); } if ((killFirst == false) && (killLast == false)) { if (intronOri == INTRON_GAP) { splitOnGap++; printPolish(splGap, p); fprintf(splGap, "==============================\n"); } else { // If there is a valid strand prediction and // a) all exons >= 90% // b) all exons >= 95% // c) all exons >= 95%, except first and last, which can be >= 90% // save the match as is. // bool validStrand = false; if ((p->strandOrientation == STRAND_POSITIVE) || (p->strandOrientation == STRAND_NEGATIVE)) validStrand = true; #if 0 bool qualIsA = true; for (exA=0; exA < p->numExons; exA++) if (p->exons[exA].percentIdentity < 90) qualIsA = false; bool qualIsB = true; for (exA=0; exA < p->numExons; exA++) if (p->exons[exA].percentIdentity < 95) qualIsB = false; #endif bool qualIsC = true; if (p->exons[0].percentIdentity < 90) qualIsC = false; if (p->exons[p->numExons-1].percentIdentity < 90) qualIsC = false; for (exA=1; exA < p->numExons-1; exA++) if (p->exons[exA].percentIdentity < 95) qualIsC = false; // If the match looks good, but just has a large intron, keep it. // if (validStrand && qualIsC) { printPolish(good, p); fprintf(good, "==============================\n"); goodQual++; } else { flanking++; printPolish(flank, p); fprintf(flank, "==============================\n"); } } } } // Has a big intron } // More than one exon totMatches++; if ((totMatches % 3759) == 0) { fprintf(stderr, "tot: %7d ", totMatches); fprintf(stderr, "one: %7d ", oneExon); fprintf(stderr, "sma: %7d ", smaIntron); fprintf(stderr, "jnkF: %7d ", junkFirst); fprintf(stderr, "jnkL: %7d ", junkLast); fprintf(stderr, "jnkB: %7d ", junkBoth); fprintf(stderr, "onGap: %7d ", splitOnGap); fprintf(stderr, "good: %7d ", goodQual); fprintf(stderr, "flank: %7d\r", flanking); } destroyPolish(p); } fclose(junkF); fclose(junkL); fclose(junkB); fclose(splGap); fclose(good); fclose(flank); fprintf(stderr, "tot: %7d ", totMatches); fprintf(stderr, "one: %7d ", oneExon); fprintf(stderr, "sma: %7d ", smaIntron); fprintf(stderr, "jnkF: %7d ", junkFirst); fprintf(stderr, "jnkL: %7d ", junkLast); fprintf(stderr, "jnkB: %7d ", junkBoth); fprintf(stderr, "onGap: %7d ", splitOnGap); fprintf(stderr, "good: %7d ", goodQual); fprintf(stderr, "flank: %7d\n", flanking); return(0); } kmer-code-2013-trunk/sim4dbutils/convertToAtac.C0000644000000000000000000002362512322046702020277 0ustar rootroot#include #include #include #include #include "sim4.H" // Writes polished from stdin as atac-format matches. Splits polishes on any indel to generate gapless // atac matches (type 'u'). // // Does no cleanup. void indelRedo(char *a, char *b) { uint32 orig = 0; uint32 copy = 0; while (a[orig] && b[orig]) { if ((a[orig] != '-') || (b[orig] != '-')) { if (orig != copy) { a[copy] = a[orig]; b[copy] = b[orig]; } copy++; } orig++; } a[copy] = 0; b[copy] = 0; } uint32 indelFixAlignment(char *a, char *b) { bool redo = false; uint32 len = strlen(a) - 1; uint32 fixed = 0; //fprintf(stdout, "fixIndel\n"); //fprintf(stdout, "%s\n%s\n", a, b); for (uint32 i=2; i two mismatches if ((a[i-2] == '-') && (b[i] == '-')) { a[i-2] = toUpper[a[i-1]]; a[i-1] = toUpper[a[i]]; a[i] = '-'; b[i-2] = toUpper[b[i-2]]; b[i-1] = toUpper[b[i-1]]; b[i] = '-'; fixed++; redo = true; } if ((a[i] == '-') && (b[i-2] == '-')) { a[i-2] = toUpper[a[i-2]]; a[i-1] = toUpper[a[i-1]]; a[i] = '-'; b[i-2] = toUpper[b[i-1]]; b[i-1] = toUpper[b[i]]; b[i] = '-'; fixed++; redo = true; } } if (redo) { //fprintf(stdout, "%s\n%s\n", a, b); //fprintf(stdout, "Fixed "uint32FMT" 1 base wide indel\n", fixed); indelRedo(a, b); } redo = false; len = strlen(a) - 1; for (uint32 i=3; i three mismatches // we also would do two gaps -> three mismatches if ((a[i] == '-') && (b[i-3] == '-')) { a[i-3] = toUpper[a[i-3]]; a[i-2] = toUpper[a[i-2]]; a[i-1] = toUpper[a[i-1]]; a[i] = '-'; b[i-3] = toUpper[b[i-2]]; b[i-2] = toUpper[b[i-1]]; b[i-1] = toUpper[b[i]]; b[i] = '-'; fixed++; redo = true; } if ((a[i-3] == '-') && (b[i] == '-')) { a[i-3] = toUpper[a[i-2]]; a[i-2] = toUpper[a[i-1]]; a[i-1] = toUpper[a[i]]; a[i] = '-'; b[i-3] = toUpper[b[i-3]]; b[i-2] = toUpper[b[i-2]]; b[i-1] = toUpper[b[i-1]]; b[i] = '-'; fixed++; redo = true; } } if (redo) { //fprintf(stdout, "%s\n%s\n", a, b); //fprintf(stdout, "Fixed "uint32FMT" 2 base wide indel\n", fixed); indelRedo(a, b); } return(fixed); } int main(int argc, char **argv) { char *nickname1 = 0L, *asmfile1 = 0L; char *nickname2 = 0L, *asmfile2 = 0L; bool flip = false; int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-1", 2) == 0) { nickname1 = argv[++arg]; asmfile1 = argv[++arg]; } else if (strncmp(argv[arg], "-2", 2) == 0) { nickname2 = argv[++arg]; asmfile2 = argv[++arg]; } else if (strncmp(argv[arg], "-f", 2) == 0) { flip = true; } else { fprintf(stderr, "Unknown arg '%s'\n", argv[arg]); } arg++; } if ((nickname1 == 0L) || (nickname2 == 0L)) { fprintf(stderr, "usage: %s [-f] -1 nickname1 asmfile1 -2 nickname2 asmfile2 < matches.sim4db > matches.atac\n", argv[0]); exit(1); } if (flip == false) { fprintf(stdout, "!format atac 1.0\n"); fprintf(stdout, "/assemblyFile1=%s\n", asmfile1); fprintf(stdout, "/assemblyFile2=%s\n", asmfile2); fprintf(stdout, "/assemblyId1=%s\n", nickname1); fprintf(stdout, "/assemblyId2=%s\n", nickname2); } else { fprintf(stdout, "!format atac 1.0\n"); fprintf(stdout, "/assemblyFile1=%s\n", asmfile2); fprintf(stdout, "/assemblyFile2=%s\n", asmfile1); fprintf(stdout, "/assemblyId1=%s\n", nickname2); fprintf(stdout, "/assemblyId2=%s\n", nickname1); } uint32 dupRecordIID = 0; uint32 dupParentIID = 0; uint32 totalFixed = 0; sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; while (R->nextAlignment(p)) { // Parse the defline to find the genomic region our 'est' // (unfortunate sim4db term) is from. Search for our // information in the defline // // extracted from iid (\d+) pos (\d+) (\d+) splitToWords W(p->_estDefLine); uint32 i=0; while ((i < W.numWords()) && (strcmp(W[i], "iid") != 0)) i++; if ((i == 0) || (i == W.numWords())) fprintf(stderr, "Failed to match est defline '%s'\n", p->_estDefLine), exit(1); uint32 qSeqIID = strtouint32(W[i+1], 0L); uint32 qSeqBeg = strtouint32(W[i+3], 0L); uint32 qSeqEnd = strtouint32(W[i+4], 0L); // Not used W.split(p->_genDefLine); i=0; while ((i_genDefLine), exit(1); uint32 gSeqIID = strtouint32(W[i+1], 0L); uint32 gSeqBeg = strtouint32(W[i+3], 0L); //uint32 gSeqEnd = strtouint32(W[i+4], 0L); // Not used bool fwd = (p->_matchOrientation == SIM4_MATCH_FORWARD); // Fix the coords // if (fwd) { // Forward is easy! Just add. for (uint32 exon=0; exon_numExons; exon++) { sim4polishExon *e = p->_exons + exon; e->_estFrom += qSeqBeg; e->_estTo += qSeqBeg; e->_genFrom += gSeqBeg; e->_genTo += gSeqBeg; } } else { // Reverse is not easy. Need to reverse complement the query positions. for (uint32 exon=0; exon_numExons; exon++) { sim4polishExon *e = p->_exons + exon; // First, reverse the query relative to our extracted piece // uint32 f = (qSeqEnd - qSeqBeg) - e->_estTo + 2; // Extra +1 to offset -1 when we set qBeg uint32 t = (qSeqEnd - qSeqBeg) - e->_estFrom + 2; // Now we can just offset stuff. e->_estFrom = qSeqBeg + t; // Really the end! e->_estTo = qSeqBeg + f; // Really the begin! e->_genFrom += gSeqBeg; e->_genTo += gSeqBeg; } } for (uint32 exon=0; exon_numExons; exon++) { sim4polishExon *e = p->_exons + exon; // Parse the alignment to find ungapped blocks uint32 aPos = 0; uint32 qBeg = e->_estFrom - 1; uint32 gBeg = e->_genFrom - 1; uint32 mLen = 0; totalFixed += indelFixAlignment(e->_estAlignment, e->_genAlignment); // Skip mismatches/gaps at the start of this sequence // while ((e->_estAlignment[aPos] == '-') || (e->_genAlignment[aPos] == '-') || (e->_estAlignment[aPos] != e->_genAlignment[aPos])) { if (e->_estAlignment[aPos] != '-') if (fwd) qBeg++; else qBeg--; if (e->_genAlignment[aPos] != '-') gBeg++; //fprintf(stderr, "SKIP BEGIN %c %c\n", e->_estAlignment[aPos], e->_genAlignment[aPos]); aPos++; } bool notDone = true; // There should be a way to get rid of this stupid variable.... while (notDone) { notDone = ((e->_estAlignment[aPos] != 0) && (e->_genAlignment[aPos] != 0)); // If we find the end of a gapless block, emit a match if ((e->_estAlignment[aPos] == '-') || (e->_estAlignment[aPos] == 0) || (e->_genAlignment[aPos] == '-') || (e->_genAlignment[aPos] == 0)) { // Trim off any mismatches at the end of this block. // uint32 mismatch = 0; while ((aPos > mismatch) && (e->_estAlignment[aPos - mismatch - 1] != e->_genAlignment[aPos - mismatch - 1])) { //fprintf(stderr, "SKIP MIDDLE %c %c\n", e->_estAlignment[aPos-mismatch], e->_genAlignment[aPos-mismatch]); mismatch++; } // If there is an indel at the start (which probably // shouldn't happen anyway!), or possibly at the end, // then our length is zero, and we should not emit // anything. // if (mLen > mismatch) { mLen -= mismatch; if (flip == false) { fprintf(stdout, "M u dupr"uint32FMT" dupp"uint32FMT" %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %s\n", dupRecordIID, dupParentIID, nickname1, qSeqIID, (fwd) ? qBeg : qBeg - mLen, mLen, nickname2, gSeqIID, gBeg, mLen, (fwd) ? "1" : "-1"); } else { fprintf(stdout, "M u dupr"uint32FMT" dupp"uint32FMT" %s:"uint32FMT" "uint32FMT" "uint32FMT" 1 %s:"uint32FMT" "uint32FMT" "uint32FMT" %s\n", dupRecordIID, dupParentIID, nickname2, gSeqIID, gBeg, mLen, nickname1, qSeqIID, (fwd) ? qBeg : qBeg - mLen, mLen, (fwd) ? "1" : "-1"); } dupRecordIID++; mLen += mismatch; // Adjust our begin and end positions to the end of this record if (fwd) qBeg += mLen; else qBeg -= mLen; gBeg += mLen; mLen = 0; } // Skip whatever caused us to emit a gapless block, also skip any mismatches here // while ((e->_estAlignment[aPos] == '-') || (e->_genAlignment[aPos] == '-') || (e->_estAlignment[aPos] != e->_genAlignment[aPos])) { if (e->_estAlignment[aPos] != '-') if (fwd) qBeg++; else qBeg--; if (e->_genAlignment[aPos] != '-') gBeg++; //fprintf(stderr, "SKIP END %c %c\n", e->_estAlignment[aPos], e->_genAlignment[aPos]); aPos++; } } else { // Not the end of a gapless block, extend this match by one mLen++; aPos++; } } // over all positions in the alignemnt } // over all exons dupParentIID++; } fprintf(stderr, "Fixed "uint32FMT" indel/mismatches.\n", totalFixed); return(0); } kmer-code-2013-trunk/sim4dbutils/convertToExtent.C0000644000000000000000000000724612322046702020677 0ustar rootroot#include #include #include #include #include "sim4.H" // Writes polishes from stdin as a one-line-per-match format, space-based! bool extendedFormat = false; void output(sim4polish *p, char *Ep, char *Gp, uint32 a, uint32 b, bool isExon) { uint32 beg = p->_exons[a]._estFrom - 1; uint32 end = p->_exons[b]._estTo; if (p->_matchOrientation == SIM4_MATCH_COMPLEMENT) { beg = p->_estLen - beg; end = p->_estLen - end; } double ident = p->_exons[a]._percentIdentity; double cover = 0.0; // If we're not a single exon, compute the real identity of the whole thing. // if (isExon == false) { if (p->_exons[a]._estAlignment) { ident = p->s4p_percentIdentityExact(); cover = p->s4p_percentCoverageExact(); } else { ident = p->_percentIdentity; cover = p->_querySeqIdentity; } } if (extendedFormat) fprintf(stdout, "%s\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%s\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%6.3f\t%6.3f\n", Ep, p->_estID, p->_estLen, a, beg, end, Gp, p->_genID, p->_exons[a]._genFrom - 1, p->_exons[b]._genTo, ident, cover); else fprintf(stdout, "%s\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t%s\t"uint32FMT"\t"uint32FMT"\t%6.3f\t%6.3f\n", Ep, p->_estLen, a, beg, end, Gp, p->_exons[a]._genFrom - 1, p->_exons[b]._genTo, ident, cover); } int main(int argc, char **argv) { bool beVerbose = false; bool wholeEDefLine = false; bool wholeGDefLine = false; bool doExons = false; int arg = 1; int err = 0; while (arg < argc) { if (strcmp(argv[arg], "-v") == 0) { beVerbose = true; } else if (strcmp(argv[arg], "-fullquery") == 0) { wholeEDefLine = true; } else if (strcmp(argv[arg], "-fullgenomic") == 0) { wholeGDefLine = true; } else if (strcmp(argv[arg], "-exons") == 0) { doExons = true; } else if (strcmp(argv[arg], "-extended") == 0) { extendedFormat = true; } else { fprintf(stderr, "Unknown arg '%s'\n", argv[arg]); err++; } arg++; } if (isatty(fileno(stdin)) || (err)) { fprintf(stderr, "usage: %s [options] < IN > OUT\n", argv[0]); fprintf(stderr, " -v be chatty\n"); fprintf(stderr, " -fullquery output the whole query def line\n"); fprintf(stderr, " -fullgenomic output the whole genomic def line\n"); fprintf(stderr, " -exons include exons\n"); fprintf(stderr, " -extended include the IDX of each sequence\n"); exit(1); } if (extendedFormat) fprintf(stdout, "cDNAid\tcDNAidx\tcDNAlen\texonNum\tbegin\tend\tgenomicid\tgenomicidx\tbegin\tend\tidentity\tcoverage\n"); else fprintf(stdout, "cDNAid\tcDNAlen\texonNum\tbegin\tend\tgenomicid\tbegin\tend\tidentity\tcoverage\n"); char E[1024], *Ep; char G[1024], *Gp; splitToWords W; sim4polishReader *R = new sim4polishReader("-"); sim4polish *p = 0L; while (R->nextAlignment(p)) { if (wholeEDefLine == true) { Ep = p->_estDefLine; } else { W.split(p->_estDefLine); strcpy(E, W[0] + ((W[0][0] == '>') ? 1 : 0)); Ep = E; } if (wholeGDefLine == true) { Gp = p->_genDefLine; } else { W.split(p->_genDefLine); strcpy(G, W[0] + ((W[0][0] == '>') ? 1 : 0)); Gp = G; } if (doExons == false) { output(p, Ep, Gp, 0, p->_numExons-1, false); } else { for (uint32 i=0; i_numExons; i++) output(p, Ep, Gp, i, i, true); } } return(0); } kmer-code-2013-trunk/sim4dbutils/plotIntronSize.C0000644000000000000000000000427312322046702020524 0ustar rootroot#include #include #include #include #include #include "sim4reader.h" // // Outputs some statistics on the matches // #define HISTBIN (1000) #define HISTMAX (300000000 / HISTBIN) int main(int argc, char ** argv) { uint32 dumpSize = 0; uint32 *hist; FILE *all; FILE *big; int i, j; if (isatty(fileno(stdin))) { fprintf(stderr, "error: I cannot read polishes from the terminal!\n\n"); exit(1); } int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-dump", 2) == 0) { dumpSize = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-all", 2) == 0) { all = fopen(argv[++arg], "w"); if (all == 0L) { fprintf(stderr, "Can't open '%s' for writing\n", argv[arg]); exit(1); } } else if (strncmp(argv[arg], "-big", 2) == 0) { big = fopen(argv[++arg], "w"); if (big == 0L) { fprintf(stderr, "Can't open '%s' for writing\n", argv[arg]); exit(1); } } else { fprintf(stderr, "Unknown option: '%s'\n", argv[arg]); } arg++; } if (all || big) { hist = new uint32 [HISTMAX]; memset(hist, 0, sizeof(uint32) * HISTMAX); } sim4polish *p = new sim4polish(stdin); while (p->_numExons > 0) { if (p->numExons > 1) { int exA; int exB; int biggestIntron = 0; for (exA=0, exB=1; exB < p->numExons; exA++, exB++) { int dist = p->exons[exB].genFrom - p->exons[exA].genTo + 1; if (dist > biggestIntron) biggestIntron = dist; if (all) hist[dist / HISTBIN]++; } if (big) hist[biggestIntron / HISTBIN]++; //fprintf(stdout, "%d\n", biggestIntron); if ((dumpSize > 0) && (biggestIntron > dumpSize)) printPolish(stdout, p); } destroyPolish(p); } if (all) { for (j=HISTMAX-1; hist[j]==0 && j>=0; j--) ; for (i=0; i=0; j--) ; for (i=0; i #include #include #include #include "sim4.H" // Writes n polishes from stdin to stdout, default 1. int main(int argc, char **argv) { uint32 numToPrint = 1; sim4polishReader *R = 0L; sim4polishWriter *W = 0L; sim4polishStyle style = sim4polishStyleDefault; int arg = 1; int err = 0; while (arg < argc) { if (strncmp(argv[arg], "-h", 2) == 0) { err++; } else if (strncmp(argv[arg], "-n", 2) == 0) { numToPrint = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-gff3") == 0) { style = sim4polishGFF3; } else if (strncmp(argv[arg], "-", 1) == 0) { numToPrint = atoi(argv[arg] + 1); } else { R = new sim4polishReader(argv[arg]); } arg++; } if ((err) || ((R == 0L) && (isatty(fileno(stdin))))) { fprintf(stderr, "usage: %s [-h] [-# | -n #] [-gff3] [polishes-file]\n", argv[0]); exit(1); } if (R == 0L) R = new sim4polishReader("-"); if (W == 0L) W = new sim4polishWriter("-", style); if (R->getsim4polishStyle() != style) fprintf(stderr, "warning: Input format and output format differ.\n"); sim4polish *p = 0L; while ((numToPrint--) && (R->nextAlignment(p))) W->writeAlignment(p); delete W; delete R; return(0); } kmer-code-2013-trunk/libkmer/0000755000000000000000000000000012641613356014602 5ustar rootrootkmer-code-2013-trunk/libkmer/positionDB-access.C0000644000000000000000000002307112322046702020211 0ustar rootroot#include "bio++.H" #include "positionDB.H" void positionDB::reallocateSpace(uint64*& posn, uint64& posnMax, uint64& posnLen, uint64 len) { if (posnMax < posnLen + len) { uint64 *pp; posnMax = posnLen + len + (len >> 2); if (posnMax == 0) posnMax = 16384; try { pp = new uint64 [posnMax]; } catch (...) { fprintf(stderr, "positionDB::get()-- Can't allocate space for more positions, requested "uint64FMT" uint64's.\n", posnMax); abort(); } memcpy(pp, posn, sizeof(uint64) * posnLen); delete [] posn; posn = pp; } } void positionDB::loadPositions(uint64 J, uint64*& posn, uint64& posnMax, uint64& posnLen, uint64& count) { uint64 sizs[3] = {_pptrWidth, 1, _sizeWidth}; uint64 vals[3] = {0, 0, 1}; getDecodedValues(_buckets, J + _chckWidth, (_sizeWidth == 0) ? 2 : 3, sizs, vals); // If the size is stored, the count is updated to the correct // thing. If it's not stored, the count is set to 1 by the default // value of vals[2], and reset after we get the number of positions // stored. // count = vals[2]; if (vals[1]) { reallocateSpace(posn, posnMax, posnLen, 64); posn[posnLen++] = vals[0]; } else { uint64 ptr = vals[0] * _posnWidth; uint64 len = getDecodedValue(_positions, ptr, _posnWidth); if (_sizeWidth == 0) count = len; reallocateSpace(posn, posnMax, posnLen, len + 64); for (ptr += _posnWidth; len > 0; ptr += _posnWidth, len--) posn[posnLen++] = getDecodedValue(_positions, ptr, _posnWidth); } } bool positionDB::getExact(uint64 mer, uint64*& posn, uint64& posnMax, uint64& posnLen, uint64& count) { uint64 h = HASH(mer); uint64 c = CHECK(mer); uint64 st, ed; if (_hashTable_BP) { st = getDecodedValue(_hashTable_BP, h * _hashWidth, _hashWidth); ed = getDecodedValue(_hashTable_BP, h * _hashWidth + _hashWidth, _hashWidth); } else { st = _hashTable_FW[h]; ed = _hashTable_FW[h+1]; } posnLen = 0; if (st == ed) return(false); for (uint64 i=st, J=st * _wFin; i 0) return(vals[2]); if (vals[1]) return(1); return(getDecodedValue(_positions, vals[0] * _posnWidth, _posnWidth)); } } return(0); } uint64 positionDB::setCount(uint64 mer, uint64 count) { uint64 h = HASH(mer); uint64 c = CHECK(mer); uint64 st, ed; if (_hashTable_BP) { st = getDecodedValue(_hashTable_BP, h * _hashWidth, _hashWidth); ed = getDecodedValue(_hashTable_BP, h * _hashWidth + _hashWidth, _hashWidth); } else { st = _hashTable_FW[h]; ed = _hashTable_FW[h+1]; } if (st == ed) return(0); for (uint64 i=st, J=st * _wFin; i 0) count = vals[3]; // What happened here: By default, the count is 1. If it is // NOT a unique mer in the table, we reset the count to the // number of entries in the table. Then, if there is a count // stored in the table, we reset the count again. // Move on to copying the data, if in the correct range. if (vals[2] == 1) { // Is a single mer in our table. Copy if the actual count is // acceptable. if ((lo <= count) && (count < hi)) { okCount++; setDecodedValues(_buckets, nb, (_sizeWidth == 0) ? 3 : 4, sizs, vals); nb += _wFin; } else { _numberOfDistinct--; _numberOfMers--; loCount++; } } else { // Mer has more than one location in the table. Copy all // locations if the count is acceptable. if ((lo <= count) && (count < hi)) { okCount++; // Copy the bucket vals[1] = np / _posnWidth; setDecodedValues(_buckets, nb, (_sizeWidth == 0) ? 3 : 4, sizs, vals); nb += _wFin; // Copy length of the positions if (cp != np) setDecodedValue(_positions, np, _posnWidth, len); np += _posnWidth; cp += _posnWidth; // Copy positions while (len > 0) { if (cp != np) setDecodedValue(_positions, np, _posnWidth, getDecodedValue(_positions, cp, _posnWidth)); np += _posnWidth; cp += _posnWidth; len--; } } else { // Not acceptable count _numberOfDistinct--; _numberOfEntries -= len; if (count < lo) loCount++; if (count > hi) hiCount++; } } // Move to the next entry st++; cb += _wFin; } // Over all entries in the bucket // Update the end position of this bucket if (_hashTable_BP) setDecodedValue(_hashTable_BP, h * _hashWidth + _hashWidth, _hashWidth, nb / _wFin); else _hashTable_FW[h+1] = nb / _wFin; } // Over all buckets fprintf(stderr, "positionDB::filter()-- Filtered "uint64FMT" kmers less than "uint64FMT"\n", loCount, lo); fprintf(stderr, "positionDB::filter()-- Filtered "uint64FMT" kmers more than "uint64FMT"\n", hiCount, hi); fprintf(stderr, "positionDB::filter()-- Saved "uint64FMT" kmers with acceptable count\n", okCount); //dump("posDB.after"); } kmer-code-2013-trunk/libkmer/merTable.H0000644000000000000000000000406212322046702016437 0ustar rootroot#ifndef MERTABLE_H #define MERTABLE_H // The obvious simple small mer table, appropriate for large sequences #error merTable should be unused class merTable { public: merTable() { }; ~merTable() { delete [] merToPositions; delete [] positions; }; void build(seqStream *CS, uint32 merSize, uint32 merSkip=0) { // Allocate the mer table // uint32 tableSize = uint32ONE << (2*merSize); fprintf(stderr, "allocate "uint32FMT" entries for a merTable.\n", tableSize); merToPositions = new uint32 [tableSize+1]; // First pass, count the number of times we see each mer // for (uint32 i=0; i<=tableSize; i++) merToPositions[i] = 0; merStream MS(merSize, CS); while (MS.nextMer(merSkip)) { uint64 m = (uint64)MS.theFMer(); //fprintf(stderr, "add mer "uint64FMT"\n", m); merToPositions[m]++; } // Convert those to indexes into positions - m[i] is the start of // the locations in positions[] where positions are stored. // for (uint32 pos=0, val=0, i=0; i<=tableSize; i++) { val = merToPositions[i]; merToPositions[i] = pos; pos += val; } // Allocate space // fprintf(stderr, "allocate "uint32FMT" entries for positions\n", merToPositions[tableSize]); positions = new uint32 [merToPositions[tableSize]]; // Second pass, fill in positions // MS.rewind(); while (MS.nextMer(merSkip)) positions[ merToPositions[(uint64)MS.theFMer()]++ ] = MS.thePositionInStream(); }; uint32 numberOfPositions(uint64 mer) { return(merToPositions[mer+1] - merToPositions[mer]); }; uint32 getPosition(uint64 mer, uint32 index) { if (index >= merToPositions[mer+1] - merToPositions[mer]) return(~uint32ZERO); return(merToPositions[mer] + index); }; private: uint32 *merToPositions; // index into positions[]; merToPositions[mer] is the first base in the mer uint32 *positions; // list of positions for mers, sorted by mer }; #endif // MERTABLE_H kmer-code-2013-trunk/libkmer/test/0000755000000000000000000000000012641613356015561 5ustar rootrootkmer-code-2013-trunk/libkmer/test/Makefile0000644000000000000000000001227612073562464017233 0ustar rootrootinclude ../../Make.compilers # Bigger tblsize makes existDB much faster, but uses more memory (not # much, really). 23 is nice. all: test-maskonly-passed position-passed @echo "existDB has expensive tests. They take:" @echo " 17 minutes on 1.8GHz Quadxeon (with KMER=1)" @echo " 60 minutes on 2.8GHz P4 (with KMER=1)" @echo " 120 minutes on 2.0GHz G5 (with KMER=8)" @echo "If you really want to run them, do 'make exist-passed'." # ../../meryl/meryl -M equal 1 -s xp -o xp1 # ../../meryl/meryl -Dt -n 1 -s xp1 > xp.uni.fasta # Dead code, removed. test-mertable: $(CXX) $(CXXFLAGS_COMPILE) -c -o test-mertable.o test-mertable.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-mertable test-mertable.o $(LIBS) ../../leaff/leaff -G 1000 5000 8000 > xp.fasta ./test-mertable xp.fasta echo test-mertable PASSED rm xp* junk* test-maskonly-passed: $(CXX) $(CXXFLAGS_COMPILE) -c -o test-maskonly.o test-maskonly.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-maskonly test-maskonly.o $(LIBS) ../../leaff/leaff -G 1000 5000 8000 > xp.fasta ../../meryl/meryl -B -f -m 14 -s xp.fasta -o xp ../../meryl/meryl -Dt -n 2 -s xp > xp.dup.fasta ./test-maskonly xp.fasta xp.dup.fasta xp.dup.fasta echo test-maskonky-passed PASSED rm xp* junk* touch test-maskonly-passed test-rebuild: test-rebuild.C ../positionDB.H $(CXX) $(CXXFLAGS_COMPILE) -c -o test-rebuild.o test-rebuild.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o test-rebuild test-rebuild.o $(LIBS) ./test-rebuild @echo test-rebuild-passed PASSED rm -f xp* junk* position-passed: position-passed1 position-passed2 exist-fast-passed touch position-passed xp.fasta: ../../leaff/leaff #../../leaff/leaff -G 1000 5000 8000 > xp.fasta ../../leaff/leaff -G 1 500 800 > xp.fasta yp.fasta: ../../leaff/leaff #../../leaff/leaff -G 100 500 1000 > yp.fasta ../../leaff/leaff -G 1 50 100 > yp.fasta position-passed1: testerp xp.fasta ./testerp -test1 xp.fasta touch position-passed1 position-passed2: testerp xp.fasta yp.fasta ./testerp -test2 xp.fasta yp.fasta touch position-passed2 exist-fast-passed: ../existDB xe.fasta ../existDB -mersize 14 -tblsize 21 -testfiles xe.fasta junk ../existDB -mersize 14 -tblsize 21 -testexistence xe.fasta rm -f xe.mcdat xe.mcidx ../../meryl/meryl -B -f -m 14 -s xe.fasta -o xe ../existDB -mersize 14 -tblsize 21 -testexhaustive xe.fasta xe touch exist-fast-passed exist-passed: exist-passed1 exist-passed2 exist-passed3 touch exist-passed xe.fasta: ../../leaff/leaff #../../leaff/leaff -G 1000 5000 8000 > xe.fasta ../../leaff/leaff -G 1 500 800 > xe.fasta exist-passed1: ../existDB xe.fasta ../existDB -mersize 17 -tblsize 23 -testfiles xe.fasta junk rm -f junk* touch exist-passed1 exist-passed2: ../existDB xe.fasta ../existDB -mersize 17 -tblsize 23 -testexistence xe.fasta rm -f junk* touch exist-passed2 exist-passed3: ../existDB xe.fasta ../../meryl/meryl rm -f xe.mcdat xe.mcidx ../../meryl/meryl -B -f -m 17 -s xe.fasta -o xe ../existDB -mersize 17 -tblsize 23 -testexhaustive xe.fasta xe rm -f junk* touch exist-passed3 INCLUDE = -I../../libbio -I../../libseq -I../../libutil -I../../libmeryl -I.. LIBS = -L../../libbio -L../../libseq -L../../libutil -L../../libmeryl -L.. -lkmer -lmeryl -lbio -lutil DBGOPT = -DERROR_CHECK_COUNTING -DERROR_CHECK_COUNTING_ENCODING -DERROR_CHECK_EMPTY_BUCKETS testerp: ../positionDB.C ../positionDB.H ../positionDB-access.C ../positionDB-dump.C ../positionDB-sort.C ../positionDB-file.C $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o driverp.o ../driver-posDB.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o positionDB.o ../positionDB.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o positionDB-access.o ../positionDB-access.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o positionDB-dump.o ../positionDB-dump.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o positionDB-sort.o ../positionDB-sort.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o positionDB-file.o ../positionDB-file.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o testerp driverp.o positionDB.o positionDB-access.o positionDB-dump.o positionDB-sort.o positionDB-file.o $(LIBS) -lm # XXX: There isn't any reason we need to build testere, we could # just use ../existDB (as it did before!) testere: ../existDB.C ../existDB-create-from-fasta.C ../existDB-create-from-meryl.C ../existDB-state.C $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o drivere.o ../driver-existDB.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o existDB.o ../existDB.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o existDB-create-from-fasta.o ../existDB-create-from-fasta.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o existDB-create-from-meryl.o ../existDB-create-from-meryl.C $(INCLUDE) $(CXX) $(DBGOPT) $(CXXFLAGS_COMPILE) -c -o existDB-state.o ../existDB-state.C $(INCLUDE) $(CXX) $(CXXLDFLAGS) -o testere drivere.o existDB.o existDB-create-from-fasta.o existDB-create-from-meryl.o existDB-state.o $(LIBS) -lm clean: rm -f *passed* rm -f testerp *.o xp.fasta* yp.fasta* rm -f testere junk* xe.fasta* xe.mcidx xe.mcdat xe.merStream rm -f test-maskonly xp.dup.fasta xp.mcidx xp.mcdat rm -f test-rebuild kmer-code-2013-trunk/libkmer/test/test-mertable.C0000644000000000000000000000031310415632042020420 0ustar rootroot#include "bio++.H" #include "merTable.H" int main(int argc, char **argv) { merTable X; chainedSequence *CS = new chainedSequence(); CS->setSource(argv[1]); CS->finish(); X.build(CS, 8); } kmer-code-2013-trunk/libkmer/test/test-rebuild.C0000644000000000000000000000212612322046702020260 0ustar rootroot#include "bio++.H" #include "existDB.H" #include "positionDB.H" // Tests a positionDB when using an existDB for masking. // // existDB can be either include or exclude // positionDB can use include, exclude or threshold // int main(int argc, char **argv) { uint64 maxMers = uint64ONE << 25; for (uint32 merSize=8; merSize<33; merSize++) { fprintf(stderr, "Testing "uint64FMT" Mmers at merSize "uint32FMT".\n", maxMers, merSize); kMerBuilder *K = new kMerBuilder(merSize); merStream *T = new merStream(K, "acgcgactcgagctacgagcgatcacgacgactacgagca", 40); positionDB *P = new positionDB(T, merSize, 0, 0L, 0L, 0L, 0, 0, false, true); uint64 p = 0; uint64 f = 0; mt_s *mts = mtInit(3492); uint64 msk = uint64MASK(2*merSize); uint64 cnt = maxMers; while (cnt--) { if (P->checkREBUILD(mtRandom64(mts) & msk) == false) { f++; } else { p++; } } if (f) { fprintf(stderr, "PASS: "uint64FMT" FAIL: "uint64FMT"\n", p, f); exit(1); } free(mts); delete P; delete T; } exit(0); } kmer-code-2013-trunk/libkmer/test/test-maskonly.C0000644000000000000000000000600612322046702020470 0ustar rootroot#include "bio++.H" #include "existDB.H" #include "positionDB.H" // Tests a positionDB when using an existDB for masking. // // existDB can be either include or exclude // positionDB can use include, exclude or threshold // #define MERSIZE 14 int main(int argc, char **argv) { existDB *include; existDB *exclude; positionDB *full; positionDB *incl; positionDB *excl; positionDB *thrs; if (argc != 4) { fprintf(stderr, "usage: %s seq.fasta mask.fasta incl.fasta\n", argv[0]); exit(1); } char *seqName = argv[1]; char *mskName = argv[2]; char *incName = argv[3]; fprintf(stderr, "BUILDING EXCLUDE\n"); exclude = new existDB(mskName, MERSIZE, existDBnoFlags, uint32ZERO, ~uint32ZERO); fprintf(stderr, "BUILDING INCLUDE\n"); include = new existDB(incName, MERSIZE, existDBnoFlags, uint32ZERO, ~uint32ZERO); seqStream *F = new seqStream(seqName, true); merStream *T = new merStream(new kMerBuilder(MERSIZE), F); fprintf(stderr, "BUILDING FULL\n"); full = new positionDB(T, MERSIZE, 0, 0L, 0L, 0L, 0, 0, 0, 0, true); full->saveState("junk-full"); delete full; fprintf(stderr, "BUILDING INCL\n"); incl = new positionDB(T, MERSIZE, 0, 0L, include, 0L, 0, 0, 0, 0, true); incl->saveState("junk-incl"); delete incl; fprintf(stderr, "BUILDING EXCL\n"); excl = new positionDB(T, MERSIZE, 0, exclude, 0L, 0L, 0, 0, 0, 0, true); excl->saveState("junk-excl"); delete excl; fprintf(stderr, "BUILDING THRS\n"); thrs = new positionDB(T, MERSIZE, 0, 0L, 0L, 0L, 1, 0, 0, 0, true); thrs->saveState("junk-thrs"); delete thrs; full = new positionDB("junk-full", MERSIZE, 0, 0); incl = new positionDB("junk-incl", MERSIZE, 0, 0); excl = new positionDB("junk-excl", MERSIZE, 0, 0); thrs = new positionDB("junk-thrs", MERSIZE, 0, 0); char themer[1000]; uint32 mernum = 0; uint32 err = 0; // Check everything looks ok T->rewind(); while (T->nextMer()) { if (!full->existsExact(T->theFMer())) { fprintf(stderr, "Didn't find mer "uint32FMT" %s in full.\n", mernum, T->theFMer().merToString(themer)); err++; } if (include->exists(T->theFMer())) { if (!incl->existsExact(T->theFMer())) { fprintf(stderr, "Didn't find mer "uint32FMT" %s in incl.\n", mernum, T->theFMer().merToString(themer)); err++; } } else { if (incl->existsExact(T->theFMer())) { fprintf(stderr, "Found extra mer "uint32FMT" %s in incl.\n", mernum, T->theFMer().merToString(themer)); err++; } } if (exclude->exists(T->theFMer())) { if (excl->existsExact(T->theFMer())) { fprintf(stderr, "Found extra mer "uint32FMT" %s in excl.\n", mernum, T->theFMer().merToString(themer)); err++; } } else { if (!excl->existsExact(T->theFMer())) { fprintf(stderr, "Didn't find mer "uint32FMT" %s in excl.\n", mernum, T->theFMer().merToString(themer)); err++; } } mernum++; } delete T; delete F; exit(err > 0); } kmer-code-2013-trunk/libkmer/existDB-state.C0000644000000000000000000001416012322046702017357 0ustar rootroot#include #include #include #include #include "existDB.H" #include "bio++.H" const char magic[16] = { 'e', 'x', 'i', 's', 't', 'D', 'B', '2', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }; void existDB::saveState(char const *filename) { char cigam[16] = { 0 }; errno = 0; FILE *F = fopen(filename, "wb"); if (errno) { fprintf(stderr, "Can't open '%s' for writing\n%s\n", filename, strerror(errno)); exit(1); } strncpy(cigam, magic, 16); if (_compressedHash) cigam[8] = 'h'; if (_compressedBucket) cigam[9] = 'b'; if (_compressedCounts) cigam[10] = 'c'; if (_isForward) cigam[11] = 'F'; if (_isCanonical) cigam[11] = 'C'; fwrite(cigam, sizeof(char), 16, F); fwrite(&_merSizeInBases, sizeof(uint32), 1, F); fwrite(&_shift1, sizeof(uint32), 1, F); fwrite(&_shift2, sizeof(uint32), 1, F); fwrite(&_mask1, sizeof(uint64), 1, F); fwrite(&_mask2, sizeof(uint64), 1, F); fwrite(&_hshWidth, sizeof(uint32), 1, F); // only valid if _compressedHash fwrite(&_chkWidth, sizeof(uint32), 1, F); // only valid if _compressedBucket fwrite(&_cntWidth, sizeof(uint32), 1, F); // only valid if _compressedCounts fwrite(&_hashTableWords, sizeof(uint64), 1, F); fwrite(&_bucketsWords, sizeof(uint64), 1, F); fwrite(&_countsWords, sizeof(uint64), 1, F); fwrite(_hashTable, sizeof(uint64), _hashTableWords, F); fwrite(_buckets, sizeof(uint64), _bucketsWords, F); fwrite(_counts, sizeof(uint64), _countsWords, F); fclose(F); if (errno) { fprintf(stderr, "existDB::saveState()-- Write failure.\n%s\n", strerror(errno)); exit(1); } } bool existDB::loadState(char const *filename, bool beNoisy, bool loadData) { char cigam[16]; errno = 0; FILE *F = fopen(filename, "rb"); if (errno) { //fprintf(stderr, "Can't open '%s' for reading pre-built existDB\n%s\n", strerror(errno)); return(false); } fread(cigam, sizeof(char), 16, F); _compressedHash = false; _compressedBucket = false; _compressedCounts = false; _isForward = false; _isCanonical = false; if (cigam[8] == 'h') _compressedHash = true; if (cigam[9] == 'b') _compressedBucket = true; if (cigam[10] == 'c') _compressedCounts = true; if (cigam[11] == 'F') _isForward = true; if (cigam[11] == 'C') _isCanonical = true; cigam[ 8] = ' '; cigam[ 9] = ' '; cigam[10] = ' '; cigam[11] = ' '; if (strncmp(magic, cigam, 16) != 0) { if (beNoisy) { fprintf(stderr, "existDB::loadState()-- Not an existDB binary file, maybe a sequence file?\n"); fprintf(stderr, "existDB::loadState()-- Read '%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c'\n", cigam[0], cigam[1], cigam[2], cigam[3], cigam[4], cigam[5], cigam[6], cigam[7], cigam[8], cigam[9], cigam[10], cigam[11], cigam[12], cigam[13], cigam[14], cigam[15]); fprintf(stderr, "existDB::loadState()-- Expected '%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c'\n", magic[0], magic[1], magic[2], magic[3], magic[4], magic[5], magic[6], magic[7], magic[8], magic[9], magic[10], magic[11], magic[12], magic[13], magic[14], magic[15]); } fclose(F); return(false); } fread(&_merSizeInBases, sizeof(uint32), 1, F); fread(&_shift1, sizeof(uint32), 1, F); fread(&_shift2, sizeof(uint32), 1, F); fread(&_mask1, sizeof(uint64), 1, F); fread(&_mask2, sizeof(uint64), 1, F); fread(&_hshWidth, sizeof(uint32), 1, F); // only valid if _compressedHash fread(&_chkWidth, sizeof(uint32), 1, F); // only valid if _compressedBucket fread(&_cntWidth, sizeof(uint32), 1, F); // only valid if _compressedCounts fread(&_hashTableWords, sizeof(uint64), 1, F); fread(&_bucketsWords, sizeof(uint64), 1, F); fread(&_countsWords, sizeof(uint64), 1, F); _hashTable = 0L; _buckets = 0L; _counts = 0L; if (loadData) { _hashTable = new uint64 [_hashTableWords]; _buckets = new uint64 [_bucketsWords]; if (_countsWords > 0) _counts = new uint64 [_countsWords]; fread(_hashTable, sizeof(uint64), _hashTableWords, F); fread(_buckets, sizeof(uint64), _bucketsWords, F); if (_countsWords > 0) fread(_counts, sizeof(uint64), _countsWords, F); } fclose(F); if (errno) { fprintf(stderr, "existDB::loadState()-- Read failure.\n%s\n", strerror(errno)); exit(1); } return(true); } void existDB::printState(FILE *stream) { fprintf(stream, "merSizeInBases: "uint32FMT"\n", _merSizeInBases); fprintf(stream, "tableBits "uint32FMT"\n", 2 * _merSizeInBases - _shift1); fprintf(stream, "-----------------\n"); fprintf(stream, "_hashTableWords "uint64FMT" ("uint64FMT" KB)\n", _hashTableWords, _hashTableWords >> 7); fprintf(stream, "_bucketsWords "uint64FMT" ("uint64FMT" KB)\n", _bucketsWords, _bucketsWords >> 7); fprintf(stream, "_countsWords "uint64FMT" ("uint64FMT" KB)\n", _countsWords, _countsWords >> 7); fprintf(stream, "-----------------\n"); fprintf(stream, "_shift1: "uint32FMT"\n", _shift1); fprintf(stream, "_shift2 "uint32FMT"\n", _shift2); fprintf(stream, "_mask1 "uint64HEX"\n", _mask1); fprintf(stream, "_mask2 "uint64HEX"\n", _mask2); if (_compressedHash) { fprintf(stream, "_compressedHash true\n"); fprintf(stream, "_hshWidth "uint32FMT"\n", _hshWidth); } else { fprintf(stream, "_compressedHash false\n"); fprintf(stream, "_hshWidth undefined\n"); } if (_compressedBucket) { fprintf(stream, "_compressedBucket true\n"); fprintf(stream, "_chkWidth "uint32FMT"\n", _chkWidth); } else { fprintf(stream, "_compressedBucket false\n"); fprintf(stream, "_chkWidth undefined\n"); } if (_compressedCounts) { fprintf(stream, "_compressedCount true\n"); fprintf(stream, "_cntWidth "uint32FMT"\n", _cntWidth); } else { fprintf(stream, "_compressedCount false\n"); fprintf(stream, "_cntWidth undefined\n"); } } kmer-code-2013-trunk/libkmer/existDB.H0000644000000000000000000001040612322046702016245 0ustar rootroot#ifndef EXISTDB_H #define EXISTDB_H // Used by wgs-assembler, to determine if a rather serious bug was patched. #define EXISTDB_H_VERSION 1960 #include "bio++.H" // Takes as input a list of mers (in a file) and builds a searchable // structure listing those mers. Duplicate mers are not removed and // will be stored multiple times. // // Using a compressed hash is allowed, but somewhat useless -- it is // really slow and doesn't save that much. // // If existDBcanonical is requested, this will store only the // canonical mer. It is up to the client to be sure that is // appropriate! See positionDB.H for more. //#define STATS typedef uint32 existDBflags; const existDBflags existDBnoFlags = 0x0000; const existDBflags existDBcompressHash = 0x0001; const existDBflags existDBcompressBuckets = 0x0002; const existDBflags existDBcompressCounts = 0x0004; const existDBflags existDBcanonical = 0x0008; const existDBflags existDBforward = 0x0010; const existDBflags existDBcounts = 0x0020; class existDB { public: // Read state from an existDB file existDB(char const *filename, bool loadData=true); // Load mers from an existing existDB file, a fastafile, or a meryl database existDB(char const *filename, uint32 merSize, existDBflags flags, uint32 lo, uint32 hi); // Load mers from a character string existDB(char const *sequence, uint32 merSize, existDBflags flags); ~existDB(); void saveState(char const *filename); void printState(FILE *stream); bool isForward(void) { return(_isForward); }; bool isCanonical(void) { return(_isCanonical); }; bool exists(uint64 mer); uint64 count(uint64 mer); private: bool loadState(char const *filename, bool beNoisy=false, bool loadData=true); bool createFromFastA(char const *filename, uint32 merSize, uint32 flags); bool createFromMeryl(char const *filename, uint32 merSize, uint32 lo, uint32 hi, uint32 flags); bool createFromSequence(char const *sequence, uint32 merSize, uint32 flags); uint64 HASH(uint64 k) { return(((k >> _shift1) ^ (k >> _shift2) ^ k) & _mask1); }; uint64 CHECK(uint64 k) { return(k & _mask2); }; void insertMer(uint64 hsh, uint64 chk, uint64 cnt, uint64 *countingTable) { // If the mer is already here, just update the count. This only // works if not _compressedBucket, and only makes sense for loading from // fasta or sequence. if ((_compressedBucket == false) && (_searchForDupe)) { uint64 st = _hashTable[hsh]; uint64 ed = countingTable[hsh]; for (; st #include #include #include #include "existDB.H" #include "bio++.H" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" bool existDB::createFromSequence(char const *sequence, uint32 merSize, uint32 flags) { bool beVerbose = false; bool rebuilding = false; _hashTable = 0L; _buckets = 0L; _counts = 0L; _merSizeInBases = merSize; _searchForDupe = true; if ((flags & existDBcompressHash) || (flags & existDBcompressBuckets) || (flags & existDBcompressCounts)) fprintf(stderr, "existDB::createFromSequence: compression not supported.\n"), exit(1); // This (at =22) eats up 16MB, and should allow a lot of mers at big sizes. Unfortunately, we // know nothing about how man mers are going to be in the input. // // Setting this too high drastically reduces performance, suspected because of cache misses. // Setting this too low will also reduce performance, by increasing the search time in a bucket. // uint32 tblBits = logBaseTwo64(strlen(sequence)); rebuild: _shift1 = 2 * _merSizeInBases - tblBits; _shift2 = _shift1 / 2; _mask1 = uint64MASK(tblBits); _mask2 = uint64MASK(_shift1); _hshWidth = uint32ZERO; _chkWidth = 2 * merSize - tblBits; _cntWidth = 16; uint64 tableSizeInEntries = uint64ONE << tblBits; uint64 numberOfMers = uint64ZERO; uint64 *countingTable = new uint64 [tableSizeInEntries + 1]; for (uint64 i=tableSizeInEntries+1; i--; ) countingTable[i] = 0; _isCanonical = flags & existDBcanonical; _isForward = flags & existDBforward; assert(_isCanonical + _isForward == 1); //////////////////////////////////////////////////////////////////////////////// // // 1) Count bucket sizes // merStream *M = new merStream(new kMerBuilder(_merSizeInBases), new seqStream(sequence, strlen(sequence)), true, true); while (M->nextMer()) { if (_isForward) { countingTable[ HASH(M->theFMer()) ]++; numberOfMers++; } if (_isCanonical) { countingTable[ HASH(M->theCMer()) ]++; numberOfMers++; } } delete M; #ifdef STATS uint64 dist[32] = {0}; uint64 maxcnt = 0; for (uint64 i=tableSizeInEntries+1; i--; ) { if (countingTable[i] > maxcnt) maxcnt = countingTable[i]; if (countingTable[i] < 32) dist[countingTable[i]]++; } for(uint64 i=0; i<32; i++) fprintf(stderr, "existDB::usage[%2d] = %d\n", i, dist[i]); fprintf(stderr, "existDB::maxcnt = %d\n", maxcnt); #endif //////////////////////////////////////////////////////////////////////////////// // // Determine how many bits we need to hold the value // numberOfMers.....then.... // // This is numberOfMers+1 because we need to store the // first position after the last mer. That is, if there are two // mers, we will store that the first mer is at position 0, the // second mer is at position 1, and the end of the second mer is at // position 2. // if (_compressedHash) { _hshWidth = 1; while ((numberOfMers+1) > (uint64ONE << _hshWidth)) _hshWidth++; } //////////////////////////////////////////////////////////////////////////////// // // 2) Allocate a hash table and some mer storage buckets. // _hashTableWords = tableSizeInEntries + 2; if (_compressedHash) _hashTableWords = _hashTableWords * _hshWidth / 64 + 1; _bucketsWords = numberOfMers + 2; if (_compressedBucket) _bucketsWords = _bucketsWords * _chkWidth / 64 + 1; _countsWords = numberOfMers + 2; if (_compressedCounts) _countsWords = _countsWords * _cntWidth / 64 + 1; if (beVerbose) { fprintf(stderr, "existDB::createFromSequence()-- hashTable is "uint64FMT"MB\n", _hashTableWords >> 17); fprintf(stderr, "existDB::createFromSequence()-- buckets is "uint64FMT"MB\n", _bucketsWords >> 17); if (flags & existDBcounts) fprintf(stderr, "existDB::createFromSequence()-- counts is "uint64FMT"MB\n", _countsWords >> 17); } _hashTable = new uint64 [_hashTableWords]; _buckets = new uint64 [_bucketsWords]; _countsWords = (flags & existDBcounts) ? _countsWords : 0; _counts = (flags & existDBcounts) ? new uint64 [_countsWords] : 0L; // These aren't strictly needed. _buckets is cleared as it is initialied. _hashTable // is also cleared as it is initialized, but in the _compressedHash case, the last // few words might be uninitialized. They're unused. //memset(_hashTable, 0, sizeof(uint64) * _hashTableWords); //memset(_buckets, 0, sizeof(uint64) * _bucketsWords); // buckets is cleared as it is built //memset(_counts, 0, sizeof(uint64) * _countsWords); _hashTable[_hashTableWords-1] = 0; _hashTable[_hashTableWords-2] = 0; _hashTable[_hashTableWords-3] = 0; _hashTable[_hashTableWords-4] = 0; //////////////////////////////////////////////////////////////////////////////// // // Make the hash table point to the start of the bucket, and reset // the counting table -- we're going to use it to fill the buckets. // uint64 tmpPosition = 0; uint64 begPosition = 0; uint64 ptr = 0; if (_compressedHash) { for (uint64 i=0; inextMer()) { if (_isForward) insertMer(HASH(M->theFMer()), CHECK(M->theFMer()), 1, countingTable); if (_isCanonical) insertMer(HASH(M->theCMer()), CHECK(M->theCMer()), 1, countingTable); } delete M; // Compress out the gaps we have from redundant kmers. uint64 pos = 0; uint64 frm = 0; uint64 len = 0; for (uint64 i=0; i #include #include #include #include "existDB.H" #include "bio++.H" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" bool existDB::createFromFastA(char const *filename, uint32 merSize, uint32 flags) { bool beVerbose = false; bool rebuilding = false; _hashTable = 0L; _buckets = 0L; _counts = 0L; _merSizeInBases = merSize; _searchForDupe = true; if ((flags & existDBcompressHash) || (flags & existDBcompressBuckets) || (flags & existDBcompressCounts)) fprintf(stderr, "existDB::createFromSequence: compression not supported.\n"), exit(1); // This (at =22) eats up 16MB, and should allow a lot of mers at big sizes. Unfortunately, we // know nothing about how man mers are going to be in the input. // // Setting this too high drastically reduces performance, suspected because of cache misses. // Setting this too low will also reduce performance, by increasing the search time in a bucket. // uint32 tblBits = logBaseTwo64(sizeOfFile(filename)); rebuild: _shift1 = 2 * _merSizeInBases - tblBits; _shift2 = _shift1 / 2; _mask1 = uint64MASK(tblBits); _mask2 = uint64MASK(_shift1); _hshWidth = uint32ZERO; _chkWidth = 2 * merSize - tblBits; _cntWidth = 16; uint64 tableSizeInEntries = uint64ONE << tblBits; uint64 numberOfMers = uint64ZERO; uint64 *countingTable = new uint64 [tableSizeInEntries + 1]; for (uint64 i=tableSizeInEntries+1; i--; ) countingTable[i] = 0; _isCanonical = flags & existDBcanonical; _isForward = flags & existDBforward; assert(_isCanonical + _isForward == 1); //////////////////////////////////////////////////////////////////////////////// // // 1) Count bucket sizes // merStream *M = new merStream(new kMerBuilder(_merSizeInBases), new seqStream(filename), true, true); while (M->nextMer()) { if (_isForward) { countingTable[ HASH(M->theFMer()) ]++; numberOfMers++; } if (_isCanonical) { countingTable[ HASH(M->theCMer()) ]++; numberOfMers++; } } delete M; #ifdef STATS uint64 dist[32] = {0}; uint64 maxcnt = 0; for (uint64 i=tableSizeInEntries+1; i--; ) { if (countingTable[i] > maxcnt) maxcnt = countingTable[i]; if (countingTable[i] < 32) dist[countingTable[i]]++; } for(uint64 i=0; i<32; i++) fprintf(stderr, "existDB::usage[%2d] = %d\n", i, dist[i]); fprintf(stderr, "existDB::maxcnt = %d\n", maxcnt); #endif //////////////////////////////////////////////////////////////////////////////// // // Determine how many bits we need to hold the value // numberOfMers.....then.... // // This is numberOfMers+1 because we need to store the // first position after the last mer. That is, if there are two // mers, we will store that the first mer is at position 0, the // second mer is at position 1, and the end of the second mer is at // position 2. // if (_compressedHash) { _hshWidth = 1; while ((numberOfMers+1) > (uint64ONE << _hshWidth)) _hshWidth++; } //////////////////////////////////////////////////////////////////////////////// // // 2) Allocate a hash table and some mer storage buckets. // _hashTableWords = tableSizeInEntries + 2; if (_compressedHash) _hashTableWords = _hashTableWords * _hshWidth / 64 + 1; _bucketsWords = numberOfMers + 2; if (_compressedBucket) _bucketsWords = _bucketsWords * _chkWidth / 64 + 1; _countsWords = numberOfMers + 2; if (_compressedCounts) _countsWords = _countsWords * _cntWidth / 64 + 1; if (beVerbose) { fprintf(stderr, "existDB::createFromFastA()-- hashTable is "uint64FMT"MB\n", _hashTableWords >> 17); fprintf(stderr, "existDB::createFromFastA()-- buckets is "uint64FMT"MB\n", _bucketsWords >> 17); if (flags & existDBcounts) fprintf(stderr, "existDB::createFromFastA()-- counts is "uint64FMT"MB\n", _countsWords >> 17); } _hashTable = new uint64 [_hashTableWords]; _buckets = new uint64 [_bucketsWords]; _countsWords = (flags & existDBcounts) ? _countsWords : 0; _counts = (flags & existDBcounts) ? new uint64 [_countsWords] : 0L; // These aren't strictly needed. _buckets is cleared as it is initialied. _hashTable // is also cleared as it is initialized, but in the _compressedHash case, the last // few words might be uninitialized. They're unused. // //memset(_hashTable, 0, sizeof(uint64) * _hashTableWords); //memset(_buckets, 0, sizeof(uint64) * _bucketsWords); // buckets is cleared as it is built //memset(_counts, 0, sizeof(uint64) * _countsWords); _hashTable[_hashTableWords-1] = 0; _hashTable[_hashTableWords-2] = 0; _hashTable[_hashTableWords-3] = 0; _hashTable[_hashTableWords-4] = 0; //////////////////////////////////////////////////////////////////////////////// // // Make the hash table point to the start of the bucket, and reset // the counting table -- we're going to use it to fill the buckets. // uint64 tmpPosition = 0; uint64 begPosition = 0; uint64 ptr = 0; if (_compressedHash) { for (uint64 i=0; inextMer()) { if (_isForward) insertMer(HASH(M->theFMer()), CHECK(M->theFMer()), 1, countingTable); if (_isCanonical) insertMer(HASH(M->theCMer()), CHECK(M->theCMer()), 1, countingTable); } delete M; // Compress out the gaps we have from redundant kmers. uint64 pos = 0; uint64 frm = 0; uint64 len = 0; for (uint64 i=0; i #include "bio++.H" #include "merStream.H" // The two existDB inputs can be either forward or canonical. If // canonical, we are smart enough to search exist/only with the // canonical mer. // Returns position in posn, resizing it if needed. Space is // allocated if none supplied. The following is valid: // // uint64 *posn = 0L; // uint64 posnMax = 0; // uint64 posnLen = 0; // if (get(somemer, posn, posnMax, posnLen)) { // do something with the positions // } // // exists() returns T/F if mer exists or not // count() returns the number of times that mer is present // Define this to use an uncompressed hash table when the width is 32 // bits or less. Doing so is A LOT faster in mismatch lookups, but // does use more memory. #undef UNCOMPRESS_HASH_TABLE // Define this to leave out references to getTime(), speedCounter() // and make the positionDB build very quietly. #undef SILENTPOSITIONDB // Define these to enable some debugging methods #undef DEBUGPOSDB #undef DEBUGREBUILD class existDB; class merylStreamReader; class positionDB { public: positionDB(char const *filename, uint32 merSize, uint32 merSkip, uint32 maxMismatch, bool loadData=true); positionDB(merStream *MS, uint32 merSize, uint32 merSkip, existDB *mask, existDB *only, merylStreamReader *counts, uint32 minCount, uint32 maxCount, uint32 maxMismatch, uint32 maxMemory, bool beVerbose); ~positionDB(); private: void build(merStream *MS, existDB *mask, existDB *only, merylStreamReader *counts, uint32 minCount, uint32 maxCount, bool beVerbose); private: void reallocateSpace(uint64*& posn, uint64& posnMax, uint64& posnLen, uint64 len); void loadPositions(uint64 v, uint64*& posn, uint64& posnMax, uint64& posnLen, uint64& count); public: bool getExact(uint64 mer, uint64*& posn, uint64& posnMax, uint64& posnLen, uint64& count); bool existsExact(uint64 mer); uint64 countExact(uint64 mer); public: void filter(uint64 lo, uint64 hi); private: double setUpMismatchMatcher(uint32 nErrorsAllowed, uint64 approxMers); public: bool getUpToNMismatches(uint64 mer, uint32 maxMismatches, uint64*& posn, uint64& posnMax, uint64& posnLen); private: uint64 setCount(uint64 mer, uint64 count); // Save or load a built table // public: void saveState(char const *filename); bool loadState(char const *filename, bool beNoisy=false, bool loadData=true); void printState(FILE *stream); // Only really useful for debugging. Don't use. // void dump(char *name); bool checkREBUILD(uint64 m) { #define DEBUGREBUILD #ifdef DEBUGREBUILD uint64 h = HASH(m); uint64 c = CHECK(m); uint64 r = REBUILD(h, c); if (r != m) { fprintf(stderr, "shift1 = "uint32FMT"\n", _shift1); fprintf(stderr, "shift2 = "uint32FMT"\n", _shift2); fprintf(stderr, "M = "uint64HEX"\n", m); fprintf(stderr, "H = "uint64HEX"\n", h); fprintf(stderr, "C = "uint64HEX"\n", c); fprintf(stderr, "R = "uint64HEX"\n", r); return(false); } return(true); #else return(REBUILD(HASH(m), CHECK(m)) == m); #endif }; private: uint64 HASH(uint64 k) { return(((k >> _shift1) ^ (k >> _shift2) ^ k) & _mask1); }; uint64 CHECK(uint64 k) { return(k & _mask2); }; uint64 REBUILD(uint64 h, uint64 c) { // Decode a HASH and a CHECK to get back the mer. You'd better // bloody PRAY you don't break this (test/test-rebuild.C). It // was a headache++ to write. uint64 sha = _shift1 - _shift2; uint64 msk = uint64MASK(sha); // The check is exactly the mer....just not all there. uint64 mer = c; uint64 shf = sha - (_tableSizeInBits % 2); uint64 shg = 0; uint64 shh = _shift1; // Unrolling this is troublesome - we still need the tests, // bizarre merSize, tblSize combinations use lots of iterations // (when the merSize and tblSize are about the same, the CHECK is // small, and so we need to do lots of iterations). //fprintf(stderr, "shf="uint64FMTW(2)" shg="uint64FMTW(2)" shh="uint64FMTW(2)" mer="uint64HEX"\n", shf, shg, shh, mer); do { mer |= (((h >> shg) ^ (mer >> shg) ^ (mer >> shf)) & msk) << shh; //fprintf(stderr, "shf="uint64FMTW(2)" shg="uint64FMTW(2)" shh="uint64FMTW(2)" mer="uint64HEX"\n", shf, shg, shh, mer); shf += sha; shg += sha; shh += sha; } while ((shf < _merSizeInBits) && (shh < 64)); mer &= uint64MASK(_merSizeInBits); return(mer); }; void sortAndRepackBucket(uint64 b); uint32 *_bucketSizes; uint64 *_countingBuckets; uint64 *_hashTable_BP; // Bit packed uint32 *_hashTable_FW; // Full width uint64 *_buckets; uint64 *_positions; uint32 _merSizeInBases; uint32 _merSizeInBits; uint32 _merSkipInBases; uint64 _tableSizeInEntries; uint32 _tableSizeInBits; uint32 _hashWidth; // Hash bith uint32 _chckWidth; // Check bits uint32 _posnWidth; // Positions in the sequence uint32 _pptrWidth; // Pointers to positions uint32 _sizeWidth; // Extra number in the table uint64 _hashMask; uint32 _wCnt; uint32 _wFin; uint32 _shift1; uint32 _shift2; uint64 _mask1; uint64 _mask2; uint64 _numberOfMers; uint64 _numberOfPositions; uint64 _numberOfDistinct; uint64 _numberOfUnique; uint64 _numberOfEntries; uint64 _maximumEntries; // For sorting the mers // uint32 _sortedMax; uint64 *_sortedChck; uint64 *_sortedPosn; // For the mismatch matcher uint32 _nErrorsAllowed; uint32 _hashedErrorsLen; uint32 _hashedErrorsMax; uint64 *_hashedErrors; }; #endif // POSITIONDB_H kmer-code-2013-trunk/libkmer/positionDB-sort.C0000644000000000000000000000715712322046702017746 0ustar rootroot#include "positionDB.H" #include "bio++.H" void adjustHeap(uint64 *C, uint64 *P, int64 i, int64 n) { uint64 c = C[i]; uint64 p = P[i]; int64 j = (i << 1) + 1; // let j be the left child while (j < n) { if (j= C[j]) // a position for M[i] has been found break; C[(j-1)/2] = C[j]; // Move larger child up a level P[(j-1)/2] = P[j]; j = (j << 1) + 1; } C[(j-1)/2] = c; P[(j-1)/2] = p; } void positionDB::sortAndRepackBucket(uint64 b) { uint64 st = _bucketSizes[b]; uint64 ed = _bucketSizes[b+1]; uint32 le = (uint32)(ed - st); if (ed < st) fprintf(stdout, "ERROR: Bucket "uint64FMT" starts at "uint64FMT" ends at "uint64FMT"?\n", b, st, ed); if (le == 0) return; // One mer in the list? It's distinct and unique! (and doesn't // contribute to the position list space count) // if (le == 1) { _numberOfDistinct++; _numberOfUnique++; return; } // Allocate more space, if we need to. // if (_sortedMax <= le) { _sortedMax = le + 1024; delete [] _sortedChck; delete [] _sortedPosn; _sortedChck = new uint64 [_sortedMax]; _sortedPosn = new uint64 [_sortedMax]; } // Unpack the bucket // uint64 lens[3] = {_chckWidth, _posnWidth, 1 + _sizeWidth}; uint64 vals[3] = {0}; for (uint64 i=st, J=st * _wCnt; i=0; t--) { if (_sortedPosn[t] == uint64MASK(_posnWidth)) { unsetBucket = 1; fprintf(stdout, "ERROR: unset posn bucket="uint64FMT" t="int64FMT" le="uint32FMT"\n", b, t, le); } adjustHeap(_sortedChck, _sortedPosn, t, le); } if (unsetBucket) for (uint32 t=0; t0; t--) { uint64 tc = _sortedChck[t]; uint64 tp = _sortedPosn[t]; _sortedChck[t] = _sortedChck[0]; _sortedPosn[t] = _sortedPosn[0]; _sortedChck[0] = tc; _sortedPosn[0] = tp; adjustHeap(_sortedChck, _sortedPosn, 0, t); } // Scan the list of sorted mers, counting the number of distinct and unique, // and the space needed in the position list. uint64 entries = 1; // For t=0 for (uint32 t=1; t _sortedChck[t]) fprintf(stdout, "ERROR: bucket="uint64FMT" t="uint32FMT" le="uint32FMT": "uint64HEX" > "uint64HEX"\n", b, t, le, _sortedChck[t-1], _sortedChck[t]); if (_sortedChck[t-1] != _sortedChck[t]) { _numberOfDistinct++; if (_maximumEntries < entries) _maximumEntries = entries; if (entries == 1) _numberOfUnique++; else _numberOfEntries += entries + 1; // +1 for the length entries = 0; } entries++; } // Don't forget the last mer! // _numberOfDistinct++; if (_maximumEntries < entries) _maximumEntries = entries; if (entries == 1) _numberOfUnique++; else _numberOfEntries += entries + 1; // Repack the sorted entries // for (uint64 i=st, J=st * _wCnt; i #include #include #include #include static char magic[16] = { 'p', 'o', 's', 'i', 't', 'i', 'o', 'n', 'D', 'B', '.', 'v', '1', ' ', ' ', ' ' }; static char faild[16] = { 'p', 'o', 's', 'i', 't', 'i', 'o', 'n', 'D', 'B', 'f', 'a', 'i', 'l', 'e', 'd' }; void positionDB::saveState(char const *filename) { fprintf(stderr, "Saving positionDB to '%s'\n", filename); errno = 0; int F = open(filename, O_RDWR | O_CREAT | O_LARGEFILE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) { fprintf(stderr, "Can't open '%s' for writing positionDB.\n%s\n", filename, strerror(errno)); exit(1); } bool magicFirst = false; // Test if this is a pipe. If so, we write the magic first, // otherwise we write the magic last. // errno = 0; lseek(F, 0, SEEK_SET); if (errno == ESPIPE) magicFirst = true; if (magicFirst) write(F, magic, sizeof(char) * 16); else write(F, faild, sizeof(char) * 16); if (errno) { fprintf(stderr, "positionDB::saveState()-- Write failure on magic first.\n%s\n", strerror(errno)); exit(1); } // If only to be completely annoying and anal, we clear the // pointers before we write the data. Sure, we could just write // the stuff we care about, but this is easier. This is easier. // Before you go rip out this stuff, remember that you can now // checksum the resulting files. So don't do it. // uint32 *bs = _bucketSizes; uint64 *cb = _countingBuckets; uint64 *hp = _hashTable_BP; uint32 *hw = _hashTable_FW; uint64 *bu = _buckets; uint64 *ps = _positions; uint64 *he = _hashedErrors; _bucketSizes = 0L; _countingBuckets = 0L; _hashTable_BP = (uint64 *)((_hashTable_BP) ? uint64ONE : uint64ZERO); _hashTable_FW = (uint32 *)((_hashTable_FW) ? uint32ONE : uint32ZERO); _buckets = 0L; _positions = 0L; _hashedErrors = 0L; safeWrite(F, this, "this", sizeof(positionDB) * 1); _bucketSizes = bs; _countingBuckets = cb; _hashTable_BP = hp; _hashTable_FW = hw; _buckets = bu; _positions = ps; _hashedErrors = he; if (_hashTable_BP) { safeWrite(F, _hashTable_BP, "_hashTable_BP", sizeof(uint64) * (_tableSizeInEntries * _hashWidth / 64 + 1)); } else { safeWrite(F, _hashTable_FW, "_hashTable_FW", sizeof(uint32) * (_tableSizeInEntries + 1)); } safeWrite(F, _buckets, "_buckets", sizeof(uint64) * (_numberOfDistinct * _wFin / 64 + 1)); safeWrite(F, _positions, "_positions", sizeof(uint64) * (_numberOfEntries * _posnWidth / 64 + 1)); safeWrite(F, _hashedErrors, "_hashedErrors", sizeof(uint64) * (_hashedErrorsLen)); if (magicFirst == false) { lseek(F, 0, SEEK_SET); if (errno) { fprintf(stderr, "positionDB::saveState()-- Failed to seek to start of file -- write failed.\n%s\n", strerror(errno)); exit(1); } write(F, magic, sizeof(char) * 16); if (errno) { fprintf(stderr, "positionDB::saveState()-- Write failure on magic last.\n%s\n", strerror(errno)); exit(1); } } close(F); } bool positionDB::loadState(char const *filename, bool beNoisy, bool loadData) { char cigam[16] = { 0 }; fprintf(stderr, "Loading positionDB from '%s'\n", filename); errno = 0; int F = open(filename, O_RDONLY | O_LARGEFILE, 0); if (errno) { fprintf(stderr, "Can't open '%s' for reading pre-built positionDB: %s\n", filename, strerror(errno)); return(false); } safeRead(F, cigam, "Magic Number", sizeof(char) * 16); if (strncmp(faild, cigam, 16) == 0) { if (beNoisy) { fprintf(stderr, "positionDB::loadState()-- Incomplete positionDB binary file.\n"); fprintf(stderr, "positionDB::loadState()-- Read '%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c'\n", cigam[0], cigam[1], cigam[2], cigam[3], cigam[4], cigam[5], cigam[6], cigam[7], cigam[8], cigam[9], cigam[10], cigam[11], cigam[12], cigam[13], cigam[14], cigam[15]); fprintf(stderr, "positionDB::loadState()-- Expected '%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c'\n", magic[0], magic[1], magic[2], magic[3], magic[4], magic[5], magic[6], magic[7], magic[8], magic[9], magic[10], magic[11], magic[12], magic[13], magic[14], magic[15]); } close(F); return(false); } else if (strncmp(magic, cigam, 16) != 0) { if (beNoisy) { fprintf(stderr, "positionDB::loadState()-- Not a positionDB binary file, maybe a sequence file?\n"); fprintf(stderr, "positionDB::loadState()-- Read '%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c'\n", cigam[0], cigam[1], cigam[2], cigam[3], cigam[4], cigam[5], cigam[6], cigam[7], cigam[8], cigam[9], cigam[10], cigam[11], cigam[12], cigam[13], cigam[14], cigam[15]); fprintf(stderr, "positionDB::loadState()-- Expected '%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c'\n", magic[0], magic[1], magic[2], magic[3], magic[4], magic[5], magic[6], magic[7], magic[8], magic[9], magic[10], magic[11], magic[12], magic[13], magic[14], magic[15]); } close(F); return(false); } safeRead(F, this, "positionDB", sizeof(positionDB) * 1); _bucketSizes = 0L; _countingBuckets = 0L; _buckets = 0L; _positions = 0L; _hashedErrors = 0L; if (loadData) { uint64 hs = _tableSizeInEntries * _hashWidth / 64 + 1; uint64 bs = _numberOfDistinct * _wFin / 64 + 1; uint64 ps = _numberOfEntries * _posnWidth / 64 + 1; if (_hashTable_BP) { _hashTable_BP = new uint64 [hs]; _hashTable_FW = 0L; safeRead(F, _hashTable_BP, "_hashTable_BP", sizeof(uint64) * hs); } else { _hashTable_BP = 0L; _hashTable_FW = new uint32 [_tableSizeInEntries + 1]; safeRead(F, _hashTable_FW, "_hashTable_FW", sizeof(uint32) * (_tableSizeInEntries + 1)); } _buckets = new uint64 [bs]; _positions = new uint64 [ps]; _hashedErrors = new uint64 [_hashedErrorsMax]; safeRead(F, _buckets, "_buckets", sizeof(uint64) * bs); safeRead(F, _positions, "_positions", sizeof(uint64) * ps); safeRead(F, _hashedErrors, "_hashedErrors", sizeof(uint64) * _hashedErrorsLen); } close(F); return(true); } void positionDB::printState(FILE *stream) { fprintf(stream, "merSizeInBases: "uint32FMT"\n", _merSizeInBases); fprintf(stream, "merSkipInBases: "uint32FMT"\n", _merSkipInBases); fprintf(stream, "tableSizeInBits: "uint32FMT"\n", _tableSizeInBits); fprintf(stream, "tableSizeInEntries: "uint64FMT"\n", _tableSizeInEntries); fprintf(stream, "hashWidth: "uint32FMT"\n", _hashWidth); fprintf(stream, "chckWidth: "uint32FMT"\n", _chckWidth); fprintf(stream, "posnWidth: "uint32FMT"\n", _posnWidth); fprintf(stream, "numberOfMers: "uint64FMT"\n", _numberOfMers); fprintf(stream, "numberOfPositions: "uint64FMT"\n", _numberOfPositions); fprintf(stream, "numberOfDistinct: "uint64FMT"\n", _numberOfDistinct); fprintf(stream, "numberOfUnique: "uint64FMT"\n", _numberOfUnique); fprintf(stream, "numberOfEntries: "uint64FMT"\n", _numberOfEntries); fprintf(stream, "maximumEntries: "uint64FMT"\n", _maximumEntries); } kmer-code-2013-trunk/libkmer/positionDB-dump.C0000644000000000000000000000241712322046702017716 0ustar rootroot#include #include #include "positionDB.H" #include "bio++.H" void positionDB::dump(char *name) { uint64 sizs[4] = {_chckWidth, _pptrWidth, 1, _sizeWidth}; uint64 vals[4] = {0, 0, 0, 0}; FILE *F = fopen(name, "w"); for (uint64 h=0; h<_tableSizeInEntries; h++) { uint64 st, ed; if (_hashTable_BP) { st = getDecodedValue(_hashTable_BP, h * _hashWidth, _hashWidth); ed = getDecodedValue(_hashTable_BP, h * _hashWidth + _hashWidth, _hashWidth); } else { st = _hashTable_FW[h]; ed = _hashTable_FW[h+1]; } fprintf(F, "B "uint64FMT" "uint64FMT"-"uint64FMT"\n", h, st, ed); while (st < ed) { uint64 cb = st * _wFin; getDecodedValues(_buckets, cb, (_sizeWidth == 0) ? 3 : 4, sizs, vals); fprintf(F, "%c chk="uint64HEX" pos="uint64FMT" siz="uint64FMT, (vals[2] == 0) ? 'D' : 'U', vals[0], vals[1], vals[3]); if (vals[2] == 0) { uint64 pos = vals[1] * _posnWidth; uint64 len = getDecodedValue(_positions, pos, _posnWidth); for (pos += _posnWidth; len > 0; pos += _posnWidth, len--) fprintf(F, " "uint64FMT, getDecodedValue(_positions, pos, _posnWidth)); } fprintf(F, "\n"); st++; } } fclose(F); } kmer-code-2013-trunk/libkmer/existDB.C0000644000000000000000000000765312322046702016252 0ustar rootroot#include #include #include #include #include "existDB.H" #include "bio++.H" existDB::existDB(char const *filename, bool loadData) { clear(); _compressedHash = false; _compressedBucket = false; if (loadState(filename, true, loadData) == false) { fprintf(stderr, "existDB::existDB()-- Tried to read state from '%s', but failed.\n", filename); exit(1); } } existDB::existDB(char const *filename, uint32 merSize, existDBflags flags, uint32 lo, uint32 hi) { clear(); _compressedHash = flags & existDBcompressHash; _compressedBucket = flags & existDBcompressBuckets; _compressedCounts = flags & existDBcompressCounts; _searchForDupe = false; // Try to read state from the filename. If successful, make sure // that the merSize is correct. // if (loadState(filename)) { bool fail = false; if (_merSizeInBases != merSize) { fprintf(stderr, "existDB::existDB()-- Read state from '%s', but got different mer sizes\n", filename); fprintf(stderr, "existDB::existDB()-- Got "uint32FMT", expected "uint32FMT"\n", _merSizeInBases, merSize); fail = true; } if (fail) exit(1); return; } // If no direction flags are set, set the default direction of // forward. Stupid precedence rules. // if ((flags & (existDBcanonical | existDBforward)) == uint32ZERO) flags |= existDBforward; // If we can open 'filename' for reading, then we assume the file // is a multi-fasta, and we build an existDB/ // // Otherwise, we assume that 'filename' is really the prefix for a // meryl database. if (fileExists(filename)) createFromFastA(filename, merSize, flags); else createFromMeryl(filename, merSize, lo, hi, flags); } existDB::existDB(char const *sequence, uint32 merSize, existDBflags flags) { clear(); _compressedHash = flags & existDBcompressHash; _compressedBucket = flags & existDBcompressBuckets; _compressedCounts = flags & existDBcompressCounts; if ((flags & (existDBcanonical | existDBforward)) == uint32ZERO) flags |= existDBforward; createFromSequence(sequence, merSize, flags); } existDB::~existDB() { delete [] _hashTable; delete [] _buckets; delete [] _counts; } bool existDB::exists(uint64 mer) { uint64 c, h, st, ed; if (_compressedHash) { h = HASH(mer) * _hshWidth; st = getDecodedValue(_hashTable, h, _hshWidth); ed = getDecodedValue(_hashTable, h + _hshWidth, _hshWidth); } else { h = HASH(mer); st = _hashTable[h]; ed = _hashTable[h+1]; } if (st == ed) return(false); c = CHECK(mer); if (_compressedBucket) { st *= _chkWidth; ed *= _chkWidth; for (; st #include #include #include #include "existDB.H" #include "libmeryl.H" bool existDB::createFromMeryl(char const *prefix, uint32 merSize, uint32 lo, uint32 hi, uint32 flags) { merylStreamReader *M = new merylStreamReader(prefix); bool beVerbose = false; _hashTable = 0L; _buckets = 0L; _counts = 0L; _merSizeInBases = M->merSize(); if (merSize != _merSizeInBases) { fprintf(stderr, "createFromMeryl()-- ERROR: requested merSize ("uint32FMT") is different than merSize in meryl database ("uint32FMT").\n", merSize, _merSizeInBases); exit(1); } // We can set this exactly, but not memory optimal (see meryl/estimate.C:optimalNumberOfBuckets()). // Instead, we just blindly use whatever meryl used. // uint32 tblBits = M->prefixSize(); // But it is faster to reset to this. Might use 2x the memory. //uint32 tblBits = logBaseTwo64(M->numberOfDistinctMers() + 1); _shift1 = 2 * _merSizeInBases - tblBits; _shift2 = _shift1 / 2; _mask1 = uint64MASK(tblBits); _mask2 = uint64MASK(_shift1); _hshWidth = uint32ZERO; _chkWidth = 2 * _merSizeInBases - tblBits; _cntWidth = 16; uint64 tableSizeInEntries = uint64ONE << tblBits; uint64 numberOfMers = uint64ZERO; uint64 *countingTable = new uint64 [tableSizeInEntries + 1]; if (beVerbose) { fprintf(stderr, "createFromMeryl()-- tableSizeInEntries "uint64FMT"\n", tableSizeInEntries); fprintf(stderr, "createFromMeryl()-- count range "uint32FMT"-"uint32FMT"\n", lo, hi); } for (uint64 i=tableSizeInEntries+1; i--; ) countingTable[i] = 0; _isCanonical = flags & existDBcanonical; _isForward = flags & existDBforward; if (beVerbose) { fprintf(stderr, "createFromMeryl()-- canonical %c\n", (_isCanonical) ? 'T' : 'F'); fprintf(stderr, "createFromMeryl()-- forward %c\n", (_isForward) ? 'T' : 'F'); } assert(_isCanonical + _isForward == 1); // 1) Count bucket sizes // While we don't know the bucket sizes right now, but we do know // how many buckets and how many mers. // // Because we could be inserting both forward and reverse, we can't // really move the direction testing outside the loop, unless we // want to do two iterations over M. // speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, beVerbose); while (M->nextMer()) { if ((lo <= M->theCount()) && (M->theCount() <= hi)) { if (_isForward) { countingTable[ HASH(M->theFMer()) ]++; numberOfMers++; } if (_isCanonical) { kMer r = M->theFMer(); r.reverseComplement(); if (M->theFMer() < r) countingTable[ HASH(M->theFMer()) ]++; else countingTable[ HASH(r) ]++; numberOfMers++; } C->tick(); } } if (beVerbose) fprintf(stderr, "createFromMeryl()-- numberOfMers "uint64FMT"\n", numberOfMers); delete C; delete M; if (_compressedHash) { _hshWidth = 1; while ((numberOfMers+1) > (uint64ONE << _hshWidth)) _hshWidth++; } if (beVerbose) { fprintf(stderr, "existDB::createFromMeryl()-- Found "uint64FMT" mers between count of "uint32FMT" and "uint32FMT"\n", numberOfMers, lo, hi); } // 2) Allocate hash table, mer storage buckets // _hashTableWords = tableSizeInEntries + 2; if (_compressedHash) _hashTableWords = _hashTableWords * _hshWidth / 64 + 1; _bucketsWords = numberOfMers + 2; if (_compressedBucket) _bucketsWords = _bucketsWords * _chkWidth / 64 + 1; _countsWords = numberOfMers + 2; if (_compressedCounts) _countsWords = _countsWords * _cntWidth / 64 + 1; if (beVerbose) { fprintf(stderr, "existDB::createFromMeryl()-- hashTable is "uint64FMT"MB\n", _hashTableWords >> 17); fprintf(stderr, "existDB::createFromMeryl()-- buckets is "uint64FMT"MB\n", _bucketsWords >> 17); if (flags & existDBcounts) fprintf(stderr, "existDB::createFromMeryl()-- counts is "uint64FMT"MB\n", _countsWords >> 17); } _hashTable = new uint64 [_hashTableWords]; _buckets = new uint64 [_bucketsWords]; _countsWords = (flags & existDBcounts) ? _countsWords : 0; _counts = (flags & existDBcounts) ? new uint64 [_countsWords] : 0L; // These aren't strictly needed. _buckets is cleared as it is initialied. _hashTable // is also cleared as it is initialized, but in the _compressedHash case, the last // few words might be uninitialized. They're unused. //memset(_hashTable, 0, sizeof(uint64) * _hashTableWords); //memset(_buckets, 0, sizeof(uint64) * _bucketsWords); // buckets is cleared as it is built //memset(_counts, 0, sizeof(uint64) * _countsWords); _hashTable[_hashTableWords-1] = 0; _hashTable[_hashTableWords-2] = 0; _hashTable[_hashTableWords-3] = 0; _hashTable[_hashTableWords-4] = 0; //////////////////////////////////////////////////////////////////////////////// // // Make the hash table point to the start of the bucket, and reset // the counting table -- we're going to use it to fill the buckets. // uint64 tmpPosition = 0; uint64 begPosition = 0; uint64 ptr = 0; if (_compressedHash) { for (uint64 i=0; inextMer()) { if ((lo <= M->theCount()) && (M->theCount() <= hi)) { if (_isForward) insertMer(HASH(M->theFMer()), CHECK(M->theFMer()), M->theCount(), countingTable); if (_isCanonical) { kMer r = M->theFMer(); r.reverseComplement(); if (M->theFMer() < r) insertMer(HASH(M->theFMer()), CHECK(M->theFMer()), M->theCount(), countingTable); else insertMer(HASH(r), CHECK(r), M->theCount(), countingTable); numberOfMers++; } C->tick(); } } delete C; delete M; delete [] countingTable; return(true); } kmer-code-2013-trunk/libkmer/positionDB-mismatch.C0000644000000000000000000002653512322046702020565 0ustar rootroot#include "positionDB.H" #include "bio++.H" static int stringscmp(const void *A, const void *B) { uint64 const a = *(uint64 const *)A; uint64 const b = *(uint64 const *)B; if (a < b) return(-1); if (a > b) return(1); return(0); } static uint32 makeUnique(uint64 *strings, uint32 stringsLen) { qsort(strings, stringsLen, sizeof(uint64), stringscmp); uint32 len = 0; uint32 nxt = 1; while (nxt < stringsLen) { if (strings[len] != strings[nxt]) { len++; strings[len] = strings[nxt]; } nxt++; } return(len+1); } #if 0 // debug static void dumpPatterns(uint64 *strings, uint32 stringsLen, uint32 ts) { for (uint32 i=0; i= stringsMax) stringsLen = makeUnique(strings, stringsLen); for (uint32 x=0; x<243; x++) strings[stringsLen++] = HASH((m1 & e1[x]) ^ (m2 & e2[x]) ^ (m3 & e3[x]) ^ (m4 & e4[x]) ^ (m5 & e5[x])); } stringsLen = makeUnique(strings, stringsLen); stringsLen = makeUnique(strings, stringsLen); //dumpPatterns(strings, stringsLen, _tableSizeInBits); //fprintf(stderr, "DONE5 totpat="uint64FMT" toterr="uint64FMT" stringsLen="uint32FMT"\n", totpat, toterr, stringsLen); } // Six errors if (6 <= _nErrorsAllowed) { for (uint32 ai=0; ai<_merSizeInBases; ai++) for (uint32 bi=0; bi= stringsMax) stringsLen = makeUnique(strings, stringsLen); for (uint32 x=0; x<729; x++) strings[stringsLen++] = HASH((m1 & e1[x]) ^ (m2 & e2[x]) ^ (m3 & e3[x]) ^ (m4 & e4[x]) ^ (m5 & e5[x]) ^ (m6 & e6[x])); } stringsLen = makeUnique(strings, stringsLen); stringsLen = makeUnique(strings, stringsLen); //dumpPatterns(strings, stringsLen, _tableSizeInBits); //fprintf(stderr, "DONE6 totpat="uint64FMT" toterr="uint64FMT" stringsLen="uint32FMT"\n", totpat, toterr, stringsLen); } if (7 <= _nErrorsAllowed) { fprintf(stderr, "Only 6 errors allowed.\n"); exit(1); } for (uint32 i=1; i> 1)); if (err <= numMismatches) { diffs = REBUILD(hash, chck) ^ mer; d1 = diffs & uint64NUMBER(0x5555555555555555); d2 = diffs & uint64NUMBER(0xaaaaaaaaaaaaaaaa); err = countNumberOfSetBits64(d1 | (d2 >> 1)); if (err <= numMismatches) // err is junk, just need a parameter here loadPositions(J, posn, posnMax, posnLen, err); } } } } return(posnLen > 0); } kmer-code-2013-trunk/libkmer/Make.include0000644000000000000000000000225312527037073017025 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBMERYL/ :=$(realpath $/../libmeryl/)/ src := $/existDB-create-from-fasta.C \ $/existDB-create-from-meryl.C \ $/existDB-create-from-sequence.C \ $/existDB-state.C \ $/existDB.C \ $/existDB.H \ $/merTable.H \ $/positionDB-access.C \ $/positionDB-dump.C \ $/positionDB-file.C \ $/positionDB-mismatch.C \ $/positionDB-sort.C \ $/positionDB.C \ $/positionDB.H $/.CXX_SRCS := $(filter %.C,${src}) $/driver-existDB.C $/driver-posDB.C $/.CXX_INCS := $(filter %.H,${src}) $/.CXX_EXES := $/existDB $/positionDB $/.CXX_LIBS := $/libkmer.a $/.CLEAN := $/*.o $/libkmer.a: $(filter %.o,${src:.C=.o}) $/existDB: $/driver-existDB.o $/libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/positionDB: $/driver-posDB.o $/libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $(eval $/%.d $/%.o: CXXFLAGS+= -I${LIBMERYL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/libkmer/driver-posDB.C0000644000000000000000000002250412322046702017200 0ustar rootroot#include #include #include #include #include "bio++.H" #include "existDB.H" #include "positionDB.H" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" // Driver for the positionDB creation. Reads a sequence.fasta, builds // a positionDB for the mers in the file, and then writes the internal // structures to disk. // // The positionDB constructor is smart enough to read either a pre-built // image or a regular multi-fasta file. #define MERSIZE 20 int test1(char *filename) { merStream *T = new merStream(new kMerBuilder(MERSIZE), new seqStream(filename), true, true); positionDB *M = new positionDB(T, MERSIZE, 0, 0L, 0L, 0L, 0, 0, 0, 0, true); uint64 *posn = new uint64 [1024]; uint64 posnMax = 1024; uint64 posnLen = uint64ZERO; uint64 count = uint64ZERO; uint32 missing = uint32ZERO; uint32 failed = uint32ZERO; char str[33]; T->rewind(); while (T->nextMer()) { if (M->getExact(T->theFMer(), posn, posnMax, posnLen, count)) { missing = uint32ZERO; for (uint32 i=0; ithePositionInStream()) missing++; if (missing != 1) { failed++; fprintf(stdout, "%s @ "uint64FMT"/"uint64FMT": Found "uint64FMT" table entries, and "uint32FMT" matching positions (", T->theFMer().merToString(str), T->theSequenceNumber(), T->thePositionInStream(), posnLen, missing); for (uint32 i=0; itheFMer().merToString(str), T->thePositionInStream()); } } delete M; delete T; return(failed != 0); } int test2(char *filename, char *query) { merStream *T = new merStream(new kMerBuilder(MERSIZE), new seqStream(filename), true, true); positionDB *M = new positionDB(T, MERSIZE, 0, 0L, 0L, 0L, 0, 0, 0, 0, true); uint64 *posn = new uint64 [1024]; uint64 posnMax = 1024; uint64 posnLen = uint64ZERO; uint64 count = uint64ZERO; char str[33]; delete T; T = new merStream(new kMerBuilder(MERSIZE), new seqStream(query), true, true); while (T->nextMer()) { if (M->getExact(T->theFMer(), posn, posnMax, posnLen, count)) { fprintf(stdout, "Got a F match for mer=%s at "uint64FMT"/"uint64FMT" (in mers), numMatches="uint64FMT"\n", T->theFMer().merToString(str), T->theSequenceNumber(), T->thePositionInStream(), posnLen); } if (M->getExact(T->theRMer(), posn, posnMax, posnLen, count)) { fprintf(stdout, "Got a R match for mer=%s at "uint64FMT"/"uint64FMT" (in mers), numMatches="uint64FMT"\n", T->theRMer().merToString(str), T->theSequenceNumber(), T->thePositionInStream(), posnLen); } } delete M; delete T; return(0); } // Builds a positionDB possibly using a subset of the file. // // Subset on entire sequences: // -use x-y,a,b // // Subset on a range of mers, in this case, use only the 1000th // through 1999th (inclusive) mer: // -merbegin 1000 -merend 2000 // // Or do both, use the first 1000 mers from the 3rd sequence: // -use 3 -merbegin 0 -merend 1000 int main(int argc, char **argv) { uint32 mersize = 20; uint32 merskip = 0; char *maskF = 0L; char *onlyF = 0L; uint64 merBegin = ~uint64ZERO; uint64 merEnd = ~uint64ZERO; char *sequenceFile = 0L; char *outputFile = 0L; if (argc < 3) { fprintf(stderr, "usage: %s [args]\n", argv[0]); fprintf(stderr, " -mersize k The size of the mers, default=20.\n"); fprintf(stderr, " -merskip k The skip between mers, default=0\n"); fprintf(stderr, " -use a-b,c Specify which sequences to use, default=all\n"); fprintf(stderr, " -merbegin b Build on a subset of the mers, starting at mer #b, default=all mers\n"); fprintf(stderr, " -merend e Build on a subset of the mers, ending at mer #e, default=all mers\n"); fprintf(stderr, " -sequence s.fasta Input sequences.\n"); fprintf(stderr, " -output p.posDB Output filename.\n"); fprintf(stderr, "\n"); fprintf(stderr, " To dump information about an image:\n"); fprintf(stderr, " -dump datafile\n"); fprintf(stderr, "\n"); fprintf(stderr, " To run sanity tests:\n"); fprintf(stderr, " -buildonly [build opts] sequence.fasta\n"); fprintf(stderr, " -- just builds a table and exits\n"); fprintf(stderr, " -existence [build opts] sequence.fasta\n"); fprintf(stderr, " -- builds (or reads) a table reports if any mers\n"); fprintf(stderr, " in sequence.fasta cannot be found\n"); fprintf(stderr, " -extra [build opts] sequence.fasta\n"); fprintf(stderr, " -- builds (or reads) a table reports if any mers\n"); fprintf(stderr, " NOT in sequence.fasta are be found\n"); fprintf(stderr, " -test1 sequence.fasta\n"); fprintf(stderr, " -- Tests if each and every mer is found in the\n"); fprintf(stderr, " positionDB. Reports if it doesn't find a mer\n"); fprintf(stderr, " at the correct position. Doesn't report if table\n"); fprintf(stderr, " has too much stuff.\n"); fprintf(stderr, " -test2 db.fasta sequence.fasta\n"); fprintf(stderr, " -- Builds a positionDB from db.fasta, then searches\n"); fprintf(stderr, " the table for each mer in sequence.fasta. Reports\n"); fprintf(stderr, " all mers it finds.\n"); fprintf(stderr, " -- This is a silly test and you shouldn't do it.\n"); exit(1); } int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-mersize") == 0) { mersize = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-merskip") == 0) { merskip = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-mask") == 0) { maskF = argv[++arg]; } else if (strcmp(argv[arg], "-only") == 0) { onlyF = argv[++arg]; } else if (strcmp(argv[arg], "-merbegin") == 0) { merBegin = strtouint64(argv[++arg], 0L); } else if (strcmp(argv[arg], "-merend") == 0) { merEnd = strtouint64(argv[++arg], 0L); } else if (strcmp(argv[arg], "-sequence") == 0) { sequenceFile = argv[++arg]; } else if (strcmp(argv[arg], "-output") == 0) { outputFile = argv[++arg]; } else if (strcmp(argv[arg], "-dump") == 0) { positionDB *e = new positionDB(argv[++arg], 0, 0, 0, false); e->printState(stdout); delete e; exit(0); } else if (strcmp(argv[arg], "-test1") == 0) { exit(test1(argv[arg+1])); } else if (strcmp(argv[arg], "-test2") == 0) { exit(test2(argv[arg+1], argv[arg+2])); } else { fprintf(stderr, "ERROR: unknown arg '%s'\n", argv[arg]); exit(1); } arg++; } // Exit quickly if the output file exists. // if (fileExists(outputFile)) { fprintf(stderr, "Output file '%s' exists already!\n", outputFile); exit(0); } merStream *MS = new merStream(new kMerBuilder(MERSIZE), new seqStream(sequenceFile), true, true); // Approximate the number of mers in the sequences. // uint64 numMers = MS->approximateNumberOfMers(); // Reset the limits. // // XXX: If the user somehow knows how many mers are in the input // file, and specifies an end between there and the amount of // sequence, we'll pointlessly still make a merStreamFile, even // though we shouldn't. // if (merBegin == ~uint64ZERO) merBegin = 0; if (merEnd == ~uint64ZERO) merEnd = numMers; if (merBegin >= merEnd) { fprintf(stderr, "ERROR: merbegin="uint64FMT" and merend="uint64FMT" are incompatible.\n", merBegin, merEnd); exit(1); } if ((merBegin > 0) || (merEnd < numMers)) MS->setBaseRange(merBegin, merEnd); existDB *maskDB = 0L; if (maskF) { fprintf(stderr, "Building maskDB from '%s'\n", maskF); maskDB = new existDB(maskF, mersize, existDBnoFlags, 0, ~uint32ZERO); } existDB *onlyDB = 0L; if (onlyF) { fprintf(stderr, "Building onlyDB from '%s'\n", onlyF); onlyDB = new existDB(onlyF, mersize, existDBnoFlags, 0, ~uint32ZERO); } fprintf(stderr, "Building table with merSize "uint32FMT", merSkip "uint32FMT"\n", mersize, merskip); positionDB *positions = new positionDB(MS, mersize, merskip, maskDB, onlyDB, 0L, 0, 0, 0, 0, true); fprintf(stderr, "Dumping positions table to '%s'\n", outputFile); positions->saveState(outputFile); delete MS; delete positions; exit(0); } kmer-code-2013-trunk/libkmer/driver-existDB.C0000644000000000000000000001465712322046702017545 0ustar rootroot#include #include #include #include #include "bio++.H" #include "existDB.H" #include "libmeryl.H" #include "seqCache.H" #include "seqStream.H" #include "merStream.H" // Driver for the existDB creation. Reads a sequence.fasta, builds // an existDB for the mers in the file, and then writes the internal // structures to disk. // // The existDB constructor is smart enough to read either a pre-built // image or a regular multi-fasta file. int testFiles(char *filename, char *prefix, uint32 merSize) { char *prefixfilename = new char [strlen(prefix) + 32]; // Create existDB e and save it to disk // existDB *e = new existDB(filename, merSize, existDBnoFlags | existDBcounts, 0, ~uint32ZERO); sprintf(prefixfilename, "%s.1", prefix); e->saveState(prefixfilename); // Create existDB f by loading the saved copy from disk // existDB *f = new existDB(prefixfilename); // Create a fresh existDB g (to check if we corrup the original when saved) // existDB *g = new existDB(filename, merSize, existDBnoFlags | existDBcounts, 0, ~uint32ZERO); speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, true); fprintf(stderr, "Need to iterate over %7.2f Mmers.\n", (uint64MASK(2 * merSize) + 1) / 1000000.0); for (uint64 d=0, m=uint64MASK(2 * merSize); m--; ) { bool ee = e->exists(m); bool ef = f->exists(m); bool eg = g->exists(m); uint32 ce = e->count(m); uint32 cf = f->count(m); uint32 cg = g->count(m); if ((ee != ef) || (ef != eg) || (ee != eg)) fprintf(stderr, "mer "uint64HEX" not found : e=%d f=%d g=%d\n", m, ee, ef, eg); if ((ce != cf) || (cf != cg) || (ce != cg)) fprintf(stderr, "mer "uint64HEX" count differs : e=%u f=%u g=%u (exists=%d)\n", m, ce, cf, cg, ee); if ((m & 0xffffff) == 0) { // Been a while since a report, so report. d = 1; } if ((ce > 1) && (d == 1)) { // Report anything not unique, to make sure that we're testing real counts and not just existence. fprintf(stderr, "mer "uint64HEX" : e=%u f=%u g=%u (exists=%d)\n", m, ce, cf, cg, ee); d = 0; } C->tick(); } delete e; delete C; return(0); } int testExistence(char *filename, uint32 merSize) { existDB *E = new existDB(filename, merSize, existDBnoFlags, 0, ~uint32ZERO); merStream *M = new merStream(new kMerBuilder(merSize), new seqStream(filename), true, true); uint64 tried = 0; uint64 lost = 0; while (M->nextMer()) { tried++; if (!E->exists(M->theFMer())) lost++; } delete M; delete E; if (lost) { fprintf(stderr, "Tried "uint64FMT", didn't find "uint64FMT" merStream mers in the existDB.\n", tried, lost); return(1); } else { return(0); } } int testExhaustive(char *filename, char *merylname, uint32 merSize) { existDB *E = new existDB(filename, merSize, existDBnoFlags, 0, ~uint32ZERO); merylStreamReader *M = new merylStreamReader(merylname); speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, true); uint64 found = uint64ZERO; uint64 expected = uint64ZERO; FILE *DUMP = 0L; DUMP = fopen("testExhaustive.ms.dump", "w"); while (M->nextMer()) { if (E->exists(M->theFMer())) { expected++; fprintf(DUMP, uint64HEX"\n", (uint64)M->theFMer()); } else { fprintf(DUMP, uint64HEX" MISSED!\n", (uint64)M->theFMer()); } } fclose(DUMP); fprintf(stderr, "Found "uint64FMT" mers in the meryl database.\n", expected); fprintf(stderr, "Need to iterate over %7.2f Mmers.\n", (uint64MASK(2 * merSize) + 1) / 1000000.0); DUMP = fopen("testExhaustive.ck.dump", "w"); for (uint64 m = uint64MASK(2 * merSize); m--; ) { if (E->exists(m)) { found++; fprintf(DUMP, uint64HEX"\n", m); } C->tick(); } fclose(DUMP); delete C; delete E; delete M; if (expected != found) { fprintf(stderr, "Expected to find "uint64FMT" mers, but found "uint64FMT" instead.\n", expected, found); return(1); } else { return(0); } } const char *usage = "usage: %s [stuff]\n" " -mersize mersize\n" " -- Use the specified mersize when building existDB tables.\n" "\n" " -build some.fasta prefix\n" " -- Build an existDB on all mers in some.fasta and save\n" " the tables into prefix.\n" "\n" " -describe prefix\n" " -- Reports the state of some existDB file.\n" "\n" " -testfiles some.fasta prefix\n" " -- Build an existDB table from some.fasta. Write that table to disk.\n" " Load the table back. Compare that each mer in some.fasta is present\n" " in all three existDB tables created earlier.\n" "\n" " -testexistence some.fasta\n" " -- Build an existDB table from some.fasta, check that every\n" " mer in some.fasta can be found in the table. Does not\n" " guarantee that every mer in the table is found in the file.\n" "\n" " -testexhaustive some.fasta some.meryl\n" " -- Build an existDB table from some.fasta, check _EVERY_ mer\n" " for existance. Complain if a mer exists in the table but\n" " not in the meryl database. Assumes 'some.meryl' is the\n" " mercount of some.fasta.\n" "\n"; int main(int argc, char **argv) { uint32 mersize = 20; if (argc < 3) { fprintf(stderr, usage, argv[0]); exit(1); } int arg = 1; while (arg < argc) { if (strncmp(argv[arg], "-mersize", 2) == 0) { arg++; mersize = atoi(argv[arg]); } else if (strncmp(argv[arg], "-describe", 2) == 0) { existDB *e = new existDB(argv[argc-1], false); e->printState(stdout); delete e; exit(0); } else if (strncmp(argv[arg], "-testfiles", 8) == 0) { exit(testFiles(argv[arg+1], argv[arg+2], mersize)); } else if (strncmp(argv[arg], "-testexistence", 8) == 0) { exit(testExistence(argv[arg+1], mersize)); } else if (strncmp(argv[arg], "-testexhaustive", 8) == 0) { exit(testExhaustive(argv[arg+1], argv[arg+2], mersize)); } else if (strncmp(argv[arg], "-build", 2) == 0) { existDB *e = new existDB(argv[argc-2], mersize, existDBnoFlags, 0, ~uint32ZERO); e->saveState(argv[argc-1]); delete e; exit(0); } arg++; } exit(0); } kmer-code-2013-trunk/libkmer/positionDB.C0000644000000000000000000011234312322046702016753 0ustar rootroot#include #include #include #include #include "bio++.H" #include "positionDB.H" #include "existDB.H" #include "libmeryl.H" #undef ERROR_CHECK_COUNTING #undef ERROR_CHECK_COUNTING_ENCODING #undef ERROR_CHECK_EMPTY_BUCKETS // This tests Chunlin Xiao's discovered bug -- if there are a small // number of unique mers, compared to distinct mers (2 * #unique_mers // < #distinct_mers, we would overflow the position pointer in // buckets. This enables a check that it doesn't occur. // // This has a fixed allocation size, and crashes on larger inputs. // #undef TEST_NASTY_BUGS // Tests that mers are masked out properly. Doesn't handle canonical // mers though. // #undef MER_REMOVAL_TEST positionDB::positionDB(char const *filename, uint32 merSize, uint32 merSkip, uint32 maxMismatch, bool loadData) { memset(this, 0, sizeof(positionDB)); // loadData == false only for driver-posDB.C, and only so it can // dump stats on a posDB file. if (loadState(filename, true, false) == false) { fprintf(stderr, "positionDB()-- Tried to read state from '%s', but failed.\n", filename); exit(1); } if ((loadData) && (merSize != _merSizeInBases)) { fprintf(stderr, "positionDB()-- Tried to read state from '%s', but mer size is wrong (found "uint32FMT", wanted "uint32FMT").\n", filename, _merSizeInBases, merSize); exit(1); } if ((loadData) && (merSkip != _merSkipInBases)) { fprintf(stderr, "positionDB()-- Tried to read state from '%s', but mer skip is wrong (found "uint32FMT", wanted "uint32FMT").\n", filename, _merSkipInBases, merSkip); exit(1); } if ((loadData) && (maxMismatch != _nErrorsAllowed)) { fprintf(stderr, "positionDB()-- Tried to read state from '%s', but max number of mismatches is wrong (found "uint32FMT", wanted "uint32FMT").\n", filename, _nErrorsAllowed, maxMismatch); exit(1); } if (loadState(filename, true, loadData) == false) { fprintf(stderr, "positionDB()-- Tried to read state from '%s', but failed.\n", filename); exit(1); } } positionDB::positionDB(merStream *MS, uint32 merSize, uint32 merSkip, existDB *mask, existDB *only, merylStreamReader *counts, uint32 minCount, uint32 maxCount, uint32 maxMismatch, uint32 maxMemory, bool beVerbose) { memset(this, 0, sizeof(positionDB)); // Guesstimate a nice table size based on the number of input mers // and the mersize, unless the user gave us a table size. // // We need to ensure that // 2 * merSize + posnWidth + 1 - 64 <= tblBits <= 2 * merSize - 4 // // The catch is that we don't exactly know posnWidth right now. We // can overestimate it, though, based on the size of the sequence // that is backing the merStream. // // The second catch is that we don't want to make tblBits too big // or too small. If too big, we waste a lot of memory in the hash // table pointers, and if too small, we waste even more memory in // the data table (not to mention the algorithm dies because it // assumed buckets in the data table are small). // // The memory size is (roughly): // // 2^tblBits * log(numDistinctMers) + // numDistinctMers * (2*merSize - tblBits + 1 + log(numMers) + // (numMers - numUniqieMers) * log(numMers) // // this is approximately proportional to: // // 2^tblBits * posnWidth + // approxMers * (2*merSize - tblBits + 1 + posnWidth) // uint64 approxMers = MS->approximateNumberOfMers(); uint64 posnWidth = logBaseTwo64(approxMers + 1); // Find the smallest and largest tblBits we could possibly use. // uint64 sm = 2 * merSize + posnWidth + 1 - 64; uint64 lg = 2 * merSize - 4; if (2 * merSize + posnWidth + 1 < 64) sm = 2; if (sm < 16) sm = 16; if (sm > lg) { fprintf(stderr, "ERROR: too many mers for this mersize!\n"); fprintf(stderr, " sm = "uint64FMT"\n", sm); fprintf(stderr, " lg = "uint64FMT"\n", lg); fprintf(stderr, " merSize = "uint32FMT" bits\n", 2 * merSize); fprintf(stderr, " approxMers = "uint64FMT" mers\n", approxMers); fprintf(stderr, " posnWidth = "uint64FMT" bits\n", posnWidth); exit(1); } // Iterate through all the choices, picking the one with the // smallest expected footprint. // { if (beVerbose) { fprintf(stderr, "potential configurations for approximately "uint64FMT" "uint32FMT"-mers (posnW="uint64FMT").\n", approxMers, merSize, posnWidth); } uint64 mini = 0; // tblSize of the smallest found uint64 minm = ~mini; // memory size of the smallest found double minw = 0.0; // work of the smallest found uint64 memory = 0; double effort = 0; if (maxMemory == 0) maxMemory = ~uint32ZERO; for (uint64 i=sm; i<=lg; i++) { // These are only needed if maxMismatch is set, but it's // simpler to always set. // _merSizeInBases = merSize; _merSizeInBits = 2 * _merSizeInBases; _merSkipInBases = merSkip; _tableSizeInBits = i; _tableSizeInEntries = uint64ONE << _tableSizeInBits; _hashWidth = uint32ZERO; _hashMask = uint64MASK(_tableSizeInBits); _chckWidth = _merSizeInBits - _tableSizeInBits; _posnWidth = uint64ZERO; _sizeWidth = 0; _shift1 = _merSizeInBits - _tableSizeInBits; _shift2 = _shift1 / 2; _mask1 = uint64MASK(_tableSizeInBits); _mask2 = uint64MASK(_shift1); // Everyone wants to know the memory size (in MB). // memory = ((uint64ONE << i) * posnWidth + approxMers * (2*merSize - i + 1 + posnWidth)) >> 23; // If we know we're looking for mismatches, we compute the amount // of work needed per lookup, and use that, instead of strict // memory sizing, to deicde the table size. // if (maxMismatch > 0) effort = setUpMismatchMatcher(maxMismatch, approxMers); // If our memory size is smaller than allowed, AND it's the // smallest, or the work is smaller, save the table size. // if ((memory < maxMemory) && ((memory < minm) || (effort < minw))) { mini = i; minm = memory; minw = effort; } if (beVerbose) { fprintf(stderr, "tblBits="uint64FMTW(2)" shifts="uint32FMTW(02)","uint32FMTW(02)" -- size %8.3fGB -- work %8.3f%s\n", i, _shift1, _shift2, memory / 1024.0, effort, (mini == i) ? " ***" : ""); } } _tableSizeInBits = mini; } if (_tableSizeInBits == 0) { fprintf(stderr, "ERROR: No positionDB parameters within allowed memory limit.\n"); exit(1); } if (beVerbose) { uint32 s1 = 2*merSize-_tableSizeInBits; fprintf(stderr, "tblBits="uint32FMT" s1="uint32FMT" s2="uint32FMT" -- merSize="uint32FMT" bits + posnWidth="uint64FMT" bits (est "uint64FMT" mers) FINAL\n", _tableSizeInBits, s1, s1/2, merSize, posnWidth, approxMers); } _merSizeInBases = merSize; _merSizeInBits = 2 * _merSizeInBases; _merSkipInBases = merSkip; _tableSizeInEntries = uint64ONE << _tableSizeInBits; _hashWidth = uint32ZERO; _hashMask = uint64MASK(_tableSizeInBits); _chckWidth = _merSizeInBits - _tableSizeInBits; _posnWidth = uint64ZERO; _sizeWidth = 0; if (maxCount == 0) maxCount = ~uint32ZERO; if (counts) _sizeWidth = (maxCount < ~uint32ZERO) ? logBaseTwo64(maxCount+1) : 32; _shift1 = _merSizeInBits - _tableSizeInBits; _shift2 = _shift1 / 2; _mask1 = uint64MASK(_tableSizeInBits); _mask2 = uint64MASK(_shift1); #if 0 fprintf(stderr, "merSizeInBits "uint32FMT"\n", _merSizeInBits); fprintf(stderr, "hashWidth "uint32FMT"\n", _hashWidth); fprintf(stderr, "chckWidth "uint32FMT"\n", _chckWidth); fprintf(stderr, "shift1 "uint32FMT"\n", _shift1); fprintf(stderr, "shift2 "uint32FMT"\n", _shift2); #endif if (maxMismatch > 0) setUpMismatchMatcher(maxMismatch, approxMers); build(MS, mask, only, counts, minCount, maxCount, beVerbose); } void positionDB::build(merStream *MS, existDB *mask, existDB *only, merylStreamReader *counts, uint32 minCount, uint32 maxCount, bool beVerbose) { _bucketSizes = 0L; _countingBuckets = 0L; _hashTable_BP = 0L; _hashTable_FW = 0L; _buckets = 0L; _positions = 0L; _wCnt = 0; _wFin = 0; // For get/setDecodedValues(). uint64 lensC[4] = {~uint64ZERO, ~uint64ZERO, ~uint64ZERO, ~uint64ZERO}; uint64 lensF[4] = {~uint64ZERO, ~uint64ZERO, ~uint64ZERO, ~uint64ZERO}; uint64 vals[4] = {0}; uint64 nval = (_sizeWidth == 0) ? 3 : 4; _numberOfMers = uint64ZERO; _numberOfPositions = uint64ZERO; _numberOfDistinct = uint64ZERO; _numberOfUnique = uint64ZERO; _numberOfEntries = uint64ZERO; _maximumEntries = uint64ZERO; // We assume later that these are already allocated. _sortedMax = 16384; _sortedChck = new uint64 [_sortedMax]; _sortedPosn = new uint64 [_sortedMax]; if (MS == 0L) { fprintf(stderr, "positionDB()-- ERROR: No merStream? Nothing to build a table with!\n"); exit(1); } MS->rewind(); //////////////////////////////////////////////////////////////////////////////// // // 1) Count bucket sizes // // We'll later want to reuse the _bucketSizes space for storing the // hash table. To make it somewhat safe, we allocate the space as // uint64, then cast it to be uint32. // // bktAllocIsJunk tells us if we should release this memory (if we // need to allocate separate space for the hash table). We'd need // to do this if the hashWidth is more than 32 bits, but we won't // know that for a little bit. // // The _bucketSizes is offset by one from bktAlloc so that we don't // overwrite _bucketSizes when we are constructing hash table. // uint64 *bktAlloc; try { bktAlloc = new uint64 [_tableSizeInEntries / 2 + 4]; } catch (std::bad_alloc) { fprintf(stderr, "positionDB()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "positionDB()-- bktAlloc = new uint64 ["uint64FMT"]\n", _tableSizeInEntries / 2 + 4); exit(1); } bool bktAllocIsJunk = false; bzero(bktAlloc, sizeof(uint64) * (_tableSizeInEntries / 2 + 4)); // Why +2? We try to reuse the bktAlloc space for the hash table, // which is constructed from the bucketSizes. The hashTable is // built from the bucketSizes. It definitely needs to be +1, and // so we use +2 just in case the human is being stupid again. // _bucketSizes = (uint32 *)(bktAlloc + 2); #ifdef ERROR_CHECK_COUNTING fprintf(stdout, "ERROR_CHECK_COUNTING is defined.\n"); uint32 *_errbucketSizes = new uint32 [_tableSizeInEntries + 2]; for (uint64 i=0; i<_tableSizeInEntries + 2; i++) _errbucketSizes[i] = uint32ZERO; #endif if (beVerbose) fprintf(stderr, " Allocated bucket size counting space with total size "uint64FMT" KB\n", _tableSizeInEntries >> 8); speedCounter *C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, beVerbose); // Two choices here // // 1) No masking or onlying is done. Stream the mers and just // count the positions. This is the original behavior. // // 2) Masking or onlying is done. Open the output stream file, // stream the mers by, checking for mask/only of both // forward and reverse mers. If either is found, push // the (forward) mer and position onto the stream. // close the output stream. // // Save the mer if it doesn't exist in the mask (both f and r), // or does exist in the only (either f or r), add it. // // The input databases for mask and only are (currently) made // using canonical mers. We halve the number of exists() by // also using canonical mers here. // MS->rewind(); while (MS->nextMer(_merSkipInBases)) { _bucketSizes[ HASH(MS->theFMer()) ]++; #ifdef ERROR_CHECK_COUNTING _errbucketSizes[ HASH(MS->theFMer()) ]++; #endif _numberOfMers++; _numberOfPositions = MS->thePositionInStream(); assert((_numberOfPositions >> 60) == 0); C->tick(); } delete C; C = 0L; if (beVerbose) fprintf(stderr, " Found "uint64FMT" mers (max position = "uint64FMT")\n", _numberOfMers, _numberOfPositions); // This caught a nasty bug in merStream rewind(), and it's pretty // cheap, so I left it in. Search for the other DEBUGnumPositions. // uint64 DEBUGnumPositions = _numberOfPositions + 1; // This is _numberOfMers+1 because we need to store the first // position after the last mer. That is, if there are two mers, we // will store that the first mer is at position 0, the second mer // is at position 1, and the end of the second mer is at position // 2. // // In reality, it should be the number of distinct mers, not the // total number of mers, but we don't know that yet. And so // occasionally we'll make things too big and waste a bit of // memory. // _hashWidth = logBaseTwo64(_numberOfMers+1); _posnWidth = logBaseTwo64(_numberOfPositions+1); /////////////////////////////////////////////////////////////////////////////// // // 2) Allocate buckets and make bucketSizes be a pointer into them // _wCnt = _chckWidth + _posnWidth + 1 + _sizeWidth; lensC[0] = _chckWidth; lensC[1] = _posnWidth; lensC[2] = 1; lensC[3] = _sizeWidth; uint64 bucketsSpace = (_numberOfMers+1) * _wCnt / 64 + 1; uint32 endPosition = 0; if (beVerbose) fprintf(stderr, " Allocated "uint64FMT"KB for buckets ("uint64FMT" 64-bit words)\n", bucketsSpace >> 7, bucketsSpace); try { _countingBuckets = new uint64 [bucketsSpace]; } catch (std::bad_alloc) { fprintf(stderr, "positionDB()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "positionDB()-- _countingBuckets = new uint64 ["uint64FMT"]\n", bucketsSpace); exit(1); } for (uint64 i=0; irewind(); while (MS->nextMer(_merSkipInBases)) { uint64 h = HASH(MS->theFMer()); #ifdef ERROR_CHECK_COUNTING if (_bucketSizes[h] == 0) { char str[33]; fprintf(stderr, "positionDB()-- ERROR_CHECK_COUNTING: Bucket "uint64FMT" ran out of things! '%s'\n", h, MS->theFMer().merToString(str)); fprintf(stderr, "positionDB()-- ERROR_CHECK_COUNTING: Stream is at "uint64FMT"\n", MS->thePositionInStream()); } #endif _bucketSizes[h]--; #ifdef ERROR_CHECK_COUNTING _errbucketSizes[h]--; #endif #ifdef ERROR_CHECK_EMPTY_BUCKETS // Check that everything is empty. Empty is defined as set to all 1's. getDecodedValues(_countingBuckets, (uint64)_bucketSizes[h] * (uint64)_wCnt, nval, lensC, vals); if (((~vals[0]) & uint64MASK(lensC[0])) || ((~vals[1]) & uint64MASK(lensC[1])) || ((~vals[2]) & uint64MASK(lensC[2])) || ((lensC[3] > 0) && ((~vals[3]) & uint64MASK(lensC[3])))) fprintf(stdout, "ERROR_CHECK_EMPTY_BUCKETS: countingBucket not empty! pos=%lu 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", _bucketSizes[h] * _wCnt, (~vals[0]) & uint64MASK(lensC[0]), (~vals[1]) & uint64MASK(lensC[1]), (~vals[2]) & uint64MASK(lensC[2]), (~vals[3]) & uint64MASK(lensC[3])); #endif vals[0] = CHECK(MS->theFMer()); vals[1] = MS->thePositionInStream(); vals[2] = 0; vals[3] = 0; setDecodedValues(_countingBuckets, (uint64)_bucketSizes[h] * (uint64)_wCnt, nval, lensC, vals); #ifdef ERROR_CHECK_COUNTING_ENCODING getDecodedValues(_countingBuckets, (uint64)_bucketSizes[h] * (uint64)_wCnt, nval, lensC, vals); if (vals[0] != CHECK(MS->theFMer())) fprintf(stdout, "ERROR_CHECK_COUNTING_ENCODING error: CHCK corrupted! Wanted "uint64HEX" got "uint64HEX"\n", CHECK(MS->theFMer()), vals[0]); if (vals[1] != MS->thePositionInStream()) fprintf(stdout, "ERROR_CHECK_COUNTING_ENCODING error: POSN corrupted! Wanted "uint64HEX" got "uint64HEX"\n", MS->thePositionInStream(), vals[1]); if (vals[2] != 0) fprintf(stdout, "ERROR_CHECK_COUNTING_ENCODING error: UNIQ corrupted.\n"); if (vals[3] != 0) fprintf(stdout, "ERROR_CHECK_COUNTING_ENCODING error: SIZE corrupted.\n"); #endif C->tick(); } delete C; C = 0L; #ifdef ERROR_CHECK_COUNTING for (uint64 i=0; i<_tableSizeInEntries; i++) if (_errbucketSizes[i] != 0) fprintf(stdout, "ERROR_CHECK_COUNTING: Bucket "uint32FMT" wasn't filled fully? "uint32FMT" left over.\n", i, _errbucketSizes[i]); delete [] _errbucketSizes; _errbucketSizes = 0L; #endif //////////////////////////////////////////////////////////////////////////////// // // 4) Sort each bucket -- count: // 1) number of distinct mers // 2) number of unique mers // 3) number of entries in position table ( sum mercount+1 for all mercounts > 1) // also need to repack the sorted things // if (beVerbose) fprintf(stderr, " Sorting and repacking buckets ("uint64FMT" buckets).\n", _tableSizeInEntries); C = new speedCounter(" %7.2f Mbuckets -- %5.2f Mbuckets/second\r", 1000000.0, 0x1ffffff, beVerbose); for (uint64 i=0; i<_tableSizeInEntries; i++) { sortAndRepackBucket(i); C->tick(); } delete C; C = 0L; if (beVerbose) fprintf(stderr, " Found "uint64FMTW(12)" total mers\n" " Found "uint64FMTW(12)" distinct mers\n" " Found "uint64FMTW(12)" unique mers\n" " Need "uint64FMT" non-unique position list entries ("uint64FMT" maximum count)\n", _numberOfMers, _numberOfDistinct, _numberOfUnique, _numberOfEntries, _maximumEntries); //////////////////////////////////////////////////////////////////////////////// // // Compute the size of the final bucket position entry. It's // either a position into the sequence, or a pointer into a list of // positions. In rare cases, the pointer is larger than the // sequence position, and we need to do extra work. // // The width of position pointers (in buckets) is the max of // _posnWidth (a pointer to the sequence position) and // _pptrWidth (a pointer to an entry in the positions table). // _pptrWidth = logBaseTwo64(_numberOfEntries+1); if (_pptrWidth < _posnWidth) _pptrWidth = _posnWidth; _wFin = _chckWidth + _pptrWidth + 1 + _sizeWidth; lensF[0] = _chckWidth; lensF[1] = _pptrWidth; lensF[2] = 1; lensF[3] = _sizeWidth; //////////////////////////////////////////////////////////////////////////////// // // 5) Allocate: real hash table, buckets and position table. // // XXXX how do we count the number of buckets/positions we never // use because they are masked out?? // // If we are just thresholding (ignore things with count > 100) // it's easy, a simple loop over something. // // If we have an exist/only db....are they in the same order? Can // we loop over both at the same time and count that way? That'd // be cool! Mersize is the same, why can the table size be the // same too -- OK, if the existDB has a small number of mers in it, // then we don't need a large table. uint64 hs = _tableSizeInEntries * _hashWidth / 64 + 1; uint64 bs = _numberOfDistinct * _wFin / 64 + 1; uint64 ps = _numberOfEntries * _posnWidth / 64 + 1; if (_hashWidth <= 32) { if (beVerbose) fprintf(stderr, " Reusing bucket counting space for hash table.\n"); #ifdef UNCOMPRESS_HASH_TABLE _hashTable_BP = 0L; _hashTable_FW = (uint32 *)bktAlloc; #else _hashTable_BP = bktAlloc; _hashTable_FW = 0L; #endif bktAllocIsJunk = false; } else { // Can't use the full-width hash table, since the data size is > // 32 bits -- we'd need to allocate 64-bit ints for it, and // that'll likely be too big...and we'd need to have // _hashTable_FW64 or something. if (beVerbose) fprintf(stderr, " Allocated "uint64FMTW(10)"KB for hash table ("uint64FMT" 64-bit words)\n", hs >> 7, hs); try { _hashTable_BP = new uint64 [hs]; _hashTable_FW = 0L; } catch (std::bad_alloc) { fprintf(stderr, "positionDB()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "positionDB()-- _hashTable_BP = new uint64 ["uint64FMT"]\n", hs); exit(1); } bktAllocIsJunk = true; } // If we have enough space to reuse the counting space, reuse it. // Else, allocate more space. // // We need to ensure that there are enough bits and that the size // of a bucket didn't increase. If the bucket size did increase, // and we see more unique buckets than total mers (up to some // point) we overwrite data. // // Recall that bucketSpace ~= numberOfMers * wCnt // if ((bs < bucketsSpace) && (_wFin <= _wCnt)) { if (beVerbose) fprintf(stderr, " Reusing bucket space; Have: "uint64FMT" Need: "uint64FMT" (64-bit words)\n", bucketsSpace, bs); _buckets = _countingBuckets; bs = bucketsSpace; // for output at the end } else { if (beVerbose) fprintf(stderr, " Allocated "uint64FMTW(10)"KB for buckets ("uint64FMT" 64-bit words)\n", bs >> 7, bs); try { _buckets = new uint64 [bs]; } catch (std::bad_alloc) { fprintf(stderr, "positionDB()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "positionDB()-- _buckets = new uint64 ["uint64FMT"]\n", bs); exit(1); } } if (beVerbose) fprintf(stderr, " Allocated "uint64FMTW(10)"KB for positions ("uint64FMT" 64-bit words)\n", ps >> 7, ps); try { _positions = new uint64 [ps]; } catch (std::bad_alloc) { fprintf(stderr, "positionDB()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "positionDB()-- _positions = new uint64 ["uint64FMT"\n", ps); exit(1); } //////////////////////////////////////////////////////////////////////////////// // // 6) Transfer from the sorted buckets to the hash table. // if (beVerbose) fprintf(stderr, " Transferring to final structure ("uint64FMT" buckets).\n", _tableSizeInEntries); uint64 bucketStartPosition = 0; // Current positions and bit positions in the buckets and position list. // uint64 currentBbit = uint64ZERO; // Bit position into bucket uint64 currentPbit = uint64ZERO; // Bit position into positions uint64 currentPpos = uint64ZERO; // Value position into positions #ifdef TEST_NASTY_BUGS // Save the position array pointer of each bucket for debugging. // uint64 currentBpos = uint64ZERO; // Value position into bucket uint32 *posPtrCheck = new uint32 [65826038]; #endif // We also take this opportunity to reset some statistics that are // wrong. // _numberOfMers = 0; _numberOfPositions = 0; _numberOfDistinct = 0; _numberOfUnique = 0; _numberOfEntries = 0; _maximumEntries = 0; C = new speedCounter(" %7.2f Mbuckets -- %5.2f Mbuckets/second\r", 1000000.0, 0x1ffffff, beVerbose); // We need b outside the loop! // uint64 b; for (b=0; b<_tableSizeInEntries; b++) { C->tick(); // Set the start of the bucket -- we took pains to ensure that // we don't overwrite _bucketSizes[b], if we are reusing that // space for the hash table. // if (_hashTable_BP) setDecodedValue(_hashTable_BP, (uint64)b * (uint64)_hashWidth, _hashWidth, bucketStartPosition); else _hashTable_FW[b] = bucketStartPosition; // Get the number of mers in the counting bucket. The error // checking and sizing of _sortedChck and _sortedPosn was already // done in the sort. // uint64 st = _bucketSizes[b]; uint64 ed = _bucketSizes[b+1]; uint32 le = ed - st; // Unpack the check values // for (uint64 i=st, J=st * _wCnt; i maxCount) useMer = false; if ((useMer == true) && (mask || only)) { // MER_REMOVAL_DURING_XFER. Great. The existDB has // (usually) the canonical mer. We have the forward mer. // Well, no, we have the forward mers' hash and check. So, // we reconstruct the mer, reverse complement it, and then // throw the mer out if either the forward or reverse exists // (or doesn't exist). uint64 m = REBUILD(b, _sortedChck[stM]); uint64 r; if (mask) { if (mask->isCanonical()) { r = reverseComplementMer(_merSizeInBases, m); if (r < m) m = r; } if (mask->exists(m)) useMer = false; } if (only) { if (only->isCanonical()) { r = reverseComplementMer(_merSizeInBases, m); if (r < m) m = r; } if (only->exists(m) == false) useMer = false; } } if (useMer) { _numberOfMers += edM - stM; _numberOfPositions += edM - stM; _numberOfDistinct++; if (stM+1 == edM) { _numberOfUnique++; #ifdef TEST_NASTY_BUGS posPtrCheck[currentBpos++] = _sortedPosn[stM]; #endif vals[0] = _sortedChck[stM]; vals[1] = _sortedPosn[stM]; vals[2] = 1; vals[3] = 0; currentBbit = setDecodedValues(_buckets, currentBbit, nval, lensF, vals); bucketStartPosition++; } else { _numberOfEntries += edM - stM; if (_maximumEntries < edM - stM) _maximumEntries = edM - stM; #ifdef TEST_NASTY_BUGS posPtrCheck[currentBpos++] = currentPpos; #endif vals[0] = _sortedChck[stM]; vals[1] = currentPpos; vals[2] = 0; vals[3] = 0; currentBbit = setDecodedValues(_buckets, currentBbit, nval, lensF, vals); bucketStartPosition++; // Store the positions. Store the number of positions // here, then store all positions. // // The positions are in the proper place in _sortedPosn, // and setDecodedValue masks out the extra crap, so no // temporary needed. Probably should be done with // setDecodedValues, but then we need another array telling // the sizes of each piece. // setDecodedValue(_positions, currentPbit, _posnWidth, edM - stM); currentPbit += _posnWidth; currentPpos++; for (; stM < edM; stM++) { if (_sortedPosn[stM] >= DEBUGnumPositions) { fprintf(stderr, "positionDB()-- ERROR: Got position "uint64FMT", but only "uint64FMT" available!\n", _sortedPosn[stM], DEBUGnumPositions); abort(); } setDecodedValue(_positions, currentPbit, _posnWidth, _sortedPosn[stM]); currentPbit += _posnWidth; currentPpos++; } } } // useMer // All done with this mer. // stM = edM; } // while (stM < le) } // for each bucket // Set the end of the last bucket // if (_hashTable_BP) setDecodedValue(_hashTable_BP, b * _hashWidth, _hashWidth, bucketStartPosition); else _hashTable_FW[b] = bucketStartPosition; delete C; // Clear out the end of the arrays -- this is only so that we can // checksum the result. // if (_hashTable_BP) { b = b * _hashWidth + _hashWidth; setDecodedValue(_hashTable_BP, b, 64 - (b % 64), uint64ZERO); } setDecodedValue(_buckets, currentBbit, 64 - (currentBbit % 64), uint64ZERO); setDecodedValue(_positions, currentPbit, 64 - (currentPbit % 64), uint64ZERO); if (beVerbose) { fprintf(stderr, " Avail: Bucket "uint64FMTW(12)" Position "uint64FMTW(12)" (64-bit words)\n", bs, ps); fprintf(stderr, " Avail: Bucket "uint64FMTW(12)" Position "uint64FMTW(12)" (entries)\n", _numberOfDistinct, _numberOfEntries); fprintf(stderr, " Used: Bucket "uint64FMTW(12)" Position "uint64FMTW(12)" (64-bit words)\n", currentBbit / 64, currentPbit / 64); } // Reset the sizes to what we actually found. If we then // dump/reload, we shrink our footprint. // _numberOfDistinct = currentBbit / _wFin; _numberOfEntries = currentPbit / _posnWidth; if (beVerbose) { fprintf(stderr, " Used: Bucket "uint64FMTW(12)" Position "uint64FMTW(12)" (entries)\n", _numberOfDistinct, _numberOfEntries); fprintf(stderr, " Found "uint64FMTW(12)" total mers\n" " Found "uint64FMTW(12)" distinct mers\n" " Found "uint64FMTW(12)" unique mers\n" " Need "uint64FMT" non-unique position list entries ("uint64FMT" maximum count)\n", _numberOfMers, _numberOfDistinct, _numberOfUnique, _numberOfEntries, _maximumEntries); } // If we removed mers, there is a small chance that our hash table // is too big -- we might have removed enoough mers to make the // width smaller. If so, rebuild the hash table. // // Also, hooray, we finally know the number of distinct mers, so we // can make this nice and tight // if (_hashTable_BP) { uint32 newHashWidth = 1; while ((_numberOfDistinct+1) > (uint64ONE << newHashWidth)) newHashWidth++; if (newHashWidth != _hashWidth) { uint64 npos = 0; uint64 opos = 0; if (beVerbose) fprintf(stderr, " Rebuilding the hash table, from "uint32FMT" bits wide to "uint32FMT" bits wide.\n", _hashWidth, newHashWidth); for (uint64 z=0; z<_tableSizeInEntries+1; z++) { setDecodedValue(_hashTable_BP, npos, newHashWidth, getDecodedValue(_hashTable_BP, opos, _hashWidth)); npos += newHashWidth; opos += _hashWidth; } // Clear the end again. setDecodedValue(_hashTable_BP, npos, 64 - (npos % 64), uint64ZERO); } _hashWidth = newHashWidth; } // If supplied, add in any counts. The meryl table is, sadly, in // the wrong order, and we must hash and search. // // Meryl _should_ be storing only forward mers, but we have no way // of checking. // // After all counts are loaded, check if we can compress the counts // space any. Check if the largestMerylCount is much smaller than // the space it is stored in. If so, we can compress the table. // uint64 largestMerylCount = 0; uint64 countsLoaded = 0; if (counts) { if (beVerbose) fprintf(stderr, " Loading "uint64FMT" mercounts.\n", counts->numberOfDistinctMers()); C = new speedCounter(" %7.2f Mmercounts -- %5.2f Mmercounts/second\r", 1000000.0, 0x1fffff, beVerbose); while (counts->nextMer()) { kMer k = counts->theFMer(); uint64 c = counts->theCount(); uint64 f = setCount(k, c); k.reverseComplement(); uint64 r = setCount(k, c); if (f + r > 0) { countsLoaded++; if (largestMerylCount < c) largestMerylCount = c; } C->tick(); } delete C; if (beVerbose) fprintf(stderr, " Loaded "uint64FMT" mercounts; largest is "uint64FMT".\n", countsLoaded, largestMerylCount); if (logBaseTwo64(largestMerylCount + 1) < _sizeWidth) { if (beVerbose) fprintf(stderr, " Compress sizes from "uint32FMT" bits to "uint32FMT" bits.\n", _sizeWidth, (uint32)logBaseTwo64(largestMerylCount + 1)); uint64 oSiz[4] = { _chckWidth, _pptrWidth, 1, _sizeWidth }; uint64 nSiz[4] = { _chckWidth, _pptrWidth, 1, logBaseTwo64(largestMerylCount + 1) }; uint64 tVal[4] = { 0, 0, 0, 0 }; uint64 oP = 0, oS = oSiz[0] + oSiz[1] + oSiz[2] + oSiz[3]; uint64 nP = 0, nS = nSiz[0] + nSiz[1] + nSiz[2] + nSiz[3]; assert(nS < oS); C = new speedCounter(" %7.2f Mmercounts -- %5.2f Mmercounts/second\r", 1000000.0, 0x1fffff, beVerbose); for (uint64 bu=0; bu<_numberOfDistinct; bu++) { getDecodedValues(_buckets, oP, 4, oSiz, tVal); setDecodedValues(_buckets, nP, 4, nSiz, tVal); oP += oS; nP += nS; C->tick(); } delete C; _sizeWidth = nSiz[3]; _wFin = _chckWidth + _pptrWidth + 1 + _sizeWidth; } } #ifdef TEST_NASTY_BUGS // Unpack the bucket positions and check. Report the first one // that is broken. // for(uint64 bb=0; bbrewind(); if (mask) { C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, beVerbose); uint32 extraMer = 0; while (MS->nextMer(_merSkipInBases)) { uint64 mer = MS->theFMer(); if (mask->exists(mer) && exists(mer)) extraMer++; C->tick(); } delete C; fprintf(stderr, "positionDB()-- mask: "uint32FMT" mers extra!\n", extraMer); } else if (only) { C = new speedCounter(" %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, beVerbose); uint32 missingMer = 0; while (MS->nextMer(_merSkipInBases)) { uint64 mer = MS->theFMer(); if (only->exists(mer) && !exists(mer)) missingMer++; C->tick(); } delete C; fprintf(stderr, "positionDB()-- only: "uint32FMT" mers missing!\n", missingMer); } #endif // Free the counting buckets if we aren't using the space for // something else. // if (_buckets != _countingBuckets) delete [] _countingBuckets; // In theory, we could move these to be immediately after the data // is useless. // _bucketSizes = 0L; _countingBuckets = 0L; delete [] _sortedChck; delete [] _sortedPosn; _sortedMax = 0; _sortedChck = 0L; _sortedPosn = 0L; if (bktAllocIsJunk) delete [] bktAlloc; } positionDB::~positionDB() { delete [] _hashTable_BP; delete [] _hashTable_FW; delete [] _buckets; delete [] _positions; delete [] _hashedErrors; } kmer-code-2013-trunk/snapper/0000755000000000000000000000000012641613360014620 5ustar rootrootkmer-code-2013-trunk/snapper/snapper2.C0000644000000000000000000003113112415072200016444 0ustar rootroot#include "snapper2.H" // The (private) structure for testing various filters. // struct filterStats { double L; double H; double V; uint32 tp; uint32 tn; uint32 fp; uint32 fn; }; // Shared data // configuration config; sim4parameters sim4params; seqCache *genome; seqStream *genomeMap; seqCache *qsFASTA; existDB *maskDB; existDB *onlyDB; positionDB *positions; volatile uint32 numberOfQueries; int resultFILE; int logmsgFILE; uint32 numFilters; uint32 maxFilters; filterStats *theFilters; void writeValidationFile(char *name) { FILE *F = fopen(name, "wb"); if (F) { fprintf(F, "%6s %6s %6s %6s %6s %8s %8s %8s %8s\n", "L", "H", "V", "sens", "spec", "tp", "fp", "tn", "fn"); for (uint32 f=0; f 0) sens = (double)theFilters[f].tp / (theFilters[f].tp + theFilters[f].fn); if (theFilters[f].tn + theFilters[f].fp > 0) spec = (double)theFilters[f].tn / (theFilters[f].tn + theFilters[f].fp); fprintf(F, "%6.4f %6.4f %6.4f %6.4f %6.4f "uint32FMTW(8)" "uint32FMTW(8)" "uint32FMTW(8)" "uint32FMTW(8)"\n", theFilters[f].L, theFilters[f].H, theFilters[f].V, sens, spec, theFilters[f].tp, theFilters[f].fp, theFilters[f].tn, theFilters[f].fn); } fclose(F); } } void* loaderThread(void *global) { query *q = new query; if (q->loadSequence(qsFASTA) == false) { delete q; q = 0L; } return(q); } void searchThread(void *global, void *thread, void *thing) { searcherState *state = (searcherState *)thread; query *qry = (query *)thing; // Do searches. // if (config._doForward) doSearch(state, qry, true); if (config._doReverse) doSearch(state, qry, false); // Filter the hits // doFilter(state, qry); // Polish the filtered hits // if (config._polishOptimally) doPolishDP(state, qry); else doPolishS4(state, qry); // Clean up // delete qry->seq; qry->seq = 0L; for (uint32 h=0; htheHitsLen; h++) { delete qry->theHits[h]._ML; qry->theHits[h]._ML = 0L; } // If we aren't validating or aren't logging, don't save those pieces, just nuke them now. // if (config._doValidation == false) { delete [] qry->theHits; qry->theHitsLen = 0; qry->theHits = 0L; } if (config._logmsgFileName == 0L) { delete qry->theLog; qry->theLog = 0L; } } void writerThread(void *global, void *thing) { query *qry = (query *)thing; // Write the output, if there is any (zero length just means that // there was no match found). // if ((qry->theOutput != 0L) && (qry->theOutputLen > 0)) { errno = 0; write(resultFILE, qry->theOutput, sizeof(char) * qry->theOutputLen); if (errno) fprintf(stderr, "Couldn't write to the output file '%s': %s\n", config._outputFileName, strerror(errno)), exit(1); } // Write the log messages, if any, and if there is a log file // if ((logmsgFILE) && (qry->theLog)) qry->theLog->write(logmsgFILE, config._logmsgFileName); // If we are supposed to be doing validation, test a bunch of // filters here. // if (config._doValidation && (qry->theHitsLen > 0)) { for (uint32 f=0; ftheHits, qry->theHitsLen); for (uint32 a=0; atheHitsLen; a++) { if (qry->theHits[a]._covered < cutL) { // These hits would have been discarded by the filter. // if (qry->theHits[a]._status & AHIT_VERIFIED) { // Oops. We found a high-quality match. theFilters[f].fn++; } else { // Good call. Nothing there! theFilters[f].tn++; } } else { // These hits would have been kept by the filter. // if (qry->theHits[a]._status & AHIT_VERIFIED) { // Allright! Got a high-quality match! theFilters[f].tp++; } else { // Dang. Nothing there. theFilters[f].fp++; } } } } // Dump a snapshot of the filter testing // if ((qry->seq->getIID() % 50) == 0) writeValidationFile(config._doValidationFileName); } // doing validation delete qry; } int main(int argc, char **argv) { config.read(argc, argv); if (config._beVerbose) fprintf(stderr, "Opening the cDNA sequences.\n"); qsFASTA = new seqCache(config._qsFileName); numberOfQueries = qsFASTA->getNumberOfSequences(); // We can save some time and warn of too short and too long // sequences before the table is built. // { uint32 numTooShortQueries = 0; uint32 numTooLongQueries = 0; uint32 numOKQueries = 0; for (uint32 i=0; igetSequenceLength(i) < config._discardExonLength) numTooShortQueries++; else if (qsFASTA->getSequenceLength(i) >= (uint64ONE << 22)) numTooLongQueries++; else numOKQueries++; } if (numTooShortQueries > 0) { fprintf(stderr, "WARNING:\n"); fprintf(stderr, "WARNING: Found "uint32FMT" queries shorter than minimum reportable size (-discardexonlength = "uint32FMT")\n", numTooShortQueries, config._discardExonLength); fprintf(stderr, "WARNING:\n"); } if (numTooLongQueries > 0) { fprintf(stderr, "WARNING:\n"); fprintf(stderr, "WARNING: Found "uint32FMT" queries longer than maximum size ("uint32FMT")\n", numTooLongQueries, uint32ONE << 22); fprintf(stderr, "WARNING:\n"); } if (numOKQueries == 0) { fprintf(stderr, "ERROR: Found no queries in acceptable size range!\n"); exit(1); } } // Allocate some structures for doing a validation run. This is // done pretty early, just in case it needs to abort. // numFilters = 0; maxFilters = 21 * 22 / 2 * 20; theFilters = 0L; if (config._doValidation) { theFilters = new filterStats [maxFilters]; for (uint32 h=0; h<=100; h+=5) { for (uint32 l=0; l<=h; l+=5) { for (uint32 v=5; v<=100; v+=5) { if (numFilters >= maxFilters) { fprintf(stderr, "ERROR: Ran out of filterStats structures while configuring the filters!\n"); exit(1); } theFilters[numFilters].L = l / 100.0; theFilters[numFilters].H = h / 100.0; theFilters[numFilters].V = v / 100.0; theFilters[numFilters].tp = 0; theFilters[numFilters].tn = 0; theFilters[numFilters].fp = 0; theFilters[numFilters].fn = 0; numFilters++; } } } fprintf(stderr, "Created "uint32FMT" filters (out of "uint32FMT" available) to test/validate.\n", numFilters, maxFilters); } // Read in the positionDB if it's already built, or build a new one. // if ((config._psFileName) && (fileExists(config._psFileName))) { if (config._buildOnly) { fprintf(stderr, "All done. Table '%s' already built.\n", config._psFileName); exit(1); } else { fprintf(stderr, "Loading positionDB state from '%s'\n", config._psFileName); positions = new positionDB(config._psFileName, config._KBmerSize, config._merSkip, 0); } } else { // The masking databases // maskDB = 0L; #if 0 if (config._maskFileName) { if (config._beVerbose) fprintf(stderr, "Building maskDB from fasta file '%s'\n", config._maskFileName); maskDB = new existDB(config._maskFileName, config._KBmerSize, existDBnoFlags, 0, ~uint32ZERO); } if (config._maskPrefix) { if (config._beVerbose) fprintf(stderr, "Building maskDB from meryl prefix '%s'\n", config._maskPrefix); maskDB = new existDB(config._maskPrefix, config._KBmerSize, existDBnoFlags, config._maskThreshold, ~uint32ZERO); } #endif onlyDB = 0L; #if 0 if (config._onlyFileName) { if (config._beVerbose) fprintf(stderr, "Building onlyDB from fasta file '%s'\n", config._onlyFileName); onlyDB = new existDB(config._onlyFileName, config._KBmerSize, existDBnoFlags, 0, ~uint32ZERO); } if (config._onlyPrefix) { if (config._beVerbose) fprintf(stderr, "Building onlyDB from meryl prefix '%s'\n", config._onlyPrefix); onlyDB = new existDB(config._onlyPrefix, config._KBmerSize, existDBnoFlags, 0, config._onlyThreshold); } #endif if ((config._maskFileName) || (config._maskPrefix) || (config._onlyFileName) || (config._onlyPrefix)) { fprintf(stderr, "maskDB/onlyDB not currently supported.\n"); exit(1); } merStream *MS = new merStream(new kMerBuilder(config._KBmerSize, config._KBcompression, config._KBspacingTemplate), new seqStream(config._dbFileName), true, true); positions = new positionDB(MS, config._KBmerSize, config._merSkip, maskDB, onlyDB, 0L, 0, config._ignoreThreshold, 0, 0, config._beVerbose); delete MS; delete maskDB; delete onlyDB; maskDB = 0L; onlyDB = 0L; if (config._psFileName) { if (config._beVerbose) fprintf(stderr, "Dumping positions table to '%s'\n", config._psFileName); positions->saveState(config._psFileName); if (config._buildOnly) exit(0); delete positions; positions = new positionDB(config._psFileName, config._KBmerSize, config._merSkip, 0); } } // Open and init the genomic sequences. // if (config._beVerbose) fprintf(stderr, "Opening the genomic database.\n"); genome = new seqCache(config._dbFileName, false); genome->loadAllSequences(); genomeMap = new seqStream(config._dbFileName); // // Configure sim4 // sim4params.setPrintAlignments(config._doAlignments); sim4params.setFindAllExons(); sim4params.setMinCoverage(MAX(0.0, config._minMatchCoverage / 100.0 - 0.1)); sim4params.setMinPercentExonIdentity(config._minMatchIdentity - 5); sim4params.setIgnorePolyTails(false); //sim4params.setSlideIntrons(false); // see sim4b1.C for why this is disabled //sim4params.setWordSize(14); //sim4params.setWordSizeInt(14); //sim4params.setWordSizeExt(14); // // Open output files // resultFILE = fileno(stdout); logmsgFILE = 0; if (config._outputFileName) { errno = 0; resultFILE = open(config._outputFileName, O_WRONLY | O_LARGEFILE | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "Couldn't open the output file '%s': %s\n", config._outputFileName, strerror(errno)), exit(1); } if (config._logmsgFileName) { errno = 0; logmsgFILE = open(config._logmsgFileName, O_WRONLY | O_LARGEFILE | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (errno) fprintf(stderr, "Couldn't open the log message file '%s': %s\n", config._logmsgFileName, strerror(errno)), exit(1); } // // Initialize threads // sweatShop *ss = new sweatShop(loaderThread, searchThread, writerThread); ss->setNumberOfWorkers(config._numSearchThreads); ss->setWriterQueueSize(16384); for (uint32 i=0; isetThreadData(i, new searcherState(i)); ss->run(0L, config._beVerbose); delete ss; if (resultFILE != fileno(stdout)) close(resultFILE); if (logmsgFILE != 0) close(logmsgFILE); // Summarize the filter test results // if (config._doValidation) writeValidationFile(config._doValidationFileName); // Clean up // delete genome; delete genomeMap; if (config._doValidation) delete [] theFilters; delete qsFASTA; delete maskDB; delete onlyDB; delete positions; return(0); } kmer-code-2013-trunk/snapper/thr-polish-dp.C0000644000000000000000000002673012400730017017416 0ustar rootroot#include "snapper2.H" #define MATCH 0 #define GAPA 1 #define GAPB 2 #define STOP 3 #define MATCHSCORE 2 #define GAPSCORE -3 #define MISMATCHSCORE -1 void reverse(char *a, char *b, int len) { char c=0; char *s=a, *S=a+len-1; char *q=b, *Q=b+len-1; while (s < S) { c = *s; *s++ = *S; *S-- = c; c = *q; *q++ = *Q; *Q-- = c; } } class dpMatch { public: dpMatch() { matches = 0; alignLen = 0; begI = begJ = 0; endI = endJ = 0; lenA = lenB = 0; }; int matches; int alignLen; int begI, begJ; int endI, endJ; int lenA, lenB; char *alignA; char *alignB; }; class dpMatrix { private: typedef struct { unsigned int score : 30; unsigned int action : 2; } dpCell; public: dpMatrix() { aMax = 0; bMax = 0; alignA = 0L; alignB = 0L; matrix = 0L; }; ~dpMatrix() { delete [] alignA; delete [] alignB; delete [] matrix; }; void dpMatrixInit(int lenA, int lenB) { if ((aMax <= lenA) || (bMax <= lenB)) { delete [] alignA; delete [] alignB; delete [] matrix; aMax = MAX(aMax, lenA) + 1000; bMax = MAX(bMax, lenB) + 1000; fprintf(stderr, "dpMatrix-- reallocate to "uint32FMT" x "uint32FMT"\n", aMax, bMax); alignA = new char [aMax + bMax + 1]; alignB = new char [bMax + bMax + 1]; matrix = new dpCell [aMax * bMax]; } int i, j, p = 0; for (i=0; imatches = matches; match->alignLen = alignLen; match->begI = begI; match->begJ = begJ; match->endI = endI; match->endJ = endJ; match->lenA = lenA; match->lenB = lenB; //warning alignA and alignB aliases to dpMatrix match->alignA = alignA; match->alignB = alignB; return(match); } void doPolishDP(searcherState *state, query *qry) { // For the autofilter uint64 successes = uint64ZERO; uint64 successMask = uint64MASK(config._afLength); uint32 attempts = 0; if (qry->theHitsLen == 0) return; qry->theOutputLen = 0; qry->theOutputMax = 2 * 1024 * qry->theHitsLen; qry->theOutput = new char [qry->theOutputMax]; qry->theOutput[0] = 0; // Move these to searcherState! if (state->DP == 0L) state->DP = new dpMatrix; dpMatch match; dpMatrix *matrix = (dpMatrix *)state->DP; for (uint32 h=0; htheHitsLen; h++) { // If the hit was discarded, move along. // if (qry->theHits[h]._status & AHIT_DISCARDED) continue; // If the hit was filtered out, move along. // if ((config._doValidation == false) && ((qry->theHits[h]._status & AHIT_POLISHABLE) == 0) && ((qry->theHits[h]._status & AHIT_HAS_UNIQUE) == 0)) continue; // If our recent success rate is pretty terrible, continue. // if (config._afEnabled) { if (attempts > config._afInit) { double rat = countNumberOfSetBits64(successes) / (double)((attempts < config._afLength) ? attempts : config._afLength); // If we've hit the end of the good polishes, give up. But // still do all the stuff with unique mers in them. // if (((qry->theHits[h]._status & AHIT_HAS_UNIQUE) == 0) && (rat < config._afThreshold)) continue; } attempts++; } // // Polish it up! // seqInCore *QRYseq = qry->seq; seqInCore *GENseq = genome->getSequenceInCore(qry->theHits[h]._dsIdx); uint32 GENlo = qry->theHits[h]._dsLo; uint32 GENhi = qry->theHits[h]._dsHi; char *q = QRYseq->sequence(); char *g = GENseq->sequence() + GENlo; if (GENhi > GENseq->sequenceLength()) GENhi = GENseq->sequenceLength(); uint32 qlen = qry->seq->sequenceLength(); uint32 glen = GENhi - GENlo; bool doForward = qry->theHits[h]._status & AHIT_DIRECTION_MASK; bool doReverse = !doForward; if (doReverse) { reverseComplementSequence(q, qlen); } #if 0 fprintf(stderr, "align QRYlen="uint32FMT" GEN="uint32FMT"-"uint32FMT" GENlen="uint32FMT"\n", qlen, GENlo, GENhi, glen); #endif //if ((qlen * 3 > glen) && ((qlen / 1024) * (glen / 1024) < 4 * 1024)) matrix->dpAlign(q, qlen, g, glen, &match); if (doReverse) { reverseComplementSequence(q, qlen); uint32 x = match.begI; match.begI = qlen - match.endI; match.endI = qlen - x; } // Build the proper match if it's even remotely good // if (match.matches > 0) { sim4polish p; sim4polishExon e; qry->theHits[h]._status |= AHIT_VERIFIED; p._estID = QRYseq->getIID(); p._estLen = QRYseq->sequenceLength(); p._estPolyA = 0; p._estPolyT = 0; p._genID = GENseq->getIID(); p._genRegionOffset = GENlo; p._genRegionLength = GENhi - GENlo; p._numMatches = match.matches; p._numMatchesN = 0; p._numCovered = match.endI - match.begI; p._percentIdentity = 0; p._querySeqIdentity = 0; p._matchOrientation = (doReverse) ? SIM4_MATCH_COMPLEMENT : SIM4_MATCH_FORWARD; p._strandOrientation = SIM4_STRAND_UNKNOWN; p._comment = NULL; p._estDefLine = QRYseq->header(); p._genDefLine = GENseq->header(); p._numExons = 1; p._exons = &e; e._estFrom = match.begI + 1; e._estTo = match.endI; e._genFrom = match.begJ + GENlo + 1; e._genTo = match.endJ + GENlo; e._numMatches = match.matches; e._numMatchesN = 0; e._percentIdentity = 0; e._intronOrientation = SIM4_INTRON_NONE; // The alignments are needed for updateAlignmentScores(). e._estAlignment = match.alignA; // 'e' DOES NOT own this, must reset the pointer later. e._genAlignment = match.alignB; p.s4p_updateAlignmentScores(); // Since we're not using sim4, the normal method of ignoring aligns doesn't work. // Do it explicitly. if (config._doAlignments == false) { e._estAlignment = NULL; e._genAlignment = NULL; } // Save it if it is truely good. if ((p._percentIdentity >= config._minMatchIdentity) && (p._querySeqIdentity >= config._minMatchCoverage)) { char *pstr = p.s4p_polishToString(sim4polishStyleDefault); uint32 l = (uint32)strlen(pstr); if (qry->theOutputLen + l + 1 >= qry->theOutputMax) { qry->theOutputMax = qry->theOutputMax + qry->theOutputMax + l; char *o = 0L; try { o = new char [qry->theOutputMax]; } catch (...) { fprintf(stderr, "doPolish()-- Can't reallocate space for the output string ("uint32FMT" bytes) in thread "uint64FMT"\n", qry->theOutputMax, state->threadID); abort(); } memcpy(o, qry->theOutput, sizeof(char) * qry->theOutputLen); delete [] qry->theOutput; qry->theOutput = o; } memcpy(qry->theOutput + qry->theOutputLen, pstr, sizeof(char) * l); qry->theOutputLen += l; qry->theOutput[qry->theOutputLen] = 0; delete [] pstr; // Save the best scores // uint32 pi = p._percentIdentity; uint32 pc = p._querySeqIdentity; qry->theHits[h]._status |= pi << 16; qry->theHits[h]._status |= pc << 24; successes <<= 1; if ((pi >= config._minMatchIdentity) && (pc >= config._minMatchCoverage)) { //fprintf(stderr, "GOOD "uint32FMT" "uint32FMT"\n", pi, pc); successes |= uint64ONE; } else { //fprintf(stderr, "BAD "uint32FMT" "uint32FMT"\n", pi, pc); successes |= uint64ZERO; } successes &= successMask; } // Before sim4polish and sim4polishExon go out of scope, reset the pointers. Ugly, but needs // to be done else the destructors try to delete things it doesn't own (alignments) or // allocated on the stack (sim4polishExon). p._estDefLine = 0L; p._genDefLine = 0L; p._exons = 0L; e._estAlignment = 0L; e._genAlignment = 0L; } delete GENseq; } // over all hits } kmer-code-2013-trunk/snapper/snapper2-sge.pl0000644000000000000000000001146410401200245017453 0ustar rootroot#!/usr/bin/perl # Runs snapper2 on SGE, splitting both the genome and query sequences. use FindBin; my $genome = ""; my $query = ""; my $dir = ""; my $mask = 1000; my $gseg = 32; my $qseg = 32; my $check = undef; my $bin = "$FindBin::Bin"; while (scalar(@ARGV) > 0) { $arg = shift @ARGV; if ($arg =~ m/^-genome/) { $genome = shift @ARGV; } elsif ($arg =~ m/^-query/) { $query = shift @ARGV; } elsif ($arg =~ m/^-dir/) { $dir = shift @ARGV; } elsif ($arg =~ m/^-mask/) { $mask = shift @ARGV; } elsif ($arg =~ m/^-gseg/) { $gseg = shift @ARGV; } elsif ($arg =~ m/^-qseg/) { $qseg = shift @ARGV; } elsif ($arg =~ m/^-check/) { $check = 1; } else { print STDERR "unknown option '$arg'\n"; } } # If we're checking the results, assume we are in the correct dir, # that the gen and qry dirs exist. # if (defined($check)) { my ($gen, $qry) = countSequences($dir); for (my $g=1; $g<=$gen; $g++) { for (my $q=1; $q<=$qry; $q++) { $g = substr("000$g", -3); $q = substr("000$q", -3); if (0) { print STDERR "Why am I doing this? I'm supposed to be checking overlap, not snapper....\n"; exit(1); } } } } if (!defined($genome) || !defined($query) || !defined($dir)) { print STDERR "usage: $0 [arg]\n"; print STDERR " -genome x.fasta\n"; print STDERR " -query q.fasta\n"; print STDERR " -dir /path/to/work\n"; print STDERR " -mask kmer-limit (def: 1000)\n"; print STDERR " -gseg gseg (def: 16 segs, see leaff for format\n"; print STDERR " -qseg qseg (def: 16 segs, see leaff for format\n"; print STDERR " -check (check a run, assume we are in the /path/to/work\n"; exit(1); } die "Can't find genome '$genome'\n" if (! -e $genome); die "Can't find queries '$query'\n" if (! -e $query); system("mkdir $dir") if (! -d $dir); die "Can't find '$dir'\n" if (! -d $dir); if (! -e "$dir/gen/gen.partitioned") { system("mkdir $dir/gen") if (! -d "$dir/gen"); system("$bin/leaff -F $genome --partition $dir/gen/gen $gseg"); open(F, "> $dir/gen/gen.partitioned"); close(F); } if (! -e "$dir/qry/qry.partitioned") { system("mkdir $dir/qry") if (! -d "$dir/qry"); system("$bin/leaff -F $query --partition $dir/qry/qry $qseg"); open(F, "> $dir/qry/qry.partitioned"); close(F); } # Build indexes for everyone -- this prevents the grid jobs from # racing to build them. And it lets us count how many jobs to # submit. # my ($gen, $qry) = countSequences($dir); open(F, "> $dir/run.sh"); print F "#!/bin/sh\n"; print F "PIECE=`expr \$SGE_TASK_ID - 1`\n"; print F "GPIECE=`expr \$PIECE % $gen + 1`\n"; print F "QPIECE=`expr \$PIECE / $gen + 1`\n"; print F "GPIECE=`printf %03d \$GPIECE`\n"; print F "QPIECE=`printf %03d \$QPIECE`\n"; print F "scratchname=/scratch/\$\$-\$GPIECE-\$QPIECE\n"; print F "\n"; print F "ulimit -c 0\n"; print F "#rm /scratch/[0-9]*-[0-9]*-[0-9]*\n"; print F "#echo $GPIECE $QPIECE $PIECE\n"; print F "\n"; print F "if [ -e $dir/map-gen$GPIECE-qlt$QPIECE.success ] ; then\n"; print F " echo map-gen$GPIECE-qlt$QPIECE already done\n"; print F " exit\n"; print F "fi\n"; print F "\n"; print F "$bin/snapper2 -verbose \\\n"; print F " -mersize 22 -merskip 0 \\\n"; print F " -minhitlength 22 -minhitcoverage 0.0 \\\n"; #print F " -setfilter 0.1500 0.1500 0.2500 \\\n"; print F " -validate $dir/map-gen\$GPIECE-qlt\$QPIECE.validate \\\n"; print F " -genomic $dir/gen/gen-\$GPIECE.fasta \\\n"; print F " -queries $dir/qry/qry-\$QPIECE.fasta \\\n"; print F " -ignore $mask \\\n"; #print F " -output \$scratchname \\\n"; print F " -noaligns \\\n"; print F " -numthreads 2 \\\n"; print F " -minmatchidentity 90 \\\n"; print F " -minmatchcoverage 4 \\\n"; print F " -loaderhighwatermark 1024 \\\n"; print F "| \\\n"; print F "bzip2 -9vc > \$scratchname.bz2 \\\n"; print F "&& \\\n"; print F "mv \$scratchname.bz2 $dir/map-gen\$GPIECE-qlt\$QPIECE.sim4db.bz2 \\\n"; print F "&& \\\n"; print F "touch $dir/map-gen\$GPIECE-qlt\$QPIECE.success\n"; close(F); my $numJobs = $gen * $qry; print STDOUT "qsub -pe thread 2 -t 1-$numJobs -p -50 -j y -o $dir/map-\\\$TASK_ID $dir/run.sh\n"; sub countSequences { my $dir = shift @_; my $gen = 0; my $qry = 0; open(F, "ls $dir/gen/gen-*.fasta $dir/qry/qry-*.fasta |"); while () { chomp; if (! -e "${_}idx") { system("$bin/leaff -F $_"); } if (m/\/gen-\d\d\d.fasta$/) { $gen++; } elsif (m/\/qry-\d\d\d.fasta$/) { $qry++; } else { print STDERR "ERROR: Unknown file '$_'\n"; } } close(F); return($gen, $qry); } kmer-code-2013-trunk/snapper/test/0000755000000000000000000000000012641613360015577 5ustar rootrootkmer-code-2013-trunk/snapper/test/Makefile0000644000000000000000000000112210431022242017217 0ustar rootroot all: @../../leaff/leaff -G 1 2000 2000 > 1.f @../../leaff/leaff -G 1 700 700 > 2.f @../../leaff/leaff -G 1 3000 3000 > 3.f @echo ">g1" > g.f @cat 1.f 2.f 3.f | grep -v '>' >> g.f @echo ">g2" >> g.f @cat 3.f 2.f 1.f | grep -v '>' >> g.f @echo ">g3" >> g.f @cat 1.f 2.f 3.f | grep -v '>' >> g.f @rm -f 1.f 3.f ../snapper2 -mersize 23 -queries 2.f -genomic g.f -output x.out @cat x.out @echo "" @echo "Check that the ranges are 1900-2800, 2900-3800 and 1900-2800" @echo "Check that ddefs change" @echo "Check that exons are the same" @rm -f 1.f* 2.f* 3.f* g.f* x.tmp* x.out* kmer-code-2013-trunk/snapper/configuration.C0000644000000000000000000002564412322046702017602 0ustar rootroot#include "snapper2.H" configuration::configuration() { _beVerbose = false; _KBmerSize = 20; _KBcompression = 0; _KBspacingTemplate = 0L; _merSkip = 0; _numSearchThreads = 4; _doReverse = true; _doForward = true; _doValidation = false; _doValidationFileName = 0L; _doAlignments = false; _Lo = 0.5; _Hi = 1.0; _Va = 0.6; _maxDiagonal = 25; // Alternate match extension scheme _extendWeight = 2.0; _extendMinimum = 100; _extendMaximum = 2000; _repeatThreshold = 3; _minHitLength = 0; _minHitCoverage = 0.2; _minMatchIdentity = 98; _minMatchCoverage = 96; _afEnabled = false; _afThreshold = 0.25; _afLength = 64; _afInit = 5; _discardExonLength = 64; _discardExonQuality = 90; _splitMatches = true; _polishOptimally = false; _dbFileName = 0L; _psFileName = 0L; _qsFileName = 0L; _maskFileName = 0L; _onlyFileName = 0L; _ignoreThreshold = 0; _maskPrefix = 0L; _maskThreshold = 0; _onlyPrefix = 0L; _onlyThreshold = 0; _outputFileName = 0L; _logmsgFileName = 0L; _statsFileName = 0L; _buildOnly = false; } configuration::~configuration() { } void configuration::read(int argc, char **argv) { int arg = 1; int err = 0; while (arg < argc) { if (strcmp(argv[arg], "-mersize") == 0) { _KBmerSize = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-merskip") == 0) { _merSkip = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-compression") == 0) { _KBcompression = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-template") == 0) { _KBspacingTemplate = argv[++arg]; } else if (strcmp(argv[arg], "-numthreads") == 0) { _numSearchThreads = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-ignore") == 0) { _ignoreThreshold = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-mask") == 0) { _maskFileName = argv[++arg]; } else if (strcmp(argv[arg], "-only") == 0) { _onlyFileName = argv[++arg]; } else if (strcmp(argv[arg], "-maskn") == 0) { _maskPrefix = argv[++arg]; _maskThreshold = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-onlyn") == 0) { _onlyPrefix = argv[++arg]; _onlyThreshold = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-queries") == 0) { _qsFileName = argv[++arg]; } else if (strcmp(argv[arg], "-genomic") == 0) { _dbFileName = argv[++arg]; } else if (strcmp(argv[arg], "-positions") == 0) { _psFileName = argv[++arg]; } else if (strcmp(argv[arg], "-buildonly") == 0) { _buildOnly = argv[arg]; } else if (strcmp(argv[arg], "-forward") == 0) { _doForward = true; _doReverse = false; } else if (strcmp(argv[arg], "-reverse") == 0) { _doReverse = true; _doForward = false; } else if (strcmp(argv[arg], "-validate") == 0) { _doValidation = true; _doValidationFileName = argv[++arg]; } else if ((strcmp(argv[arg], "-setfilter") == 0) || (strcmp(argv[arg], "-lhv") == 0) || (strcmp(argv[arg], "-LHV") == 0)) { _Lo = atof(argv[++arg]); _Hi = atof(argv[++arg]); _Va = atof(argv[++arg]); } else if (strcmp(argv[arg], "-verbose") == 0) { _beVerbose = true; } else if (strcmp(argv[arg], "-output") == 0) { _outputFileName = argv[++arg]; } else if (strcmp(argv[arg], "-aligns") == 0) { _doAlignments = true; } else if (strcmp(argv[arg], "-noaligns") == 0) { _doAlignments = false; } else if (strcmp(argv[arg], "-log") == 0) { _logmsgFileName = argv[++arg]; } else if (strcmp(argv[arg], "-stats") == 0) { _statsFileName = argv[++arg]; } else if (strcmp(argv[arg], "-dp") == 0) { _polishOptimally = true; } else if (strcmp(argv[arg], "-maxdiagonal") == 0) { _maxDiagonal = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-minhitlength") == 0) { _minHitLength = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-minhitcoverage") == 0) { _minHitCoverage = atof(argv[++arg]); } else if (strcmp(argv[arg], "-minmatchidentity") == 0) { _minMatchIdentity = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-minmatchcoverage") == 0) { _minMatchCoverage = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-af") == 0) { _afEnabled = true; } else if (strcmp(argv[arg], "-afthreshold") == 0) { _afThreshold = atof(argv[++arg]); _afEnabled = true; } else if (strcmp(argv[arg], "-aflength") == 0) { _afLength = strtouint32(argv[++arg], 0L); _afEnabled = true; } else if (strcmp(argv[arg], "-afinit") == 0) { _afInit = strtouint32(argv[++arg], 0L); _afEnabled = true; } else if (strcmp(argv[arg], "-discardexonlength") == 0) { _discardExonLength = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-discardexonquality") == 0) { _discardExonQuality = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-extendweight", 8) == 0) { _extendWeight = atof(argv[++arg]); } else if (strncmp(argv[arg], "-extendminimum", 8) == 0) { _extendMinimum = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-extendmaximum", 8) == 0) { _extendMaximum = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-repeatthreshold", 8) == 0) { _repeatThreshold = strtouint32(argv[++arg], 0L); } else { fprintf(stderr, "Unknown option '%s'\n", argv[arg]); err++; } arg++; } // // Make sure some constraints are met // if (_maskFileName && _onlyFileName) fprintf(stderr, "ERROR: At most one of -mask and -only may be used.\n"), err++; if (_merSkip >= _KBmerSize) fprintf(stderr, "ERROR: Mers are not adjacent; make sure merskip <= mersize.\n"), err++; if ((_KBcompression) || (_KBspacingTemplate)) fprintf(stderr, "ERROR: Mer compression and spacing not supported right now. :-(\n"), err++; if ((_afThreshold < 0) || (_afThreshold > 1.0)) fprintf(stderr, "ERROR: Invalid afThreshold %f, should be 0.0 <= t <= 1.0\n", _afThreshold), err++; if (64 < _afLength) fprintf(stderr, "ERROR: Invalid afLength "uint32FMT", should be < 64.\n", _afLength), err++; if ((_qsFileName == 0L) && (_buildOnly == false)) fprintf(stderr, "ERROR: No query file supplied.\n"), err++; if (_dbFileName == 0L) fprintf(stderr, "ERROR: No genome file supplied.\n"), err++; // // Be helpful. // if (err) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Algorithm Options:\n"); fprintf(stderr, " -forward Search only the normal cDNA.\n"); fprintf(stderr, " -reverse Search only the reverse-complement cDNA.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -mersize k Use k-mers.\n"); fprintf(stderr, " -merskip l Skip l mers between.\n"); fprintf(stderr, " -compression c Compress homopolymer runs to c letters.\n"); fprintf(stderr, " -template t Use spaced seed template t.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -dp Optimially polish (broken)\n"); fprintf(stderr, "\n"); fprintf(stderr, " -maxdiagonal d Maximum diagonal gap within a hit (25).\n"); fprintf(stderr, " -minhitlength l Minimum length for a hit to be polished (0).\n"); fprintf(stderr, " -minhitcoverage c Minimum coverage for a hit to be polished (0.2, 0.0 to 1.0).\n"); fprintf(stderr, " -minmatchidentity i Minimum percent identity for matches (98, integer).\n"); fprintf(stderr, " -minmatchcoverage c Minimum coverage for matches (96, integer).\n"); fprintf(stderr, " -discardexonlength l Discard exons less than l bp long (64).\n"); fprintf(stderr, " -discardexonquality p Discard exons less than p percent identity (90).\n"); fprintf(stderr, " -extendweight w For each unhit base, extend by this much (2).\n"); fprintf(stderr, " -extendminimum e Extend hits by at least this much (100).\n"); fprintf(stderr, " -extendmaximum e Extend hits by at most this much (2000).\n"); fprintf(stderr, " -repeatthreshold t Tune hits to expect t local repeat count (3).\n"); fprintf(stderr, "\n"); fprintf(stderr, "Filter and Filter Validation:\n"); fprintf(stderr, " -setfilter L H V Use { L,H,V } as the filter parameters.\n"); fprintf(stderr, " -validate Enable tuning of the filter (expensive!).\n"); fprintf(stderr, "\n"); fprintf(stderr, "Masking Options:\n"); fprintf(stderr, " -ignore n Ignore mers with count more than n.\n"); fprintf(stderr, " -mask f Ignore (only use) all mers listed in file f.\n"); fprintf(stderr, " -only f\n"); fprintf(stderr, " -maskn f n Ignore (only use) the mers listed in meryl prefix f.\n"); fprintf(stderr, " -onlyn f n For mask, mers with count >= n are masked.\n"); fprintf(stderr, " For only, mers with count <= n are used.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Input Options:\n"); fprintf(stderr, " -queries c.fasta Query sequences.\n"); fprintf(stderr, " -genomic g.fasta Database sequences.\n"); fprintf(stderr, " -positions p.positionDB Build and save / use positionDB. Assumes you aren't using -use.\n"); fprintf(stderr, " -buildonly Only do the build and save.\n"); fprintf(stderr, " -use [...]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Process Options:\n"); fprintf(stderr, " -numthreads n Use n search threads.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Output Options:\n"); fprintf(stderr, " -verbose Entertain the user with useless statistics.\n"); fprintf(stderr, " -output f Write output to file f.\n"); fprintf(stderr, " -{no}aligns Enable/Disable full alignments. Enabled by default.\n"); fprintf(stderr, " -log f Write some debugging/logging information to file f. This\n"); fprintf(stderr, " is mostly for developers, and does NOT provide useful\n"); fprintf(stderr, " information unless you know the guts of snapper.\n"); fprintf(stderr, " -stats f Write resource usage statistics to f.\n"); exit(1); } } kmer-code-2013-trunk/snapper/hitMatrix.C0000644000000000000000000002725412322046702016703 0ustar rootroot#include "snapper2.H" #define MINCOUNT 3 hitMatrix::hitMatrix(uint32 qsLen, uint32 qsMers, uint32 qsIdx, logMsg *theLog) { _qsLen = qsLen; _qsMers = qsMers; _qsIdx = qsIdx; _hitsLen = 0; _hitsMax = 8; _hits = new diagonalLine [_hitsMax]; _matches = 0L; _theLog = theLog; } hitMatrix::~hitMatrix() { delete [] _hits; } void hitMatrix::addMatch(uint32 isunique, uint32 qsLo, uint32 qsHi, uint32 dsLo, uint32 dsHi, merCovering *IL, merList *ML) { uint32 offset = 0; offset = (uint32)(config._extendWeight * qsLo); if (offset < config._extendMinimum) offset = config._extendMinimum; if (offset > config._extendMaximum) offset = config._extendMaximum; if (dsLo < offset) dsLo = 0; else dsLo -= offset; offset = (uint32)(config._extendWeight * (_qsLen - qsHi)); if (offset < config._extendMinimum) offset = config._extendMinimum; if (offset > config._extendMaximum) offset = config._extendMaximum; dsHi += offset; // Create a new match // // n = new match // m = current match // l = last match // trapMatch *n = new trapMatch(isunique, qsLo, qsHi, dsLo, dsHi, IL, ML); #ifdef SHOW_HITMATRIX _theLog->add("chained: Q::"uint32FMT"-"uint32FMT"("uint32FMT") G::"uint32FMT"-"uint32FMT"("uint32FMT")\n", qsLo, qsHi, qsHi - qsLo, dsLo, dsHi, dsHi - dsLo); #endif // And find a home for it in the list. No merging of matches is done here. It's // too hard. // if ((_matches == 0L) || (n->_dsHi > _matches->_dsHi)) { n->_next = _matches; _matches = n; } else { trapMatch *l = _matches; trapMatch *m = _matches->_next; while ((m) && (n->_dsHi < m->_dsHi)) { l = m; m = m->_next; } n->_next = m; l->_next = n; } } // Utility for sorting the diagonal lines in the hitMatrix // // The two comparison functions return true if the first line // is less than the second line. inline int compareLines(diagonalLine *A, diagonalLine *B, uint32 qsLen) { uint32 a = qsLen - A->val.qPos - 1 + A->val.dPos; uint32 b = qsLen - B->val.qPos - 1 + B->val.dPos; return(((a < b)) || ((a == b) && (A->val.qPos < B->val.qPos))); } inline int compareLines(uint32 l, uint32 q, diagonalLine *B, uint32 qsLen) { uint32 b = qsLen - B->val.qPos - 1 + B->val.dPos; return(((l < b)) || ((l == b) && (q < B->val.qPos))); } inline void adjustHeap(diagonalLine *L, int32 p, int32 n, uint32 qsLen) { uint64 v = L[p].all; uint32 q = L[p].val.qPos; uint32 l = qsLen - q - 1 + L[p].val.dPos; int32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && compareLines(L+c, L+c+1, qsLen)) c++; // Does the node in question fit here? // if (compareLines(l, q, L+c, qsLen) == false) break; // Else, swap the parent and the child // L[p].all = L[c].all; // Move down the tree // p = c; c = (p << 1) + 1; } L[p].all = v; } void hitMatrix::filter(char direction, double minHitCoverage, uint32 minHitLength, aHit *&theHits, uint32 &theHitsPos, uint32 &theHitsMax) { if (_hitsLen == 0) return; // Decide on the minimum quality values; we pick the larger of // the fixed lengths, and the sequence length * coverage. // uint32 minLength = (uint32)(minHitCoverage * _qsLen); if (minLength < minHitLength) minLength = minHitLength; // First, sort by the dsPos. This is done so that we can find all the hits for // a specific scaffold. // sort_dsPos(); // Now, while there are hits left.... // uint32 firstHit = 0; uint32 lastHit = 0; uint32 currentSeq = 0; // // Step 1: Sort the mer-hits, chain, promote decent ones to matches // while (firstHit < _hitsLen) { // Move the currentSeq until the firstHit is below it. After // this loop, currentSeq is the sequence AFTER the one that we // want hits in. // while ((currentSeq < genomeMap->numberOfSequences()) && (genomeMap->startOf(currentSeq) <= _hits[firstHit].val.dPos)) currentSeq++; // Find the first hit that is in currentSeq. If this is the last sequence, // then, of course, all remaining hits are in it. // if (currentSeq < genomeMap->numberOfSequences()) { lastHit = firstHit + 1; while ((lastHit < _hitsLen) && (_hits[lastHit].val.dPos < genomeMap->startOf(currentSeq))) lastHit++; } else { lastHit = _hitsLen; } // Drop back one sequence; this is the sequence the hits are in. // currentSeq--; // Adjust the hits to be relative to the start of this sequence // for (uint32 i=firstHit; istartOf(currentSeq); // Sort them, if needed. // if (lastHit - firstHit > 1) { // We cheat; heapsort isn't too friendly to sorting the middle of // an array, so we make a new array in the middle! // diagonalLine *hitsToSort = _hits + firstHit; // Build the heap. I initially thought this could be done at the // same time as the scan for the last hit, but it can't (easily) // for (int32 i=(lastHit - firstHit)/2 - 1; i>=0; i--) adjustHeap(hitsToSort, i, lastHit - firstHit, _qsLen); // Sort the hits by diagonal. This is the second part of // heap sort -- Interchange the new maximum with the element // at the end of the tree // for (uint32 i=lastHit - firstHit - 1; i>0; i--) { uint64 v = hitsToSort[i].all; hitsToSort[i].all = hitsToSort[0].all; hitsToSort[0].all = v; adjustHeap(hitsToSort, 0, i, _qsLen); } } // Filter them // uint32 frstDiagonal = _qsLen - _hits[firstHit].val.qPos - 1 + _hits[firstHit].val.dPos; uint32 lastDiagonal = frstDiagonal; uint32 unique = uint32ZERO; uint32 qsLow = _hits[firstHit].val.qPos; uint32 qsHigh = _hits[firstHit].val.qPos; uint32 dsLow = _hits[firstHit].val.dPos; uint32 dsHigh = _hits[firstHit].val.dPos; uint32 minCount = ~uint32ZERO; merCovering *IL = new merCovering(config._KBmerSize); merList *ML = new merList(); for (uint32 i=firstHit; i= thisDiagonalID) { lastDiagonal = thisDiagonalID; if (qsLow > _hits[i].val.qPos) qsLow = _hits[i].val.qPos; if (qsHigh < _hits[i].val.qPos) qsHigh = _hits[i].val.qPos; if (dsLow > _hits[i].val.dPos) dsLow = _hits[i].val.dPos; if (dsHigh < _hits[i].val.dPos) dsHigh = _hits[i].val.dPos; if (minCount > _hits[i].val.uniq) minCount = _hits[i].val.uniq; IL->addMer(_hits[i].val.qPos); ML->addMer(_hits[i].val.qPos, _hits[i].val.dPos); continue; } // Doesn't look like these hits belong together. Promote the hit // to a match if it's decent. IL->merge(); if ((minCount <= MINCOUNT) || (minLength <= IL->sumOfLengths())) { addMatch(minCount <= MINCOUNT, qsLow, qsHigh + config._KBmerSize, dsLow, dsHigh + config._KBmerSize, IL, ML); IL = new merCovering(config._KBmerSize); ML = new merList(); } else { IL->clear(); ML->clear(); } frstDiagonal = thisDiagonalID; lastDiagonal = thisDiagonalID; qsLow = _hits[i].val.qPos; qsHigh = _hits[i].val.qPos; dsLow = _hits[i].val.dPos; dsHigh = _hits[i].val.dPos; minCount = _hits[i].val.uniq; IL->addMer(_hits[i].val.qPos); ML->addMer(_hits[i].val.qPos, _hits[i].val.dPos); } // Save the final cluster? IL->merge(); if ((minCount <= MINCOUNT) || (minLength <= IL->sumOfLengths())) { addMatch(minCount <= MINCOUNT, qsLow, qsHigh + config._KBmerSize, dsLow, dsHigh + config._KBmerSize, IL, ML); } else { delete IL; delete ML; } // // Step 2: Merge matches into, sigh, hits, stuff them into the output // while (_matches) { // Save the current match, then delete it. // unique = _matches->_unique; dsLow = _matches->_dsLo; dsHigh = _matches->_dsHi; IL = _matches->_IL; ML = _matches->_ML; { trapMatch *n = _matches; _matches = _matches->_next; delete n; } // Assimilate as many of the remaining matches as possible. // // Think of this as first reversing the list, then merging as // long as (dsHigh + 1000 > _matches->_dsLo). But since we // don't reverse the list, we can map: // dsHigh --> _matches->dsHi // _matches->_dsLo --> dsLow // where dsHigh and dsLow are the values for the extended match. // while (_matches && (dsLow < _matches->_dsHi + 5000)) { // Combine the two merCoverings // IL->merge(_matches->_IL); ML->merge(_matches->_ML); delete _matches->_IL; delete _matches->_ML; // The start of the new match might be after the start of the // merged region. (Only rarely is it before) // // The end of current match is always greater than the end of the // new match! // if (dsLow > _matches->_dsLo) dsLow = _matches->_dsLo; unique |= _matches->_unique; { trapMatch *n = _matches; _matches = _matches->_next; delete n; } } if (theHitsPos >= theHitsMax) { theHitsMax <<= 1; aHit *o = 0L; try { o = new aHit [theHitsMax]; } catch (std::bad_alloc) { fprintf(stderr, "hitMatrix::filter()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "hitMatrix::filter()-- tried to extend output string from "uint32FMT" to "uint32FMT".\n", theHitsPos, theHitsMax); exit(1); } memcpy(o, theHits, theHitsPos * sizeof(aHit)); delete [] theHits; theHits = o; } IL->merge(); aHit *a = theHits + theHitsPos++; a->_status = (direction == 'f'); a->_status |= (unique ? AHIT_HAS_UNIQUE : 0); a->_qsIdx = _qsIdx; a->_dsIdx = genomeMap->IIDOf(currentSeq); a->_dsLo = dsLow; a->_dsHi = dsHigh; a->_covered = IL->sumOfLengths(); a->_matched = IL->numberOfPieces(); //numberOfIntervals(); a->_numMers = _qsMers; a->_ML = ML; assert(a->_dsLo < a->_dsHi); #ifdef SHOW_HITMATRIX _theLog->add("merged: G::"uint32FMT"-"uint32FMT"("uint32FMT") q:"uint32FMT" g:"uint32FMT" cov:"uint32FMT" mat:"uint32FMT" mer:"uint32FMT"\n", a->_dsLo, a->_dsHi, a->_dsHi - a->_dsLo, a->_qsIdx, a->_dsIdx, a->_covered, a->_matched, a->_numMers); #endif delete IL; } // All done with these hits. Move to the next set. // firstHit = lastHit; } } kmer-code-2013-trunk/snapper/snapper2.H0000644000000000000000000002503212322046702016461 0ustar rootroot#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bio++.H" #include "positionDB.H" #include "existDB.H" #include "sim4.H" #include "logMsg.H" #define MAX_THREADS 64 // A meta-option. Enable all the good stuff. Or not, I guess. // #if 0 // Define this to print a message for each search, showing times and // stats, unless it finished quicker than some minimum time. // #define VERBOSE_SEARCH #define VERBOSE_SEARCH_MINIMUM_TIME 1.0 // Define this to print the number of hits (raw and filtered) for each // sequence, unless it has fewer than some minimum number of raw hits. // #define VERBOSE_FILTER #define VERBOSE_FILTER_MINIMUM 10 // Define this to show the hitMatrix #define SHOW_HITMATRIX // Define this to print a message whenever a polish starts. // #define SHOW_POLISHING // Define these to show polishes that take a long time -- individual // polishes, not all polishes for a single sequence. The time is in // seconds. // #define SHOW_POLISHING_EXPENSIVE 0.5 // Define this to show the exon discarding and match splitting. // #define SHOW_MATCH_SPLITTING // Define this to show the "hit discarding" results. Any hits // that look like they are repeats are re-searched using nearly // unique mers. // #define SHOW_HIT_DISCARDING // Define this to show the kmers being added to the sim4command from // thr-polish.C. This generates a lot of output! // #define SHOW_HITS_ADDED //#define SHOW_HITS_ADDED_AFTER_QUERY 0 // END OF GOOD STUFF! #endif class configuration { public: configuration(); ~configuration(); void read(int argc, char **argv); void setTime(struct timespec *ts, double t) { ts->tv_sec = (time_t)floor(t); ts->tv_nsec = (long)((t - ts->tv_sec) * 1e9); }; public: bool _beVerbose; // These are private to the kMerBuilder -- in particular, we cannot // use _merSize as a surrogate for the length of the seed, // compressed seeds can be much longer. // uint32 _KBmerSize; uint32 _KBcompression; char *_KBspacingTemplate; uint32 _merSkip; uint32 _numSearchThreads; bool _doReverse; bool _doForward; bool _doValidation; char *_doValidationFileName; bool _doAlignments; double _Lo; double _Hi; double _Va; uint32 _maxDiagonal; double _extendWeight; uint32 _extendMinimum; uint32 _extendMaximum; uint32 _repeatThreshold; // Minimums for hits double _minHitCoverage; uint32 _minHitLength; // Minimums for matches uint32 _minMatchIdentity; uint32 _minMatchCoverage; // Filtering of hits bool _afEnabled; double _afThreshold; uint32 _afLength; uint32 _afInit; // Filtering and/or cleanup of matches uint32 _discardExonLength; uint32 _discardExonQuality; bool _splitMatches; bool _polishOptimally; char *_dbFileName; char *_psFileName; char *_qsFileName; char *_maskFileName; char *_onlyFileName; bool _buildOnly; uint32 _ignoreThreshold; char *_maskPrefix; uint32 _maskThreshold; char *_onlyPrefix; uint32 _onlyThreshold; char *_outputFileName; char *_logmsgFileName; char *_statsFileName; }; // Shared data // extern configuration config; extern sim4parameters sim4params; extern seqCache *genome; extern seqStream *genomeMap; extern seqCache *qsFASTA; // Used exclusively by thr-loader.C extern existDB *maskDB; // thr-search.C extern existDB *onlyDB; // thr-search.C extern positionDB *positions; extern volatile uint32 numberOfQueries; // aHit -- storing the internal hits // // _status // & 0x00000001 -- direction, forward if set, otherwise reverse // 0x00000002 -- not filtered, if set, polish the hit // 0x00000004 -- not filtered, if set, polish the hit because it has something unique-ish // 0x00000008 -- match verified via polishng // 0x00000010 -- match discarded via hit refinement // 0x0000fff8 -- unused // 0x00ff0000 -- percent idendity of match // 0xff000000 -- percent coverage of match // #define AHIT_DIRECTION_MASK 0x00000001 #define AHIT_POLISHABLE 0x00000002 #define AHIT_HAS_UNIQUE 0x00000004 #define AHIT_VERIFIED 0x00000008 #define AHIT_DISCARDED 0x00000010 struct aHit { uint32 _status; uint32 _qsIdx; uint32 _dsIdx; uint32 _dsLo; uint32 _dsHi; uint32 _covered; uint32 _matched; uint32 _numMers; merList *_ML; }; class query { public: query() { seq = 0L; theHitsLen = 0; theHitsMax = 4; theHits = new aHit [theHitsMax]; theLog = 0L; if (config._logmsgFileName) theLog = new logMsg(true); theOutputLen = 0; theOutputMax = 0; theOutput = 0L; }; ~query() { delete seq; delete [] theHits; delete [] theLog; delete [] theOutput; }; bool loadSequence(seqCache *qs) { seq = qs->getSequenceInCore(); return(seq != 0L); }; seqInCore *seq; uint32 theHitsLen; uint32 theHitsMax; aHit *theHits; logMsg *theLog; uint32 theOutputLen; uint32 theOutputMax; char *theOutput; }; class searcherState { public: uint64 threadID; uint64 posnMax; uint64 posnLen; uint64 *posn; kMerBuilder *KB; void *DP; searcherState(uint64 U) { threadID = U; posnMax = 0; posnLen = 0; posn = 0L; KB = 0L; DP = 0L; }; ~searcherState() { delete [] posn; delete KB; }; }; struct diagonalLine { union { uint64 all; struct { uint64 uniq : 10; // uniqueness score for the mer here uint64 qPos : 22; // position in the query, 0 to 4M uint64 dPos : 32; // position in the genome, 0 to 4G } val; }; }; class trapMatch { public: uint32 _unique; uint32 _qsLo; uint32 _qsHi; uint32 _dsLo; uint32 _dsHi; merCovering *_IL; merList *_ML; trapMatch *_next; trapMatch(uint32 isunique, uint32 qsLo, uint32 qsHi, uint32 dsLo, uint32 dsHi, merCovering *IL, merList *ML) { _unique = isunique; _qsLo = qsLo; _qsHi = qsHi; _dsLo = dsLo; _dsHi = dsHi; _IL = IL; _ML = ML; _next = 0L; }; }; class hitMatrix { public: hitMatrix(uint32 qsLen, uint32 q, uint32 qsIdx, logMsg *theLog); ~hitMatrix(); void addHits(uint32 qi, uint64 *ps, uint64 cn, uint64 ad=0); void sort_diagonal(void); void sort_dsPos(void); void filter(char direction, double minHitCoverage, uint32 minHitLength, aHit *&theOutput, uint32 &theOutputPos, uint32 &theOutputMax); private: uint32 _qsLen; // Seq Len of Q uint32 _qsMers; // Valid mers in Q uint32 _qsIdx; // Index of Q in the FastA // Instead of building the lines during add(), we store // the information used to build lines, and then build them // in chain(). This was done to reduce simultaneous memory // usage, as the lineArrayMap and etc take up considerable space. // uint32 _hitsLen; uint32 _hitsMax; diagonalLine *_hits; logMsg *_theLog; // Making sense of the raw output from the search is not a trivial // task for perl. SMALL searches (dbEST vs 0.5MB sequence) used more // than 4GB of memory in perl. // // So, we bite the bullet and do it here. // // _matches is a sorted linked list of the regions we have found. // The list is kept in REVERSE order, as we usually add regions // in the correct order (correct reverse order), occasionally // we need to swap the last two. // // The list is deleted in filter() // trapMatch *_matches; void addMatch(uint32 isunique, uint32 qsLo, uint32 qsHi, uint32 dsLo, uint32 dsHi, merCovering *IL, merList *ML); }; inline void hitMatrix::addHits(uint32 qi, uint64 *ps, uint64 cn, uint64 ad) { if ((_hitsLen + cn) >= _hitsMax) { _hitsMax = _hitsMax + _hitsMax + (uint32)cn; diagonalLine *h; try { h = new diagonalLine [_hitsMax]; } catch (std::bad_alloc) { fprintf(stderr, "hitMatrix::addHits()-- caught std::bad_alloc in %s at line %d\n", __FILE__, __LINE__); fprintf(stderr, "hitMatrix::addHits()-- have "uint32FMT" hits, tried to add "uint64FMT" more\n", _hitsLen, cn); exit(1); } memcpy(h, _hits, sizeof(uint64) * _hitsLen); delete [] _hits; _hits = h; } uint64 uniq = cn; if (ad > 0) uniq = ad; if (uniq > 0x000003ff) uniq = 0x000003ff; for (uint64 i=0; i; chomp $hdr; while () { chomp; my @vals = split '\s+', $_; # 3 -> sensitivity # 4 -> specificity if ($spec{$vals[3]} < $vals[4]) { $spec{$vals[3]} = $vals[4]; $line{$vals[3]} = $_; } } close(F); # print "\n$file\n $hdr\n"; # my @sortedK = sort { $b <=> $a } keys %spec; # $#sortedK = $numToShow - 1; # foreach my $k (@sortedK) { # print "$k $spec{$k} -- $line{$k}\n"; # } if ($hdrshown == 0) { print " $hdr\n"; $hdrshown = 1; } $file = substr("$file ", 0, 40); my @sortedK = sort { $b <=> $a } keys %spec; $#sortedK = $numToShow - 1; foreach my $k (@sortedK) { printf "$file$k $spec{$k} -- $line{$k}\n"; } undef @sortedK; undef %spec; } kmer-code-2013-trunk/snapper/thr-filter.C0000644000000000000000000000621712322046702017006 0ustar rootroot#include "snapper2.H" uint32 configureFilter(double L, double H, double V, aHit *theHits, uint32 theHitsLen) { // Find the highest and lowest quality hit // uint32 hiQ = theHits[0]._covered; uint32 loQ = hiQ; for (uint32 i=0; i theHits[i]._covered) loQ = theHits[i]._covered; } // _numMers is not the same as the number covered, so we should // ensure that h is in range. // // Note: _numMers is constant for all hits, so we can use any of them // double h = (double)(hiQ - loQ) / (double)theHits[0]._numMers; if (h > 1.0) h = 1.0; double p = 0.0; if (h <= L) p = 1.0; else if (h >= H) p = V; else p = 1.0 - (1.0 - V) * (h - L) / (H - L); if (p > 1.0) { fprintf(stderr, "error in p; p=%f > 1.0! h=%f (L=%f H=%f V=%f)\n", p, h, L, H, V); p = 1.0; } if (V - p > 1e-10) { fprintf(stderr, "error in p; p=%f < V! h=%f (L=%f H=%f V=%f)\n", p, h, L, H, V); p = V; } // Any thing at or above cutL is good, and we should polish it. // Anything below is junk, and we should ignore it. // return((uint32)floor(hiQ - p * h * theHits[0]._numMers)); } int aHitAutoFilterSort(const void *a, const void *b) { const aHit *A = (const aHit *)a; const aHit *B = (const aHit *)b; // If either was discarded, we don't care the order, // just throw them at the end of the array // if ((A->_status & AHIT_DISCARDED) || (B->_status & AHIT_DISCARDED)) { if (A->_status & AHIT_DISCARDED) return(1); else if (B->_status & AHIT_DISCARDED) return(-1); return(0); } // Otherwise, snapper filters simply on coverage. // if (A->_covered > B->_covered) return(-1); else if (A->_covered < B->_covered) return(1); return(0); } void doFilter(searcherState *state, query *qry) { if (qry->theHitsLen == 0) return; uint32 numF = 0; // Auto filter -- keep polishing until a running average of // polishes falls below some threshold. // if (config._afEnabled) { qsort(qry->theHits, qry->theHitsLen, sizeof(aHit), aHitAutoFilterSort); for (uint32 i=0; i < qry->theHitsLen; i++) qry->theHits[i]._status |= AHIT_POLISHABLE; numF = qry->theHitsLen; } else { uint32 cutL = configureFilter(config._Lo, config._Hi, config._Va, qry->theHits, qry->theHitsLen); // If the coverage of the hit is more than the minimum, mark the // hit as polishable. Unless the hit was discarded. for (uint32 i=0; i < qry->theHitsLen; i++) { if (!(qry->theHits[i]._status & AHIT_DISCARDED) && (qry->theHits[i]._covered >= cutL)) { qry->theHits[i]._status |= AHIT_POLISHABLE; numF++; } } } #ifdef VERBOSE_FILTER if (qry->theHitsLen >= VERBOSE_FILTER_MINIMUM) theLog->add("Query "uint32FMT" with "uint32FMT" good hits out of "uint32FMT" total hits.\n", idx, numF, qry->theHitsLen); #endif } kmer-code-2013-trunk/snapper/hitMatrix-sort.C0000644000000000000000000000210012322046702017647 0ustar rootroot#include "snapper2.H" // Sort by dsPos inline void adjustHeap_dsPos(diagonalLine *L, uint32 p, uint32 n) { uint64 v = L[p].all; uint64 d = L[p].val.dPos; uint32 c = (p << 1) + 1; // let c be the left child of p while (c < n) { // Find the larger of the two children // if ((c+1 < n) && (L[c].val.dPos < L[c+1].val.dPos)) c++; // Does the node in question fit here? // if (d >= L[c].val.dPos) break; // Else, swap the parent and the child // L[p].all = L[c].all; // Move down the tree // p = c; c = (p << 1) + 1; } L[p].all = v; } void hitMatrix::sort_dsPos(void) { if (_hitsLen > 1) { // Create the heap of lines. // for (uint32 i=_hitsLen/2; i--; ) adjustHeap_dsPos(_hits, i, _hitsLen); // Interchange the new maximum with the element at the end of the tree // for (uint32 i=_hitsLen-1; i>0; i--) { uint64 v = _hits[i].all; _hits[i].all = _hits[0].all; _hits[0].all = v; adjustHeap_dsPos(_hits, 0, i); } } } kmer-code-2013-trunk/snapper/thr-polish.C0000644000000000000000000002763512322046702017026 0ustar rootroot#include "snapper2.H" void doPolishS4(searcherState *state, query *qry) { // For the autofilter uint64 successes = uint64ZERO; uint64 successMask = uint64MASK(config._afLength); uint32 attempts = 0; if (qry->theHitsLen == 0) return; qry->theOutputLen = 0; qry->theOutputMax = 2 * 1024 * qry->theHitsLen; qry->theOutput = new char [qry->theOutputMax]; qry->theOutput[0] = 0; for (uint32 h=0; htheHitsLen; h++) { // If the hit was discarded, move along. // if (qry->theHits[h]._status & AHIT_DISCARDED) { #ifdef SHOW_HIT_DISCARDING qry->theLog->add("Hit %u out of %u (%u -> %u[%u-%u]) cov=%u matched=%u numMers=%u DISCARDED\n", h, qry->theHitsLen, qry->seq->getIID(), qry->theHits[h]._dsIdx, qry->theHits[h]._dsLo, qry->theHits[h]._dsHi, qry->theHits[h]._covered, qry->theHits[h]._matched, qry->theHits[h]._numMers); #endif continue; } // If the hit was filtered out, move along. // if ((config._doValidation == false) && ((qry->theHits[h]._status & AHIT_POLISHABLE) == 0) && ((qry->theHits[h]._status & AHIT_HAS_UNIQUE) == 0)) continue; // If our recent success rate is pretty terrible, continue. // if (config._afEnabled) { if (attempts > config._afInit) { double rat = countNumberOfSetBits64(successes) / (double)((attempts < config._afLength) ? attempts : config._afLength); #if 0 fprintf(stderr, "autofilter: hit "uint32FMT" out of "uint32FMT" (attempts="uint32FMT") with rate %f\n", h, qry->theHitsLen, attempts, rat); #endif // If we've hit the end of the good polishes, give up. But // still do all the stuff with unique mers in them. // if (((qry->theHits[h]._status & AHIT_HAS_UNIQUE) == 0) && (rat < config._afThreshold)) continue; } attempts++; } // // Polish it up! // seqInCore *ESTseq = qry->seq; seqInCore *GENseq = genome->getSequenceInCore(qry->theHits[h]._dsIdx); uint32 GENlo = qry->theHits[h]._dsLo; uint32 GENhi = qry->theHits[h]._dsHi; if (GENhi > GENseq->sequenceLength()) GENhi = GENseq->sequenceLength(); assert(GENlo < GENhi); bool doForward = qry->theHits[h]._status & AHIT_DIRECTION_MASK; bool doReverse = !doForward; #ifdef SHOW_POLISHING qry->theLog->add("Hit %u out of %u (%u -> %u[%u-%u]) dir=%c cov=%u matched=%u numMers=%u\n", h, qry->theHitsLen, ESTseq->getIID(), qry->theHits[h]._dsIdx, qry->theHits[h]._dsLo, qry->theHits[h]._dsHi, doForward ? 'F' : 'R', qry->theHits[h]._covered, qry->theHits[h]._matched, qry->theHits[h]._numMers); #endif #ifdef SHOW_POLISHING_EXPENSIVE double startTime = getTime(); #endif sim4command *P4 = new sim4command(ESTseq, GENseq, GENlo, GENhi, doForward, doReverse); //////////////////////////////////////// // // Add hits to the command // // addSeed() expects base-based, of the last position in // the seed. We have space-based, first position. Adding // the size of a mer fixes both. // if (doForward) { for (uint32 i=0, x, y; qry->theHits[h]._ML->getMer(i, x, y); i++) { #ifdef SHOW_HITS_ADDED #ifdef SHOW_HITS_ADDED_AFTER_QUERY if (ESTseq->getIID() > SHOW_HITS_ADDED_AFTER_QUERY) #endif qry->theLog->add("FORWARDHIT GEN: hi:"uint32FMT"-lo:"uint32FMT" pos:"uint32FMT" EST: len:"uint32FMT" pos:"uint32FMT"\n", GENhi, GENlo, y, (uint32)ESTseq->sequenceLength(), x); #endif assert(y + config._KBmerSize >= GENlo); P4->addSeed(y - GENlo + config._KBmerSize, x + config._KBmerSize, config._KBmerSize); } } else { for (uint32 i=0, x, y; qry->theHits[h]._ML->getMer(i, x, y); i++) { #ifdef SHOW_HITS_ADDED #ifdef SHOW_HITS_ADDED_AFTER_QUERY if (ESTseq->getIID() > SHOW_HITS_ADDED_AFTER_QUERY) #endif qry->theLog->add("REVERSEHIT GEN: hi:"uint32FMT"-lo:"uint32FMT" pos:"uint32FMT" EST: len:"uint32FMT" pos:"uint32FMT"\n", GENhi, GENlo, y, (uint32)ESTseq->sequenceLength(), x); #endif // Original form was (GENhi-GENlo) - (y-GENlo), which // reduces to the below. By reversing, we no longer need // to add in the mersize, we're representing the end of // the mer now! // assert(GENhi >= y); assert(ESTseq->sequenceLength() >= x); P4->addSeed(GENhi - y, ESTseq->sequenceLength() - x, config._KBmerSize); } } // The main loop deletes the hits, but we take care of deleting _ML here. // Maybe it should go in the destructor for the hits?? // delete qry->theHits[h]._ML; qry->theHits[h]._ML = 0L; Sim4 *S4 = new Sim4(&sim4params); sim4polishList *l4 = S4->run(P4); sim4polishList &L4 = *l4; // Clean up the matches -- remove small exons from the match, // split things with big gaps into two matches. for (uint32 i=0; L4[i]; i++) { #ifdef SHOW_MATCH_SPLITTING qry->theLog->add(" match "uint32FMT" has "uint32FMT" exons.\n", i, L4[i]->_numExons); for (uint32 j=L4[i]->_numExons; j--; ) qry->theLog->add(" exon "uint32FMT" query:"uint32FMT"-"uint32FMT" genome:"uint32FMT"-"uint32FMT" id:%d nm:%d\n", j, L4[i]->_exons[j].estFrom, L4[i]->_exons[j].estTo, L4[i]->_exons[j]._genFrom, L4[i]->_exons[j]._genTo, L4[i]->_exons[j]._percentIdentity, L4[i]->_exons[j]._numMatches); #endif for (uint32 j=L4[i]->_numExons; j--; ) { if (((L4[i]->_exons[j]._estTo - L4[i]->_exons[j]._estFrom) < config._discardExonLength) || (L4[i]->_exons[j]._percentIdentity < config._discardExonQuality)) { #ifdef SHOW_MATCH_SPLITTING qry->theLog->add(" Deleting exon "uint32FMT" from query:"uint32FMT"-"uint32FMT" genome:"uint32FMT"-"uint32FMT"\n", j, L4[i]->_exons[j]._estFrom, L4[i]->_exons[j]._estTo, L4[i]->_exons[j]._genFrom, L4[i]->_exons[j]._genTo); #endif L4[i]->s4p_deleteExon(j); } } // Copy each exon into a new match ("split things with big gaps") while (L4[i]->_numExons > 1) { #ifdef SHOW_MATCH_SPLITTING qry->theLog->add(" Saving exon "uint32FMT" from query:"uint32FMT"-"uint32FMT" genome:"uint32FMT"-"uint32FMT"\n", L4[i]->_numExons-1, L4[i]->_exons[L4[i]->_numExons-1]._estFrom, L4[i]->_exons[L4[i]->_numExons-1]._estTo, L4[i]->_exons[L4[i]->_numExons-1]._genFrom, L4[i]->_exons[L4[i]->_numExons-1]._genTo); #endif sim4polish *n = new sim4polish(L4[i], L4[i]->_numExons-1); L4.push(n); L4[i]->s4p_deleteExon(L4[i]->_numExons-1); } // Rebuild the stats on this guy -- we now have one exon, so just copy // the exon stats to the global stats. if (L4[i]->_numExons > 0) { #ifdef SHOW_MATCH_SPLITTING qry->theLog->add(" Saving exon "uint32FMT" from query:"uint32FMT"-"uint32FMT" genome:"uint32FMT"-"uint32FMT"\n", 0, L4[i]->_exons[0]._estFrom, L4[i]->_exons[0]._estTo, L4[i]->_exons[0]._genFrom, L4[i]->_exons[0]._genTo); #endif L4[i]->_numMatches = L4[i]->_exons[0]._numMatches; L4[i]->_numMatchesN = L4[i]->_exons[0]._numMatchesN; L4[i]->_numCovered = L4[i]->_exons[0]._genTo - L4[i]->_exons[0]._genFrom + 1; L4[i]->_percentIdentity = L4[i]->_exons[0]._percentIdentity; L4[i]->_querySeqIdentity = L4[i]->s4p_percentCoverageApprox(); } else { #ifdef SHOW_MATCH_SPLITTING qry->theLog->add(" All exons removed!\n"); #endif L4.remove(i); i--; } } // Even though we don't expect multiple polishes, we still have to deal with // them. :-( // Clear the 'match' flag and set qualities to zero. XXX: // Again, this should be already done, but we need to guarantee // it. // //qry->theHits[h]._status &= 0x00000003; // (I guess we don't _need_ to do it....) uint32 pi = 0; uint32 pc = 0; for (uint32 i=0; L4[i]; i++) { // We need to remember the best pair of percent // identity/coverage. These wil be stored in the hit after // we process all matches. // if ((L4[i]->_percentIdentity >= pi) && (L4[i]->_querySeqIdentity >= pc)) { pi = L4[i]->_percentIdentity; pc = L4[i]->_querySeqIdentity; } #ifdef SHOW_POLISHING qry->theLog->add(" match["uint32FMT"] query:"uint32FMT"-"uint32FMT" genome:"uint32FMT"-"uint32FMT" id=%u cv=%d nm=%u\n", i, L4[i]->_exons[0]._estFrom, L4[i]->_exons[0]._estTo, L4[i]->_exons[0]._genFrom, L4[i]->_exons[0]._genTo, L4[i]->_percentIdentity, L4[i]->_querySeqIdentity, L4[i]->_exons[0]._numMatches); #endif // If we have a real hit, set the flag and save the output // if ((L4[i]->_percentIdentity >= config._minMatchIdentity) && (L4[i]->_querySeqIdentity >= config._minMatchCoverage)) { qry->theHits[h]._status |= AHIT_VERIFIED; char *pstr = L4[i]->s4p_polishToString(sim4polishStyleDefault); uint32 l = (uint32)strlen(pstr); if (qry->theOutputLen + l + 1 >= qry->theOutputMax) { qry->theOutputMax = qry->theOutputMax + qry->theOutputMax + l; char *o = 0L; try { o = new char [qry->theOutputMax]; } catch (...) { fprintf(stderr, "doPolish()-- Can't reallocate space for the output string ("uint32FMT" bytes) in thread "uint64FMT"\n", qry->theOutputMax, state->threadID); abort(); } memcpy(o, qry->theOutput, sizeof(char) * qry->theOutputLen); delete [] qry->theOutput; qry->theOutput = o; } memcpy(qry->theOutput + qry->theOutputLen, pstr, sizeof(char) * l); qry->theOutputLen += l; qry->theOutput[qry->theOutputLen] = 0; delete [] pstr; } } // Save the best scores // qry->theHits[h]._status |= pi << 16; qry->theHits[h]._status |= pc << 24; successes <<= 1; if ((pi >= config._minMatchIdentity) && (pc >= config._minMatchCoverage)) { //fprintf(stderr, "GOOD "uint32FMT" "uint32FMT"\n", pi, pc); successes |= uint64ONE; } else { //fprintf(stderr, "BAD "uint32FMT" "uint32FMT"\n", pi, pc); successes |= uint64ZERO; } successes &= successMask; delete l4; delete S4; delete P4; #ifdef SHOW_POLISHING_EXPENSIVE double elapsedTime = getTime() - startTime; if (elapsedTime >= SHOW_POLISHING_EXPENSIVE) { qry->theLog->add("Hit %u out of %u (%u -> %u[%u-%u]) took %f seconds ().\n", h, qry->theHitsLen, ESTseq->getIID(), GENseq->getIID(), qry->theHits[h]._dsLo, qry->theHits[h]._dsHi, elapsedTime); } #endif delete GENseq; } // over all hits } kmer-code-2013-trunk/snapper/Make.include0000644000000000000000000000171311512763666017057 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBMERYL/ :=$(realpath $/../libmeryl/)/ LIBKMER/ :=$(realpath $/../libkmer/)/ LIBSIM4/ :=$(realpath $/../libsim4/)/ src := $/snapper2.C \ $/configuration.C \ $/thr-search.C \ $/thr-filter.C \ $/thr-polish.C \ $/thr-polish-dp.C \ $/hitMatrix.C \ $/hitMatrix-sort.C \ $/snapper2.H $/.CXX_SRCS := $(filter %.C,${src}) $/.CXX_EXES := $/snapper2 $/.CLEAN :=$/*.o $(eval $/%.d $/%.o: CXXFLAGS+= -I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBMERYL/} -I${LIBKMER/} -I${LIBSIM4/}) $/snapper2: ${$/.CXX_SRCS:.C=.o} \ ${LIBSIM4/}libsim4.a \ ${LIBKMER/}libkmer.a \ ${LIBMERYL/}libmeryl.a \ ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a \ ${LIBUTL/}libutil.a kmer-code-2013-trunk/snapper/thr-search.C0000644000000000000000000002255512322046702016771 0ustar rootroot#include "snapper2.H" #if defined (__SVR4) && defined (__sun) // Solaris defines SS in sys/regset.h #undef SS #endif class encodedQuery { private: uint64 *_mers; uint32 *_posn; uint32 *_span; uint32 _mersActive; uint32 _mersInQuery; public: encodedQuery(seqInCore *seq, kMerBuilder *KB, bool rc) { _mers = new uint64 [seq->sequenceLength()]; _posn = new uint32 [seq->sequenceLength()]; _span = new uint32 [seq->sequenceLength()]; _mersActive = 0; _mersInQuery = 0; // Unfortunately, we need to use the slightly heavyweight merStream // and kMerBuilder to get mers. We used to build mers in a tight // loop, but with the inclusion of spacing and compression, we // cannot do that anymore. seqStream *SS = new seqStream(seq->sequence(), seq->sequenceLength()); merStream *MS = new merStream(KB, SS); uint64 mer; uint32 val; // The rc flag tells us if we should build for the forward or // reverse strand. If forward (rc == false) the mers are in the // same order. If reverse, the mers are both reverse-complemented, // and appear in our mers[] and skip[] lists reversed. if (rc == false) { while (MS->nextMer()) { mer = MS->theFMer(); if ((maskDB && (maskDB->exists(mer) == true)) || (onlyDB && (onlyDB->exists(mer) == false))) ; // Don't use it. else { _mers[_mersActive] = mer; _posn[_mersActive] = MS->thePositionInSequence(); _span[_mersActive] = MS->theFMer().getMerSpan(); _mersActive++; } _mersInQuery++; } } else { while (MS->nextMer()) { mer = MS->theRMer(); if ((maskDB && (maskDB->exists(mer) == true)) || (onlyDB && (onlyDB->exists(mer) == false))) ; // Don't use it. else { // We die horribly unless we do the goofy math to get the // _posn. I'm sure that could be cleaned up, but it'd take // more effort than I want now (being we'd have to figure // out what the search/hitMatrix stuff is doing). _mers[_mersActive] = mer; _posn[_mersActive] = seq->sequenceLength() - MS->thePositionInSequence() - MS->theRMer().getMerSpan(); _span[_mersActive] = MS->theRMer().getMerSpan(); _mersActive++; } _mersInQuery++; } // Reverse the array -- this appears to be optional. #if 1 if (_mersActive > 0) for (uint32 i=0, j=_mersActive-1; iKB == 0L) state->KB = new kMerBuilder(config._KBmerSize, config._KBcompression, config._KBspacingTemplate); encodedQuery *encqry = new encodedQuery(qry->seq, state->KB, rc); hitMatrix *matrix = new hitMatrix(qry->seq->sequenceLength(), encqry->numberOfMersInQuery(), qry->seq->getIID(), qry->theLog); for (uint32 qidx=0; qidxnumberOfMersActive(); qidx++) { uint64 count = 0; if (positions->getExact(encqry->getMer(qidx), state->posn, state->posnMax, state->posnLen, count)) matrix->addHits(encqry->getPosn(qidx), state->posn, state->posnLen); } // Chain the hits // matrix->filter(rc ? 'r' : 'f', config._minHitCoverage, config._minHitLength, qry->theHits, qry->theHitsLen, qry->theHitsMax); //////////////////////////////////////// // // Refine the hits -- if any hit looks like it contains a repeat, // rebuild it using an adaptive mask threshold. // // We work backwards because we add on new hits to the end of our // list. // for (uint32 h=qry->theHitsLen; h--; ) { // The first test eliminates hits that were not generated for the // complementarity used in this search (e.g., the first search // does rc=forward, adds some hits, the second search does // rc=reverse, and we should skip all the rc=forward hits. // if (((qry->theHits[h]._status & AHIT_DIRECTION_MASK) == !rc) && (qry->theHits[h]._matched > 2 * qry->theHits[h]._numMers)) { #ifdef SHOW_HIT_DISCARDING qry->theLog->add("Seq "uint32FMT" Hit "uint32FMT" (%c) has "uint32FMT" matched, but only "uint32FMT" mers.\n", seq->getIID(), h, rc ? 'r' : 'f', qry->theHits[h]._matched, qry->theHits[h]._numMers); #endif // Grab the genomic sequence. // Construct a merstream for the region. // Build a positionDB of the region (both positions and counts). // Fill out another hitMatrix using about 2*length mers. // seqInCore *GENseq = genome->getSequenceInCore(qry->theHits[h]._dsIdx); uint32 GENlo = qry->theHits[h]._dsLo; uint32 GENhi = qry->theHits[h]._dsHi; merStream *MS = new merStream(state->KB, new seqStream(GENseq->sequence(), GENseq->sequenceLength()), false, true); MS->setBaseRange(GENlo, GENhi); positionDB *PS = new positionDB(MS, config._KBmerSize, 0, 0L, 0L, 0L, 0, 0, 0, 0, false); hitMatrix *HM = new hitMatrix(qry->seq->sequenceLength(), encqry->numberOfMersInQuery(), qry->seq->getIID(), qry->theLog); // We find the number of hits we would get if we use a // countLimit of i. // #define COUNT_MAX 256 uint32 numHitsAtCount[COUNT_MAX] = { 0 }; uint32 countLimit = 0; uint64 count = 0; uint32 numMers = 0; #ifdef SHOW_HIT_DISCARDING uint32 numHits = 0; uint32 minNum = ~uint32ZERO; uint32 maxNum = 0; #endif for (uint32 qidx=0; qidxnumberOfMersActive(); qidx++) { if (PS->getExact(encqry->getMer(qidx), state->posn, state->posnMax, state->posnLen, count)) { numMers++; if (state->posnLen < COUNT_MAX) numHitsAtCount[state->posnLen] += state->posnLen; #ifdef SHOW_HIT_DISCARDING numHits += state->posnLen; if (minNum > state->posnLen) minNum = state->posnLen; if (maxNum < state->posnLen) maxNum = state->posnLen; #endif } } // Scan the number of hits at count, pick the first highest // count such that the number of hits is below our threshold. // for (uint32 qidx=1; qidxtheLog->add(" -- found "uint32FMT" hits in "uint32FMT" mers, min="uint32FMT" max="uint32FMT" avg=%.5f hits/mer.\n", numHits, numMers, minNum, maxNum, (double)numHits / (double)numMers); qry->theLog->add(" -- using a countLimit of "uint32FMT" which gets us "uint32FMT" mers\n", countLimit, numHitsAtCount[countLimit]); #endif for (uint32 qidx=0; qidxnumberOfMersActive(); qidx++) { if (PS->getExact(encqry->getMer(qidx), state->posn, state->posnMax, state->posnLen, count)) { if (state->posnLen <= countLimit) { for (uint32 x=0; xposnLen; x++) state->posn[x] += genomeMap->startOf(qry->theHits[h]._dsIdx); // The kmer counts for these mers are relative to the // sub-regions, not the global, so we want to disable any // filtering by kmer counts. We could add a flag to the filter // to stop this, or we can reset the counts here to large // values. Or we could simply reset the counts to the global // value. // HM->addHits(encqry->getPosn(qidx), state->posn, state->posnLen, positions->countExact(encqry->getMer(qidx))); } } } // Chain the hits // HM->filter(rc ? 'r' : 'f', 0.01, 0, qry->theHits, qry->theHitsLen, qry->theHitsMax); // Mark this hit as dead // qry->theHits[h]._status |= AHIT_DISCARDED; delete HM; delete PS; delete MS; delete GENseq; } } delete matrix; delete encqry; } kmer-code-2013-trunk/README.compiling0000644000000000000000000000472612524131127016015 0ustar rootrootThis guide tells how to compile and install the software. ---------------------------------------- Quick Instructions: % gmake install There! That wasn't tough at all, was it!? The software is compiled in place, and installed into a directory named after the OS/architecture, for example, Linux-amd64. ---------------------------------------- Detailed Instructions: 0) Required Software 1) Configuration 2) Compilation 3) Installation 4) Other build targets ---------- 0) Required software Project kmer requires two additional software packages be installed: python and gmake. 0.1) Python. Python (http://www.python.org/) is a freely available programming language. It is frequently installed by many OS installations. It is only needed by ATAC/A2Amapper. If python is not installed, ATAC/A2Amapper will not be built. Version 2.4+ is recommended. Version 2.3 has seen limited testing and seems to work. Version 2.2 might work, but is unsupported. 0.2) gmake. The GNU make program (gmake) is used to build the software. The BSD make will not work. gmake v 3.81 or higher is REQUIRED. This shouldn't be a problem, as that was released in April 2006. It _was_ a problem when most of this software was written. ---------- 1) Configuration This is optional. It allows compilation for debuging and profiliing. % sh configure.sh [debug | profile] Supplying 'debug' as an argument will build debuggable executables. Supplying 'profile' as an argument will build profiling executables. Not all architectures support profiling. If configure.sh reports that your architecture is unsupported, you'll have to port...or force it to use, say, linux with "configure.sh linux". If configure.sh reports that python cannot be found, you likely need to install python, version 2.3 or 2.4. If you have pyhton installed in an unusual location, edit the script. ---------- 2) Compilation % gmake If gmake crashes or returns gmake: *** No rule to make target `.all', needed by `all'. Stop. then you need to update your gmake to at least version 3.81. ---------- 3) Installation % gmake install will copy all executables, libraries and header files into an OS/architecture specific directory. Binaries, for example, will be in FreeBSD-amd64/bin or Linux-i686/bin. ---------- 4) Other build targets 'gmake clean' will remove the object files, leaving the binaries. 'gmake real-clean' will remove all traces of a build, leaving you with (hopefully) a virgin copy of the software. kmer-code-2013-trunk/ESTmapper LaTeX/0000755000000000000000000000000012641613360015746 5ustar rootrootkmer-code-2013-trunk/ESTmapper LaTeX/mrna-filter.ps0000644000000000000000000015710110562262400020532 0ustar rootroot%!PS-Adobe-2.0 %%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software %%Title: mrna-filter.dvi %%Pages: 2 %%PageOrder: Ascend %%BoundingBox: 0 0 596 842 %%EndComments %DVIPSWebPage: (www.radicaleye.com) %DVIPSCommandLine: dvips -o mrna-filter.ps mrna-filter.dvi %DVIPSParameters: dpi=600, compressed %DVIPSSource: TeX output 2002.01.17:1521 %%BeginProcSet: texc.pro %! /TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72 mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{ landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[ matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{ statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0] N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin /FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array /BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2 array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get }B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub} B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr 1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3 1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx 0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{ rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B /chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{ /cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{ A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse} ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17 {2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{ 1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop} forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put }if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{ bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{ SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{ userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X 1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4 index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N /p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{ /Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT) (LaserWriter 16/600)]{A length product length le{A length product exch 0 exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot} imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M} B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{ p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end %%EndProcSet %%BeginProcSet: special.pro %! TeXDict begin/SDict 200 dict N SDict begin/@SpecialDefaults{/hs 612 N /vs 792 N/ho 0 N/vo 0 N/hsc 1 N/vsc 1 N/ang 0 N/CLIP 0 N/rwiSeen false N /rhiSeen false N/letter{}N/note{}N/a4{}N/legal{}N}B/@scaleunit 100 N /@hscale{@scaleunit div/hsc X}B/@vscale{@scaleunit div/vsc X}B/@hsize{ /hs X/CLIP 1 N}B/@vsize{/vs X/CLIP 1 N}B/@clip{/CLIP 2 N}B/@hoffset{/ho X}B/@voffset{/vo X}B/@angle{/ang X}B/@rwi{10 div/rwi X/rwiSeen true N}B /@rhi{10 div/rhi X/rhiSeen true N}B/@llx{/llx X}B/@lly{/lly X}B/@urx{ /urx X}B/@ury{/ury X}B/magscale true def end/@MacSetUp{userdict/md known {userdict/md get type/dicttype eq{userdict begin md length 10 add md maxlength ge{/md md dup length 20 add dict copy def}if end md begin /letter{}N/note{}N/legal{}N/od{txpose 1 0 mtx defaultmatrix dtransform S atan/pa X newpath clippath mark{transform{itransform moveto}}{transform{ itransform lineto}}{6 -2 roll transform 6 -2 roll transform 6 -2 roll transform{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll curveto}}{{closepath}}pathforall newpath counttomark array astore/gc xdf pop ct 39 0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack} if}N/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1 -1 scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip yflip not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub neg 0 TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{ noflips{TR pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop 90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr 1 get neg sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr 2 get ppr 0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4 -1 roll add 2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S TR}if}N/cp{pop pop showpage pm restore}N end}if}if}N/normalscale{ Resolution 72 div VResolution 72 div neg scale magscale{DVImag dup scale }if 0 setgray}N/psfts{S 65781.76 div N}N/startTexFig{/psf$SavedState save N userdict maxlength dict begin/magscale true def normalscale currentpoint TR/psf$ury psfts/psf$urx psfts/psf$lly psfts/psf$llx psfts /psf$y psfts/psf$x psfts currentpoint/psf$cy X/psf$cx X/psf$sx psf$x psf$urx psf$llx sub div N/psf$sy psf$y psf$ury psf$lly sub div N psf$sx psf$sy scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub TR/showpage{}N/erasepage{}N/copypage{}N/p 3 def @MacSetUp}N/doclip{ psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2 roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath moveto}N/endTexFig{end psf$SavedState restore}N/@beginspecial{SDict begin/SpecialSave save N gsave normalscale currentpoint TR @SpecialDefaults count/ocount X/dcount countdictstack N}N/@setspecial{ CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR }{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury lineto closepath clip}if/showpage{}N/erasepage{}N/copypage{}N newpath}N /@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{end} repeat grestore SpecialSave restore end}N/@defspecial{SDict begin}N /@fedspecial{end}B/li{lineto}B/rl{rlineto}B/rc{rcurveto}B/np{/SaveX currentpoint/SaveY X N 1 setlinecap newpath}N/st{stroke SaveX SaveY moveto}N/fil{fill SaveX SaveY moveto}N/ellipse{/endangle X/startangle X /yrad X/xrad X/savematrix matrix currentmatrix N TR xrad yrad scale 0 0 1 startangle endangle arc savematrix setmatrix}N end %%EndProcSet TeXDict begin 39158280 55380996 1000 600 600 (mrna-filter.dvi) @start %DVIPSBitmapFont: Fa cmsy7 7 1 /Fa 1 1 df0 D E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fb cmex10 10 4 /Fb 4 63 df56 D58 D60 D62 D E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fc cmsy10 10 4 /Fc 4 21 df<007FB81280B912C0A26C17803204799641>0 D<121C127FEAFF80A5EA7F 00121C0909799917>I3 D20 D E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fd cmmi7 7 5 /Fd 5 109 df<903B3FFFF01FFFF8A2D901FCC7EAFE004A5CA2010314015F5CA2010714 035F5CA2010F14075F5CA2011F140F91B65AA2913880000F013F141F5F91C7FCA249143F 94C7FC137EA201FE5C167E5BA2000115FE5E5BA200031401B539C07FFFE0A235287DA736 >72 D<90383FFFF8A2D901FCC7FC5CA21303A25CA21307A25CA2130FA25CA2131FA25CA2 133FA291C8FCA249141C1618137E163801FE1430167049146016E000011401ED03C04913 07ED0F800003147FB7FC160026287DA72E>76 D99 D<133EEA07FEA2EA007CA213FCA25BA2 1201A25BA2120314FCEBE3FF9038EF0780D807FC13C0EBF00313E0A2EA0FC014071380A2 121FEC0F801300A248EB1F00A2003E1406143E127EEC7C0C127C151800FCEB3C30157048 EB1FE00070EB0F801F297CA727>104 D<137CEA0FFCA2EA00F8A21201A213F0A21203A2 13E0A21207A213C0A2120FA21380A2121FA21300A25AA2123EA2127EA2EA7C18A3EAF830 A21320EA786013C0EA3F80EA0F000E297EA715>108 D E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fe cmmi10 10 27 /Fe 27 120 df<121C127FEAFF80A5EA7F00121C0909798817>58 D<121C127FEAFF80A213C0A3127F121C1200A412011380A2120313005A1206120E5A5A5A 12600A19798817>I I<0103B77E4916F018FC903B0007F80003FE4BEB00FFF07F80020FED3FC0181F4B15E0A2 141FA25DA2143F19C04B143F1980027F157F190092C812FE4D5A4A4A5AEF0FF04AEC1FC0 05FFC7FC49B612FC5F02FCC7B4FCEF3FC00103ED0FE0717E5C717E1307844A1401A2130F 17035CA2131F4D5A5C4D5A133F4D5A4A4A5A4D5A017F4BC7FC4C5A91C7EA07FC49EC3FF0 B812C094C8FC16F83B397DB83F>66 D<9339FF8001C0030F13E0037F9038F80380913A01 FF807E07913A07F8000F0FDA1FE0EB079FDA3F80903803BF0002FFC76CB4FCD901FC8049 5A4948157E495A495A4948153E017F163C49C9FC5B1201484816385B1207485A1830121F 4993C7FCA2485AA3127F5BA312FF90CCFCA41703A25F1706A26C160E170C171C5F6C7E5F 001F5E6D4A5A6C6C4A5A16076C6C020EC8FC6C6C143C6C6C5C6CB4495A90393FE00FC001 0FB5C9FC010313FC9038007FC03A3D7CBA3B>I<0103B5D8F803B512F8495DA290260007 F8C73807F8004B5DA2020F150F615DA2021F151F615DA2023F153F615DA2027F157F96C7 FC92C8FCA24A5D605CA249B7FC60A202FCC7120101031503605CA201071507605CA2010F 150F605CA2011F151F605CA2013F153F605CA2017F157F95C8FC91C8FC496C4A7EB690B6 FCA345397DB845>72 D<0103B6FC5B5E90260007FCC8FC5D5D140FA25DA2141FA25DA214 3FA25DA2147FA292C9FCA25CA25CA21301A25CA21303A25CA2130718404A15C0A2010F15 0118804A1403A2011F16005F4A1406170E013F151E171C4A143C177C017F5D160391C712 0F49EC7FF0B8FCA25F32397DB839>76 D<902603FFF893383FFF80496081D900079438FF 80000206DC01BFC7FCA2020E4C5A1A7E020C1606190CDA1C7E16FE4F5A02181630A20238 166162023016C1F00181DA703F158395380303F002601506A202E0ED0C076202C0151818 3001016D6C140F06605B028015C0A20103923801801FDD03005B140092380FC00649173F 4D91C8FC01065DA2010E4B5B4D137E130C6F6C5A011C17FEDCE1805B011802E3C7FCA201 3802E6130104EC5C1330ED03F8017016034C5C01F05CD807FC4C7EB500E0D9C007B512F0 1680150151397CB851>I<267FFFFC91383FFFC0B55DA2000390C83807FC006C48ED03E0 6060000094C7FC5F17065FA25F6D5DA26D5D17E05F4C5AA24CC8FC6E1306A2013F5C161C 16185EA25E6E5BA2011F495A150393C9FC1506A25D6E5AA2010F5B157015605DA2ECE180 02E3CAFC14F3EB07F614FE5C5CA25C5CA26D5AA25C91CBFC3A3B7CB830>86 D<147E903803FF8090390FC1C38090391F00EFC0017E137F49133F485A4848EB1F801207 5B000F143F48481400A2485A5D007F147E90C7FCA215FE485C5AA214015D48150CA21403 EDF01C16181407007C1538007E010F1330003E131F027B13706C01E113E03A0F83C0F9C0 3A03FF007F80D800FCEB1F0026267DA42C>97 D<133FEA1FFFA3C67E137EA313FE5BA312 015BA312035BA31207EBE0FCEBE3FF9038E707C0390FFE03E09038F801F001F013F8EBE0 00485A15FC5BA2123F90C7FCA214015A127EA2140312FE4814F8A2140715F05AEC0FE0A2 15C0EC1F80143F00781400007C137E5C383C01F86C485A380F07C06CB4C7FCEA01FC1E3B 7CB924>II<163FED1FFFA3ED007F167EA216FEA216FCA21501A216F8A21503A216F0A2 1507A2027E13E0903803FF8790380FC1CF90381F00EF017EEB7FC049133F485A4848131F 000715805B000F143F485A1600485A5D127F90C7127EA215FE5A485CA21401A248ECF80C A21403161CEDF0181407007C1538007E010F1330003E131F027B13706C01E113E03A0F83 C0F9C03A03FF007F80D800FCEB1F00283B7DB92B>II103 DI<14E0EB03F8A21307A314F0EB01C090C7FCAB13F8EA03FEEA070F000E 1380121C121812381230EA701F1260133F00E0130012C05BEA007EA213FE5B1201A25B12 035BA20007131813E01438000F133013C01470EB806014E014C01381EB838038078700EA 03FEEA00F815397EB71D>I109 DII<90390F8003F090391FE00FFC903939F03C1F903A70F8700F80903AE0FDE007C090 38C0FF80030013E00001491303018015F05CEA038113015CA2D800031407A25CA2010714 0FA24A14E0A2010F141F17C05CEE3F80131FEE7F004A137E16FE013F5C6E485A4B5A6E48 5A90397F700F80DA383FC7FC90387E1FFCEC07E001FEC9FCA25BA21201A25BA21203A25B 1207B512C0A32C3583A42A>I<3903E001F83907F807FE390E3C1E07391C3E381F3A183F 703F800038EBE07F0030EBC0FF00705B00601500EC007E153CD8E07F90C7FCEAC07EA212 0013FE5BA312015BA312035BA312075BA3120F5BA3121F5B0007C9FC21267EA425>114 D<14FF010313C090380F80F090383E00380178131C153C4913FC0001130113E0A33903F0 00F06D13007F3801FFE014FC14FF6C14806D13C0011F13E013039038003FF01407140300 1E1301127FA24814E0A348EB03C012F800E0EB07800070EB0F006C133E001E13F83807FF E0000190C7FC1E267CA427>II<13F8D803FE1438D8070F147C000E6D13FC121C1218003814011230D8701F5C1260 1503EAE03F00C001005B5BD8007E1307A201FE5C5B150F1201495CA2151F120349EC80C0 A2153F1681EE0180A2ED7F0303FF130012014A5B3A00F8079F0E90397C0E0F1C90393FFC 07F8903907F001F02A267EA430>I<01F8EB03C0D803FEEB07E0D8070F130F000E018013 F0121C12180038140700301403D8701F130112601500D8E03F14E000C090C7FC5BEA007E 16C013FE5B1501000115805B150316001203495B1506150E150C151C151815385D00015C 6D485A6C6C485AD97E0FC7FCEB1FFEEB07F024267EA428>I<01F816F0D803FE9138E001 F8D8070F903801F003000ED9800314FC121C12180038020713010030EDE000D8701F167C 1260030F143CD8E03F163800C001005B5BD8007E131F183001FE5C5B033F147000011760 4991C7FCA218E000034A14C049137E17011880170318005F03FE1306170E000101015C01 F801BF5B3B00FC039F8070903A7E0F0FC0E0903A1FFC03FFC0902703F0007FC7FC36267E A43B>I E %EndDVIPSBitmapFont %DVIPSBitmapFont: Ff cmr10 10 59 /Ff 59 123 df12 D<121C127FEAFF80A213C0A3127F121C1200A412 011380A2120313005A1206120E5A5A5A12600A1979B917>39 D<146014E0EB01C0EB0380 EB0700130E131E5B5BA25B485AA2485AA212075B120F90C7FCA25A121EA2123EA35AA65A B2127CA67EA3121EA2121F7EA27F12077F1203A26C7EA26C7E1378A27F7F130E7FEB0380 EB01C0EB00E01460135278BD20>I<12C07E12707E7E7E120F6C7E6C7EA26C7E6C7EA213 78A2137C133C133E131EA2131F7FA21480A3EB07C0A6EB03E0B2EB07C0A6EB0F80A31400 A25B131EA2133E133C137C1378A25BA2485A485AA2485A48C7FC120E5A5A5A5A5A13527C BD20>I I<121C127FEAFF80A213C0A3127F121C1200A412011380A2120313005A1206120E5A5A5A 12600A19798817>44 DI<121C127FEAFF80A5EA7F00121C0909 798817>I<150C151E153EA2153C157CA2157815F8A215F01401A215E01403A215C01407 A21580140FA215005CA2141E143EA2143C147CA2147814F8A25C1301A25C1303A2495AA2 5C130FA291C7FC5BA2131E133EA2133C137CA2137813F8A25B1201A25B1203A25B1207A2 5B120FA290C8FC5AA2121E123EA2123C127CA2127812F8A25A12601F537BBD2A>IIIII<0006140CD80780133C 9038F003F890B5FC5D5D158092C7FC14FC38067FE090C9FCABEB07F8EB3FFE9038780F80 3907E007E090388003F0496C7E12066E7EC87EA28181A21680A4123E127F487EA490C713 00485C12E000605C12700030495A00385C6C1303001E495A6C6C485A3907E03F800001B5 C7FC38007FFCEB1FE0213A7CB72A>53 DI<12301238 123E003FB612E0A316C05A168016000070C712060060140E5D151800E01438485C5D5DC7 12014A5A92C7FC5C140E140C141C5CA25CA214F0495AA21303A25C1307A2130FA3495AA3 133FA5137FA96DC8FC131E233B7BB82A>I<121C127FEAFF80A5EA7F00121CC7FCB2121C 127FEAFF80A5EA7F00121C092479A317>58 D<121C127FEAFF80A5EA7F00121CC7FCA812 1CAB123EAB127FABEAFF80A8EA7F00121C093C79A917>60 D<007FB812F8B912FCA26C17 F8CCFCAE007FB812F8B912FCA26C17F836167B9F41>I<130EEB3F80497EA56D5A010EC7 FC90C8FCA81306A4130E130CA6131CA35BA213785BA21201485A1207485A485A123F48C8 FCA200FE14F0EC01F8EC03FCA41401EC00F8007E1438007F14706C14E0391F8003C0390F C01F003803FFFC38007FE01E3B7CA927>I<1538A3157CA315FEA34A7EA34A6C7EA20207 7FEC063FA2020E7FEC0C1FA2021C7FEC180FA202387FEC3007A202707FEC6003A202C07F 1501A2D901807F81A249C77F167FA20106810107B6FCA24981010CC7121FA2496E7EA349 6E7EA3496E7EA213E0707E1201486C81D80FFC02071380B56C90B512FEA3373C7DBB3E> 65 DI<913A01FF800180020FEBE003027F13F8903A01FF807E0790 3A03FC000F0FD90FF0EB039F4948EB01DFD93F80EB00FF49C8127F01FE153F1201484815 1F4848150FA248481507A2485A1703123F5B007F1601A35B00FF93C7FCAD127F6DED0180 A3123F7F001F160318006C7E5F6C7E17066C6C150E6C6C5D00001618017F15386D6C5CD9 1FE05C6D6CEB03C0D903FCEB0F80902701FF803FC7FC9039007FFFFC020F13F002011380 313D7BBA3C>II70 D72 DI<013FB512E0A3903900 1FFC00EC07F8B3B3A3123FEA7F80EAFFC0A44A5A1380D87F005B0070131F6C5C6C495A6C 49C7FC380781FC3801FFF038007F80233B7DB82B>I76 DII82 DI<003FB812E0A3D9C003EB001F27 3E0001FE130348EE01F00078160000701770A300601730A400E01738481718A4C71600B3 B0913807FF80011FB612E0A335397DB83C>I87 D97 DIIII<147E903803FF8090380FC1E0 EB1F8790383F0FF0137EA213FCA23901F803C091C7FCADB512FCA3D801F8C7FCB3AB487E 387FFFF8A31C3B7FBA19>IIII108 D<2703F00FF0EB1FE000FFD93FFCEB7FF8913AF03F01E07E903BF1C01F83803F3D0FF380 0FC7001F802603F70013CE01FE14DC49D907F8EB0FC0A2495CA3495CB3A3486C496CEB1F E0B500C1B50083B5FCA340257EA445>I<3903F00FF000FFEB3FFCECF03F9039F1C01F80 3A0FF3800FC03803F70013FE496D7EA25BA35BB3A3486C497EB500C1B51280A329257EA4 2E>II<3903F01FE000FFEB7FF89038F1E07E9039F3801F803A0FF700 0FC0D803FEEB07E049EB03F04914F849130116FC150016FEA3167FAA16FEA3ED01FCA26D EB03F816F06D13076DEB0FE001F614C09039F7803F009038F1E07E9038F0FFF8EC1FC091 C8FCAB487EB512C0A328357EA42E>II<3807E01F00FFEB7FC09038E1E3E09038E387 F0380FE707EA03E613EE9038EC03E09038FC0080491300A45BB3A2487EB512F0A31C257E A421>II< 1318A51338A31378A313F8120112031207001FB5FCB6FCA2D801F8C7FCB215C0A93800FC 011580EB7C03017E13006D5AEB0FFEEB01F81A347FB220>III III<003FB512FCA2EB80 03D83E0013F8003CEB07F00038EB0FE012300070EB1FC0EC3F800060137F150014FE495A A2C6485A495AA2495A495A495AA290387F000613FEA2485A485A0007140E5B4848130C48 48131CA24848133C48C7127C48EB03FC90B5FCA21F247EA325>I E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fg cmbx12 14.4 14 /Fg 14 117 df<157815FC14031407141F14FF130F0007B5FCB6FCA2147F13F0EAF800C7 FCB3B3B3A6007FB712FEA52F4E76CD43>49 DI70 D87 D97 D<913803FFC0023F13FC49B6FC010715C04901817F903A3FFC007FF849486D 7E49486D7E4849130F48496D7E48178048497F18C0488191C7FC4817E0A248815B18F0A2 12FFA490B8FCA318E049CAFCA6127FA27F7EA218E06CEE01F06E14037E6C6DEC07E0A26C 6DEC0FC06C6D141F6C6DEC3F806D6CECFF00D91FFEEB03FE903A0FFFC03FF8010390B55A 010015C0021F49C7FC020113F034387CB63D>101 D103 DI<137F497E000313E0487FA2487FA76C5BA26C5BC613806DC7FC 90C8FCADEB3FF0B5FCA512017EB3B3A6B612E0A51B547BD325>I108 D110 D<90397FE003FEB590380FFF80 033F13E04B13F09238FE1FF89139E1F83FFC0003D9E3E013FEC6ECC07FECE78014EF1500 14EE02FEEB3FFC5CEE1FF8EE0FF04A90C7FCA55CB3AAB612FCA52F367CB537>114 D<903903FFF00F013FEBFE1F90B7FC120348EB003FD80FF81307D81FE0130148487F4980 127F90C87EA24881A27FA27F01F091C7FC13FCEBFFC06C13FF15F86C14FF16C06C15F06C 816C816C81C681013F1580010F15C01300020714E0EC003F030713F015010078EC007F00 F8153F161F7E160FA27E17E07E6D141F17C07F6DEC3F8001F8EC7F0001FEEB01FE9039FF C00FFC6DB55AD8FC1F14E0D8F807148048C601F8C7FC2C387CB635>I<143EA6147EA414 FEA21301A313031307A2130F131F133F13FF5A000F90B6FCB8FCA426003FFEC8FCB3A9EE 07C0AB011FEC0F8080A26DEC1F0015806DEBC03E6DEBF0FC6DEBFFF86D6C5B021F5B0203 13802A4D7ECB34>I E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fh cmr9 9 26 /Fh 26 122 df<123C127EB4FCA21380A2127F123D1201A412031300A25A1206120E120C 121C5A5A126009177A8715>44 DI<123C127E12FFA4127E123C 08087A8715>I48 D50 D71 D97 DII<153FEC0FFFA3EC007F81AEEB07F0EB3FFCEBFC0F3901F003BF39 07E001FF48487E48487F8148C7FCA25A127E12FEAA127E127FA27E6C6C5BA26C6C5B6C6C 4813803A03F007BFFC3900F81E3FEB3FFCD90FE0130026357DB32B>III<151F90391FC07F809039FFF8E3C03901F07FC73907E03F033A0FC01F8380 9039800F8000001F80EB00074880A66C5CEB800F000F5CEBC01F6C6C48C7FCEBF07C380E FFF8380C1FC0001CC9FCA3121EA2121F380FFFFEECFFC06C14F06C14FC4880381F000100 3EEB007F4880ED1F8048140FA56C141F007C15006C143E6C5C390FC001F83903F007E0C6 B51280D91FFCC7FC22337EA126>III108 D<2703F01FE013FF00FF90267FF80313C0903BF1E07C0F03E0903BF3803E1C01F02807F7 003F387FD803FE1470496D486C7EA2495CA2495CB3486C496C487EB53BC7FFFE3FFFF0A3 3C217EA041>I<3903F01FC000FFEB7FF09038F1E0FC9038F3807C3907F7007EEA03FE49 7FA25BA25BB3486CEB7F80B538C7FFFCA326217EA02B>II<3903F03F8000FFEBFFE09038F3C0F89038F7007ED807FE 7F6C48EB1F804914C049130F16E0ED07F0A3ED03F8A9150716F0A216E0150F16C06D131F 6DEB3F80160001FF13FC9038F381F89038F1FFE0D9F07FC7FC91C8FCAA487EB512C0A325 307EA02B>I<3803E07C38FFE1FF9038E38F809038E71FC0EA07EEEA03ECA29038FC0F80 49C7FCA35BB2487EB512E0A31A217FA01E>114 DI<1330A51370A313F0A21201A212031207381FFFFEB5FCA23803F000 AF1403A814073801F806A23800FC0EEB7E1CEB1FF8EB07E0182F7FAD1E>I118 DI<3A7FFF807FF8A33A07F8001FC00003EC 0F800001EC070015066C6C5BA26D131C017E1318A26D5BA2EC8070011F1360ECC0E0010F 5BA2903807E180A214F3010390C7FC14FBEB01FEA26D5AA31478A21430A25CA214E05CA2 495A1278D8FC03C8FCA21306130EEA701CEA7838EA1FF0EA0FC025307F9F29>121 D E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fi cmbx9 9 7 /Fi 7 117 df65 D97 DI<903807FF80013F13F090B512FC3903FE01FE4848 487EEA0FF8EA1FF0EA3FE0A2007F6D5A496C5A153000FF91C7FCA9127F7FA2003FEC0780 7F6C6C130F000FEC1F00D807FE133E3903FF80FCC6EBFFF8013F13E0010790C7FC21217D A027>I<3901F81F8000FFEB7FF0ECFFF89038F9E3FC9038FBC7FE380FFF876C1307A213 FEEC03FCEC01F8EC0060491300B1B512F0A41F217EA024>114 D<9038FFE1C0000713FF 5A383F803F387E000F14075A14037EA26C6CC7FC13FCEBFFE06C13FC806CEBFF80000F14 C06C14E0C6FC010F13F0EB007F140F00F0130714037EA26C14E06C13076CEB0FC09038C0 1F8090B5120000F913FC38E03FE01C217DA023>I<133CA5137CA313FCA21201A2120312 07001FB51280B6FCA3D807FCC7FCB0EC03C0A79038FE078012033901FF0F006C13FEEB3F FCEB0FF01A2F7EAE22>I E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fj cmr12 12 19 /Fj 19 123 df<121EEA7F8012FF13C0A213E0A3127FEA1E601200A413E013C0A3120113 80120313005A1206120E5A5A5A12600B1D78891B>44 D<121EEA7F80A2EAFFC0A4EA7F80 A2EA1E000A0A78891B>46 D<14FF010713E090381F81F890383E007C01FC133F4848EB1F 8049130F4848EB07C04848EB03E0A2000F15F0491301001F15F8A2003F15FCA390C8FC48 15FEA54815FFB3A46C15FEA56D1301003F15FCA3001F15F8A26C6CEB03F0A36C6CEB07E0 000315C06D130F6C6CEB1F806C6CEB3F00013E137C90381F81F8903807FFE0010090C7FC 28447CC131>48 D<143014F013011303131F13FFB5FC13E713071200B3B3B0497E497E00 7FB6FCA3204278C131>II<121CA2EA 1F8090B712C0A3481680A217005E0038C8120C0030151C00705D0060153016705E5E4814 014B5A4BC7FCC81206150E5D151815385D156015E04A5AA24A5A140792C8FC5CA25C141E 143EA2147E147CA214FCA21301A3495AA41307A6130FAA6D5AEB01C02A457BC231>55 D66 D<010FB512FEA3D9000313806E130080B3B3AB123F487E487EA44A5A13801300006C495A 00705C6C13076C5C6C495A6CEB1F802603E07FC7FC3800FFFCEB1FE027467BC332>74 D80 D87 D97 D101 D105 D108 D<3901FC01FE00FF903807FFC091381E07F091383801F8000701707F00 03EBE0002601FDC07F5C01FF147F91C7FCA25BA35BB3A8486CECFF80B5D8F83F13FEA32F 2C7DAB36>110 D<3903F803F000FFEB1FFCEC3C3EEC707F0007EBE0FF3803F9C000015B 13FBEC007E153C01FF13005BA45BB3A748B4FCB512FEA3202C7DAB26>114 D117 D121 D<003FB612E0A29038C0003F90C713C0003CEC7F800038 ECFF00A20030495A0070495AA24A5A0060495AA24A5A4A5AA2C7485A4AC7FC5B5C495A13 075C495A131F4A1360495A495AA249C712C0485AA2485A485A1501485A48481303A24848 EB07804848131F00FF14FF90B6FCA2232B7DAA2B>I E %EndDVIPSBitmapFont %DVIPSBitmapFont: Fk cmr17 17.28 14 /Fk 14 117 df<170FA34D7EA24D7EA34D7EA34D7EA34C7F17DFA29338039FFC178FA293 38070FFE1707040F7FEE0E03A2041E80EE1C01A2043C80EE3800A24C80187FA24C80183F A24B4880181F0303814C130FA203078193C71207A24B81030E80A24B8284A24B8284A24B 82197F03F0824B153FA20201834B151FA202038392B8FCA24A83A292C91207020E8385A2 4A8485023C84023882A20278840270177FA202F0844A173FA24948841A1FA24948841A0F A249CB7F1A074985865B496C85497E48486C4D7F000F01F8051F13F0B60407B612F0A45C 657DE463>65 D70 D78 D82 D97 D101 D103 D<133C13FF487F487FA66C5B6C90C7FC133C90C8FCB3A2EB03C0EA07FF127FA412 01EA007FA2133FB3B3AC497E497EB612E0A41B5F7DDE23>105 D108 DII<90 39078003F8D807FFEB0FFFB5013F13C092387C0FE0913881F01F9238E03FF00001EB8380 39007F8700148FEB3F8E029CEB1FE0EE0FC00298EB030002B890C7FCA214B014F0A25CA5 5CB3B0497EEBFFF8B612FCA42C3F7CBE33>114 D<9139FFE00180010FEBFC03017FEBFF 073A01FF001FCFD803F8EB03EFD807E0EB01FF48487F4848147F48C8123F003E151F007E 150F127CA200FC1507A316037EA27E7F6C7E6D91C7FC13F8EA3FFE381FFFF06CEBFF806C 14F86C14FF6C15C06C6C14F0011F80010714FED9007F7F02031480DA003F13C015030300 13E0167F00E0ED1FF0160F17F86C15071603A36C1501A37EA26C16F016037E17E06D1407 6DEC0FC06D1580D8FDF0141FD8F8F8EC7F00013E14FC3AF01FC00FF80107B512E0D8E001 148027C0003FF8C7FC2D417DBF34>I<1438A71478A414F8A31301A31303A21307130F13 1FA2137F13FF1203000F90B6FCB8FCA3260007F8C8FCB3AE17E0AE6D6CEB01C0A316036D 6C148016076D6C14006E6C5A91383FC01E91381FF07C6EB45A020313E09138007F802B59 7FD733>I E %EndDVIPSBitmapFont end %%EndProlog %%BeginSetup %%Feature: *Resolution 600dpi TeXDict begin %%PaperSize: A4 %%EndSetup %%Page: 1 1 1 0 bop 1065 872 a Fk(Filtering)46 b(mRNA)e(signal)1375 1166 y Fj(Brian)31 b(P)-8 b(.)33 b(W)-8 b(alenz)1359 1414 y(Jan)m(uary)33 b(17,)f(2002)1554 1630 y Fi(Abstract)613 1770 y Fh(Giv)n(en)24 b(signals)h(detected)e(b)n(y)g(c)n(haining)h (20-mers,)g(a)g(metho)r(d)e(is)i(presen)n(ted)f(for)498 1861 y(deciding)j(whic)n(h)g(signals)h(p)r(oten)n(tially)f(con)n(tain)g (real)h(matc)n(hes.)291 2198 y Fg(1)134 b(What)45 b(is)h(a)f(signal)291 2436 y Ff(Signal)35 b(has)h(three)g(v)-5 b(alues)35 b(asso)r(ciated)g (with)i(it.)63 b(The)36 b('co)n(v)n(ered',)h(the)f('matc)n(hed')g(and) 291 2535 y(the)28 b('length'.)37 b(Co)n(v)n(ered)26 b(is)h(the)h(n)n (um)n(b)r(er)g(of)f(bases)g(in)h(the)g(mRNA)h(that)f(are)e(co)n(v)n (ered)g(b)n(y)291 2635 y(at)34 b(least)g(one)g(mer.)56 b(Matc)n(hed)34 b(is)g(the)h(n)n(um)n(b)r(er)f(of)g(bases)g(matc)n(hed) g(\(n)n(um)n(b)r(er)g(of)g(mers)291 2734 y(*)29 b(size)h(of)g(a)g (mer\),)h(and)f(the)h(length)f(is)g(the)h(n)n(um)n(b)r(er)f(of)g(mers)g (in)g(the)h(mRNA)g(\(roughly)291 2834 y(equiv)-5 b(alen)n(t)22 b(to)h(the)g(n)n(um)n(b)r(er)g(of)g(bases)f(in)h(the)h(mRNA)g(that)f (could)g(b)r(e)g(co)n(v)n(ered)e(b)n(y)i(a)f(mer,)291 2934 y(but)28 b(easier)e(to)i(compute\).)291 3087 y(F)-7 b(rom)27 b(these,)g(w)n(e)h(score)e(signals)g(using)i(t)n(w)n(o)e(v)-5 b(alues,)28 b(co)n(v)n(erage)c(and)k(m)n(ultiplicit)n(y)-7 b(.)291 3240 y(Co)n(v)n(erage)30 b(is)j(de\014ned)h(as)f(co)n(v)n(ered) e(/)i(length.)54 b(Multiplicit)n(y)34 b(is)g(de\014ned)f(as)g(matc)n (hed)g(/)291 3340 y(co)n(v)n(ered.)291 3677 y Fg(2)134 b(Filter)291 3914 y Ff(Six)26 b(parameters)f(are)g(used:)37 b(-l,)26 b(-h,)h(-v,)f(-m,)h(-mc)f(and)g(-ml,)h(corresp)r(onding)e(to)h Fe(L)p Ff(,)g Fe(H)7 b Ff(,)27 b Fe(V)19 b Ff(,)291 4014 y Fe(M)9 b Ff(,)27 b Fe(M)512 4026 y Fd(c)545 4014 y Ff(,)h(and)f Fe(M)838 4026 y Fd(l)863 4014 y Ff(.)291 4167 y(Default)h(v)-5 b(alues:)1606 4354 y Fe(L)22 b Ff(=)h(0)p Fe(:)p Ff(2)1587 4478 y Fe(H)29 b Ff(=)23 b(0)p Fe(:)p Ff(6)1595 4603 y Fe(V)42 b Ff(=)23 b(0)p Fe(:)p Ff(7)1573 4727 y Fe(M)31 b Ff(=)23 b(0)p Fe(:)p Ff(3)1548 4852 y Fe(M)1629 4864 y Fd(c)1685 4852 y Ff(=)g(0)p Fe(:)p Ff(2)1556 4976 y Fe(M)1637 4988 y Fd(l)1685 4976 y Ff(=)g(150)1702 5255 y(1)p eop %%Page: 2 2 2 1 bop 739 203 a Ff(Jan)n(uary)26 b(17,)g(2002)396 b(Filtering)27 b(mRNA)h(signal)524 b(Brian)26 b(W)-7 b(alenz)p 739 236 2865 4 v 821 1849 a @beginspecial 0 @llx 0 @lly 433 @urx 226 @ury 3240 @rwi @setspecial %%BeginDocument: mRNAfilt.eps %!PS-Adobe-2.0 EPSF-2.0 %%Title: mRNAfilt.eps %%Creator: fig2dev Version 3.2 Patchlevel 0-beta3 %%CreationDate: Thu Jan 17 16:15:18 2002 %%For: walenz@dsc154p.celera.com (Brian Walenz,3604) %%Orientation: Portrait %%BoundingBox: 0 0 433 226 %%Pages: 0 %%BeginSetup %%EndSetup %%Magnification: 1.0000 %%EndComments /$F2psDict 200 dict def $F2psDict begin $F2psDict /mtrx matrix put /col-1 {0 setgray} bind def /col0 {0.000 0.000 0.000 srgb} bind def /col1 {0.000 0.000 1.000 srgb} bind def /col2 {0.000 1.000 0.000 srgb} bind def /col3 {0.000 1.000 1.000 srgb} bind def /col4 {1.000 0.000 0.000 srgb} bind def /col5 {1.000 0.000 1.000 srgb} bind def /col6 {1.000 1.000 0.000 srgb} bind def /col7 {1.000 1.000 1.000 srgb} bind def /col8 {0.000 0.000 0.560 srgb} bind def /col9 {0.000 0.000 0.690 srgb} bind def /col10 {0.000 0.000 0.820 srgb} bind def /col11 {0.530 0.810 1.000 srgb} bind def /col12 {0.000 0.560 0.000 srgb} bind def /col13 {0.000 0.690 0.000 srgb} bind def /col14 {0.000 0.820 0.000 srgb} bind def /col15 {0.000 0.560 0.560 srgb} bind def /col16 {0.000 0.690 0.690 srgb} bind def /col17 {0.000 0.820 0.820 srgb} bind def /col18 {0.560 0.000 0.000 srgb} bind def /col19 {0.690 0.000 0.000 srgb} bind def /col20 {0.820 0.000 0.000 srgb} bind def /col21 {0.560 0.000 0.560 srgb} bind def /col22 {0.690 0.000 0.690 srgb} bind def /col23 {0.820 0.000 0.820 srgb} bind def /col24 {0.500 0.190 0.000 srgb} bind def /col25 {0.630 0.250 0.000 srgb} bind def /col26 {0.750 0.380 0.000 srgb} bind def /col27 {1.000 0.500 0.500 srgb} bind def /col28 {1.000 0.630 0.630 srgb} bind def /col29 {1.000 0.750 0.750 srgb} bind def /col30 {1.000 0.880 0.880 srgb} bind def /col31 {1.000 0.840 0.000 srgb} bind def end save -36.0 387.0 translate 1 -1 scale /cp {closepath} bind def /ef {eofill} bind def /gr {grestore} bind def /gs {gsave} bind def /sa {save} bind def /rs {restore} bind def /l {lineto} bind def /m {moveto} bind def /rm {rmoveto} bind def /n {newpath} bind def /s {stroke} bind def /sh {show} bind def /slc {setlinecap} bind def /slj {setlinejoin} bind def /slw {setlinewidth} bind def /srgb {setrgbcolor} bind def /rot {rotate} bind def /sc {scale} bind def /sd {setdash} bind def /ff {findfont} bind def /sf {setfont} bind def /scf {scalefont} bind def /sw {stringwidth} bind def /tr {translate} bind def /tnt {dup dup currentrgbcolor 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} bind def /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 4 -2 roll mul srgb} bind def /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def /$F2psEnd {$F2psEnteredState restore end} def %%EndProlog $F2psBegin 10 setmiterlimit n -1000 7450 m -1000 -1000 l 8812 -1000 l 8812 7450 l cp clip 0.06000 0.06000 sc % Polyline 7.500 slw n 7800 6000 m 900 6000 l gs col0 s gr % Polyline n 1800 6300 m 1800 6000 l gs col0 s gr % Polyline n 2400 6300 m 2400 6000 l gs col0 s gr % Polyline n 3600 6300 m 3600 6000 l gs col0 s gr % Polyline n 4200 6300 m 4200 6000 l gs col0 s gr % Polyline n 4800 6300 m 4800 6000 l gs col0 s gr % Polyline n 3000 6300 m 3000 6000 l gs col0 s gr % Polyline n 5400 6300 m 5400 6000 l gs col0 s gr % Polyline n 6000 6300 m 6000 6000 l gs col0 s gr % Polyline n 6600 6300 m 6600 6000 l gs col0 s gr % Polyline n 7200 6300 m 7200 6000 l gs col0 s gr % Polyline n 900 5400 m 1200 5400 l gs col0 s gr % Polyline n 900 4800 m 1200 4800 l gs col0 s gr % Polyline n 900 4200 m 1200 4200 l gs col0 s gr % Polyline n 900 3600 m 1200 3600 l gs col0 s gr % Polyline n 900 3000 m 1200 3000 l gs col0 s gr % Polyline 30.000 slw n 1200 5400 m 2400 5400 l 4800 3600 l 7200 3600 l gs col0 s gr % Polyline 7.500 slw [15 60] 60 sd n 1200 3600 m 7800 3600 l gs col0 s gr [] 0 sd % Polyline [15 60] 60 sd n 2400 2700 m 2400 5700 l gs col0 s gr [] 0 sd % Polyline n 1200 2700 m 1200 6300 l gs col0 s gr % Polyline [15 60] 60 sd n 4800 2700 m 4800 5700 l gs col0 s gr [] 0 sd /Times-Roman ff 180.00 scf sf 6000 3525 m gs 1 -1 sc (minL) col0 sh gr /Times-Roman ff 180.00 scf sf 600 5475 m gs 1 -1 sc (0.0) col0 sh gr /Times-Roman ff 180.00 scf sf 600 4875 m gs 1 -1 sc (0.1) col0 sh gr /Times-Roman ff 180.00 scf sf 600 4275 m gs 1 -1 sc (0.2) col0 sh gr /Times-Roman ff 180.00 scf sf 600 3675 m gs 1 -1 sc (0.3) col0 sh gr /Times-Roman ff 180.00 scf sf 600 3075 m gs 1 -1 sc (0.4) col0 sh gr /Times-Roman ff 180.00 scf sf 1125 6450 m gs 1 -1 sc (0.0) col0 sh gr /Times-Roman ff 180.00 scf sf 1725 6450 m gs 1 -1 sc (0.1) col0 sh gr /Times-Roman ff 180.00 scf sf 2325 6450 m gs 1 -1 sc (0.2) col0 sh gr /Times-Roman ff 180.00 scf sf 2925 6450 m gs 1 -1 sc (0.3) col0 sh gr /Times-Roman ff 180.00 scf sf 3525 6450 m gs 1 -1 sc (0.4) col0 sh gr /Times-Roman ff 180.00 scf sf 4125 6450 m gs 1 -1 sc (0.5) col0 sh gr /Times-Roman ff 180.00 scf sf 4725 6450 m gs 1 -1 sc (0.6) col0 sh gr /Times-Roman ff 180.00 scf sf 5325 6450 m gs 1 -1 sc (0.7) col0 sh gr /Times-Roman ff 180.00 scf sf 5925 6450 m gs 1 -1 sc (0.8) col0 sh gr /Times-Roman ff 180.00 scf sf 6525 6450 m gs 1 -1 sc (0.9) col0 sh gr /Times-Roman ff 180.00 scf sf 7125 6450 m gs 1 -1 sc (1.0) col0 sh gr /Times-Roman ff 180.00 scf sf 1425 3525 m gs 1 -1 sc (V) col0 sh gr /Times-Roman ff 180.00 scf sf 2250 2850 m gs 1 -1 sc (L) col0 sh gr /Times-Roman ff 180.00 scf sf 4650 2850 m gs 1 -1 sc (H) col0 sh gr $F2psEnd rs %%EndDocument @endspecial 1593 2032 a(Figure)27 b(1:)37 b(The)27 b Fe(p)h Ff(curv)n(e)e(\(mostly\).)739 2302 y(Signals)h(are)f(\014ltered) i(b)n(y)f(considering)f(all)i(signals)e(for)h(a)g(single)h(mRNA.)1369 2634 y Fe(h)23 b Ff(=)g Fe(bestC)6 b(ov)s(er)r(ag)s(e)17 b Fc(\000)h Fe(w)r(or)r(stC)6 b(ov)s(er)r(ag)s(e)1375 2897 y(p)23 b Ff(=)1528 2702 y Fb(8)1528 2777 y(>)1528 2802 y(<)1528 2951 y(>)1528 2976 y(:)1601 2781 y Ff(1)p Fe(:)p Ff(0)778 b(if)28 b Fe(h)22 b Fc(\024)h Fe(L)1601 2900 y Ff(1)p Fe(:)p Ff(0)18 b Fc(\000)g Ff(\(1)p Fe(:)p Ff(0)g Fc(\000)g Fe(V)h Ff(\))f Fc(\003)2246 2868 y Fd(h)p Fa(\000)p Fd(L)p 2236 2882 157 4 v 2236 2929 a(H)t Fa(\000)p Fd(L)2486 2900 y Ff(if)28 b Fe(L)22 b(<)h(h)g(<)f(H)1601 3020 y(V)837 b Ff(if)28 b Fe(H)h Fc(\024)23 b Fe(h)739 3259 y(cutL)f Ff(=)h Fe(min)p Ff(\()p Fe(bestC)6 b(ov)s(er)r(ag)s(e)17 b Fc(\000)h Fe(p)g Fc(\001)h Fe(h;)14 b(M)9 b Ff(\))739 3408 y(cutL)37 b(is)g(the)g(minim)n(um)g(co)n(v)n(erage)d(that)j(will)g (b)r(e)g(accepted.)65 b(It)37 b(is)f(deriv)n(ed)g(from)h(the)739 3508 y(range)26 b(of)i(scores,)e(not)h(the)h(n)n(um)n(b)r(er)g(of)f (scores.)739 3658 y(If)i(the)g(score)f(range)f(is)i(small)f(\(<=)h (L\),)g(then)g(cutL)g(will)g(b)r(e)h(w)n(orstCo)n(v)n(erage,)25 b(and)j(w)n(e)h(do)739 3757 y(no)f(\014ltering.)37 b(If)28 b(the)h(score)d(range)h(is)g(large)g(\(>=)h(H\),)h(then)f(cutL)g(will)g (b)r(e)h(M)f(of)g(the)g(b)r(est)739 3857 y(score.)739 4006 y(A)g(signal)f(is)g(sa)n(v)n(ed)f(if)i(t)n(w)n(o)f(conditions)g (are)g(met:)840 4156 y(1.)41 b(\()p Fe(cutL)23 b(<)p Ff(=)f Fe(cov)s(er)r(ag)s(e)p Ff(\))840 4305 y(2.)41 b(\()p Fe(M)1059 4317 y Fd(c)1116 4305 y Fe(<)p Ff(=)22 b Fe(cov)s(er)r(ag)s(e)p Ff(\))28 b(or)f(\()p Fe(M)1866 4317 y Fd(l)1914 4305 y Fe(<)p Ff(=)c Fe(cov)s(er)r(edB)t(ases)p Ff(\))2150 5255 y(2)p eop %%Trailer end userdict /end-hook known{end-hook}if %%EOF kmer-code-2013-trunk/ESTmapper LaTeX/hash-tables.tex0000644000000000000000000001310510562262400020656 0ustar rootroot\documentclass[twoside, twocolumn, 10pt]{article} \usepackage{amsmath,amssymb} \usepackage{moreverb} \usepackage{fancyheadings} \usepackage{ulem} \usepackage{parskip} \usepackage{calc,ifthen,epsfig} \sloppy % A few float parameters % \renewcommand{\dbltopfraction}{0.9} \renewcommand{\dblfloatpagefraction}{0.9} %\renewcommand{\textfraction}{0.05} \begin{document} % See page 63-64, LaTeX Companion % % leftmargin controls the left margin for EVERYTHING in the list! % \newcommand{\entrylabel}[1]{\mbox{\texttt{#1:}}\hfil} \newenvironment{entry} {\begin{list}{}% {\renewcommand{\makelabel}{\entrylabel}% %\setlength{\leftmargin}{1.5in}% }} {\end{list}} % The first parbox width controls the indent on the first text line % The makebox width seems to do nothing. \newcommand{\Lentrylabel}[1]{% {\parbox[b]{0pt}{\makebox[0pt][l]{\texttt{#1:}}\\}}\hfil\relax} \newenvironment{Lentry} {\renewcommand{\entrylabel}{\Lentrylabel}\begin{entry}} {\end{entry}} \title{ESTmapper documentation} \author{ Liliana Florea\thanks{liliana.florea@celera.com}, Brian P. Walenz\thanks{brian.walenz@celera.com}} \maketitle \pagestyle{fancy} \rhead[]{} \chead[ESTmapper]{ESTmapper} \lhead[\today]{\today} \normalem \newcommand{\ESTmapper}{{\sc ESTmapper\ }} \begin{abstract} The gory details of the \ESTmapper process is described. \subsection{Hash Function Definitions} In the discussion that follows, let $A$ be an encoded mer, $H$ be the hashed value of the mer, and $C$ be the check value. $A$ is $m$ bits wide, $H$ is $h$ bits wide and $C$ is $m-h$ bits wide. Our hash and check functions must satisfy the following properties: \begin{align*} f_H &: m \rightarrow h \\ f_C &: m \rightarrow c \\ f_R &: h \times c \rightarrow m \end{align*} such that $f_R(f_H(A), f_C(A)) = A$. Furthermore, $f_H$ should be a good hash function, {\bf whatever that means}. The functions are explained in Section~\ref{sec:hashfcn}. \subsection{existDB} The {\tt existDB} will tell us if a mer exists in a sequence. We can build the structure in Figure~\ref{fig:hashstruct} in five steps, using $\theta(2 \cdot 2^h + 2 \cdot n)$ time and no temporary space. \begin{enumerate} \item Allocate and zero $2^h$ integers for the hash table. \item Count the size of each bucket: hash each mer, increment the size of that bucket. This can be done using the space for the hash table. Also count the number of mers. \item Allocate $n$ bucket entries, one for each mer. There is no need to initialize these. \item Make the hash table entry $i$ point to the start of bucket $i$. Note that the hash table entry $i+1$ can be used to find the end of bucket $i$. \item Rehash each mer, inserting the check value into the next available bucket entry (use the hash table to keep track of the next available entry). The buckets contain all the mers after this step, and the hash table is off by one -- entry $i$ points to the start of bucket $i+1$. If we offset the start of the table, we can fix this in $O(1)$ time. \end{enumerate} We assume that the input sequence does not contain duplicate mers. If it does, we should remove them from the table. \subsection{positionDB} We can extend the {\tt existDB} to store position information by storing, in the bucket entry, either the position of the mer (if there is exactly one copy) or a pointer to a list of positions. Unlike the {\tt existDB} we now need to remove duplicate mers from the table. \begin{enumerate} \item count bucket size - this overcounts the true size; it counts duplicates \item allocate counting buckets - build a list of hashed mers and the position that they occur. \item sort each bucket \item allocate the final hash table, buckets and position lists {\bf XXX} can we reuse the hash table and bucket space?? \item copy the counting buckets into the final structure. mers that occur exactly once have their position stored in the bucket entry. mers that occur more than once have a pointer to the position list placed in the bucket entry. \end{enumerate} \subsection{A Good Hash Function} \label{sec:hashfcn} A simple hash function would be to use the highest $h$ bits of the encoded mer as the hash, and use the lowest $m-h$ bits of the mer as the check. Unfortunately, this is a very poor hash function --- the hash function is strongly correlated with the input mer. {\bf needs more blah blah} A better hash function would first ``scramble'' the bits in the mer to break the correlation between the input and the output. In the discussion that follows, let $A$ be an encoded mer, $H$ be the hashed value of the mer, and $C$ be the collision resolution value. $A$ is $m$ bits wide, $H$ is $h$ bits wide and $C$ is $m-h$ bits wide. We want to find functions $f_H : m \rightarrow h$, $f_C : m \rightarrow c$, $f_R : h \times c \rightarrow m$ such that \begin{align*} f_H(A) =& H \\ f_C(A) =& C \\ f_R(H,C) =& A \end{align*} Furthermore, $f_H$ should be a good hash function. We specify $f_H$ and $f_C$ by specifying each bit in the output. \begin{align*} H_i &= A_{i} \oplus A_{i-\frac{m-h}{2}} \oplus A_{i+m-h}, \text{ for } 1 \le i \le h \\ C_i &= A_{i}, \text{ for } 1 \le i \le m-h \end{align*} Likewise, $f_R$ can be expressed as \begin{align*} A_i &= \begin{cases} C_i & 1 \le i \le m-h \\ A_{i-m+h} \oplus A_{i-\frac{m+h}{2}} \oplus H_{i-m+h} & m-h < i \le m \end{cases} \end{align*} In C code \begin{verbatim} u64bit fH(u64bit A) { return(((A) ^ (A >> (m-h)/2) ^ (A >> (m-h))) & MASK(h)); } u64bit fC(u64bit A) { return(A & MASK(m-h)); } \end{verbatim} where {\tt u64bit} is a 64-bit unsigned integer type. The code for $f_R$ is non-trivial, and is not needed. \end{document} kmer-code-2013-trunk/ESTmapper LaTeX/filter.fig0000644000000000000000000000522610562262400017722 0ustar rootroot#FIG 3.2 Landscape Center Inches Letter 100.00 Single -2 1200 2 5 1 0 1 0 7 100 0 -1 4.000 0 1 0 0 1612.500 2362.500 1650 2100 1425 2175 1350 2400 5 1 0 1 0 7 100 0 -1 4.000 0 1 0 0 2362.500 1612.500 2400 1350 2175 1425 2100 1650 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 1950 4200 2250 3900 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 2325 3825 2625 3525 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 2700 3375 2925 3150 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 3900 2250 4200 1950 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 3525 2550 3825 2250 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 3150 3000 3450 2700 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 3 1 1.00 60.00 120.00 3 1 1.00 60.00 120.00 1275 5100 5400 975 2 3 0 1 0 0 100 0 5 0.000 0 0 -1 0 0 5 1575 4800 1425 4800 5025 1200 5175 1200 1575 4800 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 1725 4575 2025 4275 2 3 0 1 0 0 100 0 2 0.000 0 0 -1 0 0 5 4125 2250 5550 2250 5550 1200 5175 1200 4125 2250 2 3 0 1 0 0 100 0 2 0.000 0 0 -1 0 0 4 1575 4800 2250 4125 2250 4800 1575 4800 2 3 0 1 7 0 100 0 10 0.000 0 0 -1 0 0 6 2250 4800 2250 4125 4125 2250 5550 2250 5550 4800 2250 4800 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 3375 3600 3675 3300 2 2 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 5 1950 1950 4200 1950 4200 4200 1950 4200 1950 1950 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 3 1200 1200 1200 4800 5550 4800 2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 5850 4800 5550 4800 2 1 2 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 6150 4800 5850 4800 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 3 1 1.00 60.00 120.00 3 1 1.00 60.00 120.00 1500 2250 6150 2250 2 1 0 1 0 7 100 0 -1 3.000 0 0 -1 0 0 2 5325 2100 5625 1800 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 3 1 1.00 60.00 120.00 3 1 1.00 60.00 120.00 2250 1500 2250 5100 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 1950 1800 1950 1500 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 1800 1950 1500 1950 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 0 0 1.00 30.00 60.00 0 0 1.00 30.00 60.00 1950 1650 2250 1650 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 0 0 1.00 30.00 60.00 0 0 1.00 30.00 60.00 1650 1950 1650 2250 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 0 1 2 0 0 1.00 30.00 60.00 4800 1425 4650 1275 2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 0 1 2 0 0 1.00 30.00 60.00 4875 1500 5025 1650 4 0 0 100 0 0 12 0.0000 0 105 120 1500 4575 A\001 4 0 0 100 0 0 12 0.0000 0 105 90 3225 3600 B\001 4 0 0 100 0 0 12 0.0000 0 105 105 5175 2100 C\001 4 0 0 100 0 0 12 0.0000 0 150 465 1275 2700 overlap\001 4 0 0 100 0 0 12 0.0000 0 135 510 1275 2550 genomic\001 4 0 0 100 0 0 12 0.0000 0 150 465 2400 1500 overlap\001 4 0 0 100 0 0 12 0.0000 0 120 345 2400 1350 query\001 4 0 0 100 0 0 12 0.0000 0 135 1170 3300 1500 diagonal difference\001 kmer-code-2013-trunk/ESTmapper LaTeX/blurb0000644000000000000000000000377310562262400017004 0ustar rootrootThe ESTMapper is a software package designed to efficiently map large EST data sets to a target genome. For each cDNA (EST or full-length mRNA) sequence in the input set, it will determine a set of instances of the EST in the target genome in a three-stage process. Stage I, 'signal finding', is an efficient similarity search which identifies potential EST-containing regions in the reference genome. In Stage II, 'signal filtering', regions containing weak signals are removed based on the extent of the cDNA matched and the number of regions. Stage III, 'signal polishing' uses an enhanced version of Sim4 to produce spliced alignments between the query EST sequence and each of the remaining genomic regions. Features [Input] . Simple interface and input presentation, as multi-fasta files. . Requires no pre-processing of sequences (typically, vector and quality trimming, contaminant screening, assigning quality values, repeat masking). [Output] . Output formatted as flat files, and XML-feature files, which can be viewed using Celera's Genome Browser. . Output filtered by quality (the three? files; also, flexible parameters). [Implementation] . Memory and space efficient (e.g., ). . Search uses an efficient . Polishing stage improved for efficiency. . Parallel operation to take advantage of multi-processor environement the and for better I/O management. [Algorithmics] . Search - uses a proprietary fast near-identity search program. . Search + filtering offer high sensivity at relatively low computational cost. . Differential filtering for mRNA and EST sequences takes full advantage of their mapping characteristics to reduce the computational cost for polishing false positives. . Efficient screening for repetitive elements. . Sim4db - iterative procedure allows detection of multiple occurrences. Improvements for memory efficiency, I/O. . No segmentation of the sequences is necessary (e.g., use whole chromosomes), hence matches are not pruned to fit in fixed size intervals (allows arbitrarily long introns). kmer-code-2013-trunk/ESTmapper LaTeX/hit-filtering.tex0000644000000000000000000003065710562262400021243 0ustar rootroot\documentclass[twoside, twocolumn, 10pt]{article} \usepackage{amsmath,amssymb} \usepackage{moreverb} \usepackage{fancyheadings} \usepackage{ulem} \usepackage{parskip} \usepackage{calc,ifthen,epsfig} \sloppy % A few float parameters % \renewcommand{\dbltopfraction}{0.9} \renewcommand{\dblfloatpagefraction}{0.9} %\renewcommand{\textfraction}{0.05} \begin{document} % See page 63-64, LaTeX Companion % % leftmargin controls the left margin for EVERYTHING in the list! % \newcommand{\entrylabel}[1]{\mbox{\texttt{#1:}}\hfil} \newenvironment{entry} {\begin{list}{}% {\renewcommand{\makelabel}{\entrylabel}% %\setlength{\leftmargin}{1.5in}% }} {\end{list}} % The first parbox width controls the indent on the first text line % The makebox width seems to do nothing. \newcommand{\Lentrylabel}[1]{% {\parbox[b]{0pt}{\makebox[0pt][l]{\texttt{#1:}}\\}}\hfil\relax} \newenvironment{Lentry} {\renewcommand{\entrylabel}{\Lentrylabel}\begin{entry}} {\end{entry}} \title{ESTmapper documentation} \author{ Liliana Florea\thanks{liliana.florea@celera.com}, Brian P. Walenz\thanks{brian.walenz@celera.com}} \maketitle \pagestyle{fancy} \rhead[]{} \chead[ESTmapper]{ESTmapper} \lhead[\today]{\today} \normalem \newcommand{\ESTmapper}{{\sc ESTmapper\ }} \begin{abstract} The gory details of the \ESTmapper process is described. \subsection{Hit Filtering} \begin{figure*} \begin{center} \epsfig{figure=filter.eps, silent=, width=4.5in} \end{center} \caption{Diagram of the match-building algorithm. The dotted-box represents the extent of the current match. Lines with arrows define regions of action. If the next mer falls in the dark region, the current match is evaluated and potentially saved; if the next mer falls into any of the lighter areas, the mer is added to the current match, and the current match is extended. Note that we have processed all mers in the white region. We process hit A next. As it is in a light region, it is added to the match, and the match is extended. Hit B will break the current match, so it is evaluated and saved. A new match region is formed, encompassing only hit B. Hit C would extend the new match region.} \label{fig:hitfiltering} \end{figure*} The goal of filtering is to take a set of mers, and isolate subsets that look like cDNA matches. That is, we want to find a subset of hits that form a nearly-idential alignment, but could have large gaps in the genomic sequence (introns). This is done in two passes. The first pass will detect all nearly-identical regions, some of these regions will be grouped into exon-intron structures. The second pass will examine the regions, and merge those that are in approximately the same genomic area. The hits in a region of near identity will all be on nearly the same diagonal. By sorting the hits by the diagonal they are on, we can quickly find a subset of hits that form a nearly identical match because they will be consecutive in the list. The first pass is shown in Figure~\ref{fig:hitfiltering}. In the figure, a large dashed-box represents the extent of the current matching region, the lines with arrows are various distance thresholds and divide the space into three regions (dark, light and white). The white region contains exactly those hits that we have processed thus far. If the next hit in the list falls into one of the lightly shaded regions, it is added to the current match. If the next hit falls into the darkly shaded region, it terminated the current match. When the current match is terminated, it is evaluated to decide if it is a significant match or not. Two classes of matches are possible: single exon or multiple exon (based on the size of the diagonal). If a single exon match contains more than $X$ exact base matches, the match region is saved. If a multiple exon match contains more than $Y$ exact base matches, the match region is saved. Otherwise, the match region is discarded, and a new match region is created which contains only the current hit. When a match is saved, we only need to save the coordinates in the genomic sequence. Essentially, we are saying ``There might be some piece of the cDNA on this genomic region''. We extend each side of the saved region by an amount proportional to the amount of cDNA that was not represented by the match. {\bf need to explain why} \subsection{Match Merging} Because of the extension of matches, some matches might be overlapping, or close enough to consider the same match. The final step is to scan the list of matches and merge those that are close. \subsection{output} Matches are scored by the number of exact base matches they have. We probably want to normalize this to [0,1] somehow, but should also use number of exons, etc., etc. \section{What is a signal} Signal has three values associated with it. The amount 'covered', the amount 'matched' and the total 'length'. % The amount covered is the number of bases in the mRNA that are contained in least one mer. % The amount matched is the number of paired bases (for example, position $i$ in the cDNA paired with position $j$ in the genomic) covered by a mer. % The length is the number of mers in the mRNA (roughly equivalent to the number of bases in the mRNA that could be covered by a mer, but easier to compute). From these, we can derive two scores, the coverage and the multiplicity. The coverage, $\frac{covered}{length}$, represents the fraction of the mRNA that we found, while the multiplicity, $\frac{matched}{covered}$, represents the amount of the mRNA that we found too many times. A high multiplicity usually indicates a repeat-containing mRNA. High multiplicity and high coverage can indicate that the mRNA is not cDNA. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Filtering, first try} Written around 4 December 2001. \subsection{Output of the Search} The search outputs hits. Each hit is made up of one or more exon-like matches. Each match can represent one or more exons, as long as the mers in the match do not show inconsistent overlap. [Insert three pictures here -- single exon match, multiple exon match, inconsistent mers] Each match is extended by a value determined by the amount of un-matched query sequence. Each match is scored by the number of mers it contains. Once the list of matches is found, any matches within 5Kb are merged. The number of mers in a merged match is the sum of the number of mers in its pieces. The list of merged matches is then output as hits. Hits are scored by: (number of mers in the merged matche) / (number or mers in the query) Number of mers in the query is the number of valid mers -- the high-frequency mers and mers containing 'N' are not counted. A score of 1.0 is perfect; a score less than 1.0 indicates a partial match, while a score more than 1.0 indicates a duplicate gene nearby, or a large spurious match. \subsection{Filtering of Hits} Given all hits for a specific query sequence, we filter them by throwing away the lowest scoring ones. (duh!) A low-score cutoff if determined with: cutoff = cutoffScale * (highestScore - lowestScore) + lowestScore where cutoffScale is a parameter to decide how aggressive the filtering is (1.0 is perfectly aggressive, 0.3 is reasonable). Then all hits with score < cutoff are discarded. \subsection{Modifications} Occasionally, large spurious matches are found (e.g., ???). The score of the spurious match can be significantly better than the score of the real match, which will cause the real match to be filtered out. To compensate for this, the value of highestScore in the cutoff computation is modified to be max(lowestScore, min(1.0, highestScore)) \subsection{Discussion} As the search is done over the whole genome, and all hits are used to determine the highestScore and lowestScore blah, blah, blah. if highestScore $<$ 1.0 -- unmatched mers are assumed to be in error, either in the query or the genome. We will never find these mers, so we should reduce the aggressiveness of the filter to account for this. if highestScore $>$ 1.0 -- the best hit is probably bogus, and we still want to polish hits down to (about) the same level as if the best hit were a perfect hit. Thus, threshold the highestScore to be a perfect hit. Finally, we need to make the highestScore at least the lowestScore, in the extreme case that the worst hit is greater than 1.0. Ha, ha. Why are you searching for repeats, anyway? \subsection{Implementation Detail} A CPU-time limit is imposed when polishing hits for queries that have a hit with score greater than 1.5. This solves the nasty case when we get a chunk of genomic as input, and it matches an entire chromosome with several hundred exon-like things, and it takes hours to polish. The more correct thing to do is to abort ANY polish that takes more than 60 seconds, not just suspicious looking ones. Software engineering issue. To do this correctly, we would need to register all memory allocated by sim4(), and free it when a timer goes off. How to actually return from the sim4()? Without longjmp()? If we use threads, is this easier? Have the master thread abort the slave ({\tt pthread\_cancel})? Still have the memory deallocation problem. ({\tt pthread\_cleanup\_push} can do it, if we keep a list of allocations) \section{Filtering EST signals} \section{Filtering mRNA signals} \begin{figure} \begin{center} \begin{tabular}{|c|c|p{0.3in}|p{1.25in}|} \hline Switch & Variable & Def. Value & Description \\ \hline \hline -l & $L$ & 0.2 & Signal spread low range \\ -h & $H$ & 0.6 & Signal spread high range \\ -v & $V$ & 0.3 & Pass value \\ -m & $M$ & 0.3 & Signal quality floor \\ -mc & $M_c$ & 0.2 & Minimum signal quality \\ -ml & $M_l$ & 150 & Minimum signal size \\ \hline \end{tabular} \end{center} \caption{Parameters, default values and descriptions} \label{table:defvalues} \end{figure} In order to filter signals, we need to decide, for each mRNA, which signals are bad, and which are good (duh!), which means that we'll need to look at {\em all} signals for a single mRNA. For the filter presented below, we need to know the best and worst coverage values that occur for any signal associated with a specific mRNA. Once those are known, the signals can be filtered in any order. This is important in the case where the signals are detected chromosome by chromosome. Instead of sorting all signals, we can save the best and worst coverage for each mRNA. The filter has six parameters, summarized in Table~\ref{table:defvalues}. If the signals for a specific mRNA are all very similar, it is probable that the weaker signals are weak only because of a few mismatches that break 20-mers. In this case, we cannot reliably pick the signals that are true, and should consider all of them. On the other hand, if there is a large range in the quality of signals, we can safely discard low scoring signals, and still be confident that we will find the good stuff. Therefore, the filter will discard no signals if the range in quality values is small, and will gradually discard more, proportional to the range. So that we don't discard too much, we limit the increase in filtering to $V$ (0.3). \begin{align*} h &= bestCoverage - worstCoverage \\ p &= \begin{cases} 0.0 & \text{if $h \le L$} \\ V * \frac{h-L}{H-L} & \text{if $L < h < H$} \\ V & \text{if $H \le h$} \end{cases} \\ c &= min(worstCoverage + p \cdot h, M) \end{align*} \begin{figure*} \begin{center} \epsfig{figure=mRNAfilt.eps, silent=, width=4.5in} \end{center} \caption{The $p$ curve.} \label{fig:pcurve} \end{figure*} $p$ is the amount of filtering, ranging from minimum (0.0) to maximum ($V$, a parameter). The $c$ value computed above is the filtering threshold. Signals with coverage below $c$ are considered weak, and are discarded. If the score range is small ($\le L$), then $c$ will be $worstCoverage$, and we do no filtering. If the score range is large ($\ge H$), then $c$ will be $M$ of the best score. $c$ is the minimum coverage that will be accepted. It is derived from the range of scores, not the number of scores. Finally, it is possible that {\em all} signals are good. If we used the above filtering we would be discarding the low scoring (but still valid) signals. To overcome this, absolute limits $M_c$ and $M_l$ are enforced. A signal is saved if both of the following conditions are met: \begin{enumerate} \item ($c <= coverage$) \item ($M_c <= coverage$) or ($M_l <= coveredBases$) \end{enumerate} This filter is overly permissive, throwing out only signals that are obviously garbage. \end{document} kmer-code-2013-trunk/ESTmapper LaTeX/filter.eps0000644000000000000000000001564510562262400017752 0ustar rootroot%!PS-Adobe-2.0 EPSF-2.0 %%Title: filter.eps %%Creator: fig2dev Version 3.2 Patchlevel 0-beta3 %%CreationDate: Fri Oct 26 16:16:59 2001 %%For: walenz@dsc154p.celera.com (Brian Walenz,3604) %%Orientation: Portrait %%BoundingBox: 0 0 299 250 %%Pages: 0 %%BeginSetup %%EndSetup %%Magnification: 1.0000 %%EndComments /$F2psDict 200 dict def $F2psDict begin $F2psDict /mtrx matrix put /col-1 {0 setgray} bind def /col0 {0.000 0.000 0.000 srgb} bind def /col1 {0.000 0.000 1.000 srgb} bind def /col2 {0.000 1.000 0.000 srgb} bind def /col3 {0.000 1.000 1.000 srgb} bind def /col4 {1.000 0.000 0.000 srgb} bind def /col5 {1.000 0.000 1.000 srgb} bind def /col6 {1.000 1.000 0.000 srgb} bind def /col7 {1.000 1.000 1.000 srgb} bind def /col8 {0.000 0.000 0.560 srgb} bind def /col9 {0.000 0.000 0.690 srgb} bind def /col10 {0.000 0.000 0.820 srgb} bind def /col11 {0.530 0.810 1.000 srgb} bind def /col12 {0.000 0.560 0.000 srgb} bind def /col13 {0.000 0.690 0.000 srgb} bind def /col14 {0.000 0.820 0.000 srgb} bind def /col15 {0.000 0.560 0.560 srgb} bind def /col16 {0.000 0.690 0.690 srgb} bind def /col17 {0.000 0.820 0.820 srgb} bind def /col18 {0.560 0.000 0.000 srgb} bind def /col19 {0.690 0.000 0.000 srgb} bind def /col20 {0.820 0.000 0.000 srgb} bind def /col21 {0.560 0.000 0.560 srgb} bind def /col22 {0.690 0.000 0.690 srgb} bind def /col23 {0.820 0.000 0.820 srgb} bind def /col24 {0.500 0.190 0.000 srgb} bind def /col25 {0.630 0.250 0.000 srgb} bind def /col26 {0.750 0.380 0.000 srgb} bind def /col27 {1.000 0.500 0.500 srgb} bind def /col28 {1.000 0.630 0.630 srgb} bind def /col29 {1.000 0.750 0.750 srgb} bind def /col30 {1.000 0.880 0.880 srgb} bind def /col31 {1.000 0.840 0.000 srgb} bind def end save -71.0 307.0 translate 1 -1 scale /cp {closepath} bind def /ef {eofill} bind def /gr {grestore} bind def /gs {gsave} bind def /sa {save} bind def /rs {restore} bind def /l {lineto} bind def /m {moveto} bind def /rm {rmoveto} bind def /n {newpath} bind def /s {stroke} bind def /sh {show} bind def /slc {setlinecap} bind def /slj {setlinejoin} bind def /slw {setlinewidth} bind def /srgb {setrgbcolor} bind def /rot {rotate} bind def /sc {scale} bind def /sd {setdash} bind def /ff {findfont} bind def /sf {setfont} bind def /scf {scalefont} bind def /sw {stringwidth} bind def /tr {translate} bind def /tnt {dup dup currentrgbcolor 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} bind def /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 4 -2 roll mul srgb} bind def /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def /$F2psEnd {$F2psEnteredState restore end} def %%EndProlog $F2psBegin 10 setmiterlimit n -1000 6112 m -1000 -1000 l 7162 -1000 l 7162 6112 l cp clip 0.06000 0.06000 sc % Arc 7.500 slw gs n 2362.5 1612.5 265.2 -81.9 171.9 arcn gs col0 s gr gr % Polyline n 1950 4200 m 2250 3900 l gs col0 s gr % Polyline n 2325 3825 m 2625 3525 l gs col0 s gr % Polyline n 2700 3375 m 2925 3150 l gs col0 s gr % Polyline n 3900 2250 m 4200 1950 l gs col0 s gr % Polyline n 3525 2550 m 3825 2250 l gs col0 s gr % Polyline n 3150 3000 m 3450 2700 l gs col0 s gr % Polyline gs clippath 5292 1041 m 5380 994 l 5334 1083 l 5432 986 l 5389 943 l cp 1383 5034 m 1294 5080 l 1341 4992 l 1243 5089 l 1286 5132 l cp clip n 1275 5100 m 5400 975 l gs col0 s gr gr % arrowhead n 1383 5034 m 1294 5080 l 1341 4992 l 1379 4996 l 1383 5034 l cp gs 0.00 setgray ef gr col0 s % arrowhead n 5292 1041 m 5380 994 l 5334 1083 l 5296 1079 l 5292 1041 l cp gs 0.00 setgray ef gr col0 s % Polyline n 1575 4800 m 1425 4800 l 5025 1200 l 5175 1200 l cp gs 0.75 setgray ef gr gs col0 s gr % Polyline n 1725 4575 m 2025 4275 l gs col0 s gr % Polyline n 4125 2250 m 5550 2250 l 5550 1200 l 5175 1200 l cp gs 0.90 setgray ef gr gs col0 s gr % Polyline n 1575 4800 m 2250 4125 l 2250 4800 l cp gs 0.90 setgray ef gr gs col0 s gr % Polyline n 2250 4800 m 2250 4125 l 4125 2250 l 5550 2250 l 5550 4800 l cp gs 0.50 setgray ef gr gs col7 s gr % Polyline n 3375 3600 m 3675 3300 l gs col0 s gr % Polyline [60] 0 sd n 1950 1950 m 4200 1950 l 4200 4200 l 1950 4200 l cp gs col0 s gr [] 0 sd % Polyline n 1200 1200 m 1200 4800 l 5550 4800 l gs col0 s gr % Polyline [60] 0 sd n 5850 4800 m 5550 4800 l gs col0 s gr [] 0 sd % Arc gs n 1612.5 2362.5 265.2 -81.9 171.9 arcn gs col0 s gr gr % Polyline [15 60] 60 sd n 6150 4800 m 5850 4800 l gs col0 s gr [] 0 sd /Times-Roman ff 180.00 scf sf 3300 1500 m gs 1 -1 sc (diagonal difference) col0 sh gr % Polyline gs clippath 6027 2220 m 6123 2250 l 6027 2280 l 6165 2280 l 6165 2220 l cp 1623 2280 m 1527 2250 l 1623 2220 l 1485 2220 l 1485 2280 l cp clip n 1500 2250 m 6150 2250 l gs col0 s gr gr % arrowhead n 1623 2280 m 1527 2250 l 1623 2220 l 1647 2250 l 1623 2280 l cp gs 0.00 setgray ef gr col0 s % arrowhead n 6027 2220 m 6123 2250 l 6027 2280 l 6003 2250 l 6027 2220 l cp gs 0.00 setgray ef gr col0 s % Polyline n 5325 2100 m 5625 1800 l gs col0 s gr % Polyline gs clippath 2280 4977 m 2250 5073 l 2220 4977 l 2220 5115 l 2280 5115 l cp 2220 1623 m 2250 1527 l 2280 1623 l 2280 1485 l 2220 1485 l cp clip n 2250 1500 m 2250 5100 l gs col0 s gr gr % arrowhead n 2220 1623 m 2250 1527 l 2280 1623 l 2250 1647 l 2220 1623 l cp gs 0.00 setgray ef gr col0 s % arrowhead n 2280 4977 m 2250 5073 l 2220 4977 l 2250 4953 l 2280 4977 l cp gs 0.00 setgray ef gr col0 s % Polyline n 1950 1800 m 1950 1500 l gs col0 s gr % Polyline n 1800 1950 m 1500 1950 l gs col0 s gr % Polyline gs clippath 2163 1635 m 2223 1650 l 2163 1665 l 2265 1665 l 2265 1635 l cp 2037 1665 m 1977 1650 l 2037 1635 l 1935 1635 l 1935 1665 l cp clip n 1950 1650 m 2250 1650 l gs col0 s gr gr % arrowhead n 2037 1665 m 1977 1650 l 2037 1635 l col0 s % arrowhead n 2163 1635 m 2223 1650 l 2163 1665 l col0 s % Polyline gs clippath 1665 2163 m 1650 2223 l 1635 2163 l 1635 2265 l 1665 2265 l cp 1635 2037 m 1650 1977 l 1665 2037 l 1665 1935 l 1635 1935 l cp clip n 1650 1950 m 1650 2250 l gs col0 s gr gr % arrowhead n 1635 2037 m 1650 1977 l 1665 2037 l col0 s % arrowhead n 1665 2163 m 1650 2223 l 1635 2163 l col0 s % Polyline gs clippath 4749 1353 m 4780 1405 l 4728 1374 l 4800 1446 l 4821 1425 l cp clip n 4800 1425 m 4650 1275 l gs col0 s gr gr % arrowhead n 4749 1353 m 4780 1405 l 4728 1374 l col0 s % Polyline gs clippath 4926 1572 m 4894 1519 l 4947 1551 l 4875 1479 l 4854 1500 l cp clip n 4875 1500 m 5025 1650 l gs col0 s gr gr % arrowhead n 4926 1572 m 4894 1519 l 4947 1551 l col0 s /Times-Roman ff 180.00 scf sf 1500 4575 m gs 1 -1 sc (A) col0 sh gr /Times-Roman ff 180.00 scf sf 3225 3600 m gs 1 -1 sc (B) col0 sh gr /Times-Roman ff 180.00 scf sf 5175 2100 m gs 1 -1 sc (C) col0 sh gr /Times-Roman ff 180.00 scf sf 1275 2700 m gs 1 -1 sc (overlap) col0 sh gr /Times-Roman ff 180.00 scf sf 1275 2550 m gs 1 -1 sc (genomic) col0 sh gr /Times-Roman ff 180.00 scf sf 2400 1500 m gs 1 -1 sc (overlap) col0 sh gr /Times-Roman ff 180.00 scf sf 2400 1350 m gs 1 -1 sc (query) col0 sh gr $F2psEnd rs kmer-code-2013-trunk/ESTmapper LaTeX/ESTmapper.tex0000644000000000000000000013126710562262400020335 0ustar rootroot\documentclass[twoside,11pt]{book} \usepackage{amsmath,amssymb} \usepackage{moreverb} \usepackage{fancyheadings} \usepackage{ulem} \usepackage{parskip} \usepackage{calc,ifthen,epsfig} \sloppy % % a mathematican is a machine that transforms coffee into theorems % a software engineer is a machine that transforms sugar and caffiene into software % a manager is a machine that transforms people into power point % \usepackage{longtable} % A few float parameters % \renewcommand{\dbltopfraction}{0.9} \renewcommand{\dblfloatpagefraction}{0.9} %\renewcommand{\textfraction}{0.05} \begin{document} \pagestyle{fancy} \rhead[]{} \chead[ESTmapper]{ESTmapper} \lhead[\today]{\today} \newcommand{\ESTmapper}{{\sc ESTmapper}} \normalem %\title{ESTmapper documentation\\ %{\small or, why algorithmists shouldn't write manuals}} %\author{ %Liliana Florea\thanks{liliana.florea@celera.com}, %Brian P. Walenz\thanks{brian.walenz@celera.com}} % %\maketitle \tableofcontents %\listoffigures %\listoftables %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Introduction} \label{chap:intro} \if false The ESTMapper is a software package designed to efficiently map large EST data sets to a target genome. For each cDNA (EST or full-length mRNA) sequence in the input set, it will determine a set of instances of the EST in the target genome in a three-stage process. Stage I, 'signal finding', is an efficient similarity search which identifies potential EST-containing regions in the reference genome. In Stage II, 'signal filtering', regions containing weak signals are removed based on the extent of the cDNA matched and the number of regions. Stage III, 'signal polishing' uses an enhanced version of Sim4 to produce spliced alignments between the query EST sequence and each of the remaining genomic regions. \fi \ESTmapper\ is a software package designed to efficiently map large cDNA data sets to a target genome. % A three-stage process is used to locate each cDNA sequence in the target genome. % % For each cDNA (EST or full-length mRNA) sequence %in the input set, \ESTmapper\ will locate the cDNA sequence in the %target genome in a three-stage process. % The first stage, {\em signal finding}, is an efficient sequence similarity search which identifies regions on the genome which could potentially contain the cDNA sequence. %potential EST-containing regions %in the reference genome. % The second stage, {\em signal filtering}, discards regions containing weak signals based on the extent of the cDNA matched and the number of candidate genomic regions. % The final stage, {\em signal polishing} uses an enhanced version of the {\tt Sim4} program to produce spliced alignments between the cDNA sequences and their associated genomic regions. \section{Features} \ESTmapper\ offers the following features for high-throughput mapping of cDNA sequences to genomic sequences: \begin{itemize} %[Input] \item Simple input presentation, as multi-fasta files. \item Requires no pre-processing of sequences (typical procedures include vector and quality trimming, contaminant screening, assigning quality values, and repeat masking). %[Output] \item Output formatted as easy-to-parse flat files. \item When converted to XML-feature files, the results can be viewed using Celera's Genome Browser, or loaded into a database. \item Output filtered into three user-specified quality levels corresponding to {\it good, full-length}, {\it good, but short} and {\it low quality}. %\item Flexible parameters for the quality of reported matches. %[USER INTERFACE] \item Choice of pre-packaged or fully customizable mapping procedures. %[Implementation] \item Parallel operation to take advantage of multi-processor environment. %[Algorithmics] \item The search stage employs a proprietary ultra-fast near-identity search program, which uses an efficient k-mer index to quickly identify match seeds. \item The combined search and filtering stages offer high sensivity at relatively low computational cost. \item The differential filtering for mRNA and EST sequences takes full advantage of their mapping characteristics to reduce the computational cost for polishing false positives. \item Efficient run-time screening for repetitive elements. \item Extensions and improvements to the industry-standard EST-to-genome alignment program Sim4: \begin{itemize} \item detection of multiple occurrences of the query in the genomic sequence \item improved input and output mechanisms for high-throughput processing of different sequences \item better memory management allows for processing of large sequences \end{itemize} \item Whole chromosomal sequences can be used --- no segmentation of the genomic sequences is necessary. Consequently, matches are not pruned to fit in fixed size intervals, which allows arbitrarily long introns. \end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Installation} \label{chap:install} {\tt bzip2 -dc ESTmapper.tar.bz2 | tar -xf - } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Software Overview} \label{chap:overview} This chapter provides an overview of the \ESTmapper\ process. Understanding the process will assist in operation of \ESTmapper, and is necessary for using the advanced modes of operation. \ESTmapper\ is comprised of five stages. \begin{tabular}{|c|l|l|} \hline Stage & Name & Description \\ \hline \hline 1 & Preparation & Prepare the input files. \\ 2 & Searching & Locate cDNA signals in the genome. \\ 3 & Filtering & Remove weak signals. \\ 4 & Polishing & Resolve signals into spliced alignments. \\ 5 & Output & Assemble the output and collect statistics. \\ \hline \end{tabular} The stages are implemented so that they will not recompute a previously computed results. This makes it easy to chain the stages together in a pipeline, or to associate specific hardware with a stage. For example, the compute intensive stage 4 can be processed on a fast, but expensive computer, while the I/O intensive stage 5 can be processed on an inexpensive workstation. The stages are explained in the sections that follow. {\bf A CPU-hour is defined as one hour of processing on a one processor of a 500MHz Compaq ES40. Timing is given only to give a feeling for the expense of a particular stage.} \subsection*{Preparation} {\bf Prepare the input files.} This stage prepares the input to improve the efficiency of later stages. First, an index is built for each sequence file to allow random access to the sequences. Second, the genomic sequences are examined and grouped into approximately equally-sized groups to allow the search stage to execute in a specific memory footprint. This stage typically takes only a few minutes, and requires no significant resources. \subsection*{Searching} {\bf Search the genome for cDNA signals.} This stage executes the search algorithm for each group of sequences determined in the previous stage. The search algorithm uses a fast, but memory intensive, data structure to find all common $20$-mers between each cDNA sequence and each genomic sequence. Genomic regions which contain words consistent with an exon model are reported as potential cDNA containing regions. Because the search algorithm needs to use large amounts of memory to execute efficiently, it is multi-threaded. By default, this phase requires 4GB of main memory. For {\tt dbEST} size EST input, it will generate 70GB of output, and requires 50 CPU hours. For {\tt RefSeq} size mRNA input, it will generate {\bf XXXGB} of output, and requires a few CPU hours. \subsection*{Filtering} {\bf Filter the signals.} The signal filtering stage examines the output generated by the search, and discards regions that are relatively weak among the candidate regions for the same cDNA sequence. The ESTMapper implements two different protocols for filtering EST and full-length mRNA matches, respectively, taking into the account the different characteristics of the two types of sequences. ESTs are shorter and less accurate due to sequencing errors and contamination with vector sequences, and therefore will generate weaker imprints on the genome than full-length mRNAs. They are also more likely to contain repeat sequences. Consequently, a more sensitive filter should be used. In contrast, full-length mRNA sequences are longer and more accurate, and in general have fewer expected occurrences on the genome. This makes it easier to differentiate between the false positive and true signals, and therefore the filter can be more specific. All candidate regions produced by the search are scored based on the portion of the cDNA match they contain, and the highest scoring of these will be selected for polishing. For EST input, at most 100 regions are selected for each query. If the number of regions exceeds this threshold, weak signals are discarded, and the procedure is repeated. ESTs with more than 100 candidate regions after the second filter are labeled as containing repetitive elements, and for these no regions are selected. For mRNAs, a fixed portion at the top of the scoring range is selected, and all regions with scores in this interval are selected in a first phase. In addition, all regions containing at least a fraction $p$ of the mRNA will be chosen. As mentioned, for EST input the \ESTmapper\ is able to detect repeat-containing cDNA sequences. {\bf Experiments have confirmed indicate that sequences flagged as such are confirmed by RepeatMasker. -- do we want to include some evidence?} This stage requires three CPU hours, and four wall-clock hours to process {\tt dbEST} size EST input. It does not have any significant memory requirements, however, it makes heavy use of disk. It generates 6GB of output. After this phase completes, the full output of the search is no longer needed, and can be removed. \subsection*{Polishing} {\bf Polish the filtered signals.} This stage applies the {\tt Sim4} algorithm to each cDNA-genomic region to generate a spliced-alignment. The output presents in a condensed form information about the boundaries of exons and introns in the two sequences, predicted intron orientations, sequence similarity scores for the global and for the individual exons' alignments, and other sequence and alignment statistics. The complete list is reviewed in Section~\ref{subsec:matchformat}. {\bf NEEDS WORK ON MEMORY USAGE! How much? When?} {\tt dbEST} size EST input requires approximately 600 CPU hours, and generates 10GB of output. When processing large (more than 120Mb) genomic sequences, each {\tt Sim4} process can use over 1GB of memory. {\tt RefSeq} size mRNA inputs can be polished in a few CPU hours. \subsection*{Output} {\bf Process the output.} This stage collects the output from the polishing stage, and performs a final quality-based filtering of the matches and the cDNA. Matches are classified as ``good'', ``good, but short'' or ``low-quality'' based on the two statistics: {\em query-sequence identity}, and {\em alignment-sequence identity}. \begin{tabular}{|p{1.7in}|p{3.0in}|} \hline query-sequence identity & the percentage of nucleotides in the cDNA, excluding the polyA(T) tails, exactly matching the genomic sequence \\ alignment-sequence identity & the percentage of nucleotide matches in the spliced alignment \\ \hline \end{tabular} The ESTMapper will generate all spliced alignments of matches identified in the search stage which have at least $p$ percent alignment-sequence identity, and at least $c$ percent query-sequence identity. \begin{tabular}{|p{1.7in}|p{3.0in}|} \hline good & believed with high confidence \\ good, but short & would be believed with high confidence, except that only a small piece of the cDNA sequence matched \\ low-quality & a match was reported from the polishing stage, but the percent alignment-sequence identity is low \\ \hline \end{tabular} Note that it is possible for a cDNA sequence to have matches in any number of categories, for example, the true match would be labeled as ``good'', a partial match would be placed as ``good, but short'', and a paralogous match might be placed in ``low-quality''. cDNA sequences are classified as ``good'', ``good, but short'', ``low-quality'', ``missing'', or ``zero'', based on the quality of the best match for the cDNA, the lack of a match, or the lack of a cDNA signal, respectively. \begin{tabular}{|p{1.7in}|p{3.0in}|} \hline good & the best match for these sequences is classified as ``good'' \\ good, but short & the best match for these sequences is classified as ``good, but short'' \\ low-quality & the best match for these sequences is classified as ``low-quality'' \\ missing & signals were detected, but polishing did not generate any matches. Had a match been produced, they would have been ``very low-quality''. \\ zero & no signals were detected. These cDNA sequences are probably not present on the genome. \\ \hline \end{tabular} Unlike the classification of matches, each cDNA is classified into exactly one category. Section~\ref{sec:quality} discusses quality. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Getting Started} \label{chap:start} {\bf Example of running an est search, example of an mrna search (using the same genomic). Layout of the directories. } Is the label a useful concept? Why not just use a full directory for each mapping. Simpler. The only thing we gain with labels is that we skip configuration, which takes minutes anyway. Where to put the config files? genome partition can go into search, and just leave the genome sequence in the same place. % % Liliana suggested to put the example in the distribution, then to % refer to the files. I disagree; you can't read it offline. We % should still provide some sample data for playing around. % \section{Small-scale EST Mapping} In this section we demonstrate \ESTmapper by mapping a small set of ESTs to a 1Mb genomic region. %{\tt ESTmapper.pl -mapest /dev5/walenz/FY % /dev5/walenz/ESTs/dbEST\_human\_01.fasta % /dev5/walenz/SCF/FY.fasta} The \ESTmapper\ command line \small \begin{verbatim} ESTmapper.pl -mapest /dev5/walenz/FY \ /dev5/walenz/ESTs/dbEST_human_01.fasta \ /dev5/walenz/SCF/FY.fasta \end{verbatim} \normalsize says to map ESTs, creating the directory {\tt /dev5/walenz/FY} for work and ouptut files, reading ESTs from {\tt /dev5/walenz/ESTs/dbEST\_human\_01.fasta} and genomic sequences from {\tt /dev5/walenz/SCF/FY.fasta}. This particular EST set contains about {\bf 30,000} ESTs. The genomic sequence is a {\bf 1Mb} scaffold. The screen output from \ESTmapper\ is explained next. \footnotesize \begin{verbatim} ESTmapper: Performing a configure. ESTmapper/configure-- Use about 3800MB -> 398458880 bases per chunk. ESTmapper/configure-- Generating the info for '/dev5/walenz/FY/0-input/genomic.fasta' ESTmapper/configure-- WARNING: This is done in the work directory! ESTmapper/configure-- Created group with 590724 bases. \end{verbatim} \normalsize \ESTmapper\ is performing its configure phase. It is grouping genomic sequences into groups with no more than 398,458,880 bases, and it estimates that the search process will require about 3800MB to compute. The warning refers to the fact that the index files for the genomic sequences do not already exist, and that they will be created and stored in the work directory, not with the original file. \footnotesize \begin{verbatim} ESTmapper: Performing a search. ESTmapper/search-- Local mode requested; 1 processes. ESTmapper/search-- search 000 ESTmapper: searchGENOME required 1.792912 seconds system time. ESTmapper: searchGENOME required 40.95784 seconds user time. ESTmapper: Search script finished in 19 wall-clock seconds. \end{verbatim} \normalsize \ESTmapper\ is performing the search phase. This input requires only one search process. Time statistics are reported. The search algorithm used about 43 seconds of CPU time, and the entire search phase took 19 wall-clock seconds. The search algorithm is capable of using multiple processors, which explains why it used more CPU time than wall-clock time. \footnotesize \begin{verbatim} ESTmapper: Performing a filter. ESTmapper/search-- Merging counts. ESTmapper/search-- Writing counts. \end{verbatim} \normalsize As each search process outputs the number of signals detected for each cDNA sequence, after all search processes finish, these counts are merged together, for use in the filtering phase. \footnotesize \begin{verbatim} ESTmapper/filter-- Filtering. ESTmapper/filterEST-- uniqThresh= 100 reptThresh= 100 qualityThresh=0.20 ESTmapper/filterEST-- UNIQ: 3996( 9979) FILT: 0( 0/ 0) REPT: 0( 0/ 0) ESTmapper/filter-- Sorting. \end{verbatim} \normalsize \ESTmapper\ is now filtering the signals. Filtering is explained in detail in Chapter~/ref{chap:filtering}. \footnotesize \begin{verbatim} ESTmapper: Performing a polish. ESTmapper/polish-- Creating scripts with 500 lines in each. ESTmapper/polish-- Created 020 scripts. ESTmapper/polish-- Running locally, 4 at a time. ESTmapper: sim4db required 188.127737 seconds wall-clock time. ESTmapper: sim4db required 18.927568 seconds system time. ESTmapper: sim4db required 112.960288 seconds user time. ESTmapper: Polish script finished in 50 wall-clock seconds. \end{verbatim} \normalsize \ESTmapper\ is performing the {\tt Sim4} polishing of signals. It creates 20 batches, with each batch containing 500 signals to process\footnote{Yes, except probably for the last one.}. The polishing is run on the local hardware, using four processors. Like the search phase, the polishing phase reports statistics on the time used. In this example, the {\tt Sim4} processes needed a total of 188 wall clock seconds, and 130 CPU seconds. The polishing stage required 50 wall-clock seconds. \footnotesize \begin{verbatim} ESTmapper: Performing an assembleOutput. ESTmapper/assembleOutput-- WARNING: 'short' quality levels too low for existing polishing! ESTmapper/assembleOutput-- WARNING: Polished at percent query-sequence identity = 45, requested filtration at 0. ESTmapper/assembleOutput-- WARNING: Polished at percent align-sequence identity = 85, requested filtration at 95. ESTmapper/assembleOutput-- filtering polishes by quality. ESTmapper/assembleOutput-- GOOD: percent query-sequence identity: 50 ESTmapper/assembleOutput-- GOOD: percent align-sequence identity: 95 ESTmapper/assembleOutput-- SHORT: percent query-sequence identity: 0 ESTmapper/assembleOutput-- SHORT: percent align-sequence identity: 95 ESTmapper/assembleOutput-- finding 'good' cDNA. ESTmapper/assembleOutput-- finding 'good, but short' cDNA. ESTmapper/assembleOutput-- finding 'low quality' cDNA. ESTmapper/assembleOutput-- finding 'repeat' cDNA. ESTmapper/assembleOutput-- finding 'zero hit' cDNA. ESTmapper/assembleOutput-- finding 'missing' cDNA. ESTmapper/assembleOutput-- counting 'good' matches. ESTmapper/assembleOutput-- counting 'good, but short' matches. ESTmapper/assembleOutput-- counting 'all the good' matches. ESTmapper/assembleOutput-- counting 'low quality' matches. ESTmapper/assembleOutput-- counting cDNA. ESTmapper: assembleOutput script finished in 52 wall-clock seconds. \end{verbatim} \normalsize \ESTmapper\ is processing the output from the polishing, classifying matches and cDNA. We can safely ignore the warning; it is telling us that even though we requested filtration down to 0\% query-sequence identity, polishes only exist down to 45\%. \footnotesize \begin{verbatim} ESTmapper: script finished everything in 122 wall-clock seconds. \end{verbatim} \normalsize Finally, the script is completed. Our mapping finished in a little under three minutes. The contents of the output directory are explained in Chapter~\ref{chap:output}, but we'll quickly peek at the statistics contained in the {\tt summary} file: \footnotesize \begin{verbatim} GOOD: cDNA-genomic matches 51 matches (51 different cDNA and 1 genomic) Matches per cDNA 1 matches/cDNA Matches per genomic 51 matches/genomic GOOD but SHORT: cDNA-genomic matches None. ALL THE GOOD: (both 'GOOD' and 'GOOD but SHORT') cDNA-genomic matches 51 matches (51 different cDNA and 1 genomic) Matches per cDNA 1 matches/cDNA Matches per scaffold 51 matches/genomic LOW-QUALITY: cDNA-genomic matches 2712 matches (564 different cDNA and 1 genomic) Matches per cDNA 4.80851063829787 matches/cDNA Matches per scaffold 2712 matches/genomic cDNA COUNTS: cDNA: 39182 cDNA-good: 51 cDNA-goodshort: 0 cDNA-lowquality: 563 cDNA-missing: 3382 cDNA-zero: 35186 \end{verbatim} \normalsize This is fully described in Section~\ref{sec:summary}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Input Files} \label{chap:input} cDNA and genomic sequences are read from two multi-FastA format file. EST-type and full-length mRNA-type sequences are processed differently, therefore the input data should be a priori separated in files by these categories. It is {\em not} necessary to repeat mask the sequences. \ESTmapper's mechanism will be able to identify and separate those sequences that contain highly repetitive elements. It is {\em not} necessary to fragment the genomic sequences. \ESTmapper\ can process full-length chromosomal sequences, which allows it to identify maximal cDNA matches even when long introns are present. It is {\em not} necessary to quality- and vector-trim the EST sequences, but doing so will increase the accuracy of the match statistics. In principle, the quality thresholds used for validating and classifying the matches include a margin of error that could account for the effects of such factors. \section{High Frequency $k$-Mer Masking} The search phase in \ESTmapper\ ignores $k$-mers that occur at least 1000 times in the genomic sequence. Computing the list of $k$-mers to ignore requires large amounts of memory and CPU, and is {\em not} performed by \ESTmapper. Lists appropriate for human and mouse are provided as data files. See the {\tt -maskmers} entry in Advanced Usage. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Output Files} \label{chap:output} The work directory contains several files and directories: \setlongtables \begin{longtable}{|l|p{3.4in}|} \hline File Name & Description \\ \hline \hline \endhead \hline \endfoot 0-input & Symbolic links to input files, and any indices and other files needed to later stages. \\ \hline 1-search & The temporary work directory for the search step. The contents are undocumented. \\ \hline 2-filter & The temporary work directory for the filter step. The contents are undocumented.\\ \hline 3-polish & The temporary work directory for the polishing step. The contents are undocumented.\\ \hline cDNA-good.fasta & These cDNA sequences were successfully mapped.\\ \hline cDNA-goodshort.fasta & These cDNA sequences were mapped at the correct percent alignment-sequence identity, but below the desired percent query-sequence identity.\\ \hline cDNA-lowquality.fasta & These cDNA sequences were mapped, but at low percent query- and alignment-sequence identities.\\ \hline cDNA-missing.fasta & These cDNA sequences had at least one signal detected, but the signal(s) were spurious. The polishing step did not find a match.\\ \hline cDNA-repeat.fasta & These cDNA sequences were classified as repeat-containing by the filter.\\ \hline cDNA-zero.fasta & These cDNA sequences had no signals detected.\\ \hline polishes-good & All the ``good'' polishes.\\ \hline polishes-goodshort & All the ``good, but short'' polishes.\\ \hline polishes-lowquality & All the remaining polishes.\\ \hline summary & A summary of the mapping.\\ \end{longtable} \subsection{Summary File} \label{sec:summary} The {\tt summary} file describes the results of the mapping. \footnotesize \begin{verbatim} GOOD: 50% composite, 95% identity cDNA-genomic matches 4028809 matches (3060666 different cDNA and 5377 genomic) Matches per cDNA 1.31631775567801 matches/cDNA Matches per genomic 749.267063418263 matches/genomic GOOD but SHORT: 0% composite, 95% identity cDNA-genomic matches 26825 matches (22017 different cDNA and 1614 genomic) Matches per cDNA 1.21837670890675 matches/cDNA Matches per genomic 16.6201982651797 matches/genomic ALL THE GOOD: (both 'GOOD' and 'GOOD but SHORT') cDNA-genomic matches 4055634 matches (3071297 different cDNA and 5461 genomic) Matches per cDNA 1.32049554308815 matches/cDNA Matches per scaffold 742.654092657022 matches/genomic LOW-QUALITY: cDNA-genomic matches 7664890 matches (1263273 different cDNA and 6054 genomic) Matches per cDNA 6.06748501709448 matches/cDNA Matches per scaffold 1266.08688470433 matches/genomic cDNA COUNTS: cDNA: 3992939 cDNA-good: 3060666 cDNA-goodshort: 10631 cDNA-lowquality: 433295 cDNA-missing: 440037 cDNA-zero: 48310 \end{verbatim} \normalsize cDNA sequences are classified into one of six categories: {\tt good}, {\tt goodshort}, {\tt lowquality}, {\tt missing}, {\tt repeat}, or {\tt zero}. Each cDNA in the input is in exactly one of the categories. \begin{tabular}{|l|p{4.3in}|} \hline category & description \\ \hline \hline good & A match exists that meets both query-sequence identity and alignment-sequence identity requirements. \\ goodshort & A match exists that meets both query-sequence identity and alignment-sequence identity requirements, for ``short''. \\ lowquality & A match exists, but it does not meet at least one quality requirement. \\ missing & A signal was detected, but no match was produced. \\ repeat & Many signals were detected, and the filter stage declared this cDNA to be repeat-containing. \\ zero & No signals were detected for this cDNA. \\ \hline \end{tabular} Likewise, matches generated by the polishing stage are placed into three categories: {\tt good}, {\tt goodshort} or {\tt lowquality}. This is done match by match, so it is possible to have matches for a specific cDNA sequence in all three categories. For each category, the {\tt summary} file counts the number of matches it contains, the number of distinct cDNA / genomic sequences used by those matches, and the matches per cDNA or genomic. \subsection{Polished Match Format} \label{subsec:matchformat} The files {\tt polishes-good}, {\tt polishes-goodshort}, and {\tt polishes-lowquality} contain the results of the polishing stage. All matches are placed in the same file. Each match starts with the line {\tt sim4begin}, and ends with the line {\tt sim4end}. Matches have the following format: \begin{tabular}{c|l} 1 & {\tt sim4begin} \\ 2 & {\it cDNAidx}{\tt [}{\it cDNAlen}{\tt -}{\it pA}{\tt -}{\it pT}{\tt ]} {\it GENidx}{\tt [}{\it GENlo}{\tt -}{\it GENhi}{\tt ]} {\tt <}{\it M}{\tt -}{\it N}{\tt -}{\it P}{\tt -}{\it O}{\tt -}{\it S}{\tt >} \\ 3 & {\tt edef=}{\it cDNA defline} \\ 4 & {\tt ddef=}{\it Genomic defline} \\ 5 & {\it cDNAbgn}{\tt -}{\it cDNAend} {\tt (}{\it GENbgn}{\tt -}{\it GENend}{\tt )} {\tt <}{\it M}{\tt -}{\it N}{\tt -}{\it P}{\tt >} {\it intonOrientation} \\ & . \\ & . \\ 6 & {\it cDNAbgn}{\tt -}{\it cDNAend} {\tt (}{\it GENbgn}{\tt -}{\it GENend}{\tt )} {\tt <}{\it M}{\tt -}{\it N}{\tt -}{\it P}{\tt >} {\it intonOrientation} \\ 7 & {\it cDNAbgn}{\tt -}{\it cDNAend} {\tt (}{\it GENbgn}{\tt -}{\it GENend}{\tt )} {\tt <}{\it M}{\tt -}{\it N}{\tt -}{\it P}{\tt >} \\ 8 & {\it cDNA alignment sequence for exon \#1} \\ 9 & {\it genomic alignment sequence for exon \#1} \\ & . \\ & . \\ & . \\ & . \\ 10 & {\it cDNA alignment sequence for exon \#n} \\ 11 & {\it genomic alignment sequence for exon \#n} \\ 12 & {\tt sim4end} \end{tabular} Line 1 begins the match description. Line 2 contains the match description line. The fields have the following meanings: \begin{tabular}{|l|l|} \hline Field & Description \\ \hline \hline cDNAidx & Internal index of the cDNA sequence used. \\ cDNAlen & Length of the cDNA sequence. \\ pA & Amount of poly-A masking performed. \\ pT & Amount of poly-T masking performed. \\ GENidx & Internal index of the genomic sequence used. \\ GENlo & Beginning position of the genomic region that was polished. \\ GENhi & Ending position of the genomic region that was polished. \\ M & Number of matching bases in the match. \\ N & Number of matching N's in the match. \\ P & Percent sequence identity of the match. \\ O & Orientation of the match. \\ S & Strand this match is predicted to occur on. \\ \hline \end{tabular} %{\tt M} and {\tt N} are the number of matches, and the number of %non-ACGT matches, respectively. {\tt P} is the percent sequence %similarity for this exon. The {\it match orientation} is {\tt forward} when the cDNA sequence aligns to the genomic sequence directly. It is {\tt complement} when the reverse-complement of the cDNA sequence matches the genomic sequence. These are the only two values possible. The {\it strand prediction} is either {\tt forward}, {\tt reverse} or {\tt unknown}. It is \ESTmapper's best guess which strand the cDNA is on, based on the quality of the match and the intron signals. Lines 3 and 4 contain the entire defline for the two sequences. These lines are optional. Lines 5, 6 and 7 the {\tt Sim4} exon lines. There will be one line for each exon found. The fields have the following meanings: \begin{tabular}{|l|l|} \hline Field & Description \\ \hline \hline cDNAbgn & Beginning of the exon, in the cDNA sequence. \\ cDNAend & End of the exon, in the cDNA sequence. \\ GENbgn & Beginning of the exon, in the genomic sequence. \\ GENend & End of the exon, in the genomic sequence. \\ M & Number of matching bases in the exon. \\ N & Number of matching N's in the exon. \\ P & Percent identity of the exon. \\ intronOrientation & Predicted orientation of the intron. \\ \hline \end{tabular} Coordinates in the exon are nucleotide-based. Coordinates in the genomic sequence are relative to the {\tt GENlo} value from the match description line. The true location of the exon in the genomic sequence is {\tt GENlo + GENbgn} and {\tt GENlo + GENend}. The {\it intron orientation} is one of {\tt ->}, {\tt <-}, {\tt --}, or {\tt ==}, representing forward, reverse, ambiquous, and internal gap in cDNA, respectively. All exons, except the last, contain the intron orientation field. When requested, pairwise alignments between the cDNA sequence and the genomic sequence within each exon follow the exon level descriptions (see Section~\ref{sec:polish}). In the alignments, dashes ({\tt -}) are used to represent insertion/deletion. Lower-case characters represent a match, while upper-case characters represent a non-match. Line 12 closes the match descrption. \subsection{Examples} A few examples of match output are shown. {\bf should explain the examples more; picture?} {\bf Example 1:} A minimal match description. The deflines and alignments are not present. \footnotesize \begin{verbatim} sim4begin 54[484-0-0] 0[0-590724] <477-0-98-forward-forward> 1-96 (454213-454308) <92-0-94> -> 97-266 (455410-455579) <170-0-100> -> 267-377 (458098-458208) <111-0-100> -> 378-465 (458297-458384) <88-0-100> -> 466-484 (514282-514297) <16-0-84> sim4end \end{verbatim} \normalsize {\bf Example 2}: This is the same match as above, but was generated by limiting the genomic sequence to the range 430000 through 520000 \footnotesize \begin{verbatim} sim4begin 54[484-0-0] 0[430000-520000] <477-0-98-forward-forward> 1-96 (24213-24308) <92-0-94> -> 97-266 (25410-25579) <170-0-100> -> 267-377 (28098-28208) <111-0-100> -> 378-465 (28297-28384) <88-0-100> -> 466-484 (84282-84297) <16-0-84> sim4end \end{verbatim} \normalsize {\bf Example 3:} A full match description. Deflines have been trimmed to fit on the page, and alignment lines are wrapped. \footnotesize \begin{verbatim} sim4begin 618[453-0-26] 482[450000-460000] <425-0-99-complement-forward> edef=>CRA|70647962 /altid=gi|6798356 /dataset=dbest /taxon=9606 ... ddef=>CRA|GA_x2HTBKM80FY:1..590724 /organism=Homo sapiens ... 1-71 (4238-4308) <71-0-100> -> 72-241 (5410-5579) <170-0-100> -> 242-352 (8098-8208) <110-0-99> -> 353-427 (8297-8371) <74-0-98> tcatgaaacctgggaaggtggtgcttgtcctggctggacgctactccggacgcaaagctgtcatcgtgaag tcatgaaacctgggaaggtggtgcttgtcctggctggacgctactccggacgcaaagctgtcatcgtgaag aacattgatgatggcacctcagatcgcccctacagccatgctctggtggctggaattgaccgctacccccgcaaa \ gtgacagctgccatgggcaagaagaagatcgccaagagatcaaagataaaatcttttgtgaaagtgtataact \ acaatcacctaatgcccacaag aacattgatgatggcacctcagatcgcccctacagccatgctctggtggctggaattgaccgctacccccgcaaa \ gtgacagctgccatgggcaagaagaagatcgccaagagatcaaagataaaatcttttgtgaaagtgtataact \ acaatcacctaatgcccacaag gtactctgtggatatccccttggacaaaactgtcgtcaataaggatgtcttcaNagatcctgctcttaaacgcaa \ ggcccgacgggaggccaaggtcaagtttgaagagag gtactctgtggatatccccttggacaaaactgtcgtcaataaggatgtcttcaGagatcctgctcttaaacgcaa \ ggcccgacgggaggccaaggtcaagtttgaagagag atacaagacaggcaagaacaagtggttcttccagaaactgcggttttagatgctttgttttgaNcattaaaaatt atacaagacaggcaagaacaagtggttcttccagaaactgcggttttagatgctttgttttgaTcattaaaaatt sim4end \end{verbatim} \normalsize %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Basic Usage} \label{chap:basic} Using the \ESTmapper\ in automatic mapping mode is the simplest mode of operation. To map ESTs to a genome: {\tt ESTmapper.pl -mapest} {\it work-directory} {\it ests.fasta} {\it genomic.fasta} Options implicit in {\tt -mapest}: \begin{tabular}{|ll|} \hline {\tt -directory} & {\it work-directory} \\ {\tt -cdna} & {\it ests.fasta} \\ {\tt -genomic} & {\it genomic.fasta} \\ {\tt -mersize} & 20 \\ {\tt -maskmers} & {\it install-directory}{\tt /data/frequentMers-C4-20.fasta} \\ {\tt -mincoverage} & 45 \\ {\tt -minidentity} & 85 \\ {\tt -local} & 4 \\ {\tt -good} & 50 95 \\ {\tt -goodshort} & 0 95 \\ \hline \end{tabular} To map mRNA to a genome: {\tt ESTmapper.pl -mapmrna} {\it work-directory} {\it mrna.fasta} {\it genomic.fasta} \begin{tabular}{|ll|} \hline {\tt -directory} & {\it work-directory} \\ {\tt -cdna} & {\it ests.fasta} \\ {\tt -genomic} & {\it genomic.fasta} \\ {\tt -mersize} & 20 \\ {\tt -maskmers} & {\it install-directory}{\tt /data/frequentMers-C4-20.fasta} \\ {\tt -mincoverage} & 45 \\ {\tt -minidentity} & 85 \\ {\tt -relink} & 1000 \\ {\tt -abort} & \\ {\tt -local} & 4 \\ {\tt -good} & 50 95 \\ {\tt -goodshort} & 0 95 \\ \hline \end{tabular} Be sure that the multi-FastA files are stored on a disk local to the machine --- it will work if the sequences are accessed over NFS, but performance might suffer. The automatic mapping modes may be customized by using the options listed in Section~\ref{sec:adv}. For example, {\tt ESTmapper.pl -mapest} {\it work-directory} {\it ests.fasta} {\it genomic.fasta} {\tt -numcpus 2} {\tt -memory 2000} will {\bf probably -- should test} allow the \ESTmapper\ to run on a two processor machine with 2GB of RAM. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Advanced Usage} \label{sec:adv} \ESTmapper\ recognizes seven commands, and has one required argument. The general usage is: {\tt ESTmapper.pl {\it command} -directory} {\it work-directory} [{\it options}] The seven commands: \begin{tabular}{ll} -configure &-- prepare the genomic sequences for searching \\ -searchest &-- perform signal finding on EST sequences \\ -searchmrna &-- perform signal finding on mRNA sequences \\ -filterest &-- filter EST signals \\ -filtermrna &-- filter mRNA signals \\ -polish &-- polish filtered signals \\ -assembleoutput &-- prepare the output \\ \end{tabular} The one required argument: \begin{tabular}{lp{3.0in}} -directory {\it /full/path/to/work/directory} & The \ESTmapper\ will use the supplied directory as it's work directory. This option must be present for all steps. \end{tabular} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{configure} \label{sec:configure} This step creates the work directory, and prepares the genomic sequence for mapping. \begin{tabular}{lp{3.0in}} -configure & Instruct the \ESTmapper\ to perform the {\tt configure} command. \\ -genomic {\it g.fasta} & The sequences in {\tt g.fasta} will be used as the genomic sequence. The file is a multi-FastA format, and all sequences are used. There are no special requirements for the format of the defline, nor are there limits on the length or number of sequences.\\ -memory {\it n} & The sequences in {\tt g.fasta} will be partitioned into sets so that the search phase will use no more than {\it n} MB of memory per process. Any sequences in {\tt g.fasta} that are larger than the partition size, are placed into a set containing one sequence. A warning is printed for such sequences. {\bf memory usage computation is not rigorously tested; it works for 4000} The memory usage is approximately 10 bytes per base of genomic sequence. \end{tabular} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{searchest and searchmrna} \label{sec:search} This step performs the search. \begin{tabular}{lp{3.0in}} -searchest {\it label} & Instruct the \ESTmapper\ to perform the {\tt search} stage, using parameters appropriate for EST sequences. The {\tt label} marks this run; it is possible to search multiple sets of sequences against the same genomic database without multiple {\tt configure} steps. The benefit of this is marginal. \\ -searchmrna {\it label} & Instruct the \ESTmapper\ to perform the {\tt search} stage, using parameters appropriate for mRNA sequencess. See {\tt searchest} for discussion of {\tt label}. \\ -cdna {\it c.fasta} & The sequences in {\tt c.fasta} will be searched. The file is a multi-FastA format, and all sequences are used. There are no special requirements for the format of a defline, nor are there limits on the length or number of sequences. Attempting to map sequences that are not of the specified type (mapping mRNA with EST parameters; mapping non-coding genomic sequence with {\em any} parameters) is not advised. The sequences {\em MUST NOT} be repeat masked. \\ -mersize {\it m} & Instructs the search to use $m$ for the size of the exact-match blocks. A value of $m=20$ seems to be optimal; larger values use more memory, run faster, and are less sensitive. Smaller values use less memory, run slower and result in fewer signals due to spurious matches. Note that in automatic mapping mode, changing the mersize without explicitly specifying the maskmers file is an error. \\ -maskmers {\it m.fasta} & The sequences in {\tt m.fasta} are used to build a list of mers that will be discarded from any matches. While this is generally considered to be a ``Poor Man's RepeatMasker'', the \ESTmapper\ achieves better performance and sensitivity using this strategy than with full-blown repeat masking. The sequences are usually in the {\tt data} subdirectory of the \ESTmapper\ installation. \end{tabular} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{filterest and filtermrna} \label{sec:filter} This step filters the signals detected in the search phase, discarding weak signals and keeping strong signals. There are no user-tunable parameters at the present time. \begin{tabular}{lp{3.0in}} -filterest label & Instruct the \ESTmapper\ to perform the {\tt search} stage, using parameters appropriate for EST sequencess. The {\tt label} must be the same as used in the search stage. \\ -filtermrna label & Instruct the \ESTmapper\ to perform the {\tt search} stage, using parameters appropriate for mRNA sequencess. The {\tt label} must be the same as used in the search stage. \end{tabular} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{polish} \label{sec:polish} This stage prepares, and optionally performs, the polishing of the signals detected. \begin{tabular}{lp{3.0in}} -polish {\it label} & Instruct the \ESTmapper\ to perform the {\tt polish} stage. The {\tt label} must be the same as used in the search stage. \\ -mincoverage {\it mc} & Polishing is performed so that all results have a {\it percent query-sequence identity} of at least {\tt mc}. See the discussion about quality below. \\ -minidentity {\it mi} & Polishing is performed so that all results have an {\it percent alignment-sequence identity} of at least {\tt mi}. See the discussion about quality below. \\ -alwaysprint {\it ap} & The polisher will always print at least the top $ap$ matches, no matter what their quality. This can be used to find both full-length matches and the top scoring matches for signals without full-length matches. See the discussion about quality below. \\ -relink {\it r} & Sets the {\it Sim4} relink weight. {\bf This is used only for mRNA, and Liliana should probably explain what it does.} \\ -batchsize {\it w} & Signals are processed in batches of size $w$. See the discussion about execution below. \\ -numbatches {\it b} & Signals are processed in $b$ batches. See the discussion about execution below. \\ -farm {\it queue} {\it projectname} & Runs the polishes on the Celera internal compute farm. The {\tt projectname} should be the resource code to use, e.g. {\tt 00006:MRNA:L}. See the discussion about execution below. \\ -local {\it numprocessors} & Runs the polishes on the local machine, using $numprocessors$ concurrent processes. See the discussion about execution below. \\ -runlater & The \ESTmapper\ will generate all the script files needed to perform the polishing step, but will not perform any computation. \\ -aligns & Instruct {\tt Sim4} to also include the alignment lines. This will make your output files VERY large. \\ -abort & Instruct {\tt Sim4} to abort polishing any matches with an unusually large number of MSPs. The aborted matches are saved for later examination. {\bf Need to explain the output format for this!} \end{tabular} \subsection{Polishing Quality} \label{sec:quality} In the absence of {\tt -minidentity} and {\tt -mincoverage} and {\tt -alwaysprint} only the best match is found for each signal. The result of specifying exactly one of {\tt -minidentity} and {\tt -mincoverage} is undefined. Always specify both, even if one is $0$. Specifying {\tt -alwaysprint} without either {\tt -minidentity} and {\tt -mincoverage} will print the best $ap$ matches. The effect of specifying values {\tt -minidentity} and {\tt -mincoverage} and {\tt -alwaysprint} is almost the same as asking for ``the good matches, unless none are found, then only the best''. This overcomes the problem {\bf XXX: what problem?} encountered when attempting to map mRNA sequences at very low percent query-sequence identities. mRNA that have excellent full-length matches can also have many inferrior paralogous (partial-)matches. If {\tt -mincoverage 10} is specified, all paralogous matches would be output, in addition to the true match. On the otherhand, for mRNA without full-length matches, we still want to find partial matches, even if they have a query-sequence identity of, say, 20\%. Polishing for a single signal terminates when any of three conditions is met: \begin{tabular}{l} $coverage < mc$ \\ $identity < mc$ \\ $printed > ap$ \end{tabular} \subsection{Execution} The output from the filter step is a large list of signals (4 million human ESTs produce about 60 million filtered signals). To run these efficiently, they are divided into batches. If {\tt -batchsize} is specified, then each batch will contain exactly $w$ signals (except for the last batch), otherwise, {\tt -numbatches b} batches are formed. It neither of {\tt -batchsize} and {\tt -numbatches} are specified, then the signals are divided into 256 or fewer batches. A batch will always contain at least 500 signals. These batches can be processed on the local hardware ({\tt -local} option) or on the Celera internal compute farm ({\tt -farm} option). If they are processed locally, then $p$ {\tt Sim4} processes will run concurrently (regardless of the number of CPUs actually available --- yes, it is possible to do {\tt -local 256}. It is optimal to run exactly one {\tt Sim4} process per available processor. If they are processed on the farm, they are submitted to the specified queue / project name. The \ESTmapper\ will terminate immediately after the jobs are submitted; when they have finished, simply rerun the script to continue. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{assembleoutput} \label{sec:assembleoutput} This stage collects the output from the polishing stage, and classifies the matches into quality groups. \begin{tabular}{lp{3.0in}} -assembleoutput {\it label} & Instruct the \ESTmapper\ to assemble the output. The {\tt label} must be the same as used in the search stage. \\ -good {\it mi} & Labels matches with $mi$ or better percent alignment-sequence identity as ``good''. \\ -short {\it mc} & Labels matches with less than $mc$ percent query-sequence identity as ``short''. \end{tabular} {\bf Note about the filter values being at least +5 the polish values here....} \end{document} kmer-code-2013-trunk/ESTmapper LaTeX/mrna-filter.tex0000644000000000000000000001255310562262400020711 0ustar rootroot\documentclass[twoside]{article} \usepackage{amsmath,amssymb} \usepackage{moreverb} \usepackage{fancyheadings} \usepackage{ulem} \usepackage{parskip} \usepackage{calc,ifthen,epsfig} \sloppy \begin{document} % See page 63-64, LaTeX Companion % % leftmargin controls the left margin for EVERYTHING in the list! % \newcommand{\entrylabel}[1]{\mbox{\texttt{#1:}}\hfil} \newenvironment{entry} {\begin{list}{}% {\renewcommand{\makelabel}{\entrylabel}% %\setlength{\leftmargin}{1.5in}% }} {\end{list}} % The first parbox width controls the indent on the first text line % The makebox width seems to do nothing. \newcommand{\Lentrylabel}[1]{% {\parbox[b]{0pt}{\makebox[0pt][l]{\texttt{#1:}}\\}}\hfil\relax} \newenvironment{Lentry} {\renewcommand{\entrylabel}{\Lentrylabel}\begin{entry}} {\end{entry}} \title{Filtering mRNA signal} \author{Brian P. Walenz} \maketitle \pagestyle{fancy} \rhead[Brian Walenz]{Brian Walenz} \chead[Filtering mRNA signal]{Filtering mRNA signal} \lhead[\today]{\today} \normalem \begin{abstract} Given signals detected by chaining 20-mers, a method is presented for deciding which signals potentially contain real matches. \end{abstract} \section{What is a signal} Signal has three values associated with it. The amount 'covered', the amount 'matched' and the total 'length'. % The amount covered is the number of bases in the mRNA that are contained in least one mer. % The amount matched is the number of paired bases (for example, position $i$ in the cDNA paired with position $j$ in the genomic) covered by a mer. % The length is the number of mers in the mRNA (roughly equivalent to the number of bases in the mRNA that could be covered by a mer, but easier to compute). From these, we can derive two scores, the coverage and the multiplicity. The coverage, $\frac{covered}{length}$, represents the fraction of the mRNA that we found, while the multiplicity, $\frac{matched}{covered}$, represents the amount of the mRNA that we found too many times. A high multiplicity usually indicates a repeat-containing mRNA. High multiplicity and high coverage can indicate that the mRNA is not cDNA. \section{Filter} In order to filter signals, we need to decide, for each mRNA, which signals are bad, and which are good (duh!), which means that we'll need to look at {\em all} signals for a single mRNA. For the filter presented below, we need to know the best and worst coverage values that occur for any signal associated with a specific mRNA. Once those are known, the signals can be filtered in any order. This is important in the case where the signals are detected chromosome by chromosome. Instead of sorting all signals, we can save the best and worst coverage for each mRNA. \begin{figure} \begin{center} \begin{tabular}{|c|c|c|l|} \hline Switch & Variable & Default Value & Description \\ \hline \hline -l & $L$ & 0.2 & \text{Signal spread low range} \\ -h & $H$ & 0.6 & \text{Signal spread high range} \\ -v & $V$ & 0.3 & \text{Pass value} \\ -m & $M$ & 0.3 & \text{Signal quality floor} \\ -mc & $M_c$ & 0.2 & \text{Minimum signal quality} \\ -ml & $M_l$ & 150 & \text{Minimum signal size} \\ \hline \end{tabular} \end{center} \caption{Parameters, default values and descriptions} \label{table:defvalues} \end{figure} The filter has six parameters, summarized in Table~/ref{table:defvals}. If the signals for a specific mRNA are all very similar, it is probable that the weaker signals are weak only because of a few mismatches that break 20-mers. In this case, we cannot reliably pick the signals that are true, and should consider all of them. On the other hand, if there is a large range in the quality of signals, we can safely discard low scoring signals, and still be confident that we will find the good stuff. Therefore, the filter will discard no signals if the range in quality values is small, and will gradually discard more, proportional to the range. So that we don't discard too much, we limit the increase in filtering to $V$ (0.3). \begin{align*} h &= bestCoverage - worstCoverage \\ p &= \begin{cases} 0.0 & \text{if $h \le L$} \\ V * \frac{h-L}{H-L} & \text{if $L < h < H$} \\ V & \text{if $H \le h$} \end{cases} \\ c &= min(worstCoverage + p \cdot h, M) \end{align*} \begin{figure} \begin{center} \epsfig{figure=mRNAfilt.eps, silent=, width=4.5in} \end{center} \caption{The $p$ curve.} \label{fig:pcurve} \end{figure} $p$ is the amount of filtering, ranging from minimum (0.0) to maximum ($V$, a parameter). The $c$ value computed above is the filtering threshold. Signals with coverage below $c$ are considered weak, and are discarded. If the score range is small ($\le L$), then $c$ will be $worstCoverage$, and we do no filtering. If the score range is large ($\ge H$), then $c$ will be $M$ of the best score. $c$ is the minimum coverage that will be accepted. It is derived from the range of scores, not the number of scores. Finally, it is possible that {\em all} signals are good. If we used the above filtering we would be discarding the low scoring (but still valid) signals. To overcome this, absolute limits $M_c$ and $M_l$ are enforced. A signal is saved if both of the following conditions are met: \begin{enumerate} \item ($c <= coverage$) \item ($M_c <= coverage$) or ($M_l <= coveredBases$) \end{enumerate} \section{Results} This filter is overly permissive, throwing out only signals that are obviously garbage. \end{document} kmer-code-2013-trunk/ESTmapper LaTeX/mRNAfilt.eps0000644000000000000000000001325310562262400020132 0ustar rootroot%!PS-Adobe-2.0 EPSF-2.0 %%Title: mRNAfilt.eps %%Creator: fig2dev Version 3.2.3 Patchlevel %%CreationDate: Thu Jan 17 19:14:55 2002 %%For: walenz@fengshui.home (Brian Walenz) %%BoundingBox: 0 0 469 316 %%Magnification: 1.0000 %%EndComments /$F2psDict 200 dict def $F2psDict begin $F2psDict /mtrx matrix put /col-1 {0 setgray} bind def /col0 {0.000 0.000 0.000 srgb} bind def /col1 {0.000 0.000 1.000 srgb} bind def /col2 {0.000 1.000 0.000 srgb} bind def /col3 {0.000 1.000 1.000 srgb} bind def /col4 {1.000 0.000 0.000 srgb} bind def /col5 {1.000 0.000 1.000 srgb} bind def /col6 {1.000 1.000 0.000 srgb} bind def /col7 {1.000 1.000 1.000 srgb} bind def /col8 {0.000 0.000 0.560 srgb} bind def /col9 {0.000 0.000 0.690 srgb} bind def /col10 {0.000 0.000 0.820 srgb} bind def /col11 {0.530 0.810 1.000 srgb} bind def /col12 {0.000 0.560 0.000 srgb} bind def /col13 {0.000 0.690 0.000 srgb} bind def /col14 {0.000 0.820 0.000 srgb} bind def /col15 {0.000 0.560 0.560 srgb} bind def /col16 {0.000 0.690 0.690 srgb} bind def /col17 {0.000 0.820 0.820 srgb} bind def /col18 {0.560 0.000 0.000 srgb} bind def /col19 {0.690 0.000 0.000 srgb} bind def /col20 {0.820 0.000 0.000 srgb} bind def /col21 {0.560 0.000 0.560 srgb} bind def /col22 {0.690 0.000 0.690 srgb} bind def /col23 {0.820 0.000 0.820 srgb} bind def /col24 {0.500 0.190 0.000 srgb} bind def /col25 {0.630 0.250 0.000 srgb} bind def /col26 {0.750 0.380 0.000 srgb} bind def /col27 {1.000 0.500 0.500 srgb} bind def /col28 {1.000 0.630 0.630 srgb} bind def /col29 {1.000 0.750 0.750 srgb} bind def /col30 {1.000 0.880 0.880 srgb} bind def /col31 {1.000 0.840 0.000 srgb} bind def end save newpath 0 316 moveto 0 0 lineto 469 0 lineto 469 316 lineto closepath clip newpath -72.0 387.0 translate 1 -1 scale /cp {closepath} bind def /ef {eofill} bind def /gr {grestore} bind def /gs {gsave} bind def /sa {save} bind def /rs {restore} bind def /l {lineto} bind def /m {moveto} bind def /rm {rmoveto} bind def /n {newpath} bind def /s {stroke} bind def /sh {show} bind def /slc {setlinecap} bind def /slj {setlinejoin} bind def /slw {setlinewidth} bind def /srgb {setrgbcolor} bind def /rot {rotate} bind def /sc {scale} bind def /sd {setdash} bind def /ff {findfont} bind def /sf {setfont} bind def /scf {scalefont} bind def /sw {stringwidth} bind def /tr {translate} bind def /tnt {dup dup currentrgbcolor 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} bind def /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 4 -2 roll mul srgb} bind def /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def /$F2psEnd {$F2psEnteredState restore end} def $F2psBegin %%Page: 1 1 10 setmiterlimit 0.06000 0.06000 sc /Times-Roman ff 180.00 scf sf 1800 1575 m gs 1 -1 sc (1.0) col0 sh gr % Polyline 7.500 slw n 9000 6000 m 2100 6000 l gs col0 s gr % Polyline n 3000 6300 m 3000 6000 l gs col0 s gr % Polyline n 3600 6300 m 3600 6000 l gs col0 s gr % Polyline n 4800 6300 m 4800 6000 l gs col0 s gr % Polyline n 5400 6300 m 5400 6000 l gs col0 s gr % Polyline n 6000 6300 m 6000 6000 l gs col0 s gr % Polyline n 4200 6300 m 4200 6000 l gs col0 s gr % Polyline n 6600 6300 m 6600 6000 l gs col0 s gr % Polyline n 7200 6300 m 7200 6000 l gs col0 s gr % Polyline n 7800 6300 m 7800 6000 l gs col0 s gr % Polyline n 8400 6300 m 8400 6000 l gs col0 s gr % Polyline n 2100 5400 m 2400 5400 l gs col0 s gr % Polyline n 2100 4800 m 2400 4800 l gs col0 s gr % Polyline n 2100 4200 m 2400 4200 l gs col0 s gr % Polyline n 2100 3600 m 2400 3600 l gs col0 s gr % Polyline n 2100 3000 m 2400 3000 l gs col0 s gr % Polyline 30.000 slw n 2400 5400 m 3600 5400 l 6000 3600 l 8400 3600 l gs col0 s gr % Polyline 7.500 slw [15 60] 60 sd n 2400 3600 m 9000 3600 l gs col0 s gr [] 0 sd % Polyline [15 60] 60 sd n 3600 2700 m 3600 5700 l gs col0 s gr [] 0 sd % Polyline [15 60] 60 sd n 6000 2700 m 6000 5700 l gs col0 s gr [] 0 sd % Polyline n 2100 1500 m 2400 1500 l gs col0 s gr /Times-Roman ff 180.00 scf sf 1275 1800 m gs 1 -1 sc (bestCoverage) col0 sh gr % Polyline n 2400 2700 m 2400 6300 l gs col0 s gr /Times-Roman ff 180.00 scf sf 1800 5475 m gs 1 -1 sc (0.0) col0 sh gr /Times-Roman ff 180.00 scf sf 1800 4875 m gs 1 -1 sc (0.1) col0 sh gr /Times-Roman ff 180.00 scf sf 1800 4275 m gs 1 -1 sc (0.2) col0 sh gr /Times-Roman ff 180.00 scf sf 1800 3675 m gs 1 -1 sc (0.3) col0 sh gr /Times-Roman ff 180.00 scf sf 1800 3075 m gs 1 -1 sc (0.4) col0 sh gr /Times-Roman ff 180.00 scf sf 2325 6450 m gs 1 -1 sc (0.0) col0 sh gr /Times-Roman ff 180.00 scf sf 2925 6450 m gs 1 -1 sc (0.1) col0 sh gr /Times-Roman ff 180.00 scf sf 3525 6450 m gs 1 -1 sc (0.2) col0 sh gr /Times-Roman ff 180.00 scf sf 4125 6450 m gs 1 -1 sc (0.3) col0 sh gr /Times-Roman ff 180.00 scf sf 4725 6450 m gs 1 -1 sc (0.4) col0 sh gr /Times-Roman ff 180.00 scf sf 5325 6450 m gs 1 -1 sc (0.5) col0 sh gr /Times-Roman ff 180.00 scf sf 5925 6450 m gs 1 -1 sc (0.6) col0 sh gr /Times-Roman ff 180.00 scf sf 6525 6450 m gs 1 -1 sc (0.7) col0 sh gr /Times-Roman ff 180.00 scf sf 7125 6450 m gs 1 -1 sc (0.8) col0 sh gr /Times-Roman ff 180.00 scf sf 7725 6450 m gs 1 -1 sc (0.9) col0 sh gr /Times-Roman ff 180.00 scf sf 8325 6450 m gs 1 -1 sc (1.0) col0 sh gr /Times-Roman ff 180.00 scf sf 2625 3525 m gs 1 -1 sc (V) col0 sh gr /Times-Roman ff 180.00 scf sf 3450 2850 m gs 1 -1 sc (L) col0 sh gr /Times-Roman ff 180.00 scf sf 5850 2850 m gs 1 -1 sc (H) col0 sh gr /Times-Roman ff 180.00 scf sf 7200 3525 m gs 1 -1 sc (minL) col0 sh gr /Times-Roman ff 180.00 scf sf 1200 5700 m gs 1 -1 sc (worstCoverage) col0 sh gr % Polyline n 2400 1200 m 2400 1800 l gs col0 s gr % Polyline [15 45] 45 sd n 2400 1800 m 2400 2700 l gs col0 s gr [] 0 sd $F2psEnd rs kmer-code-2013-trunk/ESTmapper LaTeX/mRNAfilt.fig0000644000000000000000000000534010562262400020106 0ustar rootroot#FIG 3.2 Landscape Center Inches Letter 100.00 Single -2 1200 2 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2400 2700 2400 6300 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 9000 6000 2100 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 3000 6300 3000 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 3600 6300 3600 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 4800 6300 4800 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 5400 6300 5400 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 6000 6300 6000 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 4200 6300 4200 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 6600 6300 6600 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 7200 6300 7200 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 7800 6300 7800 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 8400 6300 8400 6000 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2100 5400 2400 5400 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2100 4800 2400 4800 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2100 4200 2400 4200 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2100 3600 2400 3600 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2100 3000 2400 3000 2 1 0 3 0 7 100 0 -1 0.000 0 0 -1 0 0 4 2400 5400 3600 5400 6000 3600 8400 3600 2 1 2 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 2400 3600 9000 3600 2 1 2 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 3600 2700 3600 5700 2 1 2 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 6000 2700 6000 5700 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 2100 1500 2400 1500 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2 2400 1200 2400 1800 2 1 2 1 0 7 50 0 -1 3.000 0 0 -1 0 0 2 2400 1800 2400 2700 4 0 0 100 0 0 12 0.0000 0 135 225 1800 5475 0.0\001 4 0 0 100 0 0 12 0.0000 0 135 225 1800 4875 0.1\001 4 0 0 100 0 0 12 0.0000 0 135 225 1800 4275 0.2\001 4 0 0 100 0 0 12 0.0000 0 135 225 1800 3675 0.3\001 4 0 0 100 0 0 12 0.0000 0 135 225 1800 3075 0.4\001 4 0 0 100 0 0 12 0.0000 0 135 225 2325 6450 0.0\001 4 0 0 100 0 0 12 0.0000 0 135 225 2925 6450 0.1\001 4 0 0 100 0 0 12 0.0000 0 135 225 3525 6450 0.2\001 4 0 0 100 0 0 12 0.0000 0 135 225 4125 6450 0.3\001 4 0 0 100 0 0 12 0.0000 0 135 225 4725 6450 0.4\001 4 0 0 100 0 0 12 0.0000 0 135 225 5325 6450 0.5\001 4 0 0 100 0 0 12 0.0000 0 135 225 5925 6450 0.6\001 4 0 0 100 0 0 12 0.0000 0 135 225 6525 6450 0.7\001 4 0 0 100 0 0 12 0.0000 0 135 225 7125 6450 0.8\001 4 0 0 100 0 0 12 0.0000 0 135 225 7725 6450 0.9\001 4 0 0 100 0 0 12 0.0000 0 135 225 8325 6450 1.0\001 4 0 0 100 0 0 12 0.0000 0 135 135 2625 3525 V\001 4 0 0 100 0 0 12 0.0000 0 135 105 3450 2850 L\001 4 0 0 100 0 0 12 0.0000 0 135 135 5850 2850 H\001 4 0 0 100 0 0 12 0.0000 0 135 375 7200 3525 minL\001 4 0 0 100 0 0 12 0.0000 0 180 1155 1200 5700 worstCoverage\001 4 0 0 100 0 0 12 0.0000 0 180 1050 1275 1800 bestCoverage\001 4 0 0 100 0 0 12 0.0000 0 135 225 1800 1575 1.0\001 kmer-code-2013-trunk/libmeryl/0000755000000000000000000000000012641613360014767 5ustar rootrootkmer-code-2013-trunk/libmeryl/libmeryl.H0000644000000000000000000001327612322046702016724 0ustar rootroot#ifndef LIBMERYL_H #define LIBMERYL_H #include "bio++.H" // A merStream reader/writer for meryl mercount data. // // merSize is used to check that the meryl file is the correct size. // If it isn't the code fails. // // The reader returns mers in lexicographic order. No random access. // The writer assumes that mers come in sorted increasingly. // // numUnique the total number of mers with count of one // numDistinct the total number of distinct mers in this file // numTotal the total number of mers in this file class merylStreamReader { public: merylStreamReader(const char *fn, uint32 ms=0); ~merylStreamReader(); kMer &theFMer(void) { return(_thisMer); }; uint64 theCount(void) { return(_thisMerCount); }; bool hasPositions(void) { return(_POS != 0L); }; uint32 *thePositions(void) { return(_thisMerPositions); }; uint32 getPosition(uint32 i) { return(((_POS) && (i < _thisMerCount)) ? _thisMerPositions[i] : ~uint32ZERO); }; uint32 merSize(void) { return(_merSizeInBits >> 1); }; uint32 merCompression(void) { return(_merCompression); }; uint32 prefixSize(void) { return(_prefixSize); }; uint64 numberOfUniqueMers(void) { return(_numUnique); }; uint64 numberOfDistinctMers(void) { return(_numDistinct); }; uint64 numberOfTotalMers(void) { return(_numTotal); }; uint64 histogram(uint32 i) { return((i < _histogramLen) ? _histogram[i] : ~uint64ZERO); }; uint64 histogramLength(void) { return(_histogramLen); }; uint64 histogramHuge(void) { return(_histogramHuge); }; uint64 histogramMaximumCount(void) { return(_histogramMaxValue); }; bool nextMer(void); bool validMer(void) { return(_validMer); }; private: bitPackedFile *_IDX; bitPackedFile *_DAT; bitPackedFile *_POS; uint64 getIDXnumber(void) { uint64 n = 1; if (_idxIsPacked) n = _IDX->getNumber(); else n = _IDX->getBits(32); return(n); }; uint64 getDATnumber(void) { uint64 n = 1; if (_datIsPacked) { if (_DAT->getBits(1)) n = _DAT->getNumber() + 2; } else { n = _DAT->getBits(32); } return(n); }; // Why not bool? Seems like the bitPackedFile is incompatible // with bools. uint32 _idxIsPacked; uint32 _datIsPacked; uint32 _posIsPacked; uint32 _merSizeInBits; uint32 _merCompression; uint32 _prefixSize; uint32 _merDataSize; uint64 _thisBucket; uint64 _thisBucketSize; uint64 _numBuckets; kMer _thisMer; uint64 _thisMerCount; uint32 _thisMerPositionsMax; uint32 *_thisMerPositions; uint64 _numUnique; uint64 _numDistinct; uint64 _numTotal; uint64 _histogramHuge; // number that are bigger than Len uint64 _histogramLen; // number of entries in the histo uint64 _histogramMaxValue; // highest count ever seen uint64 *_histogram; bool _validMer; }; class merylStreamWriter { public: merylStreamWriter(const char *filePrefix, uint32 merSize, // In bases uint32 merComp, // A length, bases uint32 prefixSize, // In bits bool positionsEnabled); ~merylStreamWriter(); void addMer(kMer &mer, uint32 count=1, uint32 *positions=0L); void addMer(uint64 prefix, uint32 prefixBits, uint64 mer, uint32 merBits, uint32 count=1, uint32 *positions=0L); private: void writeMer(void); void setIDXnumber(uint64 n) { if (_idxIsPacked) _IDX->putNumber(n); else _IDX->putBits(n, 32); }; void setDATnumber(uint64 n) { if (_datIsPacked) { if (n == 1) { _DAT->putBits(uint64ZERO, 1); } else { _DAT->putBits(uint64ONE, 1); _DAT->putNumber(n-2); } } else { _DAT->putBits(n, 32); } }; bitPackedFile *_IDX; bitPackedFile *_DAT; bitPackedFile *_POS; uint32 _idxIsPacked; uint32 _datIsPacked; uint32 _posIsPacked; uint32 _merSizeInBits; uint32 _merCompression; uint32 _prefixSize; uint32 _merDataSize; uint64 _thisBucket; uint64 _thisBucketSize; uint64 _numBuckets; uint64 _numUnique; uint64 _numDistinct; uint64 _numTotal; uint64 _histogramHuge; // number that are bigger than Len uint64 _histogramLen; // number of entries in the histo uint64 _histogramMaxValue; // highest count ever seen uint64 *_histogram; bool _thisMerIsBits; bool _thisMerIskMer; kMer _thisMer; uint64 _thisMerPre; uint64 _thisMerMer; uint32 _thisMerPreSize; uint32 _thisMerMerSize; uint64 _thisMerCount; }; #endif // LIBMERYL_H kmer-code-2013-trunk/libmeryl/libmeryl.C0000644000000000000000000003072412322046702016714 0ustar rootroot#include "libmeryl.H" #define LIBMERYL_HISTOGRAM_MAX 1048576 // 0123456789012345 static char *ImagicV = "merylStreamIv03\n"; static char *ImagicX = "merylStreamIvXX\n"; static char *DmagicV = "merylStreamDv03\n"; static char *DmagicX = "merylStreamDvXX\n"; static char *PmagicV = "merylStreamPv03\n"; static char *PmagicX = "merylStreamPvXX\n"; merylStreamReader::merylStreamReader(const char *fn, uint32 ms) { if (fn == 0L) { fprintf(stderr, "ERROR - no counted database file specified.\n"); exit(1); } // Open the files // char *inpath = new char [strlen(fn) + 8]; sprintf(inpath, "%s.mcidx", fn); _IDX = new bitPackedFile(inpath); sprintf(inpath, "%s.mcdat", fn); _DAT = new bitPackedFile(inpath); sprintf(inpath, "%s.mcpos", fn); if (fileExists(inpath)) _POS = new bitPackedFile(inpath); else _POS = 0L; delete [] inpath; // Verify that they are what they should be, and read in the header // char Imagic[16] = {0}; char Dmagic[16] = {0}; char Pmagic[16] = {0}; bool fail = false; for (uint32 i=0; i<16; i++) { Imagic[i] = _IDX->getBits(8); Dmagic[i] = _DAT->getBits(8); if (_POS) Pmagic[i] = _POS->getBits(8); } if (strncmp(Imagic, ImagicX, 16) == 0) { fprintf(stderr, "merylStreamReader()-- ERROR: %s.mcidx is an INCOMPLETE merylStream index file!\n", fn); fail = true; } if (strncmp(Imagic, ImagicX, 13) != 0) { fprintf(stderr, "merylStreamReader()-- ERROR: %s.mcidx is not a merylStream index file!\n", fn); fail = true; } if (strncmp(Dmagic, DmagicX, 16) == 0) { fprintf(stderr, "merylStreamReader()-- ERROR: %s.mcdat is an INCOMPLETE merylStream data file!\n", fn); fail = true; } if (strncmp(Dmagic, DmagicX, 13) != 0) { fprintf(stderr, "merylStreamReader()-- ERROR: %s.mcdat is not a merylStream data file!\n", fn); fail = true; } if ((Imagic[13] != Dmagic[13]) || (Imagic[14] != Dmagic[14])) { fprintf(stderr, "merylStreamReader()-- ERROR: %s.mcidx and %s.mcdat are different versions!\n", fn, fn); fail = true; } #warning not checking pmagic if (fail) exit(1); _idxIsPacked = _IDX->getBits(32); _datIsPacked = _IDX->getBits(32); _posIsPacked = _IDX->getBits(32); _merSizeInBits = _IDX->getBits(32) << 1; _merCompression = _IDX->getBits(32); _prefixSize = _IDX->getBits(32); _merDataSize = _merSizeInBits - _prefixSize; _numUnique = _IDX->getBits(64); _numDistinct = _IDX->getBits(64); _numTotal = _IDX->getBits(64); _histogramHuge = 0; _histogramLen = 0; _histogramMaxValue = 0; _histogram = 0L; uint32 version = atoi(Imagic + 13); if (version > 1) { _histogramHuge = _IDX->getBits(64); _histogramLen = _IDX->getBits(64); _histogramMaxValue = _IDX->getBits(64); _histogram = new uint64 [_histogramLen]; for (uint32 i=0; i<_histogramLen; i++) _histogram[i] = _IDX->getBits(64); } _thisBucket = uint64ZERO; _thisBucketSize = getIDXnumber(); _numBuckets = uint64ONE << _prefixSize; _thisMer.setMerSize(_merSizeInBits >> 1); _thisMer.clear(); _thisMerCount = uint64ZERO; _thisMerPositionsMax = 0; _thisMerPositions = 0L; _validMer = true; #ifdef SHOW_VARIABLES fprintf(stderr, "_merSizeInBits = "uint32FMT"\n", _merSizeInBits); fprintf(stderr, "_merCompression = "uint32FMT"\n", _merCompression); fprintf(stderr, "_prefixSize = "uint32FMT"\n", _prefixSize); fprintf(stderr, "_merDataSize = "uint32FMT"\n", _merDataSize); fprintf(stderr, "_numUnique = "uint64FMT"\n", _numUnique); fprintf(stderr, "_numDistinct = "uint64FMT"\n", _numDistinct); fprintf(stderr, "_numTotal = "uint64FMT"\n", _numTotal); fprintf(stderr, "_thisBucket = "uint64FMT"\n", _thisBucket); fprintf(stderr, "_thisBucketSize = "uint64FMT"\n", _thisBucketSize); fprintf(stderr, "_thisMerCount = "uint64FMT"\n", _thisMerCount); #endif if ((ms > 0) && (_merSizeInBits >> 1 != ms)) { fprintf(stderr, "merylStreamReader()-- ERROR: User requested mersize "uint32FMT" but '%s' is mersize "uint32FMT"\n", ms, fn, _merSizeInBits >> 1); exit(1); } } merylStreamReader::~merylStreamReader() { delete _IDX; delete _DAT; delete _POS; delete [] _thisMerPositions; delete [] _histogram; } bool merylStreamReader::nextMer(void) { // Use a while here, so that we skip buckets that are empty // while ((_thisBucketSize == 0) && (_thisBucket < _numBuckets)) { _thisBucketSize = getIDXnumber(); _thisBucket++; } if (_thisBucket >= _numBuckets) return(_validMer = false); // Before you get rid of the clear() -- if, say, the list of mers // is sorted and we can shift the mer to make space for the new // stuff -- make sure that nobody is calling reverseComplement()! // _thisMer.clear(); _thisMer.readFromBitPackedFile(_DAT, _merDataSize); _thisMer.setBits(_merDataSize, _prefixSize, _thisBucket); _thisMerCount = getDATnumber(); _thisBucketSize--; if (_POS) { if (_thisMerPositionsMax < _thisMerCount) { delete [] _thisMerPositions; _thisMerPositionsMax = _thisMerCount + 1024; _thisMerPositions = new uint32 [_thisMerPositionsMax]; } for (uint32 i=0; i<_thisMerCount; i++) { _thisMerPositions[i] = _POS->getBits(32); } } return(true); } merylStreamWriter::merylStreamWriter(const char *fn, uint32 merSize, uint32 merComp, uint32 prefixSize, bool positionsEnabled) { char *outpath = new char [strlen(fn) + 17]; sprintf(outpath, "%s.mcidx", fn); _IDX = new bitPackedFile(outpath, 0, true); sprintf(outpath, "%s.mcdat", fn); _DAT = new bitPackedFile(outpath, 0, true); if (positionsEnabled) { sprintf(outpath, "%s.mcpos", fn); _POS = new bitPackedFile(outpath, 0, true); } else { _POS = 0L; } delete [] outpath; // Save really important stuff // unpacked --> write 0.42M mers/sec on 8 threads, merge 3.3M mers/sec // packed --> write 0.77M mers/sec on 8 threads, merge 3.9M mers/sec // // This sucks. // _idxIsPacked = 1; _datIsPacked = 1; _posIsPacked = 0; _merSizeInBits = merSize * 2; _merCompression = merComp; _prefixSize = prefixSize; _merDataSize = _merSizeInBits - _prefixSize; _thisBucket = uint64ZERO; _thisBucketSize = uint64ZERO; _numBuckets = uint64ONE << _prefixSize; _numUnique = uint64ZERO; _numDistinct = uint64ZERO; _numTotal = uint64ZERO; _thisMerIsBits = false; _thisMerIskMer = false; _thisMer.setMerSize(_merSizeInBits >> 1); _thisMer.clear(); _thisMerPre = uint64ZERO; _thisMerMer = uint64ZERO; _thisMerPreSize = prefixSize; _thisMerMerSize = 2 * merSize - prefixSize; _thisMerCount = uint64ZERO; for (uint32 i=0; i<16; i++) _IDX->putBits(ImagicX[i], 8); _IDX->putBits(_idxIsPacked, 32); _IDX->putBits(_datIsPacked, 32); _IDX->putBits(_posIsPacked, 32); _IDX->putBits(_merSizeInBits >> 1, 32); _IDX->putBits(_merCompression, 32); _IDX->putBits(_prefixSize, 32); _IDX->putBits(_numUnique, 64); _IDX->putBits(_numDistinct, 64); _IDX->putBits(_numTotal, 64); _histogramHuge = 0; _histogramLen = LIBMERYL_HISTOGRAM_MAX; _histogramMaxValue = 0; _histogram = new uint64 [_histogramLen]; for (uint32 i=0; i<_histogramLen; i++) _histogram[i] = 0; _IDX->putBits(_histogramHuge, 64); _IDX->putBits(_histogramLen, 64); _IDX->putBits(_histogramMaxValue, 64); for (uint32 i=0; i<_histogramLen; i++) _IDX->putBits(_histogram[i], 64); for (uint32 i=0; i<16; i++) _DAT->putBits(DmagicX[i], 8); if (_POS) for (uint32 i=0; i<16; i++) _POS->putBits(PmagicX[i], 8); } merylStreamWriter::~merylStreamWriter() { writeMer(); // Finish writing the buckets. // while (_thisBucket < _numBuckets + 2) { setIDXnumber(_thisBucketSize); _thisBucketSize = 0; _thisBucket++; } // Seek back to the start and rewrite the magic numbers // _IDX->seek(0); _DAT->seek(0); for (uint32 i=0; i<16; i++) _IDX->putBits(ImagicV[i], 8); _IDX->putBits(_idxIsPacked, 32); _IDX->putBits(_datIsPacked, 32); _IDX->putBits(_posIsPacked, 32); _IDX->putBits(_merSizeInBits >> 1, 32); _IDX->putBits(_merCompression, 32); _IDX->putBits(_prefixSize, 32); _IDX->putBits(_numUnique, 64); _IDX->putBits(_numDistinct, 64); _IDX->putBits(_numTotal, 64); _IDX->putBits(_histogramHuge, 64); _IDX->putBits(_histogramLen, 64); _IDX->putBits(_histogramMaxValue, 64); for (uint32 i=0; i<_histogramLen; i++) _IDX->putBits(_histogram[i], 64); delete _IDX; delete [] _histogram; for (uint32 i=0; i<16; i++) _DAT->putBits(DmagicV[i], 8); delete _DAT; if (_POS) { for (uint32 i=0; i<16; i++) _POS->putBits(PmagicV[i], 8); delete _POS; } } void merylStreamWriter::writeMer(void) { if (_thisMerCount == 0) return; _numTotal += _thisMerCount; _numDistinct++; if (_thisMerCount < LIBMERYL_HISTOGRAM_MAX) _histogram[_thisMerCount]++; else _histogramHuge++; if (_histogramMaxValue < _thisMerCount) _histogramMaxValue = _thisMerCount; assert((_thisMerIsBits == false) || (_thisMerIskMer == false)); if (_thisMerIsBits) { if (_thisMerCount == 1) { _DAT->putBits(_thisMerMer, _thisMerMerSize); setDATnumber(1); _thisBucketSize++; _numUnique++; } else { _DAT->putBits(_thisMerMer, _thisMerMerSize); setDATnumber(_thisMerCount); _thisBucketSize++; } } else { if (_thisMerCount == 1) { _thisMer.writeToBitPackedFile(_DAT, _merDataSize); setDATnumber(1); _thisBucketSize++; _numUnique++; } else if (_thisMerCount > 1) { _thisMer.writeToBitPackedFile(_DAT, _merDataSize); setDATnumber(_thisMerCount); _thisBucketSize++; } } } void merylStreamWriter::addMer(kMer &mer, uint32 count, uint32 *positions) { uint64 val; if (_thisMerIskMer == false) { _thisMerIskMer = true; assert(_thisMerIsBits == false); } // Fail if we see a smaller mer than last time. // if (mer < _thisMer) { char str[1024]; fprintf(stderr, "merylStreamWriter::addMer()-- ERROR: your mer stream isn't sorted increasingly!\n"); fprintf(stderr, "merylStreamWriter::addMer()-- last: %s\n", _thisMer.merToString(str)); fprintf(stderr, "merylStreamWriter::addMer()-- this: %s\n", mer.merToString(str)); exit(1); } // If there was a position given, write it. // if (positions && _POS) for (uint32 i=0; iputBits(positions[i], 32); // If the new mer is the same as the last one just increase the // count. // if (mer == _thisMer) { _thisMerCount += count; return; } // Write thisMer to disk. If the count is zero, we don't write // anything. The count is zero for the first mer (all A) unless we // add that mer, and if the silly user gives us a mer with zero // count. // writeMer(); // If the new mer is in a different bucket from the last mer, write // out some bucket counts. We need a while loop (opposed to just // writing one bucket) because we aren't guaranteed that the mers // are in adjacent buckets. // val = mer.startOfMer(_prefixSize); while (_thisBucket < val) { setIDXnumber(_thisBucketSize); _thisBucketSize = 0; _thisBucket++; } // Remember the new mer for the next time // _thisMer = mer; _thisMerCount = count; } void merylStreamWriter::addMer(uint64 prefix, uint32 prefixBits, uint64 mer, uint32 merBits, uint32 count, uint32 *positions) { if (_thisMerIsBits == false) { _thisMerIsBits = true; assert(_thisMerIskMer == false); } assert(prefixBits == _prefixSize); assert(prefixBits == _thisMerPreSize); assert(merBits == _thisMerMerSize); assert(prefixBits + merBits == _merSizeInBits); if ((prefix < _thisMerPre) || (prefix <= _thisMerPre) && (mer < _thisMerMer)) { assert(0); } if ((prefix == _thisMerPre) && (mer == _thisMerMer)) { _thisMerCount += count; return; } writeMer(); while (_thisBucket < prefix) { setIDXnumber(_thisBucketSize); _thisBucketSize = 0; _thisBucket++; } _thisMerPre = prefix; _thisMerMer = mer; _thisMerCount = count; } kmer-code-2013-trunk/libmeryl/Make.include0000644000000000000000000000055011512763666017224 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ $/.CXX_SRCS := $/libmeryl.C $/.CXX_INCS := $/libmeryl.H $/.CXX_LIBS := $/libmeryl.a $/.CLEAN := $/*.o $/libmeryl.a : $/libmeryl.o $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBBIO/} -I${LIBSEQ/} -I${LIBUTL/}) kmer-code-2013-trunk/tapper/0000755000000000000000000000000012641613360014443 5ustar rootrootkmer-code-2013-trunk/tapper/tapperAlignment.H0000644000000000000000000000242212322046702017702 0ustar rootroot class tapperAlignment { public: // Except as noted, all this is the same stuff as from tapperResult. uint64 _tagid; uint32 _seq; uint32 _pos; uint8 _colorDiffs[MAX_COLOR_MISMATCH_MAPPED]; // OLD, list of errors in the align uint8 _colorCorrections[MAX_COLOR_MISMATCH_MAPPED]; // NEW, list of errors in the align, after corrections uint32 _confidence:15; // NEW, confidence score on the corrected read uint32 _basesMismatch:4; // Number of mismatches in ACGT alignment uint32 _colorMismatch:4; // Number of consistent color mismatches uint32 _colorInconsistent:4; // Number of inconsistent color mismatches uint32 _rev:1; // Is reverse complement uint32 _diffSize:4; // Value of MAX_COLOR_MISMATCH_MAPPED. }; class tapperAlignmentPositionCompare { public: bool operator()(const tapperAlignment &a, const tapperAlignment &b) const { return((a._seq < b._seq) || ((a._seq == b._seq) && (a._pos < b._pos))); }; }; class tapperAlignmentTagIDAndScoreCompare { public: bool operator()(const tapperAlignment &a, const tapperAlignment &b) const { return((a._tagid < b._tagid) || ((a._tagid == b._tagid) && (a._confidence < b._confidence))); }; }; kmer-code-2013-trunk/tapper/tappermerge.C0000644000000000000000000000271212322046702017060 0ustar rootroot#include "tapperTag.H" #include "tapperResult.H" #include "tapperAlignment.H" #include "tapperHit.H" #include "tapperGlobalData.H" #include "tapperThreadData.H" #include "tapperComputation.H" int main(int argc, char **argv) { char *outName = 0L; uint32 inputsLen = 0; char *inputs[8192]; // Parse and check the inputs. int arg=1; int err=0; while (arg < argc) { if (strncmp(argv[arg], "-output", 2) == 0) { outName = argv[++arg]; } else { if (tapperResultFile::validResultFile(argv[arg]) == false) { fprintf(stderr, "Didn't find tapperResultFile '%s'\n", argv[arg]); err++; } else { inputs[inputsLen++] = argv[arg]; } } arg++; } if ((err) || (inputsLen == 0)) { fprintf(stderr, "usage: %s -output out-directory in-directory [in-directory ...]\n", argv[0]); exit(1); } // Open the output file tapperResultFile *out = new tapperResultFile(outName, 'w'); // Loop over the inputs, copying to the output. We could be much // looser here, just blindly copying all records in each file, but // we'll be a little more careful, and copy frag by frag. for (uint32 inputsIdx=0; inputsIdxread(res)) out->write(res); delete inp; delete res; } delete out; exit(0); } kmer-code-2013-trunk/tapper/tapperComputation.H0000644000000000000000000001442112322046702020270 0ustar rootroot #include class tapperComputation { public: tapperComputation(tapperTag *a, tapperTag *b) { uint16 id[4]; tag1f.clear(); tag1r.clear(); tag2f.clear(); tag2r.clear(); tag1size = 0; tag2size = 0; // Process the tags. // // It's not a trivial operation (probably not even possible) to // reverse-complement a SOLiD read. To reverse complement a // read, we would need to construct a new reference base, but to // construct that base, we need to decode the read from color to // acgt. Any errors in the read prevent precise decoding, and we // end up building the new reverse-complemented read with all the // errors at the start. By adding the anchor base to the end, // we're fixing all the bases in error. // // So, we need to handle reverse reads specially. Reverse the // read (color-space is self-complementing), and RC the anchor // base. Any downstream processes need to know that a read has // the anchor at the start OR at the end. // // When building the mers (the for loops), yes, i=2. The first // letter in the tag is the last in the adapter, and it's not in // the tag. We need to skip it. The second letter (the first // color) is biased by the adapter, and it will be an error 75% // of the time. Skip it too. if (a) { tag1id = a->decode(id, tag1fseq, tag1fqlt); tag1size = strlen(tag1fseq); tag1f.setMerSize(tag1size-1); tag1f.setMerSpan(tag1size-1); tag1r.setMerSize(tag1size-1); tag1r.setMerSpan(tag1size-1); for (uint32 i=0, j=tag1size-1; idecode(id, tag2fseq, tag2fqlt); tag2size = strlen(tag2fseq); tag2f.setMerSize(tag2size-1); tag2f.setMerSpan(tag2size-1); tag2r.setMerSize(tag2size-1); tag2r.setMerSpan(tag2size-1); for (uint32 i=0, j=tag2size-1; iTA->AQIindex(g->maxBaseError, g->maxColorError, h.numberOfBaseMismatches(), h.numberOfColorMismatches(), h.numberOfColorInconsistencies()); if (alignQualHistogram == 0L) { alignQualHistogramLen = g->TA->AQIlength(g->maxBaseError, g->maxColorError); alignQualHistogram = new uint32 [alignQualHistogramLen]; memset(alignQualHistogram, 0, sizeof(uint32) * alignQualHistogramLen); } alignQualHistogram[ii]++; if (tag1) { if (tag1hitsLen >= tag1hitsMax) { tag1hitsMax *= 2; tapperHit *nits = new tapperHit [tag1hitsMax]; memcpy(nits, tag1hits, sizeof(tapperHit) * tag1hitsLen); delete [] tag1hits; tag1hits = nits; } tag1hits[tag1hitsLen++] = h; } else { if (tag2hitsLen >= tag2hitsMax) { tag2hitsMax *= 2; tapperHit *nits = new tapperHit [tag2hitsMax]; memcpy(nits, tag2hits, sizeof(tapperHit) * tag2hitsLen); delete [] tag2hits; tag2hits = nits; } tag2hits[tag2hitsLen++] = h; } }; void sortHitsByPosition(void) { tapperHitPositionCompare pc; std::sort(tag1hits, tag1hits+tag1hitsLen, pc); std::sort(tag2hits, tag2hits+tag2hitsLen, pc); }; public: kMer tag1f, tag1r; kMer tag2f, tag2r; uint32 tag1size; uint32 tag2size; uint64 tag1id; uint64 tag2id; char tag1fseq[TAG_LEN_MAX], tag1rseq[TAG_LEN_MAX]; char tag2fseq[TAG_LEN_MAX], tag2rseq[TAG_LEN_MAX]; uint64 tag1fqlt[TAG_LEN_MAX], tag1rqlt[TAG_LEN_MAX]; uint64 tag2fqlt[TAG_LEN_MAX], tag2rqlt[TAG_LEN_MAX]; uint32 tag1hitsLen; uint32 tag1hitsMax; tapperHit *tag1hits; uint32 tag2hitsLen; uint32 tag2hitsMax; tapperHit *tag2hits; uint32 mean; uint32 stddev; uint32 *alignQualHistogram; uint32 alignQualHistogramLen; tapperResultFragment *resultFragment; uint32 resultFragmentLen; tapperResultFragment *resultSingleton; uint32 resultSingletonLen; tapperResultFragment *resultTangledAlignment; uint32 resultTangledAlignmentLen; tapperResultMated *resultMated; uint32 resultMatedLen; tapperResultTangled *resultTangled; uint32 resultTangledLen; }; kmer-code-2013-trunk/tapper/tappererrorcorrect.C0000644000000000000000000001563212322046702020501 0ustar rootroot#include "util++.H" #include "tapperTag.H" #include "tapperResult.H" #include "tapperAlignment.H" #include "tapperHit.H" #include "tapperGlobalData.H" #include "tapperThreadData.H" #include "tapperComputation.H" class alignmentList { public: alignmentList(recordFile *inp) { alignsMax = 16; aligns = new tapperAlignment * [alignsMax]; alignsLen = new uint32 [alignsMax]; alignsPerBlock = 16384; alignsInp = inp; for (uint32 i=0; igetRecord(aligns[i], alignsPerBlock); fprintf(stderr, "block "uint32FMT" has "uint32FMT" things.\n", i, alignsLen[i]); } }; ~alignmentList() { for (uint32 i=0; igetRecord(aligns[alignsMax-1], alignsPerBlock); fprintf(stderr, "block "uint32FMT" has "uint32FMT" things.\n", alignsMax-1, alignsLen[alignsMax-1]); goto trimBeforeSeqPosAgain; } }; tapperAlignment *operator[](uint32 x) { uint32 block = x / alignsPerBlock; uint32 piece = x % alignsPerBlock; if (piece < alignsLen[block]) return(aligns[block] + piece); return(0L); }; bool empty(void) { return(alignsLen[0] == 0); }; private: uint32 alignsMax; tapperAlignment **aligns; uint32 *alignsLen; uint32 alignsPerBlock; recordFile *alignsInp; }; int main(int argc, char **argv) { char *outputName = 0L; char *inputName = 0L; uint64 memoryLimit = 1024 * 1024 * 1024; { int arg=1; int err=0; while (arg < argc) { if (strncmp(argv[arg], "-memory", 2) == 0) { memoryLimit = strtouint64(argv[++arg], 0L) * 1024 * 1024; } else if (strncmp(argv[arg], "-output", 2) == 0) { outputName = argv[++arg]; } else if (strncmp(argv[arg], "-input", 2) == 0) { inputName = argv[++arg]; } else { err++; } arg++; } if ((err) || (inputName == 0) || (outputName == 0L)) { fprintf(stderr, "usage: %s [-memory X (MB)] -output prefix -input inp.tapperAlignment\n", argv[0]); exit(1); } } recordFile *inp = new recordFile(inputName, 0, sizeof(tapperAlignment), 'r'); alignmentList all(inp); uint32 winSz = 200; uint32 winLo = 0; uint32 winHi = winLo + winSz; uint32 linesMax = 1024; char lines[1024][256]; uint32 lineLen[1024]; uint16 id[4]; while (all.empty() == false) { memset(lines, ' ', sizeof(char) * linesMax * 256); for (uint32 i=0; i_pos < winHi); a++) { tapperAlignment *rec = all[a]; // XXX we lose reads that wrap into our region if (winLo < rec->_pos) { for (uint32 l=0; l_pos - winLo) { //fprintf(stdout, "at l="uint32FMT" x="uint32FMT" len="uint32FMT"\n", l, rec->_pos - winLo, lineLen[l]); #warning need the real read size here for (uint32 x=rec->_pos - winLo; x_pos - winLo + 25; x++) lines[l][x] = '.'; // Needed so we can disable ID printing. lines[l][rec->_pos - winLo + 25] = 0; #undef WITH_IDS #ifdef WITH_IDS decodeTagID(rec->_tagid, id); sprintf(lines[l] + rec->_pos - winLo + 25, " %c "uint16FMTW(05)"-"uint16FMTW(05)"-"uint16FMTW(05)"-"uint16FMTW(05)" ", (rec->_rev) ? '<' : '>', id[0], id[1], id[2], id[3]); #endif lineLen[l] = strlen(lines[l]); // Convert that trailing nul into a whitespace. lines[l][lineLen[l]] = ' '; uint32 err = 0; for (uint32 x=0; x_colorMismatch; x++) { uint32 pos = rec->_colorDiffs[err] & 0x3f; char let = '*'; //bitsToColor[rec->_colorDiffs[err] >> 6]; lines[l][rec->_pos - winLo + pos] = let; err++; } for (uint32 x=0; x_colorInconsistent; x++) { uint32 pos = rec->_colorDiffs[err] & 0x3f; char let = bitsToColor[rec->_colorDiffs[err] >> 6]; lines[l][rec->_pos - winLo + pos] = let; err++; } l = linesMax; } } } } bool stuff = false; for (uint32 i=0; i 0) stuff = true; if (stuff) { fprintf(stdout, "\nALIGN "uint32FMT"-"uint32FMT"\n", winLo, winHi); fprintf(stdout, " 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0\n"); fprintf(stdout, " 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\n"); for (uint32 i=0; i 0) { lines[i][lineLen[i]] = 0; fprintf(stdout, uint32FMTW(03)"] %s\n", i, lines[i]); } } } winLo = winHi; winHi = winLo + winSz; all.trimBeforeSeqPos(0, winLo); } delete inp; exit(0); } #if 0 sprintf(linp, "rec "uint64HEX" "uint32FMT":"uint32FMT, rec->_tagid, rec->_seq, rec->_pos); while (*linp) linp++; uint32 err = 0; for (uint32 x=0; x_colorMismatch; x++) { sprintf(linp, " M:%c@%02d(%07d)", bitsToColor[rec->_colorDiffs[err] >> 6], (rec->_colorDiffs[err] & 0x3f), (rec->_colorDiffs[err] & 0x3f) + rec->_pos); while (*linp) linp++; err++; } for (uint32 x=0; x_colorInconsistent; x++) { sprintf(linp, " E:%c@%02d(%07d)", bitsToColor[rec->_colorDiffs[err] >> 6], (rec->_colorDiffs[err] & 0x3f), (rec->_colorDiffs[err] & 0x3f) + rec->_pos); while (*linp) linp++; err++; } fprintf(stdout, "%s\n", line); #endif kmer-code-2013-trunk/tapper/tapperThreadData.H0000644000000000000000000000277512415073322020001 0ustar rootrootclass tapperThreadData { public: tapperThreadData(tapperGlobalData *g) { posn1fMax = 256 * 1024; posn1fLen = 0; posn1f = new uint64 [posn1fMax]; posn1rMax = 256 * 1024; posn1rLen = 0; posn1r = new uint64 [posn1rMax]; posn2fMax = 256 * 1024; posn2fLen = 0; posn2f = new uint64 [posn2fMax]; posn2rMax = 256 * 1024; posn2rLen = 0; posn2r = new uint64 [posn2rMax]; numHappiesMax = 256 * 1024; tag1happies = new uint32 [numHappiesMax]; tag1mate = new uint32 [numHappiesMax]; tag1tangled = new uint32 [numHappiesMax]; tag2happies = new uint32 [numHappiesMax]; tag2mate = new uint32 [numHappiesMax]; tag2tangled = new uint32 [numHappiesMax]; tangle = 0L; }; ~tapperThreadData() { delete [] posn1f; delete [] posn1r; delete [] posn2f; delete [] posn2r; delete [] tag1happies; delete [] tag1mate; delete [] tag1tangled; delete [] tag2happies; delete [] tag2mate; delete [] tag2tangled; delete [] tangle; }; public: uint64 posn1fMax; uint64 posn1fLen; uint64 *posn1f; uint64 posn1rMax; uint64 posn1rLen; uint64 *posn1r; uint64 posn2fMax; uint64 posn2fLen; uint64 *posn2f; uint64 posn2rMax; uint64 posn2rLen; uint64 *posn2r; uint32 numHappiesMax; uint32 *tag1happies; uint32 *tag1mate; uint32 *tag1tangled; uint32 *tag2happies; uint32 *tag2mate; uint32 *tag2tangled; intervalList *tangle; }; kmer-code-2013-trunk/tapper/compare.pl0000644000000000000000000001523111055560521016426 0ustar rootroot#!/usr/bin/perl use strict; # Hack to compare tapperconvert to corona output. # # corona: # 926_28_374 T0233011320231302110223300 G2002221011312002112001121 1 0 1 1 1 -1809022 -1810770 AAA # 926_33_329 T3000003020011330112020200 G2200202231211010022113031 0 3 3 1 1 -152955 -154703 AAA # 926_34_440 T0121010212320132021200211 G0312100032003101301113001 2 1 3 1 1 -1618712 -1620852 AAA # 926_38_533 T0011012010023302310021321 G0200103332011100011013103 0 4 4 1 1 -251624 -253847 AAA # 926_42_329 T2331321031230210321102001 G2330012312202211022311020 1 3 4 1 1 -1093994 -1098892 AAA # # 0 beadId # 1 F3 sequence # 2 R3 sequence # 3 num F3 mismatches # 4 num R3 mismatches # 5 total mismatches # 6 F3 reference # 7 R3 reference # 8 F3 position # 9 R3 position # 10 category # # # tapperconvert: # M 12345_926_28_197 0 633481 f 0 0/0/3 54321_926_28_197 0 631038 f 0 0/0/2 # M 12345_926_28_374 0 1808998 r 0 0/0/1 54321_926_28_374 0 1810746 r 0 0/0/0 # M 12345_926_29_486 0 129939 f 0 0/0/2 54321_926_29_486 0 127944 f 0 0/0/4 # M 12345_926_33_329 0 152931 r 0 0/0/0 54321_926_33_329 0 154679 r 0 0/0/3 # M 12345_926_34_440 0 1618688 r 0 1/2/0 54321_926_34_440 0 1620828 r 0 0/0/1 # M 12345_926_38_533 0 251600 r 0 0/0/0 54321_926_38_533 0 253823 r 0 0/0/4 my $tinput = shift @ARGV; my $terrors = shift @ARGV; my $cinput = "pgingivali.F3_R3.mates"; my $cerrors = 3; if (!defined($tinput) || !defined($terrors)) { die "usage: $0 tapper-input-prefix num-errors\n"; } print STDERR "Reading tangles.\n"; my %tangled; open(TT, "./tapperconvert -dumpt $tinput |") or die; while () { my @v = split '\s+', $_; if ($v[1] =~ m/^\d+_(\d+_\d+_\d+)$/) { $v[1] = $1; } $tangled{$v[1]}++; } close(TT); print STDERR "Reading tapper mate for counts.\n"; my %tcounts; open(TT, "./tapperconvert -dumpm $tinput |") or die; while () { my @v = split '\s+', $_; if ($v[1] =~ m/^\d+_(\d+_\d+_\d+)$/) { $v[1] = $1; } $tcounts{$v[1]}++; } close(TT); print STDERR "Processing.\n"; open(FC, "< $cinput") or die; open(FT, "./tapperconvert -dumpm $tinput |") or die; open(GC, "> compare.pl.corona.out") or die; open(GT, "> compare.pl.tapper.out") or die; my $same = 0; my $qual = 0; my $diffmultiple = 0; my $diff = 0; my $onlyc = 0; my $onlycerror = 0; my $onlyctooshort = 0; my $onlyctoolong = 0; my $onlyctangled = 0; my $onlyt = 0; my $onlyterror = 0; my $onlyttooshort = 0; my $onlyttoolong = 0; my $onlytdupl = 0; my $cid = undef; my $cstr = undef; my @c; my $tid = undef; my $tstr = undef; my @t; while (!eof(FC) && !eof(FT)) { if (!defined($cid)) { $_ = ; while (m/^#/) { $_ = ; } my @v = split '\s+', $_; if ($v[10] ne "AAA") { goto again; } my $ori = "f"; if ($v[8] < 0) { $v[8] = -int($v[8]); $v[9] = -int($v[9]); $ori = "r"; } my $dist = $v[9] - $v[8]; if ($dist < 0) { $dist = -$dist; } $cid = $v[0]; $cstr = "$v[0] $v[3] $v[4] $ori $v[8] $v[9] $dist"; $c[0] = $v[0]; $c[1] = $v[3]; $c[2] = $v[4]; $c[3] = $ori; $c[4] = $v[8]; $c[5] = $v[9]; $c[6] = $dist; { my @xxx = split '_', $cid; $xxx[0] = substr("00000$xxx[0]", -5); $xxx[1] = substr("00000$xxx[1]", -5); $xxx[2] = substr("00000$xxx[2]", -5); $cid = "$xxx[0]$xxx[1]$xxx[2]"; } } if (!defined($tid)) { $_ = ; my @v = split '\s+', $_; if ($v[1] =~ m/^\d+_(\d+_\d+_\d+)$/) { $v[1] = $1; } if ($v[6] =~ m!\d+/(\d+)/(\d+)$!) { $v[6] = $1 + $2; } if ($v[12] =~ m!\d+/(\d+)/(\d+)$!) { $v[12] = $1 + $2; } # Correct for reverse? Why? if ($v[4] eq "r") { $v[3] += 24; $v[9] += 24; } my $dist = $v[9] - $v[3]; if ($dist < 0) { $dist = -$dist; } $tid = $v[1]; $tstr = "$v[1] $v[6] $v[12] $v[4] $v[3] $v[9] $dist"; $t[0] = $v[1]; $t[1] = $v[6]; $t[2] = $v[12]; $t[3] = $v[4]; $t[4] = $v[3]; $t[5] = $v[9]; $t[6] = $dist; { my @xxx = split '\D+', $tid; $xxx[0] = substr("00000$xxx[0]", -5); $xxx[1] = substr("00000$xxx[1]", -5); $xxx[2] = substr("00000$xxx[2]", -5); $tid = "$xxx[0]$xxx[1]$xxx[2]"; } } if ($cid eq $tid) { print GC "$cstr\n"; print GT "$tstr\n"; if ($cstr eq $tstr) { $same++; } elsif (($c[3] == $t[3]) && ($c[4] == $t[4]) && ($c[5] == $t[5]) && ($c[6] == $t[6])) { $qual++; } else { #print STDERR "DIFF $cstr == $tstr\n"; if ($tcounts{$t[0]} > 1) { $diffmultiple++; } else { $diff++; } } undef $cid; undef $cstr; undef @c; undef $tid; undef $tstr; undef @t; } elsif ($cid lt $tid) { print GC "$cstr\n"; if (($c[1] > $terrors) || ($c[2] > $terrors)) { $onlycerror++; } elsif ($c[6] < 1400) { $onlyctooshort++; } elsif ($c[6] > 2600) { $onlyctoolong++; } elsif (exists($tangled{$c[0]})) { #print STDERR "TANGLED $cstr\n"; $onlyctangled++; } else { #print STDERR "MISSED $cstr\n"; $onlyc++; } undef $cid; undef $cstr; undef @c; } else { print GT "$tstr\n"; if (($t[1] > $cerrors) || ($t[2] > $cerrors)) { $onlyterror++; } elsif ($t[6] < 1400) { $onlyttooshort++; } elsif ($t[6] > 2600) { $onlyttoolong++; } elsif ($tcounts{$t[0]} > 1) { $onlytdupl++; } else { $onlyt++; } undef $tid; undef $tstr; undef @t; } again: } print STDERR "same $same qual $qual diff $diff diffmultiple $diffmultiple\n"; print STDERR "onlyc $onlyc err $onlycerror short $onlyctooshort long $onlyctoolong TANGLED $onlyctangled\n"; print STDERR "onlyt $onlyt err $onlyterror short $onlyttooshort long $onlyttoolong DUPLICATE $onlytdupl\n"; close(FC); close(FT); close(GC); close(GT); kmer-code-2013-trunk/tapper/tapperconvert.C0000644000000000000000000000522612322046702017444 0ustar rootroot#include "tapperTag.H" #include "tapperResult.H" #include "tapperAlignment.H" #include "tapperHit.H" #include "tapperGlobalData.H" #include "tapperThreadData.H" #include "tapperComputation.H" int main(int argc, char **argv) { char *resultName = 0L; bool dumpIndex = false; bool dumpFrag = false; bool dumpSing = false; bool dumpMate = false; bool dumpTang = false; bool allIndex = false; int arg=1; int err=0; while (arg < argc) { if (strncmp(argv[arg], "-dumpindex", 6) == 0) { dumpIndex = true; } else if (strncmp(argv[arg], "-dumpfragments", 6) == 0) { dumpFrag = true; } else if (strncmp(argv[arg], "-dumpsingleton", 6) == 0) { dumpSing = true; } else if (strncmp(argv[arg], "-dumpmated", 6) == 0) { dumpMate = true; } else if (strncmp(argv[arg], "-dumptangled", 6) == 0) { dumpTang = true; } else if (strncmp(argv[arg], "-allindex", 6) == 0) { allIndex = true; } else if (resultName == 0L) { resultName = argv[arg]; } else { err++; } arg++; } if ((err) || (resultName == 0L)) { fprintf(stderr, "usage: %s [-dumpindex] [-dumpfragments] [-dumpsingletons] [-dumpmated] [-dumptangled] prefix\n", argv[0]); fprintf(stderr, " -allIndex -- also dump index for unmapped fragments\n"); exit(1); } tapperResultFile *inp = new tapperResultFile(resultName, 'r'); tapperResult *res = new tapperResult; while (inp->read(res)) { if ((dumpIndex) && ((allIndex) || ((dumpFrag) && (res->idx._numFrag > 0)) || ((dumpFrag) && (res->idx._numFragDiscarded > 0)) || ((dumpSing) && (res->idx._numFragSingleton > 0)) || ((dumpMate) && (res->idx._numMated > 0)) || ((dumpTang) && (res->idx._numTangled > 0)))) res->idx.print(stdout); if (dumpFrag) for (uint32 i=0; iidx._numFrag; i++) res->frag[i].print(stdout, &res->idx); if (dumpSing) for (uint32 i=0; iidx._numFragSingleton; i++) res->sing[i].print(stdout, &res->idx); if (dumpMate) for (uint32 i=0; iidx._numMated; i++) res->mate[i].print(stdout, &res->idx); if (dumpTang) for (uint32 i=0; iidx._numTangled; i++) { res->tang[i].print(stdout, &res->idx); for (uint32 j=0; jidx._numFragTangled; j++) { if ((res->tang[i]._seq == res->tali[j]._seq) && (res->tang[i]._bgn <= res->tali[j]._pos) && (res->tali[j]._pos <= res->tang[i]._end)) { res->tali[j].print(stdout, &res->idx); } } } } delete inp; delete res; exit(0); } kmer-code-2013-trunk/tapper/tagger.C0000644000000000000000000003315612322046702016024 0ustar rootroot#include "tapperTag.H" #include "tapperResult.H" #include "tapperAlignment.H" #include "tapperHit.H" #include "seqCache.H" // Convert reads from ASCI to tapper binary. // // ASSUMPTIONS // // 1) User is smart enough to give the correct set of mated files. // Code doesn't check that an F tag goes with an R tag, just that the // tag coordinates agree. It is possible to mate an F to an F if the // wrong inputs are given. // // 2) Tag coords are 16-bit integers. File UIDs are 16-bit integers. // // Define this to test the encode/decode functionality. //#define TEST_ENCODING int tapperTagCompare(const void *a, const void *b) { tapperTag const *A = (tapperTag const *)a; tapperTag const *B = (tapperTag const *)b; if (A->tagID() < B->tagID()) return(-1); return(A->tagID() != B->tagID()); } bool readTag(uint32 fileUID, FILE *seq, FILE *qlt, tapperTag *T) { static uint16 id[4]; static char seqhdr[1024]; static char seqseq[1024]; static char qlthdr[1024]; static char qltseq[1024]; static uint64 qltnum[1024]; static splitToWords S; seqhdr[0] = 0; seqseq[0] = 0; qlthdr[0] = 0; qltseq[0] = 0; if (feof(seq) || feof(qlt)) return(false); fgets(seqhdr, 1024, seq); while (seqhdr[0] == '#') fgets(seqhdr, 1024, seq); fgets(seqseq, 1024, seq); fgets(qlthdr, 1024, qlt); while (qlthdr[0] == '#') fgets(qlthdr, 1024, qlt); fgets(qltseq, 1024, qlt); if ((seqhdr[0] == 0) || (qlthdr[0] == 0)) return(false); chomp(seqhdr); chomp(seqseq); chomp(qlthdr); chomp(qltseq); if (strcmp(seqhdr, qlthdr) != 0) fprintf(stderr, "WARNING: Got unpaired seq '%s' and qlt '%s'\n", seqhdr, qlthdr); // Assumes the header is >461_28_1918_F3 // -- copies it to the left by one to remove the > // -- the loop below doesn't move the zero-terminator // -- resulting string is "461 28 1918 F33" // for (uint32 i=1; seqhdr[i]; i++) { if (seqhdr[i] == '_') seqhdr[i] = ' '; seqhdr[i-1] = seqhdr[i]; } S.split(seqhdr); id[0] = fileUID; id[1] = strtouint32(S[0], 0L); id[2] = strtouint32(S[1], 0L); id[3] = strtouint32(S[2], 0L); S.split(qltseq); // Not sure why there are negative numbers here, but there are. // for (uint32 i=0; i 31) qltnum[i] = 31; #endif } T->encode(id, seqseq, qltnum); #ifdef TEST_ENCODING { uint16 it[4]; char seqtst[1024]; uint64 qlttst[1024]; T->decode(it, seqtst, qlttst); uint32 len = strlen(seqtst); uint32 fail = 0; uint64 qltsum=0, tstsum=0; for (uint32 l=0; lmetaData()->isPairedTagFile()) { fprintf(stdout, "%s\ttype\tmated tags\n", tagfile); fprintf(stdout, "%s\tlength\t"uint32FMT"\n", tagfile, TF->metaData()->tagSize()); fprintf(stdout, "%s\tnumMates\t"uint64FMT"\n", tagfile, TF->numberOfMatePairs()); fprintf(stdout, "%s\tmean\t"uint32FMT"\n", tagfile, TF->metaData()->mean()); fprintf(stdout, "%s\tstddev\t"uint32FMT"\n", tagfile, TF->metaData()->stddev()); } else { fprintf(stdout, "%s\ttype\tfragment tags\n", tagfile); fprintf(stdout, "%s\tlength\t"uint32FMT"\n", tagfile, TF->metaData()->tagSize()); fprintf(stdout, "%s\tnumTags\t"uint64FMT"\n", tagfile, TF->numberOfFragmentTags()); } } void dumpTagFile(char *tagfile) { tapperTagFile *TF = new tapperTagFile(tagfile, 'r'); tapperTag a, b; uint16 ida[4], idb[4]; char seqa[265], seqb[256]; char quaa[256], quab[256]; uint64 qvsa[256], qvsb[256]; uint32 i; if (TF->metaData()->isPairedTagFile()) { while (TF->get(&a, &b)) { a.decode(ida, seqa, qvsa); b.decode(idb, seqb, qvsb); for (i=0; seqa[i+1]; i++) quaa[i] = qvsa[i] + '0'; for (i=0; seqb[i+1]; i++) quab[i] = qvsb[i] + '0'; fprintf(stdout, ">"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t%s/%s\t>"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t%s/%s\n", ida[0], ida[1], ida[2], ida[3], seqa, quaa, idb[0], idb[1], idb[2], idb[3], seqb, quab); } } else { while (TF->get(&a)) { a.decode(ida, seqa, qvsa); for (i=0; seqa[i+1]; i++) quaa[i] = qvsa[i] + '0'; fprintf(stdout, ">"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t%s/%s\n", ida[0], ida[1], ida[2], ida[3], seqa, quaa); } } delete TF; } int main(int argc, char **argv) { char *prefix = 0L; uint32 sampleSize = 0; char *sampleFile = 0L; uint32 sampleErrors = 3; uint32 sampleTagSize = 25; uint32 tagfuid = 0, tagruid = 0; char *tagfseq = 0L, *tagrseq = 0L; char *tagfqlt = 0L, *tagrqlt = 0L; uint32 mean=0, stddev=0; int arg=1; int err=0; while (arg < argc) { if (strncmp(argv[arg], "-tagout", 5) == 0) { prefix = argv[++arg]; } else if (strncmp(argv[arg], "-tags", 5) == 0) { tagfuid = strtouint32(argv[++arg], 0L); tagfseq = argv[++arg]; tagfqlt = argv[++arg]; } else if (strncmp(argv[arg], "-ftags", 2) == 0) { tagfuid = strtouint32(argv[++arg], 0L); tagfseq = argv[++arg]; tagfqlt = argv[++arg]; } else if (strncmp(argv[arg], "-rtags", 2) == 0) { tagruid = strtouint32(argv[++arg], 0L); tagrseq = argv[++arg]; tagrqlt = argv[++arg]; } else if (strncmp(argv[arg], "-insertsize", 2) == 0) { mean = strtouint32(argv[++arg], 0L); stddev = strtouint32(argv[++arg], 0L); if (mean > MAX_INSERT_SIZE) fprintf(stderr, "%s: insert size limited to at most %dbp.\n", argv[0], MAX_INSERT_SIZE), exit(1); if (stddev > MAX_INSERT_DEVIATION) fprintf(stderr, "%s: insert size limited to at most +- %dbp.\n", argv[0], MAX_INSERT_DEVIATION), exit(1); } else if (strcmp(argv[arg], "-sample") == 0) { sampleSize = strtouint32(argv[++arg], 0L); sampleFile = argv[++arg]; } else if (strcmp(argv[arg], "-sampleerrors") == 0) { sampleErrors = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-sampletagsize") == 0) { sampleTagSize = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-stats", 3) == 0) { dumpTagFileStats(argv[++arg]); exit(0); } else if (strncmp(argv[arg], "-dump", 2) == 0) { dumpTagFile(argv[++arg]); exit(0); } else { err++; } arg++; } if (sampleFile == 0L) { if ((tagfseq == 0L) || (tagfqlt == 0L)) err++; if ((tagfseq != 0L) && (tagfqlt == 0L)) err++; if ((tagfseq == 0L) && (tagfqlt != 0L)) err++; } if ((err) || (prefix == 0L)) { fprintf(stderr, "usage: %s -tagout prefix -tags fileUID xx.csfasta xx.qual\n", argv[0]); fprintf(stderr, "usage: %s -tagout prefix -ftags fileUID ff.csfasta ff.qual -rtags fileUID rr.csfasta rr.qual\n", argv[0]); fprintf(stderr, "usage: %s -dump file.tapperTags\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "unmated tags will be placed in 'prefix.frag.tapperTags'\n"); fprintf(stderr, " mated tags will be placed in 'prefix.mate.tapperTags'\n"); exit(1); } uint64 numTagsF = 0, maxTagsF = 0; uint64 numTagsR = 0, maxTagsR = 0; uint64 numTagsM = 0; tapperTag *TF = 0L; tapperTag *TR = 0L; // If given a sampleFile, generate some tags from there. if (sampleFile) { seqCache *F = new seqCache(sampleFile); seqInCore *s = F->getSequenceInCore(); uint32 pos = 0; uint32 len = s->sequenceLength(); uint16 id[4]; char cor[64] = {0}; char seq[64] = {0}; uint64 qlt[64] = {0}; char acgt[4] = {'A', 'C', 'G', 'T'}; mt_s *mtctx = mtInit(time(0)); maxTagsF = sampleSize; TF = new tapperTag [maxTagsF]; maxTagsR = sampleSize; TR = new tapperTag [maxTagsR]; for (uint32 i=0; isequence()[sp++]; cor[x] = n; seq[x] = baseToColor[l][n]; l = n; } } else { uint32 sp = pos + sampleTagSize - 1; for (uint32 x=1; x<=sampleTagSize; x++) { n = complementSymbol[s->sequence()[sp--]]; cor[x] = n; seq[x] = baseToColor[l][n]; l = n; } } // Insert errors. char errors[256] = {0}; char errort[256] = {0}; uint32 nerrs = mtRandom32(mtctx) % (sampleErrors + 1); for (uint32 xx=0; xx '3') seq[e] = '0'; sprintf(errort, "\t%c->%c@%02d", o, seq[e], e); strcat(errors, errort); } id[0] = i; id[1] = 0; id[2] = 0; id[3] = 0; fprintf(stdout, "F\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t0\t"uint32FMT"\t%c\t%s%s\t%s\n", id[0], id[1], id[2], id[3], pos, (doForward) ? 'f' : 'r', cor+1, errors, seq); // TF is NOT just storing the 'forward' reads, it's all the // reads from the first half of the mate. Since we're not // mated, this is just all reads. TF[numTagsF++].encode(id, seq, qlt); } } // // Suck in all the F tags. // if (tagfseq) { FILE *fseq = fopen(tagfseq, "r"); FILE *fqlt = fopen(tagfqlt, "r"); speedCounter *CT = new speedCounter(" reading F tags %7.0f sequences -- %5.0f sequences/second\r", 1.0, 0x1ffff, true); maxTagsF = sizeOfFile(tagfseq) / 44 + 1000000; TF = new tapperTag [maxTagsF]; while (readTag(tagfuid, fseq, fqlt, TF + numTagsF)) { numTagsF++; if (numTagsF >= maxTagsF) fprintf(stderr, "Too many F tags. Boom.\n"), exit(1); CT->tick(); } delete CT; fclose(fseq); fclose(fqlt); } // // Suck in all the R tags. // if (tagrseq) { FILE *rseq = fopen(tagrseq, "r"); FILE *rqlt = fopen(tagrqlt, "r"); speedCounter *CT = new speedCounter(" reading R tags %7.0f sequences -- %5.0f sequences/second\r", 1.0, 0x1ffff, true); maxTagsR = sizeOfFile(tagrseq) / 44 + 1000000; TR = new tapperTag [maxTagsR];; while (readTag(tagruid, rseq, rqlt, TR + numTagsR)) { numTagsR++; if (numTagsR >= maxTagsR) fprintf(stderr, "Too many R tags. Boom.\n"), exit(1); CT->tick(); } delete CT; fclose(rseq); fclose(rqlt); } maxTagsF = numTagsF; numTagsF = 0; maxTagsR = numTagsR; numTagsR = 0; // // Sort them. // qsort_mt(TF, maxTagsF, sizeof(tapperTag), tapperTagCompare, 4, 4 * 1024 * 1024); qsort_mt(TR, maxTagsR, sizeof(tapperTag), tapperTagCompare, 4, 4 * 1024 * 1024); // // Merge to find pairs, output. // char fragout[FILENAME_MAX]; char mateout[FILENAME_MAX]; sprintf(fragout, "%s.frag.tapperTags", prefix); sprintf(mateout, "%s.mate.tapperTags", prefix); tapperTagFile *TOfrag = 0L; tapperTagFile *TOmate = 0L; speedCounter *CF = new speedCounter(" writing frag tags %7.0f sequences -- %5.0f sequences/second\r", 1.0, 0x1ffff, true); speedCounter *CM = new speedCounter(" writing mate tags %7.0f sequences -- %5.0f sequences/second\r", 1.0, 0x1ffff, true); while ((numTagsF < maxTagsF) && (numTagsR < maxTagsR)) { uint64 fID = TF[numTagsF].tagID() & uint64MASK(48); uint64 rID = TR[numTagsR].tagID() & uint64MASK(48); if (fID == rID) { if (TOmate == 0L) TOmate = new tapperTagFile(mateout, 'w'); TOmate->put(TF + numTagsF, TR + numTagsR); numTagsF++; numTagsR++; numTagsM++; CM->tick(); } else if (fID < rID) { if (TOfrag == 0L) TOfrag = new tapperTagFile(fragout, 'w'); TOfrag->put(TF + numTagsF); numTagsF++; CF->tick(); } else { if (TOfrag == 0L) TOfrag = new tapperTagFile(fragout, 'w'); TOfrag->put(TR + numTagsR); numTagsR++; CF->tick(); } } while (numTagsF < maxTagsF) { if (TOfrag == 0L) TOfrag = new tapperTagFile(fragout, 'w'); TOfrag->put(TF + numTagsF); numTagsF++; CF->tick(); } while (numTagsR < maxTagsR) { if (TOfrag == 0L) TOfrag = new tapperTagFile(fragout, 'w'); TOfrag->put(TR + numTagsR); numTagsR++; CF->tick(); } delete CF; delete CM; if (TOmate) TOmate->metaData()->setMeanStdDev(mean, stddev); delete TOmate; delete TOfrag; delete [] TR; delete [] TF; } kmer-code-2013-trunk/tapper/tapperHit.H0000644000000000000000000001562712322046702016523 0ustar rootroot#include "alphabet.h" class tapperGlobalData; // An internal hit. Tapper uses these for computing scores and what // not. It outputs tapperResults, above. // class tapperHit { public: uint32 numberOfBaseMismatches(void) { return(_basesMismatch); }; uint32 numberOfColorMismatches(void) { return(_colorMismatch); }; uint32 numberOfColorInconsistencies(void) { return(_colorInconsistent); }; char *printHit(char *OS, uint64 tagid) { sprintf(OS, "0x"uint64FMT"\t"uint32FMT":"uint32FMT":%c\t"uint64FMT","uint64FMT","uint64FMT, tagid, _seqIdx, _seqPos, _rev ? '-' : '+', _basesMismatch, _colorMismatch, _colorInconsistent); return(OS); } // Returns true if the tag is near the correct end of the sequence, // so that it could potentially be happily mated to a tag mapping // in a different sequence (or in a gap). // bool alignToReference(tapperGlobalData *g, uint32 so, uint32 po, char *tag, uint32 len); bool happyNearEnd(bool isFTag, uint32 mean, uint32 stddev, uint32 seqlen) { bool isHappy = false; if (seqlen < mean + 3 * stddev) return(true); if (isFTag) { if (_rev) { // Near end of sequence isHappy = (seqlen - mean - 3 * stddev < _seqPos); } else { // Near bgn of sequence isHappy = (_seqPos < mean + 3 * stddev); } } else { if (_rev) { // Near bgn of sequence isHappy = (_seqPos < mean + 3 * stddev); } else { // Near end of sequence isHappy = (seqlen - mean - 3 * stddev < _seqPos); } } return(isHappy); }; // Returns true if that read is before where this read says it // should be. Returns TRUE for reads of the incorrect orientation. // // ASSUMES it is called on this read being the forward/F3/a read. // bool mateTooFarBefore(tapperHit& that, uint32 mean, uint32 stddev) { // that read is on the sequence after us. if (_seqIdx < that._seqIdx) { //fprintf(stderr, "isBefore()- seq after false.\n"); return(false); } // that read is on the sequence before us. if (that._seqIdx < _seqIdx) { //fprintf(stderr, "isBefore()- seq before true.\n"); return(true); } // Misoriented, true if (_rev != that._rev) { //fprintf(stderr, "isBefore()- misoriented true.\n"); return(true); } // FORWARD // // ( -that-> ) -this-> // ----------------------------------- // TTTffffffffffffffffffffffffffffffff // if ((_rev == false) && (that._seqPos + mean + 3 * stddev < _seqPos)) { //fprintf(stderr, "isBefore()- forward true "uint32FMT" + "uint32FMT" + 3 * "uint32FMT" < "uint32FMT"\n", // that._seqPos, mean, stddev, _seqPos); return(true); } // REVERSE // // <-this- ( <-that- ) // ----------------------------------- // TTTTTTTTTTTTTTTTTTTffffffffffffffff // if ((_rev == true) && (that._seqPos < _seqPos + mean - 3 * stddev)) { //fprintf(stderr, "isBefore()- forward true "uint32FMT" < "uint32FMT" + "uint32FMT" - 3 * "uint32FMT"\n", // that._seqPos, _seqPos, mean, stddev); return(true); } //fprintf(stderr, "isBefore()- false.\n"); return(false); } // Returns true if that read is after where this read says it // should be. Returns FALSE for reads of the incorrect orientation. // // ASSUMES it is called on this read being the forward/F3/a read. // bool mateTooFarAfter(tapperHit& that, uint32 mean, uint32 stddev) { // that read is on the sequence after us, true. if (_seqIdx < that._seqIdx) return(true); // that read is on the sequence before us, false. if (that._seqIdx < _seqIdx) return(false); // Misoriented, true if (_rev != that._rev) return(false); // FORWARD // // ( -that-> ) -this-> // ----------------------------------- // ffffffffffffffffTTTTTTTTTTTTTTTTTTT // if ((_rev == false) && (that._seqPos + mean - 3 * stddev < _seqPos)) return(false); // REVERSE // // <-this- ( <-that- ) // ----------------------------------- // ffffffffffffffffffffffffffffffffTTT // if ((_rev == true) && (that._seqPos < _seqPos + mean + 3 * stddev)) return(false); return(true); } // ASSUMES it is called on this read being the forward/F3/a read. // bool happy(tapperHit& b, uint32 mean, uint32 stddev) { uint64 dist = ~uint64ZERO; bool isHappy = false; bool isOriented = false; if (_seqIdx != b._seqIdx) return(false); if (_rev != b._rev) return(false); // Check distance apart if (b._seqPos < _seqPos) dist = _seqPos - b._seqPos; else dist = b._seqPos - _seqPos; if ((mean - 3 * stddev < dist) && (dist < mean + 3 * stddev)) isHappy = true; // Check orientations if ((_rev == false) && (b._seqPos < _seqPos)) isOriented = true; if ((_rev == true) && (_seqPos < b._seqPos)) isOriented = true; if (!isHappy) { //fprintf(stderr, "GRUMPY DIST "uint32FMT"\n", dist); return(false); } if (!isOriented) { //fprintf(stderr, "GRUMPY ORIENT "uint32FMT"\n", dist); return(false); } //fprintf(stderr, "HAPPY! "uint32FMT"\n", dist); return(true); }; #if 0 bool operator< (tapperHit const &r) const { return(((_basesMismatch < r._basesMismatch)) || ((_basesMismatch <= r._basesMismatch) && (_colorMismatch < r._colorMismatch)) || ((_basesMismatch <= r._basesMismatch) && (_colorMismatch <= r._colorMismatch) && (_colorInconsistent < r._colorInconsistent))); }; #endif // Argh, should be private, but tapperWorker copies most of the hit to a result. //private: uint32 _seqIdx; uint32 _seqPos; uint64 _tagIdx; // 4e9 tags is only 34x of human uint64 _len:6; // Length of tag uint64 _rev:1; // Match is reversecomplement uint64 _pad:17; // Nothing uint64 _basesMismatch:6; // Number of mismatches in ACGT alignment uint64 _colorMismatch:6; // Number of consistent color mismatches uint64 _colorInconsistent:6; // Number of inconsistent color mismatches char _tagCOLOR[TAG_LEN_MAX]; char _refCOLOR[TAG_LEN_MAX]; char _tagACGT[TAG_LEN_MAX]; char _refACGT[TAG_LEN_MAX]; uint8 _tagColorDiffs[MAX_COLOR_MISMATCH_MAPPED]; }; class tapperHitPositionCompare { public: bool operator()(const tapperHit a, const tapperHit b) const { return((a._seqIdx < b._seqIdx) || ((a._seqIdx == b._seqIdx) && (a._seqPos < b._seqPos))); }; }; kmer-code-2013-trunk/tapper/tapper.C0000644000000000000000000011173612415073322016050 0ustar rootroot#include "tapperTag.H" #include "tapperResult.H" #include "tapperAlignment.H" #include "tapperHit.H" #include "tapperGlobalData.H" #include "tapperThreadData.H" #include "tapperComputation.H" #undef VERBOSEWORKER // Very expensive. Compare the obvious O(n^2) happy mate finding // algorithm against the O(n) algorithm. // #undef DEBUG_MATES void* tapperReader(void *G) { tapperGlobalData *g = (tapperGlobalData *)G; tapperComputation *s = 0L; tapperTag a, b; if (g->TF->metaData()->isPairedTagFile()) { if (g->TF->get(&a, &b)) s = new tapperComputation(&a, &b); } else { if (g->TF->get(&a)) s = new tapperComputation(&a, 0L); } return(s); } void tapperWriter(void *G, void *S) { tapperGlobalData *g = (tapperGlobalData *)G; tapperComputation *s = (tapperComputation *)S; tapperResultIndex result; // Build the result index. result._tag1id = s->tag1id; result._tag2id = s->tag2id; result._maxColrMismatchMapped = g->maxColorError; result._maxBaseMismatchMapped = g->maxBaseError; result._mean = g->TF->metaData()->mean(); result._stddev = g->TF->metaData()->stddev(); if (s->resultFragmentLen > g->repeatThreshold) { result._numFrag = 0; result._numFragDiscarded = s->resultFragmentLen; } else { result._numFrag = s->resultFragmentLen; result._numFragDiscarded = 0; } result._numFragSingleton = s->resultSingletonLen; result._numFragTangled = s->resultTangledAlignmentLen; result._numMated = s->resultMatedLen; result._numTangled = s->resultTangledLen; result._pad1 = 0; result._pad2 = 0; // Now write. g->TA->write(&result, s->resultFragment, s->resultSingleton, s->resultTangledAlignment, s->resultMated, s->resultTangled, s->alignQualHistogram); delete s; } // Compose the colors from beg to end. // inline char composeColors(char *colors, uint32 beg, uint32 end) { char c = colors[beg]; for (uint32 x=beg; xGS->getSequenceInCore(so_in)->sequence(); strncpy(_refACGT, seq + po_in, _len-1); _refACGT[_len-1] = 0; if (_rev) reverseComplementSequence(_refACGT, _len-1); _refCOLOR[0] = _tagCOLOR[0]; // ALWAYS the reference encoding base, as long as we copy the tag first. _refCOLOR[1] = baseToColor[_refCOLOR[0]][_refACGT[0]]; for (uint32 ti=2; ti<_len; ti++) _refCOLOR[ti] = baseToColor[_refACGT[ti-2]][_refACGT[ti-1]]; _refCOLOR[_len] = 0; } //fprintf(stderr, "tag: %s %s ref: %s %s\n", tag_in, _tagCOLOR, _refCOLOR, _refACGT); // Count the number of color space errors // // Note that errp[] is actaully 1-based; the first position is // never an error; it's the reference base. for (uint32 ti=1; ti<_len; ti++) { if (_tagCOLOR[ti] != _refCOLOR[ti]) { errp[errs] = ti; errc[errs] = 0; errs++; } } // // The following if blocks correct single color errors using very // complicated rules. // if (errs == 0) { _colorMismatch = 0; _colorInconsistent = 0; } else if (errs == 1) { // Always corrected, just to get an ACGT alignment. We can't // tell if the color mismatch is an error, or if the error is // adjacent to the mismatch, which would have resulted in a valid // SNP. _colorMismatch = 0; _colorInconsistent = 1; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; } else if (errs == 2) { bool ok21 = isConsistent(_refCOLOR, _tagCOLOR, 1, _len) && (errp[1] - errp[0] < 4); if (ok21) { // MNP of size 4. _colorMismatch = 2; _colorInconsistent = 0; errc[0] = 1; errc[1] = 1; } else { // Correct 'em. _colorMismatch = 0; _colorInconsistent = 2; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; _tagCOREC[errp[1]] = _refCOLOR[errp[1]]; } } else if (errs == 3) { bool ok21 = isConsistent(_refCOLOR, _tagCOLOR, 1, errp[2]) && (errp[1] - errp[0] < 4); bool ok22 = isConsistent(_refCOLOR, _tagCOLOR, errp[0]+1, _len) && (errp[2] - errp[1] < 4); bool ok31 = isConsistent(_refCOLOR, _tagCOLOR, 1, _len) && (errp[2] - errp[0] < 5); if (ok31) { // MNP of size 5 _colorMismatch = 3; _colorInconsistent = 0; errc[0] = 1; errc[1] = 1; errc[2] = 1; } else if (ok21) { // First two ok, fix the third. _colorMismatch = 2; _colorInconsistent = 1; _tagCOREC[errp[2]] = _refCOLOR[errp[2]]; errc[0] = 1; errc[1] = 1; } else if (ok22) { // Last two ok, fix the first. _colorMismatch = 2; _colorInconsistent = 1; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; errc[1] = 1; errc[2] = 1; } else { // Nothing consistent, fix all of 'em. _colorMismatch = 0; _colorInconsistent = 3; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; _tagCOREC[errp[1]] = _refCOLOR[errp[1]]; _tagCOREC[errp[2]] = _refCOLOR[errp[2]]; } } else if (errs == 4) { bool ok21 = isConsistent(_refCOLOR, _tagCOLOR, 1, errp[2]) && (errp[1] - errp[0] < 4); bool ok22 = isConsistent(_refCOLOR, _tagCOLOR, errp[0]+1, errp[2]) && (errp[2] - errp[1] < 4); bool ok23 = isConsistent(_refCOLOR, _tagCOLOR, errp[1]+1, _len) && (errp[3] - errp[2] < 4); bool ok31 = isConsistent(_refCOLOR, _tagCOLOR, 1, errp[3]) && (errp[2] - errp[0] < 5); bool ok32 = isConsistent(_refCOLOR, _tagCOLOR, errp[0]+1, _len) && (errp[3] - errp[1] < 5); bool ok41 = isConsistent(_refCOLOR, _tagCOLOR, 1, _len) && (errp[3] - errp[0] < 6); // With two exceptions, exactly one of the ok's will be true. // The exceptions are: // // a) ok21 and ok23 will imply ok41. However there is nothing to // correct here. We just need to make sure that we stop // processing rules on ok41. // // b) ok41 and ok22. Not sure if this can ever happen, but like // case a, we're ok if we stop after ok41. // if (ok41) { // MNP of size 6 _colorMismatch = 4; _colorInconsistent = 0; errc[0] = 1; errc[1] = 1; errc[2] = 1; errc[3] = 1; } else if (ok31) { // First three ok, fix the last one. _colorMismatch = 3; _colorInconsistent = 1; _tagCOREC[errp[3]] = _refCOLOR[errp[3]]; errc[0] = 1; errc[1] = 1; errc[2] = 1; } else if (ok32) { // Last three ok, fix the first one. _colorMismatch = 3; _colorInconsistent = 1; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; errc[1] = 1; errc[2] = 1; errc[3] = 1; } else if (ok21) { // First two ok, fix the last two. _colorMismatch = 2; _colorInconsistent = 2; _tagCOREC[errp[2]] = _refCOLOR[errp[2]]; _tagCOREC[errp[3]] = _refCOLOR[errp[3]]; errc[0] = 1; errc[1] = 1; } else if (ok22) { // Middle two ok, fix the outties. _colorMismatch = 2; _colorInconsistent = 2; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; _tagCOREC[errp[3]] = _refCOLOR[errp[3]]; errc[1] = 1; errc[2] = 1; } else if (ok23) { // Last two ok, fix the first two. _colorMismatch = 2; _colorInconsistent = 2; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; _tagCOREC[errp[1]] = _refCOLOR[errp[1]]; errc[2] = 1; errc[3] = 1; } else { // Nothing consistent, fix all of 'em. _colorMismatch = 0; _colorInconsistent = 4; _tagCOREC[errp[0]] = _refCOLOR[errp[0]]; _tagCOREC[errp[1]] = _refCOLOR[errp[1]]; _tagCOREC[errp[2]] = _refCOLOR[errp[2]]; _tagCOREC[errp[3]] = _refCOLOR[errp[3]]; } } else if (errs == 5) { //fprintf(stderr, "Five errors detected. Code doesn't know what to do.\n"); _colorMismatch = 0; _colorInconsistent = 5; } else if (errs == 6) { //fprintf(stderr, "Six errors detected. Code doesn't know what to do.\n"); _colorMismatch = 0; _colorInconsistent = 6; } else { //fprintf(stderr, "Wow, you got a lot of errors. Code doesn't know what to do.\n"); _colorMismatch = 0; _colorInconsistent = errs; } // Too many errors already? Fail. // if (_colorMismatch + _colorInconsistent > g->maxColorError) return(false); // Compute alignments of corrected color strings. _basesMismatch = 0; _tagACGT[0] = baseToColor[_tagCOREC[0]][_tagCOREC[1]]; _refACGT[0] = baseToColor[_refCOLOR[0]][_refCOLOR[1]]; for (uint32 ti=1; ti<_len; ti++) { _tagACGT[ti] = baseToColor[_tagACGT[ti-1]][_tagCOREC[ti+1]]; _refACGT[ti] = baseToColor[_refACGT[ti-1]][_refCOLOR[ti+1]]; } _tagACGT[_len-1] = 0; _refACGT[_len-1] = 0; for (uint32 ti=0; ti<_len-1; ti++) { if (_tagACGT[ti] != _refACGT[ti]) { _basesMismatch++; _tagACGT[ti] = toUpper[_tagACGT[ti]]; _refACGT[ti] = toUpper[_refACGT[ti]]; } } if (_rev) { // Undo the tag and ref reversals. _tagCOLOR[0] = complementSymbol[_tagCOLOR[0]]; reverseString(_tagCOLOR, _len); _tagCOREC[0] = complementSymbol[_tagCOREC[0]]; reverseString(_tagCOREC, _len); _refCOLOR[0] = complementSymbol[_refCOLOR[0]]; reverseString(_refCOLOR, _len); // Reverse complement the alignments reverseComplementSequence(_tagACGT, _len-1); reverseComplementSequence(_refACGT, _len-1); // Adjust the error positions...once we start caring about positions. for (uint32 x=0; x g->maxBaseError) return(false); //fprintf(stderr, "tag: %s %s ref: %s %s "uint32FMT" "uint32FMT" "uint32FMT"\n", // tag_in, _tagCOLOR, _refCOLOR, _refACGT, _basesMismatch, _colorMismatch, _colorInconsistent); // Stuff the errors into the hit. uint32 nn = 0; for (uint32 x=0; xtag1rseq : s->tag1fseq; taglen = s->tag1size; } else { tagseq = (rev) ? s->tag2rseq : s->tag2fseq; taglen = s->tag2size; } for (uint32 i=0; iSS->sequenceNumberOfPosition(pos); pos -= g->SS->startOf(seq); seq = g->SS->IIDOf(seq); // Search ignores first letter, align needs it. This makes for a // very special case, 0, which isn't a full match. if (pos > 0) { pos--; if (h.alignToReference(g, seq, pos, tagseq, taglen) == true) s->addHit(g, h, tag1); } } } void tapperWorker(void *G, void *T, void *S) { tapperGlobalData *g = (tapperGlobalData *)G; tapperThreadData *t = (tapperThreadData *)T; tapperComputation *s = (tapperComputation *)S; // // Get the hits. // #ifdef VERBOSEWORKER fprintf(stderr, "GET HITS %s %s.\n", s->tag1fseq, s->tag2fseq); #endif t->posn1fLen = t->posn1rLen = t->posn2fLen = t->posn2rLen = 0; if (s->tag1size > 0) { g->PS->getUpToNMismatches(s->tag1f, g->maxColorError, t->posn1f, t->posn1fMax, t->posn1fLen); g->PS->getUpToNMismatches(s->tag1r, g->maxColorError, t->posn1r, t->posn1rMax, t->posn1rLen); } if (s->tag2size > 0) { g->PS->getUpToNMismatches(s->tag2f, g->maxColorError, t->posn2f, t->posn2fMax, t->posn2fLen); g->PS->getUpToNMismatches(s->tag2r, g->maxColorError, t->posn2r, t->posn2rMax, t->posn2rLen); } // Quit if nothing there. if (t->posn1fLen + t->posn1rLen + t->posn2fLen + t->posn2rLen == 0) return; #ifdef VERBOSEWORKER fprintf(stderr, " raw hits: "uint64FMT" "uint64FMT" "uint64FMT" "uint64FMT"\n", t->posn1fLen, t->posn1rLen, t->posn2fLen, t->posn2rLen); #endif // // Align to reference to get rid of the 3/4 false hits. // #ifdef VERBOSEWORKER fprintf(stderr, "ALIGN TO REFERENCE.\n"); #endif tapperWorker_addHits(t->posn1f, t->posn1fLen, g, s, false, true); tapperWorker_addHits(t->posn1r, t->posn1rLen, g, s, true, true); tapperWorker_addHits(t->posn2f, t->posn2fLen, g, s, false, false); tapperWorker_addHits(t->posn2r, t->posn2rLen, g, s, true, false); // Quit if nothing there. if (s->tag1hitsLen + s->tag2hitsLen == 0) return; // // If mated, tease out any valid mate relationships and build the // results. If fragment, just build. // #ifdef VERBOSEWORKER fprintf(stderr, "REPORT.\n"); #endif // OUTPUT CASE 1 - nothing. if ((s->tag1size == 0) && (s->tag2size == 0)) { assert(0); // OUTPUT CASE 2 - unmated fragments } else if ((s->tag1size > 0) && (s->tag2size == 0)) { s->resultFragment = new tapperResultFragment [s->tag1hitsLen]; s->resultFragmentLen = s->tag1hitsLen; memset(s->resultFragment, 0, sizeof(tapperResultFragment) * s->tag1hitsLen); for (uint32 i=0; itag1hitsLen; i++) { s->resultFragment[i]._seq = s->tag1hits[i]._seqIdx; s->resultFragment[i]._pos = s->tag1hits[i]._seqPos; s->resultFragment[i]._qual._tag1valid = 1; s->resultFragment[i]._qual._tag1basesMismatch = s->tag1hits[i]._basesMismatch; s->resultFragment[i]._qual._tag1colorMismatch = s->tag1hits[i]._colorMismatch; s->resultFragment[i]._qual._tag1colorInconsistent = s->tag1hits[i]._colorInconsistent; s->resultFragment[i]._qual._tag1rev = s->tag1hits[i]._rev; s->resultFragment[i]._qual._diffSize = MAX_COLOR_MISMATCH_MAPPED; memcpy(s->resultFragment[i]._qual._tag1colorDiffs, s->tag1hits[i]._tagColorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); } // OUTPUT CASE 3 - unmated fragments (but wrong set, should always be in tag1) } else if ((s->tag1size == 0) && (s->tag2size > 0)) { assert(0); // OUTPUT CASE 4 - mated fragments } else if ((s->tag1size > 0) && (s->tag2size > 0)) { if (t->tangle == 0L) t->tangle = new intervalList [g->GS->getNumberOfSequences()]; if ((t->numHappiesMax < s->tag1hitsLen) || (t->numHappiesMax < s->tag2hitsLen)) { delete [] t->tag1happies; delete [] t->tag1mate; delete [] t->tag1tangled; delete [] t->tag2happies; delete [] t->tag2mate; delete [] t->tag2tangled; t->numHappiesMax = MAX(s->tag1hitsLen, s->tag2hitsLen) + 16 * 1024; fprintf(stderr, "Reallocate t->numHappiesMax to "uint32FMT"\n", t->numHappiesMax); t->tag1happies = new uint32 [t->numHappiesMax]; t->tag1mate = new uint32 [t->numHappiesMax]; t->tag1tangled = new uint32 [t->numHappiesMax]; t->tag2happies = new uint32 [t->numHappiesMax]; t->tag2mate = new uint32 [t->numHappiesMax]; t->tag2tangled = new uint32 [t->numHappiesMax]; } #ifdef VERBOSEWORKER fprintf(stderr, " Found "uint32FMT" and "uint32FMT" hits.\n", s->tag1hitsLen, s->tag2hitsLen); #endif // Sort by position. s->sortHitsByPosition(); uint32 mean = g->TF->metaData()->mean(); uint32 stddev = g->TF->metaData()->stddev(); tapperHit *t1h = s->tag1hits; tapperHit *t2h = s->tag2hits; // Pass zero, clear. Tangles are cleared below. // memset(t->tag1happies, 0, sizeof(uint32) * s->tag1hitsLen); memset(t->tag1tangled, 0, sizeof(uint32) * s->tag1hitsLen); memset(t->tag2happies, 0, sizeof(uint32) * s->tag2hitsLen); memset(t->tag2tangled, 0, sizeof(uint32) * s->tag2hitsLen); // Pass one. Count the number of times each fragment is in a // happy relationship. // { #ifdef DEBUG_MATES uint32 debug_numHappies = 0; uint64 debug_happyCheck = 0; for (uint32 a=0; atag1hitsLen; a++) { for (uint32 b=0; btag2hitsLen; b++) { if (t1h[a].happy(t2h[b], mean, stddev)) { debug_numHappies += 1; debug_happyCheck += t1h[a]._seqPos ^ t2h[b]._seqPos; } } } #endif uint32 bbaserev = 0; uint32 bbasefor = 0; for (uint32 a=0; atag1hitsLen; a++) { // Both lists of hits are sorted by position. For each tag1 (a) // hit, we first advance the bbase to the first hit that is // within the proper distance before the a tag. Then scan forward // until the b tag is too far away to be mated. uint32 b = 0; if (t1h[a]._rev == true) { while ((bbaserev < s->tag2hitsLen) && (t1h[a].mateTooFarBefore(t2h[bbaserev], mean, stddev))) bbaserev++; b = bbaserev; } else { while ((bbasefor < s->tag2hitsLen) && (t1h[a].mateTooFarBefore(t2h[bbasefor], mean, stddev))) bbasefor++; b = bbasefor; } // Now, until the b read is too far away to be mated, check // for happiness and do stuff. for (; (btag2hitsLen) && (t1h[a].mateTooFarAfter(t2h[b], mean, stddev) == false); b++) { if (t1h[a].happy(t2h[b], mean, stddev)) { #ifdef DEBUG_MATES debug_numHappies -= 1; debug_happyCheck -= t1h[a]._seqPos ^ t2h[b]._seqPos; #endif // Count. t->tag1happies[a]++; t->tag2happies[b]++; // Add the previous mate pair if we just became tangled. // It is possible for both to be == 2, but in that case, // we've already added the previous mate pair. if ((t->tag1happies[a] == 2) && (t->tag2happies[b] == 1)) { uint32 c = t->tag1mate[a]; uint32 mn = MIN(t1h[a]._seqPos, t2h[c]._seqPos); uint32 mx = MAX(t1h[a]._seqPos + s->tag1size, t2h[c]._seqPos + s->tag2size); t->tangle[t1h[a]._seqIdx].add(mn, mx-mn); t->tag1tangled[a]++; t->tag2tangled[c]++; } if ((t->tag1happies[a] == 1) && (t->tag2happies[b] == 2)) { uint32 c = t->tag2mate[b]; uint32 mn = MIN(t1h[c]._seqPos, t2h[b]._seqPos); uint32 mx = MAX(t1h[c]._seqPos + s->tag1size, t2h[b]._seqPos + s->tag2size); t->tangle[t1h[c]._seqIdx].add(mn, mx-mn); t->tag1tangled[c]++; t->tag2tangled[b]++; } // Finally, add the current mate pair to the tangle. if ((t->tag1happies[a] >= 2) || (t->tag2happies[b] >= 2)) { uint32 mn = MIN(t1h[a]._seqPos, t2h[b]._seqPos); uint32 mx = MAX(t1h[a]._seqPos + s->tag1size, t2h[b]._seqPos + s->tag2size); t->tangle[t1h[a]._seqIdx].add(mn, mx-mn); t->tag1tangled[a]++; t->tag2tangled[b]++; } // Remember the mate; only valid if tag1happies[a] and // tag2happies[b] both == 1. t->tag1mate[a] = b; t->tag2mate[b] = a; } } } #ifdef DEBUG_MATES if ((debug_numHappies != 0) || (debug_happyCheck != 0)) { FILE *df = fopen("tapper.DEBUG_MATES.err", "w"); fprintf(df, "numHappies: "uint64FMT"\n", debug_numHappies); fprintf(df, "happyCheck: "uint64FMT"\n", debug_happyCheck); for (uint32 a=0; atag1hitsLen; a++) fprintf(df, "a="uint32FMT" ori=%c pos="uint32FMT","uint32FMT"\n", a, t1h[a]._rev ? 'r' : 'f', t1h[a]._seqIdx, t1h[a]._seqPos); for (uint32 b=0; btag2hitsLen; b++) fprintf(df, "b="uint32FMT" ori=%c pos="uint32FMT","uint32FMT"\n", b, t2h[b]._rev ? 'r' : 'f', t2h[b]._seqIdx, t2h[b]._seqPos); uint32 bbaserev = 0; uint32 bbasefor = 0; for (uint32 a=0; atag1hitsLen; a++) { uint32 b = 0; if (t1h[a]._rev == true) { while ((bbaserev < s->tag2hitsLen) && (t1h[a].mateTooFarBefore(t2h[bbaserev], mean, stddev))) { fprintf(df, "rev bbaserev <- "uint32FMT" + 1\n", bbaserev); bbaserev++; } b = bbaserev; } else { while ((bbasefor < s->tag2hitsLen) && (t1h[a].mateTooFarBefore(t2h[bbasefor], mean, stddev))) { fprintf(df, "rev bbasefor <- "uint32FMT" + 1\n", bbasefor); bbasefor++; } b = bbasefor; } for (; (btag2hitsLen) && (t1h[a].mateTooFarAfter(t2h[b], mean, stddev) == false); b++) { fprintf(df, "test a="uint32FMT" b="uint32FMT"\n", a, b); if (t1h[a].happy(t2h[b], mean, stddev)) { fprintf(df, "HAPPY CLEVER a="uint32FMT" b="uint32FMT"\n", a, b); } } } for (uint32 a=0; atag1hitsLen; a++) { for (uint32 b=0; btag2hitsLen; b++) { if (t1h[a].happy(t2h[b], mean, stddev)) { fprintf(df, "HAPPY EXHAUSTIVE a="uint32FMT" b="uint32FMT"\n", a, b); } } } fclose(df); } assert(debug_numHappies == 0); assert(debug_happyCheck == 0); #endif #ifdef VERBOSEWORKER fprintf(stderr, " Paired.\n"); #endif } // Allocate space for the outputs. #if 0 // We can kind of guess how much to grab. Not perfect. Can do a // lot better. // s->resultFragmentLen = s->tag1hitsLen + s->tag2hitsLen; s->resultSingletonLen = s->tag1hitsLen + s->tag2hitsLen; s->resultTangledAlignmentLen = s->tag1hitsLen + s->tag2hitsLen; s->resultMatedLen = MIN(s->tag1hitsLen, s->tag2hitsLen); s->resultTangledLen = MIN(s->tag1hitsLen, s->tag2hitsLen); #else // Count exactly how much space is needed. The test for // singleton vs fragment is somewhat expensive, so we skip it. // for (uint32 a=0; atag1hitsLen; a++) { if (t->tag1tangled[a] != 0) { s->resultTangledAlignmentLen++; } else if (t->tag1happies[a] == 1) { s->resultMatedLen++; } else { s->resultSingletonLen++; s->resultFragmentLen++; } } for (uint32 b=0; btag2hitsLen; b++) { if (t->tag2tangled[b] != 0) { s->resultTangledAlignmentLen++; } else if (t->tag2happies[b] == 1) { s->resultMatedLen++; } else { s->resultSingletonLen++; s->resultFragmentLen++; } } s->resultMatedLen /= 2; //s->resultFragmentLen += 8; //s->resultSingletonLen += 8; //s->resultTangledAlignmentLen += 8; //s->resultMatedLen += 8; //s->resultTangledLen += 8; #endif s->resultFragment = new tapperResultFragment [s->resultFragmentLen]; s->resultSingleton = new tapperResultFragment [s->resultSingletonLen]; s->resultTangledAlignment = new tapperResultFragment [s->resultTangledAlignmentLen]; s->resultMated = new tapperResultMated [s->resultMatedLen]; s->resultTangled = new tapperResultTangled [s->resultTangledLen]; s->resultFragmentLen = 0; s->resultSingletonLen = 0; s->resultTangledAlignmentLen = 0; s->resultMatedLen = 0; s->resultTangledLen = 0; // For anything with zero happies, emit to the // singleton file. for (uint32 a=0; atag1hitsLen; a++) { tapperResultFragment *f; if (t->tag1tangled[a] != 0) { f = s->resultTangledAlignment + s->resultTangledAlignmentLen++; } else if (t->tag1happies[a] == 1) { // Happy; do nothing. We'll do it later. f = 0L; } else if (s->tag1hits[a].happyNearEnd(true, mean, stddev, g->GS->getSequenceLength(s->tag1hits[a]._seqIdx))) { f = s->resultSingleton + s->resultSingletonLen++; } else { f = s->resultFragment + s->resultFragmentLen++; } if (f) { memset(f, 0, sizeof(tapperResultFragment)); f->_seq = s->tag1hits[a]._seqIdx; f->_pos = s->tag1hits[a]._seqPos; f->_qual._tag1valid = 1; f->_qual._tag1basesMismatch = s->tag1hits[a]._basesMismatch; f->_qual._tag1colorMismatch = s->tag1hits[a]._colorMismatch; f->_qual._tag1colorInconsistent = s->tag1hits[a]._colorInconsistent; f->_qual._tag1rev = s->tag1hits[a]._rev; f->_qual._diffSize = MAX_COLOR_MISMATCH_MAPPED; memcpy(f->_qual._tag1colorDiffs, s->tag1hits[a]._tagColorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); } } for (uint32 b=0; btag2hitsLen; b++) { tapperResultFragment *f; if (t->tag2tangled[b] != 0) { f = s->resultTangledAlignment + s->resultTangledAlignmentLen++; } else if (t->tag2happies[b] == 1) { // Happy; do nothing. We'll do it later. f = 0L; } else if (s->tag2hits[b].happyNearEnd(false, mean, stddev, g->GS->getSequenceLength(s->tag2hits[b]._seqIdx))) { f = s->resultSingleton + s->resultSingletonLen++; } else { f = s->resultFragment + s->resultFragmentLen++; } if (f) { memset(f, 0, sizeof(tapperResultFragment)); f->_seq = s->tag2hits[b]._seqIdx; f->_pos = s->tag2hits[b]._seqPos; f->_qual._tag2valid = 1; f->_qual._tag2basesMismatch = s->tag2hits[b]._basesMismatch; f->_qual._tag2colorMismatch = s->tag2hits[b]._colorMismatch; f->_qual._tag2colorInconsistent = s->tag2hits[b]._colorInconsistent; f->_qual._tag2rev = s->tag2hits[b]._rev; f->_qual._diffSize = MAX_COLOR_MISMATCH_MAPPED; memcpy(f->_qual._tag2colorDiffs, s->tag2hits[b]._tagColorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); } } // For anything with a pair of single happies, emit to the happy // mate file. for (uint32 a=0; atag1hitsLen; a++) { uint32 b = t->tag1mate[a]; if ((t->tag1happies[a] == 1) && (t->tag2happies[b] == 1)) { tapperResultMated *m = s->resultMated + s->resultMatedLen++; memset(m, 0, sizeof(tapperResultMated)); assert(t->tag1mate[a] == b); assert(t->tag2mate[b] == a); m->_seq = s->tag1hits[a]._seqIdx; m->_pos1 = s->tag1hits[a]._seqPos; m->_pos2 = s->tag2hits[b]._seqPos; m->_qual._tag1valid = 1; m->_qual._tag1basesMismatch = s->tag1hits[a]._basesMismatch; m->_qual._tag1colorMismatch = s->tag1hits[a]._colorMismatch; m->_qual._tag1colorInconsistent = s->tag1hits[a]._colorInconsistent; m->_qual._tag1rev = s->tag1hits[a]._rev; m->_qual._tag2valid = 1; m->_qual._tag2basesMismatch = s->tag2hits[b]._basesMismatch; m->_qual._tag2colorMismatch = s->tag2hits[b]._colorMismatch; m->_qual._tag2colorInconsistent = s->tag2hits[b]._colorInconsistent; m->_qual._tag2rev = s->tag2hits[b]._rev; m->_qual._diffSize = MAX_COLOR_MISMATCH_MAPPED; memcpy(m->_qual._tag1colorDiffs, s->tag1hits[a]._tagColorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); memcpy(m->_qual._tag2colorDiffs, s->tag2hits[b]._tagColorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); } } // Emit and then clear the tangles. { uint32 simax = g->GS->getNumberOfSequences(); for (uint32 si=0; sitangle[si].numberOfIntervals() > 0) { t->tangle[si].merge(); for (uint32 ti=0; titangle[si].numberOfIntervals(); ti++) { tapperResultTangled *x = s->resultTangled + s->resultTangledLen++; x->_tag1count = 0; x->_tag2count = 0; x->_seq = si; x->_bgn = t->tangle[si].lo(ti); x->_end = t->tangle[si].hi(ti); for (uint32 a=0; atag1hitsLen; a++) { if ((t->tag1tangled[a] > 0) && (x->_seq == s->tag1hits[a]._seqIdx) && (x->_bgn <= s->tag1hits[a]._seqPos) && (s->tag1hits[a]._seqPos <= x->_end)) x->_tag1count++; } for (uint32 b=0; btag2hitsLen; b++) { if ((t->tag2tangled[b] > 0) && (x->_seq == s->tag2hits[b]._seqIdx) && (x->_bgn <= s->tag2hits[b]._seqPos) && (s->tag2hits[b]._seqPos <= x->_end)) x->_tag2count++; } } // This is persistent; clear it for the next mate pair. t->tangle[si].clear(); } } } } } int main(int argc, char **argv) { tapperGlobalData *g = new tapperGlobalData(); fprintf(stderr, "sizeof(tapperResultIndex) -- "sizetFMT"\n", sizeof(tapperResultIndex)); fprintf(stderr, "sizeof(tapperResultQV) -- "sizetFMT"\n", sizeof(tapperResultQV)); fprintf(stderr, "sizeof(tapperResultFragment) -- "sizetFMT"\n", sizeof(tapperResultFragment)); fprintf(stderr, "sizeof(tapperResultMated) -- "sizetFMT"\n", sizeof(tapperResultMated)); fprintf(stderr, "sizeof(tapperResultTangled) -- "sizetFMT"\n", sizeof(tapperResultTangled)); fprintf(stderr, "sizeof(tapperHit) -- "sizetFMT"\n", sizeof(tapperHit)); fprintf(stderr, "sizeof(tapperTag) -- "sizetFMT"\n", sizeof(tapperTag)); int arg=1; int err=0; while (arg < argc) { if (strncmp(argv[arg], "-genomic", 2) == 0) { g->genName = argv[++arg]; } else if (strncmp(argv[arg], "-queries", 2) == 0) { g->qryName = argv[++arg]; } else if (strncmp(argv[arg], "-output", 2) == 0) { g->outName = argv[++arg]; } else if (strncmp(argv[arg], "-begin", 2) == 0) { g->bgnRead = strtouint32(argv[++arg], 0L); g->thisPartition = 0; g->numPartitions = 1; } else if (strncmp(argv[arg], "-end", 2) == 0) { g->endRead = strtouint32(argv[++arg], 0L); g->thisPartition = 0; g->numPartitions = 1; } else if (strncmp(argv[arg], "-partition", 2) == 0) { g->thisPartition = strtouint32(argv[++arg], 0L); g->numPartitions = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-repeatthreshold", 2) == 0) { g->repeatThreshold = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-maxcolorerror", 5) == 0) { g->maxColorError = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-maxbaseerror", 5) == 0) { g->maxBaseError = strtouint32(argv[++arg], 0L); } else if (strncmp(argv[arg], "-maxmemory", 5) == 0) { g->maxMemory = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-threads", 2) == 0) { g->numThreads = atoi(argv[++arg]); } else if (strncmp(argv[arg], "-verbose", 2) == 0) { g->beVerbose = true; } else { fprintf(stderr, "%s: unknown option '%s'\n", argv[0], argv[arg]); err++; } arg++; } if ((err > 0) || (g->genName == 0L) || (g->qryName == 0L) || (g->outName == 0L)) { fprintf(stderr, "usage: %s [opts]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " MANDATORY\n"); fprintf(stderr, " -genomic genomic.fasta\n"); fprintf(stderr, " -queries tags.tapperTags\n"); fprintf(stderr, " -output tapperResultFile directory path\n"); fprintf(stderr, "\n"); fprintf(stderr, " OPTIONAL\n"); fprintf(stderr, "\n"); fprintf(stderr, " -begin b Start aligning at read b (or mate pair b)\n"); fprintf(stderr, " -end e Stop aligning at read e (or mate pair e)\n"); fprintf(stderr, "\n"); fprintf(stderr, " -partition n m Run partition n out of m total partitions.\n"); fprintf(stderr, " This sets -b and -e so that the reads/mate pairs\n"); fprintf(stderr, " are in m partitions. Partitions start at 0 and\n"); fprintf(stderr, " end at m-1.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -repeatthreshold x Do not report fragment alignments for tags\n"); fprintf(stderr, " with more than x alignments. Singletons, mated\n"); fprintf(stderr, " tags and are still reported and computed using\n"); fprintf(stderr, " all alignments. The default is "uint32FMT".\n", g->repeatThreshold); fprintf(stderr, "\n"); fprintf(stderr, " -maxcolorerror n\n"); fprintf(stderr, " -maxbaseerror n\n"); fprintf(stderr, "\n"); fprintf(stderr, " -maxmemory m (MB)\n"); fprintf(stderr, " -threads n\n"); fprintf(stderr, " -verbose\n"); exit(1); } g->initialize(); sweatShop *ss = new sweatShop(tapperReader, tapperWorker, tapperWriter); ss->setLoaderQueueSize(16384); ss->setLoaderBatchSize(512); ss->setWorkerBatchSize(1024); ss->setWriterQueueSize(65536); ss->setNumberOfWorkers(g->numThreads); for (uint32 w=0; wnumThreads; w++) ss->setThreadData(w, new tapperThreadData(g)); ss->run(g, g->beVerbose); delete g; delete ss; fprintf(stderr, "\nSuccess! Bye.\n"); return(0); } kmer-code-2013-trunk/tapper/Make.include0000644000000000000000000000313511512763666016702 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBMERYL/ :=$(realpath $/../libmeryl/)/ LIBKMER/ :=$(realpath $/../libkmer/)/ LIBSIM4/ :=$(realpath $/../libsim4/)/ $/.CXX_SRCS := $/tagger.C $/tapper.C $/tapperconvert.C $/tappermerge.C $/tappersort.C $/tappererrorcorrect.C $/.CXX_EXES := $/tagger $/tapper $/tapperconvert $/tappermerge $/tappersort $/tappererrorcorrect $/.CLEAN :=$/*.o $(eval $/%.d $/%.o: CXXFLAGS+= -I${LIBUTL/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBMERYL/} -I${LIBKMER/} -I${LIBSIM4/}) $/tagger: $/tagger.o ${LIBSIM4/}libsim4.a ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/tapper: $/tapper.o ${LIBSIM4/}libsim4.a ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/tapperconvert: $/tapperconvert.o ${LIBSIM4/}libsim4.a ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/tappermerge: $/tappermerge.o ${LIBSIM4/}libsim4.a ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/tappersort: $/tappersort.o ${LIBSIM4/}libsim4.a ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/tappererrorcorrect: $/tappererrorcorrect.o ${LIBSIM4/}libsim4.a ${LIBKMER/}libkmer.a ${LIBMERYL/}libmeryl.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a kmer-code-2013-trunk/tapper/tapperResult.H0000644000000000000000000004555012322046702017253 0ustar rootroot#include "util++.H" #include // Tapper generates four kinds of alignments. // // 1) An unmated fragment alignment // 2) A satisfied mate pair alignment // 3) An unsatisfied mate pair alignment // 4) A tangle of mated fragments // // There are SIX output files, an index, an alignment quality // histogram, and the four data files. #define MAX_FRAGMENT_ALIGNMENTS 65536 // 16 bits #define MAX_FRAGMENT_ALIGNMENTS_DISCARDED 1048576 // 20 bits #define MAX_FRAGMENT_ALIGNMENTS_TANGLED 1048576 // 20 bits #define MAX_SINGLETON_ALIGNMENTS 65536 // 16 bits #define MAX_MATED_ALIGNMENTS 8192 // 13 bits #define MAX_TANGLED_ALIGNMENTS 8192 // 13 bits #define MAX_INSERT_SIZE 262144 // 18 bits #define MAX_INSERT_DEVIATION 65536 // 16 bits #define MAX_COLOR_MISMATCH_MAPPED 4 // Info about alignments for one mate pair. One per pair or unmated // fragment. This is the index. // 256 bits. // class tapperResultIndex { public: void print(FILE *out) { uint16 id1[4]; uint16 id2[4]; decodeTagID(_tag1id, id1); decodeTagID(_tag2id, id2); fprintf(out, "R\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint64FMT"/"uint64FMT"\t"uint64FMT"+-"uint64FMT"\tf:"uint64FMT"\td:"uint64FMT"\ts:"uint64FMT"\tm:"uint64FMT"\tt:"uint64FMT"\n", id1[0], id1[1], id1[2], id1[3], id2[0], id2[1], id2[2], id2[3], _maxColrMismatchMapped, _maxBaseMismatchMapped, _mean, _stddev, _numFrag, _numFragDiscarded, _numFragSingleton, _numMated, _numTangled); }; public: uint64 _tag1id; uint64 _tag2id; // Command line, how many color mismatches we looked for, and how // many base mismatches we allowed. These deterine the number and // meaning of the alignment quality histogram. This is stored per // result, so multiple runs can be easily combined. // uint64 _maxColrMismatchMapped:4; uint64 _maxBaseMismatchMapped:4; uint64 _mean:18; // Expected mean and stddev for this pair. uint64 _stddev:16; // Again, per result so we can combine mappings. uint64 _pad1:22; uint64 _numFrag:16; // Number of fragment alignments uint64 _numFragDiscarded:20; // Number of fragment alignments found but not reported uint64 _numFragTangled:20; // Number of fragment alignments in tangled mated uint64 _numFragSingleton:16; // Number of fragment alignments potentially linking uint64 _pad2:8; uint64 _numMated:13; // Number of mated alignments uint64 _numTangled:13; // Number of tangled alignments uint64 _pad3:22; }; // Quality for a mated alignment. // 32 bits for quality // 64 bits for alignment (= 2 * MAX_COLOR_MISMATCH_MAPPED * 8 bits) // // The alignments take up a lot of space. We store both the position // of the difference, and the color in the read. // class tapperResultQV { public: uint32 _tag1valid:1; // Tag 1 is valid data uint32 _tag1basesMismatch:4; // Number of mismatches in ACGT alignment uint32 _tag1colorMismatch:4; // Number of consistent color mismatches uint32 _tag1colorInconsistent:4; // Number of inconsistent color mismatches uint32 _tag1rev:1; // Is reverse complement uint32 _tag2valid:1; // Tag 2 is valid data uint32 _tag2basesMismatch:4; // Number of mismatches in ACGT alignment uint32 _tag2colorMismatch:4; // Number of consistent color mismatches uint32 _tag2colorInconsistent:4; // Number of inconsistent color mismatches uint32 _tag2rev:1; // Is reverse complement uint32 _diffSize:4; // Value of MAX_COLOR_MISMATCH_MAPPED. uint8 _tag1colorDiffs[MAX_COLOR_MISMATCH_MAPPED]; uint8 _tag2colorDiffs[MAX_COLOR_MISMATCH_MAPPED]; }; // Unmated fragment alignment. // 96 bits. // class tapperResultFragment { public: void print(FILE *out, tapperResultIndex *idx) { uint16 id[4]; char cor[128]; uint32 err = 0; #warning do not know real tag length memset(cor, '.', 128); cor[26] = 0; if (_qual._tag1valid) { for (uint32 x=0; x<_qual._tag1colorMismatch; x++, err++) { uint32 pos = _qual._tag1colorDiffs[err] & 0x3f; cor[pos] = '*'; } for (uint32 x=0; x<_qual._tag1colorInconsistent; x++, err++) { uint32 pos = _qual._tag1colorDiffs[err] & 0x3f; cor[pos] = bitsToColor[_qual._tag1colorDiffs[err] >> 6]; } decodeTagID(idx->_tag1id, id); fprintf(stdout, "F\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t"uint32FMT"/"uint32FMT"/"uint32FMT"\t'%s'\n", id[0], id[1], id[2], id[3], _seq, _pos, _qual._tag1rev ? 'r' : 'f', _qual._tag1basesMismatch, _qual._tag1colorMismatch, _qual._tag1colorInconsistent, cor); } if (_qual._tag2valid) { for (uint32 x=0; x<_qual._tag2colorMismatch; x++, err++) { uint32 pos = _qual._tag2colorDiffs[err] & 0x3f; cor[pos] = '*'; } for (uint32 x=0; x<_qual._tag2colorInconsistent; x++, err++) { uint32 pos = _qual._tag2colorDiffs[err] & 0x3f; cor[pos] = bitsToColor[_qual._tag2colorDiffs[err] >> 6]; } decodeTagID(idx->_tag2id, id); fprintf(stdout, "F\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t"uint32FMT"/"uint32FMT"/"uint32FMT"\t'%s'\n", id[0], id[1], id[2], id[3], _seq, _pos, _qual._tag2rev ? 'r' : 'f', _qual._tag2basesMismatch, _qual._tag2colorMismatch, _qual._tag2colorInconsistent, cor); } }; public: uint32 _seq; uint32 _pos; tapperResultQV _qual; }; // Satisfied mate pair alignment. // 128 bits. // class tapperResultMated { public: void print(FILE *out, tapperResultIndex *idx) { uint16 id1[4]; uint16 id2[4]; decodeTagID(idx->_tag1id, id1); decodeTagID(idx->_tag2id, id2); fprintf(stdout, "M\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t"uint32FMT"/"uint32FMT"/"uint32FMT"\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint32FMT"\t"uint32FMT"\t%c\t"uint32FMT"/"uint32FMT"/"uint32FMT"\n", id1[0], id1[1], id1[2], id1[3], _seq, _pos1, _qual._tag1rev ? 'r' : 'f', _qual._tag1basesMismatch, _qual._tag1colorMismatch, _qual._tag1colorInconsistent, id2[0], id2[1], id2[2], id2[3], _seq, _pos2, _qual._tag2rev ? 'r' : 'f', _qual._tag2basesMismatch, _qual._tag2colorMismatch, _qual._tag2colorInconsistent); }; public: uint32 _seq; uint32 _pos1; uint32 _pos2; tapperResultQV _qual; }; // Tangled mate pair alignment. // 128 bits. // class tapperResultTangled { public: void print(FILE *out, tapperResultIndex *idx) { uint16 id1[4]; uint16 id2[4]; decodeTagID(idx->_tag1id, id1); decodeTagID(idx->_tag2id, id2); fprintf(stdout, "T\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint32FMT"\t"uint16FMT"_"uint16FMT"_"uint16FMT"_"uint16FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\t"uint32FMT"\n", id1[0], id1[1], id1[2], id1[3], _tag1count, id2[0], id2[1], id2[2], id2[3], _tag2count, _seq, _bgn, _end); }; public: uint16 _tag1count; // Number of times tag1 is in here uint16 _tag2count; // Number of times tag2 is in here uint32 _seq; // Sequence we hit uint32 _bgn; // Beginning location uint32 _end; // Ending location }; class tapperResult { public: tapperResult() { memset(&idx, 0, sizeof(tapperResultIndex)); fragMax = 0; frag = 0L; singMax = 0; sing = 0L; taliMax = 0; tali = 0L; mateMax = 0; mate = 0L; tangMax = 0; tang = 0L; aqltMax = 0; aqlt = 0L; }; ~tapperResult() { delete [] frag; delete [] sing; delete [] tali; delete [] mate; delete [] tang; delete [] aqlt; }; tapperResultIndex idx; // A single unmated alignment uint32 fragMax; tapperResultFragment *frag; // Tag in a mate pair, mapped near the end of a sequence uint32 singMax; tapperResultFragment *sing; // Tag in a mate pair, involved in a tangle uint32 taliMax; tapperResultFragment *tali; // Happy mated tags uint32 mateMax; tapperResultMated *mate; // Location of tangle uint32 tangMax; tapperResultTangled *tang; uint32 aqltMax; uint32 *aqlt; }; class tapperAlignmentQualityHistogramIndices { public: tapperAlignmentQualityHistogramIndices() { for (uint32 i=0; i<16; i++) for (uint32 j=0; j<16; j++) { _indices[i][j] = 0L; _length[i][j] = ~uint32ZERO; } }; ~tapperAlignmentQualityHistogramIndices() { for (uint32 i=0; i<16; i++) for (uint32 j=0; j<16; j++) delete [] _indices[i][j]; }; // For a given maxColorError and maxBaseError (mapper parameters), // maps between (numBaseMismatch, numColorMismatch, numColorError) // and an index in an array. // // A maximum of 16 is allowed on all values. uint32 getLength(uint32 maxBaseError, uint32 maxColorError) { generate(maxBaseError, maxColorError); assert(_length[maxBaseError][maxColorError] < ~uint32ZERO); return(_length[maxBaseError][maxColorError]); }; uint32 getIndex(uint32 maxBaseError, uint32 maxColorError, uint32 numBaseMismatch, uint32 numColorMismatch, uint32 numColorError) { generate(maxBaseError, maxColorError); assert(_length[maxBaseError][maxColorError] < ~uint32ZERO); assert(numBaseMismatch * 256 + numColorMismatch * 16 + numColorError < 16 * 16 * 16); return(_indices[maxBaseError][maxColorError][numBaseMismatch * 256 + numColorMismatch * 16 + numColorError]); }; private: void generate(uint32 maxBaseError, uint32 maxColorError) { if (_indices[maxBaseError][maxColorError] != 0L) return; // min base mismatches for i color mismatches - the min is (I // think always) the sum of the mins for the prime decomposition. // 9 - 3,3,3 -> min 6 base mismatches // 9 - 2,3,4 -> min 5 base mismatches // 9 - 2,2,5 -> min 5 base mismatches // 9 - 2,2,2,3 -> min 5 base mismatches // // max base mismatches is, for the most part, used defined, but 0 // and 1 color mismatches are forced to 0 color mismatches. // // finally, it is impossible to have just one color mismatch. // cm 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 uint32 bmmin[16] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; uint32 maxc = maxColorError; // max color errors given to the mapper uint32 maxb = maxBaseError; // max base errors given to the mapper uint32 index = 0; uint32 *histogramIndices = _indices[maxBaseError][maxColorError] = new uint32 [16 * 16 * 16]; for (uint32 ii=0; ii < 16 * 16 * 16; ii++) histogramIndices[ii] = ~uint32ZERO; // A special case for 0. for (uint32 ce=0; ce <= maxc; ce++) { //fprintf(stderr, "histogramIndices["uint32FMT"/"uint32FMT"/"uint32FMT"] = "uint32FMT"\n", 0, 0, ce, index); assert(0 * 256 + 0 * 16 + ce < 16 * 16 * 16); histogramIndices[0 * 256 + 0 * 16 + ce] = index++; } for (uint32 cm=2; cm <= maxc; cm++) for (uint32 ce=0; ce <= maxc - cm; ce++) for (uint32 bm=bmmin[cm]; bm <= maxb; bm++) { //fprintf(stderr, "histogramIndices["uint32FMT"/"uint32FMT"/"uint32FMT"] = "uint32FMT"\n", bm, cm, ce, index); assert(bm * 256 + cm * 16 + ce < 16 * 16 * 16); histogramIndices[bm * 256 + cm * 16 + ce] = index++; } _length[maxBaseError][maxColorError] = index; }; uint32 *_indices[16][16]; uint32 _length[16][16]; }; class tapperResultFile { public: tapperResultFile(char *prefix, char mode) { char filename[FILENAME_MAX]; if ((mode != 'r') && (mode != 'w')) fprintf(stderr, "tapperResultFile()-- mode must be either 'r' or 'w'; you wanted '%c'\n", mode), exit(1); if ((mode == 'r') && (!fileExists(prefix))) fprintf(stderr, "tapperResultFile()-- result directory '%s' doesn't exist.\n", prefix), exit(1); if ((mode == 'w') && (!fileExists(prefix))) { errno = 0; mkdir(prefix, S_IRWXU | S_IRWXG | S_IRWXO); if (errno) fprintf(stderr, "tapperResultFile()-- failed to make result directory '%s': %s\n", prefix, strerror(errno)), exit(1); } sprintf(filename, "%s/tapperMappedIndex", prefix); IDX = new recordFile(filename, 0, sizeof(tapperResultIndex), mode); sprintf(filename, "%s/tapperMappedFragment", prefix); FRAG = new recordFile(filename, 0, sizeof(tapperResultFragment), mode); sprintf(filename, "%s/tapperMappedSingleton", prefix); SING = new recordFile(filename, 0, sizeof(tapperResultFragment), mode); sprintf(filename, "%s/tapperMappedTangledAlignment", prefix); TALI = new recordFile(filename, 0, sizeof(tapperResultFragment), mode); sprintf(filename, "%s/tapperMappedMated", prefix); MATE = new recordFile(filename, 0, sizeof(tapperResultMated), mode); sprintf(filename, "%s/tapperMappedTangled", prefix); TANG = new recordFile(filename, 0, sizeof(tapperResultTangled), mode); sprintf(filename, "%s/tapperMappedAlignQual", prefix); AQLT = new recordFile(filename, 0, sizeof(uint32), mode); }; ~tapperResultFile() { delete IDX; delete FRAG; delete SING; delete TALI; delete MATE; delete TANG; delete AQLT; }; static bool validResultFile(char *prefix) { return(fileExists(prefix)); }; uint32 AQIlength(uint32 maxBaseErrors, uint32 maxColorErrors) { //fprintf(stderr, "AQIlength("uint32FMT","uint32FMT") -> "uint32FMT"\n", // maxBaseErrors, maxColorErrors, AQI.getLength(maxBaseErrors, maxColorErrors)); return(AQI.getLength(maxBaseErrors, maxColorErrors)); }; uint32 AQIindex(uint32 maxBaseErrors, uint32 maxColorErrors, uint32 numBaseMismatch, uint32 numColorMismatch, uint32 numColorError) { //fprintf(stderr, "AQIindex("uint32FMT","uint32FMT","uint32FMT","uint32FMT","uint32FMT") -> "uint32FMT"\n", // maxBaseErrors, maxColorErrors, // numBaseMismatch, numColorMismatch, numColorError, // AQI.getIndex(maxBaseErrors, maxColorErrors, numBaseMismatch, numColorMismatch, numColorError)); return(AQI.getIndex(maxBaseErrors, maxColorErrors, numBaseMismatch, numColorMismatch, numColorError)); }; bool read(tapperResult *align) { bool success = true; if (IDX->getRecord(&align->idx) == 0) return(false); uint32 aqilen = AQIlength(align->idx._maxBaseMismatchMapped, align->idx._maxColrMismatchMapped); if (align->idx._numFrag + align->idx._numFragDiscarded + align->idx._numFragSingleton + align->idx._numMated + align->idx._numTangled == 0) aqilen = 0; if (align->idx._numFrag > align->fragMax) { delete [] align->frag; align->fragMax = align->idx._numFrag; align->frag = new tapperResultFragment [align->fragMax]; } if (align->idx._numFragSingleton > align->singMax) { delete [] align->sing; align->singMax = align->idx._numFragSingleton; align->sing = new tapperResultFragment [align->singMax]; } if (align->idx._numFragTangled > align->taliMax) { delete [] align->tali; align->taliMax = align->idx._numFragTangled; align->tali = new tapperResultFragment [align->taliMax]; } if (align->idx._numMated > align->mateMax) { delete [] align->mate; align->mateMax = align->idx._numMated; align->mate = new tapperResultMated [align->mateMax]; } if (align->idx._numTangled > align->tangMax) { delete [] align->tang; align->tangMax = align->idx._numTangled; align->tang = new tapperResultTangled [align->tangMax]; } if (aqilen > align->aqltMax) { delete [] align->aqlt; align->aqltMax = aqilen; align->aqlt = new uint32 [align->aqltMax]; } #if 0 fprintf(stderr, "reading: "uint32FMT" "uint32FMT" "uint32FMT" "uint32FMT" "uint32FMT"\n", align->idx._numFrag, align->idx._numFragSingleton, align->idx._numFragTangled, align->idx._numMated, align->idx._numTangled); #endif if (FRAG->getRecord(align->frag, align->idx._numFrag) != align->idx._numFrag) success = false; if (SING->getRecord(align->sing, align->idx._numFragSingleton) != align->idx._numFragSingleton) success = false; if (TALI->getRecord(align->tali, align->idx._numFragTangled) != align->idx._numFragTangled) success = false; if (MATE->getRecord(align->mate, align->idx._numMated) != align->idx._numMated) success = false; if (TANG->getRecord(align->tang, align->idx._numTangled) != align->idx._numTangled) success = false; if (AQLT->getRecord(align->aqlt, aqilen) != aqilen) success = false; return(success); }; void write(tapperResult *align) { write(&align->idx, align->frag, align->sing, align->tali, align->mate, align->tang, align->aqlt); }; void write(tapperResultIndex *idx, tapperResultFragment *frag, tapperResultFragment *sing, tapperResultFragment *tali, tapperResultMated *mate, tapperResultTangled *tang, uint32 *aqlt) { IDX->putRecord(idx); FRAG->putRecord(frag, idx->_numFrag); SING->putRecord(sing, idx->_numFragSingleton); TALI->putRecord(tali, idx->_numFragTangled); MATE->putRecord(mate, idx->_numMated); TANG->putRecord(tang, idx->_numTangled); if (idx->_numFrag + idx->_numFragDiscarded + idx->_numFragSingleton + idx->_numMated + idx->_numTangled > 0) AQLT->putRecord(aqlt, AQIlength(idx->_maxBaseMismatchMapped, idx->_maxColrMismatchMapped)); }; private: tapperAlignmentQualityHistogramIndices AQI; recordFile *IDX; recordFile *FRAG; recordFile *SING; recordFile *TALI; recordFile *MATE; recordFile *TANG; recordFile *AQLT; }; kmer-code-2013-trunk/tapper/tappersort.C0000644000000000000000000002156312322046702016755 0ustar rootroot#include "util++.H" #include "tapperTag.H" #include "tapperResult.H" #include "tapperAlignment.H" #include "tapperHit.H" #include "tapperGlobalData.H" #include "tapperThreadData.H" #include "tapperComputation.H" // Reads a tapperAlignmentFile, converts all the alignments to // tapperAlignments (loses mate pair information), and sorts by // position on the reference. // There are (at least) two ways to sort. Merge sort or bucket sort. // // Bucket sort is a little easier, but, without knowing the length of // the reference sequences, we cannot map seq,pos to a bucket. We // also have no memory guarantee; it is possible to have a bucket get // too big. // // Merge sort is more difficult, because of the merge. We have a // memory size guarantee though. uint32 saveFrag(tapperAlignment *ali, uint32 aliLen, tapperResult *res, uint32 fragLen, tapperResultFragment *frag) { for (uint32 i=0; i exactly // one is true. if ((f->_qual._tag1valid == 0) && (f->_qual._tag2valid == 0)) fprintf(stderr, "error\n"); assert((f->_qual._tag1valid == 1) || (f->_qual._tag2valid == 1)); assert((f->_qual._tag1valid == 0) || (f->_qual._tag2valid == 0)); if (f->_qual._tag1valid) { memset(ali + aliLen, 0, sizeof(tapperAlignment)); ali[aliLen]._tagid = res->idx._tag1id; ali[aliLen]._seq = f->_seq; ali[aliLen]._pos = f->_pos; ali[aliLen]._basesMismatch = f->_qual._tag1basesMismatch; ali[aliLen]._colorMismatch = f->_qual._tag1colorMismatch; ali[aliLen]._colorInconsistent = f->_qual._tag1colorInconsistent; ali[aliLen]._rev = f->_qual._tag1rev; ali[aliLen]._diffSize = f->_qual._diffSize; memcpy(ali[aliLen]._colorDiffs, f->_qual._tag1colorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); aliLen++; } if (f->_qual._tag2valid) { memset(ali + aliLen, 0, sizeof(tapperAlignment)); ali[aliLen]._tagid = res->idx._tag2id; ali[aliLen]._seq = f->_seq; ali[aliLen]._pos = f->_pos; ali[aliLen]._basesMismatch = f->_qual._tag2basesMismatch; ali[aliLen]._colorMismatch = f->_qual._tag2colorMismatch; ali[aliLen]._colorInconsistent = f->_qual._tag2colorInconsistent; ali[aliLen]._rev = f->_qual._tag2rev; ali[aliLen]._diffSize = f->_qual._diffSize; memcpy(ali[aliLen]._colorDiffs, f->_qual._tag2colorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); aliLen++; } } return(aliLen); } uint32 saveMate(tapperAlignment *ali, uint32 aliLen, tapperResult *res) { for (uint32 i=0; iidx._numMated; i++) { tapperResultMated *m = res->mate + i; memset(ali + aliLen, 0, sizeof(tapperAlignment)); ali[aliLen]._tagid = res->idx._tag1id; ali[aliLen]._seq = m->_seq; ali[aliLen]._pos = m->_pos1; ali[aliLen]._basesMismatch = m->_qual._tag1basesMismatch; ali[aliLen]._colorMismatch = m->_qual._tag1colorMismatch; ali[aliLen]._colorInconsistent = m->_qual._tag1colorInconsistent; ali[aliLen]._rev = m->_qual._tag1rev; ali[aliLen]._diffSize = m->_qual._diffSize; memcpy(ali[aliLen]._colorDiffs, m->_qual._tag1colorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); aliLen++; memset(ali + aliLen, 0, sizeof(tapperAlignment)); ali[aliLen]._tagid = res->idx._tag2id; ali[aliLen]._seq = m->_seq; ali[aliLen]._pos = m->_pos2; ali[aliLen]._basesMismatch = m->_qual._tag2basesMismatch; ali[aliLen]._colorMismatch = m->_qual._tag2colorMismatch; ali[aliLen]._colorInconsistent = m->_qual._tag2colorInconsistent; ali[aliLen]._rev = m->_qual._tag2rev; ali[aliLen]._diffSize = m->_qual._diffSize; memcpy(ali[aliLen]._colorDiffs, m->_qual._tag2colorDiffs, sizeof(uint8) * MAX_COLOR_MISMATCH_MAPPED); aliLen++; } return(aliLen); } uint32 sortAndDump(tapperAlignment *ali, uint32 aliLen, char *outputName, uint32 &outputIndex) { char filename[FILENAME_MAX]; if (aliLen == 0) return(0); tapperAlignmentPositionCompare pc; std::sort(ali, ali + aliLen, pc); sprintf(filename, "%s."uint32FMTW(03)".tapperAlignment", outputName, outputIndex); fprintf(stderr, "Writing "uint32FMT" sorted alignments to '%s'\n", aliLen, filename); recordFile *out = new recordFile(filename, 0, sizeof(tapperAlignment), 'w'); out->putRecord(ali, aliLen); delete out; outputIndex++; return(0); } int main(int argc, char **argv) { char *outputName = 0L; uint32 outputIndex = 0; uint32 inputsLen = 0; char *inputs[8192]; uint64 memoryLimit = 1024 * 1024 * 1024; int arg=1; int err=0; while (arg < argc) { if (strncmp(argv[arg], "-memory", 2) == 0) { memoryLimit = strtouint64(argv[++arg], 0L) * 1024 * 1024; } else if (strncmp(argv[arg], "-output", 2) == 0) { outputName = argv[++arg]; } else { if (tapperResultFile::validResultFile(argv[arg]) == false) { fprintf(stderr, "Didn't find tapperResultFile '%s'\n", argv[arg]); err++; } else { inputs[inputsLen++] = argv[arg]; } } arg++; } if ((err) || (inputsLen == 0) || (outputName == 0L)) { fprintf(stderr, "usage: %s [-memory X (MB)] -output prefix input ....\n", argv[0]); exit(1); } { uint32 aliMax = memoryLimit / sizeof(tapperAlignment); uint32 aliLen = 0; tapperAlignment *ali = new tapperAlignment [aliMax]; fprintf(stderr, "Can fit "uint32FMT" alignments into "uint64FMT" bytes memory; "uint32FMT" bytes each.\n", aliMax, memoryLimit, (uint32)sizeof(tapperAlignment)); speedCounter S(" %10.0f results (%8.0f results/sec)\r", 1, 100000, true); for (uint32 inputsIdx=0; inputsIdxread(res)) { // Sort and dump if the next result has too many alignments. // if (aliMax < aliLen + (res->idx._numFrag + res->idx._numFragSingleton + res->idx._numFragTangled + res->idx._numMated * 2)) { aliLen = sortAndDump(ali, aliLen, outputName, outputIndex); } aliLen = saveFrag(ali, aliLen, res, res->idx._numFrag, res->frag); aliLen = saveFrag(ali, aliLen, res, res->idx._numFragSingleton, res->sing); aliLen = saveFrag(ali, aliLen, res, res->idx._numFragTangled, res->tali); aliLen = saveMate(ali, aliLen, res); S.tick(); } S.finish(); delete inp; delete res; } aliLen = sortAndDump(ali, aliLen, outputName, outputIndex); delete [] ali; } // // Now the merge. // { char filename[FILENAME_MAX]; tapperAlignment *ali = new tapperAlignment [outputIndex]; recordFile **inp = new recordFile * [outputIndex]; recordFile *out = 0L; bool stillMore = true; uint32 minidx = 0; tapperAlignmentPositionCompare lessthan; for (uint32 x=0; xgetRecord(ali + x); } sprintf(filename, "%s.tapperAlignment", outputName); out = new recordFile(filename, 0, sizeof(tapperAlignment), 'w'); while (stillMore) { // Compare all against the current default minidx, pick the // smallest alignment currently loaded. for (uint32 x=0; xputRecord(ali + minidx); // Read the next record. If no next record, close the file, // and pick a new default minidx if (inp[minidx]->getRecord(ali + minidx) == 0) { delete inp[minidx]; inp[minidx] = 0L; stillMore = false; for (uint32 x=0; x 154 bytes per tag. // // Without QVs, we can fit upto a 60bp tag into 24 bytes, using a // int64 global id instead of the sequence name. At 24B per tag, 10x // human is 27GB. // // Including QVs, we now need to use 7 bits per bp, but we then // truncate QV's to a maximum of 32. Not really a problem, since all // the files I've seen have a QV from 4 to 32 inclusive. // // The infrastructure of the bitPackedFile is used, so all we need to // define is the number of words in our tapperTag (which, since we // already do a similar hack for a kMer, isn't so terrible). // // WORDS 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 // BYTES 8 16 24 32 40 48 56 64 72 80 88 96 104 112 120 128 // TAG SIZE 0 7 17 26 35 44 53 62 71 81 90 99 108 117 126 135 (if 5 bits for qv) // TAG SIZE 0 6 14 22 30 38 46 54 62 70 78 86 94 102 110 118 (if 6 bits for qv) // #define TAPPER_TAG_WORDS 4 #define TAG_LEN_MAX 32 inline uint64 encodeTagID(uint16 id[4]) { uint64 tid; tid = id[0]; tid <<= 16; tid |= id[1]; tid <<= 16; tid |= id[2]; tid <<= 16; tid |= id[3]; return(tid); } inline void decodeTagID(uint64 tid, uint16 id[4]) { id[0] = (tid >> 48) & uint64MASK(16); id[1] = (tid >> 32) & uint64MASK(16); id[2] = (tid >> 16) & uint64MASK(16); id[3] = (tid) & uint64MASK(16); } class tapperTag { public: tapperTag() {}; // Expects seq to be "T01023030122303" and qlt to be 0 through 31, // 1-1 correspondence. // void encode(uint16 id[4], char *seq, uint64 *qlt) { uint64 pos = 64; uint32 len = strlen(seq); uint32 i = 0; for (i=1; i 31) { //fprintf(stderr, "tapperTag::encode()-- WARNING! QV too big; "uint64FMT" truncated to 31.\n", qlt[i]); qlt[i] = 31; } setDecodedValue(_w, pos, 7, (letterToBits[seq[i]] << 5) | (qlt[i-1])); pos += 7; } }; // Decodes a tag into seq and qlt (as for encode()), returns the // tagID, or 0 if failure. // uint64 decode(uint16 id[4], char *seq, uint64 *qlt) { uint64 pos = 64; uint32 len = 0; uint32 i = 0; seq[0] = bitsToLetter[getDecodedValue(_w, pos, 2)]; qlt[0] = 0; pos += 2; len = getDecodedValue(_w, pos, 7) + 1; pos += 7; for (i=1; i> 5) & 0x03]; qlt[i-1] = x & uint64MASK(5); pos += 7; } seq[len] = 0; qlt[len-1] = 0; #ifdef DEBUG_TAG_ENCODING fprintf(stderr, "tapperTag::decode()-- seq: %s id "uint64HEX" %c/%d len "uint32FMT"\n", seq, _w[0], seq[0], seq[0], len); #endif decodeTagID(_w[0], id); return(_w[0]); }; uint64 tagID(void) const { return(_w[0]); }; uint64 length(void) { return(getDecodedValue(_w, 66, 7)); }; private: friend class tapperTagFile; uint64 _w[TAPPER_TAG_WORDS]; }; class tapperTagFileMetaData { public: uint32 tagSize(void) { return(_minTagLen); }; uint32 isPairedTagFile(void) { return(_isPaired); }; uint32 mean(void) { return(_mean); }; uint32 stddev(void) { return(_stddev); }; void setMeanStdDev(uint32 mean_, uint32 stddev_) { _mean = mean_; _stddev = stddev_; }; private: friend class tapperTagFile; uint32 _minTagLen; uint32 _maxTagLen; uint32 _tagWords; uint32 _isPaired; uint32 _isFragment; uint32 _mean; uint32 _stddev; }; // Notes: // // 1 Stores EITHER mated tags or fragment tags, NEVER both in the same file. // 2 Variable tag size for every tag (even mated tags) // 3 QVs too. // 4 Random access // 5 Reads are assigned a 64-bit UID class tapperTagFile { public: tapperTagFile(char *name, char mode) { if ((mode == 'r') && (fileExists(name) == false)) { fprintf(stderr, "tapperTagFile()-- ERROR! Tag file '%s' doesn't exist.\n", name); exit(1); } _tagFile = new recordFile(name, sizeof(tapperTagFileMetaData), TAPPER_TAG_WORDS * sizeof(uint64), mode); _metaData = (tapperTagFileMetaData *)_tagFile->header(); if (_metaData->_tagWords == 0) { _metaData->_minTagLen = ~uint32ZERO; _metaData->_maxTagLen = 0; _metaData->_tagWords = TAPPER_TAG_WORDS; _metaData->_isPaired = 0; _metaData->_isFragment = 0; _metaData->_mean = 0; _metaData->_stddev = 0; } if (_metaData->_tagWords != TAPPER_TAG_WORDS) { fprintf(stderr, "tapperTagFile()-- ERROR! Tag file was built with TAPPER_TAG_WORDS="uint32FMT", but code has %d.\n", _metaData->_tagWords, TAPPER_TAG_WORDS); exit(1); } }; ~tapperTagFile() { // Metadata is updated automagically when tagFile is deleted. delete _tagFile; }; tapperTagFileMetaData *metaData(void) { return(_metaData); }; uint64 numberOfFragmentTags(void) { if (metaData()->isPairedTagFile()) return(0); else return(_tagFile->numRecords()); }; uint64 numberOfMatePairs(void) { if (metaData()->isPairedTagFile()) return(_tagFile->numRecords() / 2); else return(0); }; void setBegin(uint32 bgn) { if (metaData()->isPairedTagFile()) _tagFile->seek(bgn * 2); else _tagFile->seek(bgn); }; void setEnd(uint32 end) { if (metaData()->isPairedTagFile()) _tagFile->limit(end * 2); else _tagFile->limit(end); }; void put(tapperTag *tag) { uint64 len = tag->length(); _metaData->_isFragment = 1; if (_metaData->_isPaired) fprintf(stderr, "tapperTagFile()-- ERROR: file contains mated tags, tried to pet a fragment tag.\n"), exit(1); if (len < _metaData->_minTagLen) _metaData->_minTagLen = len; if (_metaData->_minTagLen < len) _metaData->_maxTagLen = len; _tagFile->putRecord(tag->_w); }; void put(tapperTag *ta1, tapperTag *ta2) { uint64 len1 = ta1->length(); uint64 len2 = ta2->length(); _metaData->_isPaired = 1; if (_metaData->_isFragment) fprintf(stderr, "tapperTagFile()-- ERROR: file contains fragment tags, tried to pet a mated tag.\n"), exit(1); if (len1 < _metaData->_minTagLen) _metaData->_minTagLen = len1; if (_metaData->_minTagLen < len1) _metaData->_maxTagLen = len1; if (len2 < _metaData->_minTagLen) _metaData->_minTagLen = len2; if (_metaData->_minTagLen < len2) _metaData->_maxTagLen = len2; _tagFile->putRecord(ta1->_w); _tagFile->putRecord(ta2->_w); }; bool get(tapperTag *tag) { if (_metaData->_isPaired == 1) fprintf(stderr, "tapperTagFile()-- ERROR: file contains mated tags, tried to get a fragment tag.\n"), exit(1); return(_tagFile->getRecord(tag->_w) == 1); }; bool get(tapperTag *ta1, tapperTag *ta2) { if (_metaData->_isFragment == 1) fprintf(stderr, "tapperTagFile()-- ERROR: file contains fragment tags, tried to get a mated tag.\n"), exit(1); return((_tagFile->getRecord(ta1->_w) == 1) && (_tagFile->getRecord(ta2->_w) == 1)); }; private: tapperTagFileMetaData *_metaData; recordFile *_tagFile; }; kmer-code-2013-trunk/tapper/tapperGlobalData.H0000644000000000000000000001163012322046702017757 0ustar rootroot#include "positionDB.H" #include "seqCache.H" #if defined (__SVR4) && defined (__sun) // Solaris defines SS and GS in sys/regset.h #undef GS #undef SS #endif class tapperGlobalData { public: tapperGlobalData(); ~tapperGlobalData(); void initialize(void); private: void convertACGTtoColor(char *color, char *acgt, uint32 len); void rewriteFileAsColorACGT(char *acgtname, char *colorname); public: char *genName; char *qryName; char *outName; uint32 bgnRead; uint32 endRead; uint32 thisPartition; uint32 numPartitions; uint32 repeatThreshold; uint32 maxMemory; uint32 numThreads; bool beVerbose; uint32 tagSize; uint32 maxColorError; uint32 maxBaseError; tapperTagFile *TF; tapperResultFile *TA; seqStream *SS; merStream *MS; positionDB *PS; seqCache *GS; }; tapperGlobalData::tapperGlobalData() { genName = 0L; qryName = 0L; outName = 0L; bgnRead = uint32ZERO; endRead = ~uint32ZERO; thisPartition = 0; numPartitions = 1; repeatThreshold = 500; maxMemory = 0; numThreads = 2; beVerbose = false; maxColorError = 3; maxBaseError = 5; TF = 0L; TA = 0L; SS = 0L; MS = 0L; PS = 0L; GS = 0L; } tapperGlobalData::~tapperGlobalData() { delete TF; delete TA; delete PS; delete MS; delete SS; delete GS; } void tapperGlobalData::initialize(void) { char colName[FILENAME_MAX]; sprintf(colName, "%s.colorspace", genName); rewriteFileAsColorACGT(genName, colName); TF = new tapperTagFile(qryName, 'r'); if (numPartitions > 1) { if (thisPartition >= numPartitions) { fprintf(stderr, "ERROR: invalid partition n="uint32FMT" m="uint32FMT".\n", thisPartition, numPartitions); exit(1); } // File has either fragment tags OR mate pairs, never both. uint32 numTags = (TF->numberOfFragmentTags() + TF->numberOfMatePairs()) / numPartitions + 1; bgnRead = numTags * thisPartition; endRead = numTags * thisPartition + numTags; fprintf(stderr, "Set partition for "uint64FMT" frags or "uint64FMT" mates: -begin "uint32FMT" -end "uint32FMT"\n", TF->numberOfFragmentTags(), TF->numberOfMatePairs(), bgnRead, endRead); } // Set ranges that we want to compute. TF->setBegin(bgnRead); TF->setEnd(endRead); // See the comments in the loader about the -1. tagSize = TF->metaData()->tagSize() - 1; if (tagSize > 32) { fprintf(stderr, "tag size too big for this implementation.\n"); exit(1); } fprintf(stderr, "Building seqStream\n"); SS = new seqStream(colName); fprintf(stderr, "Building merStream\n"); MS = new merStream(new kMerBuilder(tagSize), SS, true, false); sprintf(colName, "%s.ms"uint32FMT".ce"uint32FMT".posDB", genName, tagSize, maxColorError); if (fileExists(colName)) { fprintf(stderr, "Loading positionDB\n"); PS = new positionDB(colName, tagSize, 0, maxColorError); } else { fprintf(stderr, "Building positionDB\n"); PS = new positionDB(MS, tagSize, 0, 0L, 0L, 0L, 0, 0, maxColorError, maxMemory, beVerbose); PS->saveState(colName); } delete MS; MS = 0L; GS = new seqCache(genName, 0, false); GS->loadAllSequences(); TA = new tapperResultFile(outName, 'w'); // We get races unless we prebuild the AQI stuff. I don't want to // make this a requirement of the constructor, since only // multithreaded codes have this problem, and it is perfectly valid // for a file to have alignments with different max error // rates....while tapper will only write with these two maximums. // TA->AQIlength(maxBaseError, maxColorError); } // Inplace converts an acgt sequence to a color-space sequence. void tapperGlobalData::convertACGTtoColor(char *color, char *acgt, uint32 len) { char l = 'n'; // We always start the color encoding assuming the -1 letter is a gap char n = 0; for (uint32 i=0; igetSequenceInCore(); while (f) { convertACGTtoColor(f->sequence(), f->sequence(), f->sequenceLength()); fprintf(CF, "%s\n%s\n", f->header(), f->sequence()); delete f; f = F->getSequenceInCore(); } fclose(CF); delete F; } kmer-code-2013-trunk/README.meryl0000644000000000000000000001436312527037073015172 0ustar rootrootmeryl - in- and out-of-core kmer counting and utilities. Copyright (C) 2002, and GNU GPL, PE Corporation (NY) through the Celera Genomics Group Copyright (C) 2003-2004, and GNU GPL, Applied Biosystems Copyright (C) 2004-2015, and GNU GPL, Brian Walenz ======================================================================= Content: I. What is meryl? II. Command line usage III. Input/Output IV. Affiliated tools V. Terms of use VI. Support I. What is meryl? meryl computes the kmer content of genomic sequences. Kmer content is represented as a list of kmers and the number of times each occurs in the input sequences. The kmer can be restricted to only the forward kmer, only the reverse kmer, or the canonical kmer (lexicographically smaller of the forward and reverse kmer at each location). Meryl can report the histogram of counts, the list of kmers and their counts, or can perform mathematical and set operations on the processed data files. The meryl process can run in one large memory batch, in many small memory batches, or under SGE control, all with or without using multiple CPU cores. The maximum kmer size is effectively unlimited, but set at compile time. Larger kmers use more memory, and are inefficient for counting smaller kmers, and since most applications have involved kmers less than 32 bases, the default compile time limit is 32 bases. The output of meryl is two binary files, called a meryl database, which can be quickly dumped to provide a histogram of counts, or the actual counts. A C++ library is supplied for direct access to the files. The meryl program can perform many mathematical and set operations on multiple database files: min, minexist, max, add, sub,abs, and, nand, or, xor, lessthan, lessthanorequal, greaterthan, greatherthanorequal, and equal. The ATAC pipeline uses meryl to find the unique kmers in two sequences ('lessthanorequal 1') then computes the 'and' of them to find the unique kmers that exist in both sequences. II. Command line usage A simple invocation: meryl -B -C -m 22 -s /data/references/ecolik12.fasta -o ecoli-22mers The above command will build (-B) a kmer database (-o ecoli-22mers) of the canonical (-C) 22-mers (-m 22) in the FASTA file ecolik12.fasta. The two output files are ecoli-22mers.mcidx and ecoli-22mers.mcdat. meryl -Dh -s ecoli-22mers > ecoli-22mers.fasta The above command will dump a histogram of the kmers in the 'ecoli-22mers' database. The histogram has four columns: kmer-count number-of-kmers fraction-distinct fraction-total [example] The first line tells us that there are X kmers that occur exactly once, that these sequences make up XX% of lthe kmer composition, and that these sequences account for YY% of all the kmers in the input. meryl -M and -s seq1 -s seq2 -o both The above command will report the kmers that are present in both meryl databases 'seq1' and 'seq1', writing them to a new meryl database 'both'. Run with no options for a list of parameters. See http://kmer.sourceforge.net/wiki/index.php/Getting_Started_with_Meryl for more. III. Input/Output For counting kmers, input is exactly one multi-FASTA or FASTQ file. The file must be uncompressed. For processing databases, an input database is supplied by the prefix of the two files: the prefix of 'ecoli-ms22.mcidx' and 'ecoli-ms22.mcdat' is 'ecoli-ms22'. Output is a 'meryl database' consisting of two binary files, '*.mcidx' and '*.mcdat'. Output of the histogram command is a single text file to stdout. Output of the threshold dump is a multi-FASTA file, with the name of the sequence set to the count, and the sequence set to the kmer. IV. Affiliated tools Several additional kmer counting and analysis programs are included in the meryl package. simple - The obvious array based kmer counter. It will allocate 4 bytes per kmer; k=16 will need 16 GB to run. NEEDS UPDATE mapMers - Report stats of mapping kmers to sequences. Three modes of opeeration: -stats repotrs mean, min and max for each sequence, along with a log2 histogram of the counts -regions reports regions with kmer coverage -details reports for each kmer in the sequence, the forward and reverse count in the database mapMers-depth - Reports, for each sequence ordinal 's' and position 'p': -count the count (c) of the single kmer that starts at position (p). Format: 's p c' -depth the number (n) of kmers that span position (p). Format: 's p n' -stats the min (m), max (M), ave (a) count of all mers that span position (p). Format: 's p m M a t n' (also reports total count (t) and number of kmers (n)) kmer-mask - Mask and filter set of sequences (presumed to be reads) by kmer content. Masking can be done to retain novel sequence not in the database, or to retain confirmed sequence present in the database. Filtering will segregate sequences fully, partially or not masked. existDB - (installed by libkmer) Management of existDB files. positionDB - (installed by libkmer) Management of posDB files. V. Terms of use This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received (LICENSE.txt) a copy of the GNU General Public License along with this program; if not, you can obtain one from http://www.gnu.org/licenses/gpl.txt or by writing to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA VI. Support Brian Walenz (brianwalenz@users.sourceforge.net) Please check the parent project's Sourceforge page at http://kmer.sourceforge.net for details and updates. Last updated: May 13, 2015 kmer-code-2013-trunk/ESTmapper/0000755000000000000000000000000012641613357015016 5ustar rootrootkmer-code-2013-trunk/ESTmapper/scheduler.pm0000644000000000000000000000463411676744271017350 0ustar rootroot#!/usr/local/bin/perl # # Functions for running multiple processes at the same time. # package scheduler; use strict; use POSIX "sys_wait_h"; # Called by "use scheduler;" sub import () { } my $numberOfProcesses = 0; my @processQueue = (); sub schedulerSetNumberOfProcesses { $numberOfProcesses = shift @_; } sub schedulerSubmit { chomp @_; push @processQueue, @_; } sub forkProcess { my $process = shift @_; my $pid; # From Programming Perl, page 167 FORK: if ($pid = fork) { return($pid); # Parent, returns child id } elsif (defined $pid) { exec($process); # Child, runs the process } elsif ($! =~ /No more processes/) { sleep 1; # EAGIN, supposedly a recoverable fork error redo FORK; } else { die "Can't fork: $!\n"; } die "scheduler::forkProcess()-- Shouldn't be here.\n"; } sub schedulerFinish { my @processesRunning; my @newProcesses; my $remain = scalar(@processQueue); my $t = localtime(); my $d = time(); print STDERR "----------------------------------------START CONCURRENT $t\n"; while ($remain > 0) { # Reap any processes that have finished undef @newProcesses; foreach my $i (@processesRunning) { if (waitpid($i, &WNOHANG) <= 0) { push @newProcesses, $i; } } undef @processesRunning; @processesRunning = @newProcesses; # Run processes in any available slots while ((scalar(@processesRunning) < $numberOfProcesses) && (scalar(@processQueue) > 0)) { my $process = shift @processQueue; print STDERR "$process\n"; push @processesRunning, forkProcess($process); } $remain = scalar(@processQueue); # If still stuff out there, wait for something to finish. if ($remain > 0) { my $child = waitpid -1, 0; undef @newProcesses; foreach my $i (@processesRunning) { push @newProcesses, $i if ($child != $i); } undef @processesRunning; @processesRunning = @newProcesses; } } while (scalar(@processesRunning) > 0) { waitpid(shift @processesRunning, 0); } $t = localtime(); print STDERR "----------------------------------------END CONCURRENT $t (", time() - $d, " seconds)\n"; } 1; kmer-code-2013-trunk/ESTmapper/mergeCounts.C0000644000000000000000000000205610000450424017375 0ustar rootroot#include #include #include #include int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "usage: %s <....>\n", argv[0]); fprintf(stderr, " This is part of the ESTmapper; you don't want to run it by hand.\n"); exit(1); } int numFiles = argc-1; FILE **Fs = new FILE * [numFiles]; for (int i=1; i 0); while () { &scheduler::schedulerSubmit($_); } &scheduler::schedulerFinish(); kmer-code-2013-trunk/ESTmapper/terminate.C0000644000000000000000000000761012322046702017104 0ustar rootroot#include "util++.H" #include "bio++.H" #include "sim4.H" #include "seqCache.H" // Terminates an ESTmapper run. // // Splits a fasta file into multiple fasta files based on the first // occurrence of the iid. So, if the iid is in polishes and // list-of-iid, the sequence is written to fasta1. If the iid isn't // in the input (polishes or list-of-iid), put it into fasta3. // Any number of -p and -i can be specified. // // -P polishes fasta1 // -I list-of-iid fasta2 // -O fasta3 // -i input.fasta // // -P polishes MUST be sorted by cDNA iid. Relatively easy to fix this, // just read all the polishes when building an iidReaderWriter, storing the // iid's we see into an array. class iidReaderWriter { public: iidReaderWriter(char *infile, char *otfile, bool ispolishes) { isPolishes = ispolishes; inPolishes = 0L; inFile = 0L; if (isPolishes) { inPolishes = new sim4polishReader(infile); } else { errno = 0; inFile = fopen(infile, "r"); if (errno) fprintf(stderr, "iidReaderWriter-- can't open '%s': %s\n", infile, strerror(errno)), exit(1); } errno = 0; otFile = fopen(otfile, "w"); if (errno) fprintf(stderr, "iidReaderWriter-- can't open '%s': %s\n", otfile, strerror(errno)), exit(1); iids = 0L; }; ~iidReaderWriter() { delete [] iids; if (isPolishes) delete inPolishes; else fclose(inFile); fclose(otFile); }; bool thisIID(uint32 targetiid) { return(iids[targetiid]); }; void writeSequence(seqInCore *S) { fprintf(otFile, ">%s\n%s\n", S->header(), S->sequence()); }; void load(uint32 maxiid) { iids = new bool [maxiid]; for (uint32 i=0; inextAlignment(); while (p) { iids[p->_estID] = true; delete p; p = inPolishes->nextAlignment(); } } else { fscanf(inFile, uint32FMT, &iid); while (!feof(inFile)) { iids[iid] = true; fscanf(inFile, uint32FMT, &iid); } } }; private: bool isPolishes; sim4polishReader *inPolishes; FILE *inFile; FILE *otFile; uint32 iid; bool *iids; }; int main(int argc, char **argv) { uint32 iidRWlen = 0; uint32 iidRWmax = 128; iidReaderWriter **iidRW = new iidReaderWriter* [iidRWmax]; FILE *defaultOut = 0L; seqCache *F = 0L; seqInCore *S = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-P") == 0) { iidRW[iidRWlen++] = new iidReaderWriter(argv[arg+1], argv[arg+2], true); arg+=2; } else if (strcmp(argv[arg], "-I") == 0) { iidRW[iidRWlen++] = new iidReaderWriter(argv[arg+1], argv[arg+2], false); arg+=2; } else if (strcmp(argv[arg], "-O") == 0) { errno = 0; defaultOut = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Can't open '%s': %s\n", argv[arg], strerror(errno)), exit(1); } else if (strcmp(argv[arg], "-i") == 0) { F = new seqCache(argv[++arg]); } else { fprintf(stderr, "ESTmapper utility function -- not for human use.\n"); exit(1); } arg++; } if ((iidRWlen == 0) || (defaultOut == 0L) || (F == 0L)) { fprintf(stderr, "spec error.\n"); exit(1); } for (uint32 i=0; iload(F->getNumberOfSequences()); for (uint32 sid=0; ((S = F->getSequenceInCore(sid)) != 0L); sid++) { bool found = false; uint32 iid = S->getIID(); for (uint32 i=0; ithisIID(iid)) { found = true; iidRW[i]->writeSequence(S); break; } } if (found == false) fprintf(defaultOut, "%s\n%s\n", S->header(), S->sequence()); delete S; } return(0); } kmer-code-2013-trunk/ESTmapper/ESTmapper.pl0000644000000000000000000014403511676744271017231 0ustar rootroot#!/usr/bin/perl # Copyright (c) 2002 PE Corporation (NY) through the Celera Genomics Group # Copyright (c) 2003, 2004 Applied Biosystems # Copyright (c) 2004, 2005, 2006 Brian Walenz $| = 1; # Perl version 5.005_03 is too old, it requires two args to mkdir. use strict; use FindBin; use Config; # for @signame use lib "$FindBin::Bin/../lib"; use scheduler; my %prog; my %args; ################################################################################ # # Utility to run a command and check the exit status (sadly, duplicated # in configureESTmapper.pl). # ################################################################################ sub runCommand { my $cmd = shift @_; print STDERR "$cmd\n"; my $rc = 0xffff & system($cmd); # Pretty much copied from Programming Perl page 230 return(0) if ($rc == 0); # Bunch of busy work to get the names of signals. Is it really worth it?! # my @signame; if (defined($Config{sig_name})) { my $i = 0; foreach my $n (split('\s+', $Config{sig_name})) { $signame[$i] = $n; $i++; } } my $error = "ERROR: $cmd\n failed with "; if ($rc == 0xff00) { $error .= "$!\n"; } elsif ($rc > 0x80) { $rc >>= 8; $error .= "exit status $rc\n"; } else { if ($rc & 0x80) { $rc &= ~0x80; $error .= "coredump from "; } if (defined($signame[$rc])) { $error .= "signal $signame[$rc]\n"; } else { $error .= "signal $rc\n"; } } print STDERR $error; return(1); } ################################################################################ # # Command line parsing and configuration # ################################################################################ sub setExecutables { my $exechome = "$FindBin::Bin"; $prog{'ESTmapper'} = "$exechome/ESTmapper.pl"; $prog{'seagen'} = "$exechome/seagen"; $prog{'mergeCounts'} = "$exechome/mergeCounts"; $prog{'filterEST'} = "$exechome/filterEST"; $prog{'filterMRNA'} = "$exechome/filterMRNA"; $prog{'filterNULL'} = "$exechome/filterNULL"; $prog{'sim4db'} = "$exechome/sim4db"; $prog{'leaff'} = "$exechome/leaff"; $prog{'meryl'} = "$exechome/meryl"; $prog{'cleanPolishes'} = "$exechome/cleanPolishes"; $prog{'toFILTER'} = "$exechome/filterPolishes"; $prog{'sortHits'} = "$exechome/sortHits"; $prog{'sortPolishes'} = "$exechome/sortPolishes"; $prog{'parseSNPs'} = "$exechome/parseSNP"; $prog{'pickBest'} = "$exechome/pickBestPolish"; $prog{'positionDB'} = "$exechome/positionDB"; $prog{'terminate'} = "$exechome/terminate"; foreach my $e (keys %prog) { die "Can't find/execute $e ('$prog{$e}')\n" if (! -e $prog{$e}); } } sub parseArgs (@) { my @ARGS = @_; $args{'scriptVersion'} = "10"; $args{'startTime'} = time(); while (scalar(@ARGS) > 0) { my $arg = shift @ARGS; if (($arg =~ m/^-dir/) || # depricated ($arg =~ m/^-path/) || # depricated ($arg =~ m/^-outputdir/) || ($arg =~ m/^-mapdir/)) { $args{'path'} = shift @ARGS; } elsif (($arg =~ m/^-genomedir/) || ($arg =~ m/-genome/)) { # depricated $args{'genomedir'} = shift @ARGS; } elsif (($arg =~ m/^-map(est)/) || ($arg =~ m/^-map(mrna)/) || ($arg =~ m/^-map(snp)/)) { $args{'runstyle'} = $1; $args{'queries'} = shift @ARGS; } elsif ($arg =~ m/^-restart/) { $args{'runstyle'} = "restart"; $args{'path'} = shift @ARGS; } elsif ($arg =~ m/^-help/) { $args{'runstyle'} = "help"; } elsif ($arg =~ m/^-time/) { $args{'runstyle'} = "time"; } elsif ($arg =~ m/^-verbose/) { $args{'verbose'} = 1; } # # RUN options # elsif ($arg =~ m/^-runlater/) { $args{'runlater'} = 1; } # # LSF options # # # SGE options # elsif ($arg =~ m/^-sge$/) { $args{'sgename'} = shift @ARGS; } elsif (($arg =~ m/^-(sgeoptions)/) || ($arg =~ m/^-(sgesearch)/) || ($arg =~ m/^-(sgefilter)/) || ($arg =~ m/^-(sgepolish)/) || ($arg =~ m/^-(sgefinish)/)) { $args{$1} = shift @ARGS; } # # search options # elsif (($arg =~ m/^-(searchopts)/) || ($arg =~ m/^-(localsearches)/) || ($arg =~ m/^-(searchthreads)/) || ($arg =~ m/^-(hitsortmemory)/) || ($arg =~ m/^-(mermaskfile)/) || ($arg =~ m/^-(merignore)/)) { $args{$1} = shift @ARGS; } # # filter options # elsif (($arg =~ m/^-(hitsortmemory)/)) { $args{$1} = shift @ARGS; } elsif ($arg =~ m/^-nofilter/) { $args{'nofilter'} = 1; } # # polish options # elsif (($arg =~ m/^-(mincoverage)/) || ($arg =~ m/^-(minidentity)/) || ($arg =~ m/^-(minlength)/) || ($arg =~ m/^-(minsim4coverage)/) || ($arg =~ m/^-(minsim4identity)/) || ($arg =~ m/^-(minsim4length)/) || ($arg =~ m/^-(relink)/) || ($arg =~ m/^-(alwaysprint)/) || ($arg =~ m/^-(batchsize)/) || ($arg =~ m/^-(numbatches)/) || ($arg =~ m/^-(localpolishes)/)) { $args{$1} = shift @ARGS; } elsif ($arg =~ m/^-interspecies/) { $args{'interspecies'} = 1; } elsif ($arg =~ m/^-aligns/) { $args{'aligns'} = 1; } elsif ($arg =~ m/^-noaligns/) { delete $args{'aligns'}; } elsif ($arg =~ m/^-abort/) { $args{'abort'} = 1; } elsif ($arg =~ m/^-yn/) { $args{'nofilter'} = 1; $args{'sim4-yn'} = 1; } # # finish options # elsif ($arg =~ m/^-cleanup/) { $args{'cleanup'} = shift @ARGS; } elsif ($arg =~ m/^-nocleanup/) { delete $args{'cleanup'}; } elsif ($arg =~ m/^-savetemporary/) { $args{'savetemporary'} = 1; } # # Are we installed correctly? # elsif ($arg =~ m/-justtestingifitworks/) { exit(0); } else { die "ESTmapper/configure-- unknown option '$arg'\n"; } } # Check we have a path! # ($args{'path'} eq "") and die "ERROR: ESTmapper/configure-- no directory given.\n"; #print STDERR "CONF $args{'genomedir'}\n"; #print STDERR "CONF $args{'queries'}\n"; #print STDERR "CONF $args{'path'}\n"; # Be tolerant of relative paths, but don't use them! # $args{'genomedir'} = "$ENV{'PWD'}/$args{'genomedir'}" if (defined($args{'genomedir'}) && ($args{'genomedir'} !~ m!^/!)); $args{'queries'} = "$ENV{'PWD'}/$args{'queries'}" if (defined($args{'queries'}) && ($args{'queries'} !~ m!^/!)); $args{'path'} = "$ENV{'PWD'}/$args{'path'}" if (defined($args{'path'}) && ($args{'path'} !~ m!^/!)); # Make some organization # mkdir "$args{'path'}" if (! -d "$args{'path'}"); mkdir "$args{'path'}/0-input" if (! -d "$args{'path'}/0-input"); mkdir "$args{'path'}/1-search" if (! -d "$args{'path'}/1-search"); mkdir "$args{'path'}/2-filter" if (! -d "$args{'path'}/2-filter"); mkdir "$args{'path'}/3-polish" if (! -d "$args{'path'}/3-polish"); # If told to restart, suck in the original configration, but # don't overwrite things already defined. # if ($args{'runstyle'} eq "restart") { if (! -e "$args{'path'}/.runOptions") { print STDERR "ESTmapper/restart-- Nothing to restart!\n"; exit; } delete $args{'runstyle'}; open(F, "< $args{'path'}/.runOptions") or die "Failed to open '$args{'path'}/.runOptions' to read options.\n"; while () { chomp; if (m/\s*(\S+)\s*=\s*(.*)\s*$/) { $args{$1} = $2 if (!defined($args{$1})); } else { die "Invalid runOption line '$_'\n"; } } close(F); } # Write the current set of args to the runOptions file # open(F, "> $args{'path'}/.runOptions") or die "Failed to open '$args{'path'}/.runOptions' to save options.\n"; foreach my $k (keys %args) { #print STDERR "DEBUG $k=$args{$k}\n"; print F "$k=$args{$k}\n"; } close(F); } sub configure { my $path = $args{'path'}; print STDERR "ESTmapper: Performing a configure.\n"; ($args{'genomedir'} eq "") and die "ERROR: ESTmapper/configure-- no genomic sequences given.\n"; ($args{'queries'} eq "") and die "ERROR: ESTmapper/configure-- no cDNA sequences given.\n"; (! -f $args{'queries'}) and die "ERROR: ESTmapper/configure-- can't find the cdna sequence '$args{'queries'}'\n"; # XXX: We should check that the genome dir is valid and complete. # symlink "$args{'genomedir'}", "$path/0-input/genome" if (! -d "$path/0-input/genome"); # Check the input files exist, create symlinks to them, and find/build index files # symlink "$args{'queries'}", "$path/0-input/cDNA.fasta" if ((! -f "$path/0-input/cDNA.fasta")); symlink "$args{'queries'}idx", "$path/0-input/cDNA.fastaidx" if ((! -f "$path/0-input/cDNA.fastaidx") && (-f "$args{'queries'}idx")); if (! -f "$path/0-input/cDNA.fastaidx") { print STDERR "ESTmapper/configure-- Generating the index for '$path/0-input/cDNA.fasta'\n"; runCommand("$prog{'leaff'} -F $path/0-input/cDNA.fasta") and die "Failed.\n"; } # Create a .runInformaiton file, containing supposedly useful information # about this run. # my $time = time(); $args{'runInfoFile'} = "$args{'path'}/.runInformation.$time"; # Write some information and the args to a run info file # open(F, "> $args{'runInfoFile'}"); print F "startTime: $time (", scalar(localtime($time)), ")\n"; print F "operator: $ENV{'USER'}\n"; print F "host: " . `uname -a`; print F "version: $args{'scriptVersion'}\n"; print F "parameters:"; foreach my $k (keys %args) { print F "$k=$args{$k}\n"; } close(F); unlink "$args{'path'}/.runInformation"; symlink "$args{'path'}/.runInformation.$time", "$args{'path'}/.runInformation"; print STDERR "ESTmapper: configured.\n"; } ################################################################################ # # Signal Finding # ################################################################################ sub submitFilter (@) { my $watch = join ",", @_; my $path = $args{'path'}; open(F, "> $path/1-search/filter-restart.sh"); print F "#!/bin/sh\n"; print F "#\n"; print F "# Attempt to (re)configure SGE. For reasons Bri doesn't know,\n"; print F "# jobs submitted to SGE, and running under SGE, fail to read his\n"; print F "# .tcshrc (or .bashrc, limited testing), and so they don't setup\n"; print F "# SGE (or ANY other paths, etc) properly. For the record,\n"; print F "# interactive SGE logins (qlogin, etc) DO set the environment.\n"; print F "#\n"; print F ". \$SGE_ROOT/\$SGE_CELL/common/settings.sh\n"; print F "/usr/bin/perl $prog{'ESTmapper'} -restart $path\n"; close(F); my $cmd; $cmd = "qsub -cwd -j y -o $path/stage2.sgeout "; $cmd .= " $args{'sgeoptions'} " if (defined($args{'sgeoptions'}));; $cmd .= " $args{'sgefilter'} " if (defined($args{'sgefilter'})); $cmd .= " -N \"f$args{'sgename'}\" "; $cmd .= " -hold_jid $watch " if ($watch ne ""); $cmd .= " $path/1-search/filter-restart.sh"; die "Failed to submit job to SGE.\n" if (runCommand($cmd)); } sub submitFinish (@) { my $watch = join ",", @_; my $path = $args{'path'}; open(F, "> $path/3-polish/finish-restart.sh"); print F "#!/bin/sh\n"; print F "#\n"; print F "# Attempt to (re)configure SGE. For reasons Bri doesn't know,\n"; print F "# jobs submitted to SGE, and running under SGE, fail to read his\n"; print F "# .tcshrc (or .bashrc, limited testing), and so they don't setup\n"; print F "# SGE (or ANY other paths, etc) properly. For the record,\n"; print F "# interactive SGE logins (qlogin, etc) DO set the environment.\n"; print F "#\n"; print F ". \$SGE_ROOT/\$SGE_CELL/common/settings.sh\n"; print F "/usr/bin/perl $prog{'ESTmapper'} -restart $path\n"; close(F); my $cmd; $cmd = "qsub -cwd -j y -o $path/stage3.sgeout "; $cmd .= " $args{'sgeoptions'} " if (defined($args{'sgeoptions'}));; $cmd .= " $args{'sgefinish'} " if (defined($args{'sgefinish'})); $cmd .= " -N \"o$args{'sgename'}\" "; $cmd .= " -hold_jid $watch " if ($watch ne ""); $cmd .= " $path/3-polish/finish-restart.sh"; die "Failed to submit job to SGE.\n" if (runCommand($cmd)); } sub search { my $startTime = time(); my $path = $args{'path'}; # If we're all done, just get outta here. return if (-e "$path/1-search/allDone"); my $mersize = ($args{'mersize'} or 20); my $merskip = ($args{'merskip'} or 0); my $searchopts = ""; $searchopts = "-maxintron 2000000 -singlelength 20 -multiplelength 30 -smallsequence 100" if ($args{'runstyle'} eq "est"); $searchopts = "-maxintron 2000000 -singlelength 30 -multiplelength 50 -smallsequence 0" if ($args{'runstyle'} eq "mrna"); $searchopts = "-maxintron 2000000 -singlecoverage 0.3 -multiplecoverage 0.3 -smallsequence 10000000 -extendminimum 100 -extendweight 2" if ($args{'runstyle'} eq "snp"); $searchopts .= $args{'searchopts'}; my $numproc = ($args{'localsearches'} or 4); my $numthread = ($args{'searchthreads'} or 2); my $hitMemory = ($args{'hitsortmemory'} or 600); # Don't change the value without 3-filter my $cdnaInInput = int(`$prog{'leaff'} -F $path/0-input/cDNA.fasta -d`); # Look for a mer masking file, or use the one supplied. # if (!defined($args{'mermaskfile'})) { $args{'merignore'} = 1000 if (!defined($args{'merignore'})); $args{'merignore'} = substr("000000$args{'merignore'}", -4); $args{'mermaskfile'} = "$args{'genomedir'}/frequentMers-ge$args{'merignore'}.fasta"; } if (($args{'mermaskfile'} ne "none") && (! -e $args{'mermaskfile'})) { print STDERR "ESTmapper/search-- Didn't find mer mask file '$args{'mermaskfile'}', attempting\n"; print STDERR "ESTmapper/search-- create it.\n"; my $cmd; $cmd = "$prog{'meryl'}"; $cmd .= " -Dt -n $args{'merignore'} "; $cmd .= " -s \"$args{'genomedir'}//genome\""; $cmd .= " > \"$args{'genomedir'}/frequentMers-ge$args{'merignore'}.fasta\""; if (runCommand($cmd)) { die "ESTmapper/search-- Failed to create mask file.\n"; } } if (($args{'mermaskfile'} ne "none") && (! -e $args{'mermaskfile'})) { print STDERR "ESTmapper/search-- Can't find mer mask file '$args{'mermaskfile'}'.\n"; print STDERR "ESTmapper/search-- Perhaps your genome isn't installed correctly?\n"; print STDERR "ESTmapper/search-- Try a different mersize?\n"; exit(1); } open(F, "< $path/0-input/memoryLimit"); my $farmMemory = ; close(F); chomp $farmMemory; # Create a bunch of scripts to process # # Rewrite the command everytime. This fixes the problem where # we would, say, change the number of threads... # open(F, "> $path/1-search/search.sh"); print F "#!/bin/sh\n"; print F "\n"; print F "jid=\$SGE_TASK_ID\n"; print F "if [ x\$jid = x -o x\$jid = xundefined ] ; then\n"; print F " if [ x\$1 = x ] ; then\n"; print F " echo \"ERROR: I need a job-id on the command line or in \$SGE_TASK_ID\"\n"; print F " exit 1\n"; print F " fi\n"; print F " jid=`expr \$1 + 1`\n";; print F "fi\n"; print F "\n"; print F "jid=`head -\$jid $path/0-input/genome/segments | tail -1`\n"; print F "\n"; print F "if [ -e \"$path/1-search/\$jid.success\" ] ; then\n"; print F " exit\n"; print F "fi\n"; print F "\n"; print F "$prog{'seagen'} \\\n"; print F " -verbose \\\n" if ($args{'verbose'}); print F " -binary \\\n"; print F " -mersize $mersize \\\n"; print F " -numthreads $numthread \\\n"; print F " $searchopts \\\n"; print F " -cdna $path/0-input/cDNA.fasta \\\n"; print F " -genomic $path/0-input/genome/genome.seqStore \\\n"; print F " -positions $path/0-input/genome/seg\$jid.posDB \\\n"; print F " -mask $args{'mermaskfile'} \\\n" if ($args{'mermaskfile'} ne "none"); print F " -output $path/1-search/\$jid.hits \\\n"; print F " -count $path/1-search/\$jid.count \\\n"; print F "&& \\\n"; print F "touch $path/1-search/\$jid.success\n"; close(F); chmod 0755, "$path/1-search/search.sh"; # Read the list of segments to figure out which segments we need to run. # my @searchesToRun; open(F, "< $path/0-input/genome/segments") or die "Can't open genome segments list!\n"; while () { chomp; if (! -e "$path/1-search/$_.success") { print STDERR "ESTmapper/search-- search segment $_ not computed.\n"; push @searchesToRun, $_; } } close(F); # Run searches. If the search terminated properly, the # hit-counts file should exist. Run (maybe re-run) the search if # it isn't there. # if (defined($args{'runlater'})) { my $jobs = join " ", @searchesToRun; print STDERR "ESTmapper/search-- Please run the jobs:\n"; print STDERR "ESTmapper/search-- $jobs\n"; print STDERR "ESTmapper/search-- using:\n"; print STDERR "ESTmapper/search-- $path/1-search/search.sh\n"; exit(0); } elsif (defined($args{'sgename'})) { if (scalar(@searchesToRun) > 0) { print STDERR "ESTmapper/search-- SGE mode requested; ", scalar @searchesToRun, " processes to compute,\n"; print STDERR "ESTmapper/search-- SGE mode requested; each with $numthread threads,\n"; print STDERR "ESTmapper/search-- SGE mode requested; $farmMemory MB per process.\n"; # Don't resubmit jobs that are already done, and do # submit the smallest number of jobs to finish. # Bugs here should be fixed in 2-search.pl as well. my @watchJobs; my $fJob = shift @searchesToRun; my $lJob = $fJob; while (defined($lJob)) { my $nJob = shift @searchesToRun; if (($lJob + 1 != $nJob) || (!defined($nJob))) { # SGE expects jobs to start at 1, but we start at 0. $fJob++; $lJob++; print STDERR "Sumbit $fJob - $lJob (njob=$nJob)\n"; my $cmd; $cmd = "qsub -cwd -j y -o $path/1-search/sgeout-\\\$TASK_ID "; $cmd .= " $args{'sgeoptions'} " if (defined($args{'sgeoptions'})); $cmd .= " $args{'sgesearch'} " if (defined($args{'sgesearch'})); $cmd .= " -N \"s$args{'sgename'}.$fJob\" "; $cmd .= " -t $fJob-$lJob "; $cmd .= "$path/1-search/search.sh"; push @watchJobs, "s$args{'sgename'}.$fJob"; die "Failed to submit job to SGE.\n" if (runCommand($cmd)); $fJob = $nJob; } $lJob = $nJob; } # Submit the filter, and make it wait for the searches, if they were submitted. # submitFilter(@watchJobs); print STDERR "ESTmapper/search-- Searches submitted. Rest of run is on the farm.\n"; exit(0); } } else { print STDERR "ESTmapper/search-- Local mode requested; ", scalar @searchesToRun, " processes to compute,\n"; print STDERR "ESTmapper/search-- Local mode requested; $numproc concurrent processes,\n"; print STDERR "ESTmapper/search-- Local mode requested; each with $numthread threads.\n"; # Run the searches. We use the scheduler, then check # everything at the end. This is a little less friendly # to the user, but much easier for the implementor. # if (scalar(@searchesToRun) > 0) { &scheduler::schedulerSetNumberOfProcesses($numproc); foreach my $s (@searchesToRun) { print STDERR "sh $path/1-search/search.sh $s\n"; &scheduler::schedulerSubmit("sh $path/1-search/search.sh $s"); } &scheduler::schedulerFinish(); } } # See if anything failed. # print STDERR "ESTmapper/search-- checking search output. All should have $cdnaInInput cDNA.\n"; my $fail = 0; open(F, "< $path/0-input/genome/segments") or die "Can't open genome segments list!\n"; while () { chomp; # If the hits file is NOT found, remove the count file. Then # figure out how many ESTs we have signals for, and fail if # it's not what we expect. unlink "$path/1-search/$_.count" if (! -e "$path/1-search/$_.hits"); my $c = int(`wc -l < $path/1-search/$_.count`) if (-e "$path/1-search/$_.count"); if ($c != $cdnaInInput) { print STDERR "ESTmapper/search-- Search $_ failed, only $c signals. Output saved as *.CRASH\n"; rename "$path/1-search/$_.count", "$path/1-search/$_.count.CRASH"; rename "$path/1-search/$_.hits", "$path/1-search/$_.hits.CRASH"; $fail++; } } close(F); die "Dang." if ($fail); # Hooray! Now we're all done! open(F, "> $path/1-search/allDone"); close(F); print STDERR "ESTmapper/search-- Script finished in ", time() - $startTime, " wall-clock seconds.\n" if (time() > $startTime + 5); } ################################################################################ # # Signal Filtering # ################################################################################ sub filter { my $startTime = time(); # If we're all done, just get outta here. return if (-e "$args{'path'}/2-filter/filteredHits"); # If we're supposed to be running on the grid, but we aren't, restart. # This can occur if the searches have finished, but the filter # didn't, and we restart. (also in 5-assemble.pl) # if (defined($args{'sgename'}) && !defined($ENV{'SGE_TASK_ID'})) { submitFilter(); print STDERR "ESTmapper/filter-- Restarted LSF execution.\n"; exit; } my $path = $args{'path'}; my $verbose = ($args{'verbose'}) ? "-verbose" : ""; my $hitMemory = ($args{'hitsortmemory'} or 600); # Don't change the value without 2-search print STDERR "ESTmapper: Performing a filter.\n"; # Merge all the hit counts into one list -- this is needed for output filtering! # if (! -e "$path/2-filter/hitCounts") { print STDERR "ESTmapper/filter-- Merging counts.\n"; if (runCommand("$prog{'mergeCounts'} $path/1-search/[0-9]*[0-9].count > $path/2-filter/hitCounts")) { unlink "$path/2-filter/hitCounts"; die "Failed.\n"; } } # # Setup the filtering and sorting # # No verbose for filterNULL! # my $fcmd; # bpw, 20051005, this isn't the perfect EST filter, but it does # nearly as good as the best filter I've seen, and produces # significantly fewer false positives. if ($args{'nofilter'} eq 1) { $fcmd = "$prog{'filterNULL'} $path/1-search/*hits > $path/2-filter/filtHits"; } elsif ($args{'runstyle'} eq "est") { $fcmd = "$prog{'filterEST'} -u 200000000000 -r 0 -log $path/2-filter/filterLog $path/1-search/*hits > $path/2-filter/filtHits"; } elsif ($args{'runstyle'} eq "snp") { $fcmd = "$prog{'filterMRNA'} $verbose $path/1-search/*hits > $path/2-filter/filtHits"; } elsif ($args{'runstyle'} eq "mrna") { $fcmd = "$prog{'filterMRNA'} $verbose $path/1-search/*hits > $path/2-filter/filtHits"; } else { print STDERR "ESTmapper/filter-- nofilter = $args{'nofilter'}\n"; print STDERR "ESTmapper/filter-- runstyle = $args{'runstyle'}\n"; die "ESTmapper/filter-- Don't know how to filter!\n"; } print STDERR "ESTmapper/filter-- Filtering.\n"; if (runCommand($fcmd)) { unlink "$path/2-filter/filtHits"; die "Failed.\n"; } my $scmd = "$prog{'sortHits'} $verbose -m $hitMemory -t $path/2-filter $path/2-filter/filtHits > $path/2-filter/filteredHits"; print STDERR "ESTmapper/filter-- Sorting.\n"; if (runCommand($scmd)) { unlink "$path/2-filter/filteredHits"; die "Failed.\n"; } die "ESTmapper/filter-- FATAL: filter and sort produced no hits?\n" if (-z "$path/2-filter/filteredHits"); print STDERR "ESTmapper: Filter script finished in ", time() - $startTime, " wall-clock seconds.\n" if (time() > $startTime + 5); } ################################################################################ # # Signal Polishing # ################################################################################ sub polish { my $startTime = time(); # If we're all done, just get outta here. return if (-e "$args{'path'}/3-polish/allDone"); my $path = $args{'path'}; my $mini = ($args{'minidentity'} or 95); my $minc = ($args{'mincoverage'} or 50); my $minl = ($args{'minlength'} or 0); my $minsim4i = ($args{'minsim4identity'} or 90); my $minsim4c = ($args{'minsim4coverage'} or 45); my $minsim4l = ($args{'minsim4length'} or 0); my $relink = "-H $args{'relink'}" if ($args{'relink'}); my $always = "-alwaysprint $args{'alwaysprint'}" if ($args{'alwaysprint'}); my $batchsize = ($args{'batchsize'} or 0); my $numbatches = ($args{'numbatches'} or 256); my $numproc = ($args{'localpolishes'} or 4); my $aligns = "-aligns" if ($args{'aligns'}); my $abort = "-Mp 0.25 -Ma 10000" if ($args{'abort'}); my $interspecies = "-interspecies" if ($args{'interspecies'}); # Save the parameters, these are used on later invocations of # polish, and in filter to make sure the user isn't an idiot. # if (-e "$path/3-polish/parameters") { print STDERR "ESTmapper/polish-- Using original parameters.\n"; open(F, "< $path/3-polish/parameters"); $numbatches = int(); $batchsize = int(); $mini = ; chomp $mini; $minc = ; chomp $minc; $minl = ; chomp $minl; $minsim4i = ; chomp $minsim4i; $minsim4c = ; chomp $minsim4c; $minsim4l = ; chomp $minsim4l; $relink = ; chomp $relink; $always = ; chomp $always; $aligns = ; chomp $aligns; $abort = ; chomp $abort; $interspecies = ; chomp $interspecies; close(F); print STDERR "ESTmapper/polish-- Polish quality suitable for $minsim4i percent identity and\n"; print STDERR "ESTmapper/polish-- $minsim4c percent coverage\n"; print STDERR "ESTmapper/polish-- To rerun polishes at a different quality level,\n"; print STDERR "ESTmapper/polish-- remove the 3-polish directory.\n"; } else { # Do a little error checking; if both $batchsize and # $numbatches are zero, set $batchsize to make 256 batches. # if (($batchsize == 0) && ($numbatches == 0)) { $numbatches = 256; } # If $batchsize is not specified, compute it. # if ($batchsize == 0) { $batchsize = int(`wc -l < $path/2-filter/filteredHits` / $numbatches) + 1; $batchsize = 10000 if ($batchsize < 10000); } # Adjust the sim4 qualities based on the final quality desired # $mini = 0 if ($mini < 0); $minc = 0 if ($minc < 0); $minl = 0 if ($minl < 0); $minsim4i = $mini - 5 if ($mini - 5 < $minsim4i); $minsim4c = $minc - 5 if ($minc - 5 < $minsim4c); $minsim4l = $minl if ($minl < $minsim4l); $minsim4i = 0 if ($minsim4i < 0); $minsim4c = 0 if ($minsim4c < 0); $minsim4l = 0 if ($minsim4l < 0); # Save the parameters # open(F, "> $path/3-polish/parameters"); print F "$numbatches\n$batchsize\n"; print F "$mini\n$minc\n$minl\n"; print F "$minsim4i\n$minsim4c\n$minsim4l\n"; print F "$relink\n$always\n$aligns\n$abort\n$interspecies\n"; close(F); } # Build the sim4 command # open(F, "> $path/3-polish/polish.sh"); print F "#!/bin/sh\n"; print F "\n"; print F "jid=\$SGE_TASK_ID\n"; print F "if [ x\$jid = x -o x\$jid = xundefined ] ; then\n"; print F " if [ x\$1 = x ] ; then\n"; print F " echo \"ERROR: I need a job-id on the command line or in \$SGE_TASK_ID\"\n"; print F " exit 1\n"; print F " fi\n"; print F " jid=`expr \$1 + 1`\n";; print F "fi\n"; print F "\n"; print F "jid=`head -\$jid $path/3-polish/partitions | tail -1`\n"; print F "\n"; print F "if [ -e \"$path/3-polish/\$jid.success\" ] ; then\n"; print F " exit\n"; print F "fi\n"; print F "\n"; print F "$prog{'sim4db'} \\\n"; print F " -cdna $path/0-input/cDNA.fasta \\\n"; print F " -genomic $path/0-input/genome/genome.seqStore \\\n"; print F " $aligns \\\n" if ($aligns ne ""); print F " $always \\\n" if ($always ne ""); print F " $relink \\\n" if ($relink ne ""); print F " $abort \\\n" if ($abort ne ""); print F " $interspecies \\\n" if ($interspecies ne ""); print F " -cut 0.6 \\\n"; print F " -mincoverage $minsim4c \\\n"; print F " -minidentity $minsim4i \\\n"; print F " -minlength $minsim4l \\\n"; print F " -script $path/3-polish/\$jid.sim4script \\\n"; print F " -output $path/3-polish/\$jid.sim4db \\\n"; print F " -YN $path/3-polish/\$jid.yn \\\n" if ($args{'sim4-yn'} == 1); print F "&& \\\n"; print F "touch $path/3-polish/\$jid.success\n"; close(F); # Splits the filteredHits into several pieces, and outputs a script # that runs sim4db on those pieces. # if (! -e "$path/3-polish/partitions") { print STDERR "ESTmapper/polish-- Creating scripts with $batchsize lines in each.\n"; my @idxs; my $idx = "0000"; open(H, "< $path/2-filter/filteredHits"); while (!eof(H)) { my $c = 0; open(F, "> $path/3-polish/$idx.sim4script"); while (($c < $batchsize) && (!eof(H))) { $_ = ; print F $_; $c++; } close(F); push @idxs, "$idx\n"; $idx++; } close(H); print STDERR "ESTmapper/polish-- Created $idx scripts.\n"; open(S, "> $path/3-polish/partitions"); print S @idxs; close(S); } # Build a list of things to run. # my @jobsToRun; open(F, "< $path/3-polish/partitions"); while () { chomp; push @jobsToRun, $_ if (! -e "$path/3-polish/$_.success"); } close(F); # Wipe any summaries, cDNA-* and polished files if we need to polish more stuff. # if (scalar(@jobsToRun) > 0) { unlink "$path/cDNA-good.fasta"; unlink "$path/cDNA-goodshort.fasta"; unlink "$path/cDNA-lowquality.fasta"; unlink "$path/cDNA-missing.fasta"; unlink "$path/cDNA-repeat.fasta"; unlink "$path/cDNA-zero.fasta"; unlink "$path/polishes-aborted"; unlink "$path/polishes-good"; unlink "$path/polishes-goodshort"; unlink "$path/polishes-lowquality"; unlink "$path/summary"; # Display what parameters we are using # print STDERR "ESTmapper/polish-- more polishes to compute.\n"; print STDERR "ESTmapper/polish-- minidentity = $mini ($minsim4i)\n"; print STDERR "ESTmapper/polish-- mincoverage = $minc ($minsim4c)\n"; print STDERR "ESTmapper/polish-- minlength = $minl ($minsim4l)\n"; print STDERR "ESTmapper/polish-- relink = $relink\n"; print STDERR "ESTmapper/polish-- always = $always\n"; print STDERR "ESTmapper/polish-- aligns = $aligns\n"; print STDERR "ESTmapper/polish-- abort = $abort\n"; print STDERR "ESTmapper/polish-- interspecies = $interspecies\n"; # Run things, or tell the user to do it for us. # if (defined($args{'runlater'})) { print STDERR "ESTmapper/polish-- Please run the jobs in\n"; print STDERR "ESTmapper/polish-- $path/3-polish/run.sh\n"; exit(0); } elsif (defined($args{'sgename'})) { print STDERR "ESTmapper/polish-- Submitting to SGE.\n"; # Don't resubmit jobs that are already done, and do # submit the smallest number of jobs to finish. # Bugs here should be fixed in 2-search.pl as well. my @watchJobs; my $fJob = shift @jobsToRun; my $lJob = $fJob; while (defined($lJob)) { my $nJob = shift @jobsToRun; if (($lJob + 1 != $nJob) || (!defined($nJob))) { # SGE expects jobs to start at 1, but we start at 0. $fJob++; $lJob++; print STDERR "Sumbit $fJob - $lJob (njob=$nJob)\n"; my $cmd; $cmd = "qsub -cwd -j y -o $path/3-polish/sgeout-\\\$TASK_ID "; $cmd .= " $args{'sgeoptions'} " if (defined($args{'sgeoptions'}));; $cmd .= " $args{'sgepolish'} " if (defined($args{'sgepolish'})); $cmd .= " -N \"p$args{'sgename'}.$fJob\" "; $cmd .= " -t $fJob-$lJob "; $cmd .= "$path/3-polish/polish.sh"; push @watchJobs, "p$args{'sgename'}.$fJob"; die "Failed to submit job to SGE.\n" if (runCommand($cmd)); $fJob = $nJob; } $lJob = $nJob; } submitFinish(@watchJobs); print STDERR "ESTmapper/polish-- Finish submitted. See ya later!\n"; exit(0); } else { print STDERR "ESTmapper/polish-- Running locally, $numproc at a time.\n"; &scheduler::schedulerSetNumberOfProcesses($numproc); foreach my $cmd (@jobsToRun) { &scheduler::schedulerSubmit("/bin/sh $path/3-polish/polish.sh $cmd"); } &scheduler::schedulerFinish(); #unlink "$path/3-polish/run.sh"; } } # Make sure that all the polishes are finished and OK. # If not, print dire warnings and exit. # my $fail = 0; open(F, "< $path/3-polish/partitions") or die "Failed to open '$path/3-polish/partitions'\n";; while () { chomp; if (! -e "$path/3-polish/$_.success") { $fail++; print STDERR "ESTmapper/polish-- segment $_ failed.\n"; } } close(F); die "Dang." if ($fail); # Hooray! Now we're all done! open(F, "> $args{'path'}/3-polish/allDone"); close(F); print STDERR "ESTmapper: Polish script finished in ", time() - $startTime, " wall-clock seconds.\n" if (time() > $startTime + 5); } ################################################################################ # # Output # ################################################################################ # This is way too complicated. # # 1) Collect output from 4-polish, put into polishes-good # 2) Filter -> polishes-best # # Given as input a single polishes file and a cdna file, # we need an executable that: # Generate stats on mapping, good and best, missing, zero # Filter cDNA to good, missing, zero sub assembleOutput { my $startTime = time(); my $path = $args{'path'}; my $mini = ($args{'minidentity'} or 95); my $minc = ($args{'mincoverage'} or 50); my $minl = ($args{'minlength'} or 0); my $intronLimit = $args{'cleanup'} or 100000; print STDERR "ESTmapper: Performing an assembleOutput.\n"; (($mini < 0) || ($mini > 100)) and die "ERROR: ESTmapper/assembleOutput-- supply a value 0 <= x <= 100 for minidentity!\n"; (($minc < 0) || ($minc > 100)) and die "ERROR: ESTmapper/assembleOutput-- supply a value 0 <= x <= 100 for mincoverage!\n"; ($minl < 0) and die "ERROR: ESTmapper/assembleOutput-- supply a value x >= 0 for minlength!\n"; # Check that the filtering is compatable with the polishing. # if (-e "$path/3-polish/parameters") { open(F, "< $path/3-polish/parameters"); $_ = ; $_ = ; my $miniL = int(); # Quality values used for last filtering my $mincL = int(); my $minlL = int(); my $miniP = int(); # Quality values used for polishing my $mincP = int(); my $minlP = int(); close(F); if ($mini < $miniP) { printf STDERR "ESTmapper/assembleOutput-- WARNING: Percent identity quality level too low for existing polishing!\n"; printf STDERR "ESTmapper/assembleOutput-- WARNING: Polished at percent align-sequence identity = %3d, requested filtration at %3d.\n", $miniP, $mini; } if ($minc < $mincP) { printf STDERR "ESTmapper/assembleOutput-- WARNING: Coverage quality level too low for existing polishing!\n"; printf STDERR "ESTmapper/assembleOutput-- WARNING: Polished at percent query-sequence identity = %3d, requested filtration at %3d.\n", $mincP, $minc; } if ($minl < $minlP) { printf STDERR "ESTmapper/assembleOutput-- WARNING: Length quality level too low for existing polishing!\n"; printf STDERR "ESTmapper/assembleOutput-- WARNING: Polished at length = %3d, requested filtration at %3d.\n", $minlP, $minl; } # If the filter quality has changed, we need to refilter. Nuke # the filterLevel file, print a message. # if (($mini != $miniL) || ($minc != $mincL) || ($minl != $minlL)) { print STDERR "ESTmapper/assembleOutput-- filtering criteria changed; refiltering.\n"; printf STDERR "ESTmapper/assembleOutput-- identity: percent align-sequence identity: old=%3d new=%3d\n", $miniL, $mini; printf STDERR "ESTmapper/assembleOutput-- coverage: percent query-sequence identity: old=%3d new=%3d\n", $mincL, $minc; printf STDERR "ESTmapper/assembleOutput-- length: length in bp of match: old=%3d new=%3d\n", $minlL, $minl; unlink "$path/polishes-good"; unlink "$path/polishes-best"; unlink "$path/polishes-lowquality"; unlink "$path/summary"; } } else { die "ESTmapper/assemblyOutput-- ERROR: Couldn't find polishing parameters. Script error.\n"; } # If we're supposed to be running on LSF, but we aren't, restart. # This can occur if the searches have finished, but the filter # didn't, and we restart. (also in 3-filter.pl) # if (defined($args{'sgename'}) && !defined($ENV{'SGE_TASK_ID'})) { submitFinish(); print STDERR "ESTmapper/filter-- Restarted LSF execution.\n"; exit; } if (! -e "$path/polishes-good") { print STDERR "ESTmapper/assembleOutput-- filtering polishes by quality.\n"; print STDERR "ESTmapper/assembleOutput-- identity: percent align-sequence identity: $mini\n"; print STDERR "ESTmapper/assembleOutput-- coverage: percent query-sequence identity: $minc\n"; print STDERR "ESTmapper/assembleOutput-- length: length in bp of match: $minl\n"; # Find all the polishes, run them through the cleaner, and filter by quality. # my $cmd; $cmd = "find $path/3-polish/ -name '*.sim4db' -print | sort | xargs -n 100 cat | "; $cmd .= "$prog{'cleanPolishes'} -threshold $intronLimit -savejunk | " if (defined($args{'cleanup'})); $cmd .= "$prog{'toFILTER'} -c $minc -i $mini -l $minl -o $path/polishes-good -j $path/polishes-aborted > /dev/null"; if (runCommand($cmd)) { unlink "$path/polishes-good"; unlink "$path/polishes-aborted"; die "Failed.\n"; } unlink "$path/polishes-best"; unlink "$path/cDNA-good.fasta"; unlink "$path/cDNA-missing.fasta"; unlink "$path/cDNA-repeat.fasta"; unlink "$path/cDNA-zero.fasta"; unlink "$path/summary"; } if (! -e "$path/polishes-best") { if ($args{'runstyle'} eq "mrna") { print STDERR "ESTmapper/assembleOutput-- Picking the best mRNA polish.\n"; if (runCommand("$prog{'sortPolishes'} -m 400 -c < $path/polishes-good | $prog{'pickBest'} -mrna > $path/polishes-best")) { unlink "$path/polishes-best"; die "Failed."; } } elsif ($args{'runstyle'} eq "est") { print STDERR "ESTmapper/assembleOutput-- Picking the best EST polish.\n"; if (runCommand("$prog{'sortPolishes'} -m 400 -c < $path/polishes-good | $prog{'pickBest'} -est > $path/polishes-best")) { unlink "$path/polishes-best"; die "Failed."; } } else { print STDERR "ESTmapper/assembleOutput-- Not mRNA and not EST, so not picking the best polish.\n"; } } # # Segregate the sequences # # XXXX if the filter prints a list of repeats, we should add those here! if (! -e "$path/cDNA-good.fasta") { my $iid = 0; open(F, "< $path/2-filter/hitCounts"); open(G, "> $path/zero-hit-iid"); while () { if ($_ == 0) { print G "$iid\n"; } $iid++; } close(G); close(F); my $cmd; $cmd = "$prog{'terminate'}"; $cmd .= " -P $path/polishes-best $path/cDNA-best.fasta"; $cmd .= " -P $path/polishes-good $path/cDNA-good.fasta"; $cmd .= " -I $path/zero-hit-iid $path/cDNA-zero.fasta"; $cmd .= " -O $path/cDNA-missing.fasta"; $cmd .= " -i $path/0-input/cDNA.fasta"; print $cmd; if (runCommand($cmd)) { rename "$path/cDNA-good.fasta", "$path/cDNA-good.fasta.FAILED"; rename "$path/cDNA-missing.fasta", "$path/cDNA-missing.fasta.FAILED"; rename "$path/cDNA-zero.fasta", "$path/cDNA-zero.fasta.FAILED"; die "Failed.\n"; } unlink "zero-hit-iid"; } # # Summarize # if ((! -e "$path/summary") || (-z "$path/summary")) { my ($mat, $est, $scf); open(F, "> $path/summary"); print STDERR "ESTmapper/assembleOutput-- counting 'good' matches.\n"; ($mat, $est, $scf) = summarizePolishes("$path/polishes-good"); print F "GOOD: >= $mini% identity, >= $minc% composite, >= $minl bp\n"; if ($mat > 0) { print F "cDNA-genomic matches $mat matches ($est different cDNA and $scf genomic)\n"; print F "Matches per cDNA ", int(10000 * $mat / $est) / 10000.0, " matches/cDNA\n"; print F "Matches per genomic ", int(10000 * $mat / $scf) / 10000.0, " matches/genomic\n"; } else { print F "cDNA-genomic matches None.\n"; } print F "\n"; print STDERR "ESTmapper/assembleOutput-- counting cDNA.\n"; print F "cDNA COUNTS:\n"; my $cnttotl = int(`grep -c '^>' $path/0-input/cDNA.fasta`); my $cntgood = int(`grep -c '^>' $path/cDNA-good.fasta`); my $cntmiss = int(`grep -c '^>' $path/cDNA-missing.fasta`); my $cntrept = int(`grep -c '^>' $path/cDNA-repeat.fasta`) if (-e "$path/cDNA-repeat.fasta"); my $cntzero = int(`grep -c '^>' $path/cDNA-zero.fasta`); printf F "cDNA: %8d\n", $cnttotl, "\n"; printf F "cDNA-good: %8d (%8.4f%%)\n", $cntgood, 100 * $cntgood / $cnttotl; printf F "cDNA-missing: %8d (%8.4f%%)\n", $cntmiss, 100 * $cntmiss / $cnttotl; printf F "cDNA-repeat: %8d (%8.4f%%)\n", $cntrept, 100 * $cntrept / $cnttotl if (-e "$path/cDNA-repeat.fasta"); printf F "cDNA-zero: %8d (%8.4f%%)\n", $cntzero, 100 * $cntzero / $cnttotl; } # # All done! # if ($args{'savetemporary'} != 1) { if (runCommand("rm -rf $path/1-search $path/2-filter $path/3-polish")) { print STDERR "ESTmapper/assembleOutput-- WARNING: Failed to remove temporary directories.\n"; } } print STDERR "ESTmapper: assembleOutput script finished in ", time() - $startTime, " wall-clock seconds.\n" if (time() > $startTime + 5); } ###################################################################### # # Generates a report on a set of polishes. # # number of cDNA-scaffold matches # number of different cDNA sequences in the set # number of different scaffolds in the set # sub summarizePolishes { my (@files) = @_; my %est; my %scf; my $mat = 0; my $ests = 0; my $scfs = 0; foreach my $infile (@files) { open(INPUT, "< $infile"); while () { if (m/^sim4begin$/) { $mat++; } elsif (m/^edef=/) { $ests++; $est{$_} = 1; } elsif (m/^ddef=/) { $scfs++; $scf{$_} = 1; } } close(INPUT); } if (($ests != $mat) || ($scfs != $mat)) { print STDERR "WARNING: summarizePolishes counted\n"; print STDERR " $mat matches\n"; print STDERR " $ests cDNA deflines\n"; print STDERR " $scfs scaffold deflines\n"; print STDERR " The number of deflines and the number of matches should be the same!\n"; } return($mat, (scalar (keys %est)), (scalar (keys %scf))); } ################################################################################ # # Utilities for Main # ################################################################################ sub parseSNP { # Parse the SNPs out # if (! -e "$args{'path'}/snps-parsed") { print STDERR "ESTmapper-- Parsing the SNPs\n"; # Sort, if needed. # if (! -e "$args{'path'}/polishes-good.sorted") { print STDERR "ESTmapper-- Sorting polishes by sequence ID; using 2GB memory maximum.\n"; if (runCommand("$prog{'sortPolishes'} -m 2000 -c < $args{'path'}/polishes-good > $args{'path'}/polishes-good.sorted")) { unlink "$args{'path'}/polishes-good.sorted"; die "Failed to sort the polishes.\n"; } } # Parse the options, looking for SNP specific ones # my @ARGS = @ARGV; my $snpdelimiter = ""; my $snpsizetag = ""; my $snppostag = ""; my $snpoffset = ""; my $snpoutformat = ""; while (scalar @ARGS > 0) { my $arg = shift @ARGS; if ($arg eq "-snpdelimiter") { $arg = shift @ARGS; $snpdelimiter = "-d \"$arg\""; } elsif ($arg eq "-snpsizetag") { $arg = shift @ARGS; $snpsizetag = "-s \"$arg\""; } elsif ($arg eq "-snppostag") { $arg = shift @ARGS; $snppostag = "-p \"$arg\""; } elsif ($arg eq "-snpoffset") { $arg = shift @ARGS; $snpoffset = "-o $arg"; } elsif ($arg eq "-snpoutformat") { $arg = shift @ARGS; $snpoutformat = "-format $arg"; } } # PARSE! # if (runCommand("$prog{'parseSNPs'} $snpdelimiter $snpsizetag $snppostag $snpoffset $snpoutformat -F $args{'path'}/snps-failed -O $args{'path'}/snps-parsed < $args{'path'}/polishes-good.sorted > $args{'path'}/summary-snps")) { unlink "$args{'path'}/snps-failed"; unlink "$args{'path'}/snps-parsed"; unlink "$args{'path'}/summary-snps"; die "Failed to parse SNP locations from polishes.\n"; } } } sub sTOhms ($) { my ($s, $m, $h) = @_; $h = $s / 3600; $m = int(($h - int($h)) * 60); $h = int($h); $s = int($s); return($h,$m,$s); } ################################################################################ # # Main # ################################################################################ setExecutables(); parseArgs(@ARGV); if ($args{'runstyle'} eq "est") { configure(); search(); filter(); polish(); assembleOutput(); } elsif ($args{'runstyle'} eq "mrna") { $args{'relink'} = 1000; $args{'abort'} = 1; configure(); search(); filter(); polish(); assembleOutput(); } elsif ($args{'runstyle'} eq "snp") { $args{'minidentity'} = 95; $args{'mincoverage'} = 80; configure(); search(); filter(); polish(); assembleOutput(); parseSNP(); } else { print STDERR "Basic help N/A.\n"; } print STDERR "ESTmapper: script finished everything in ", time() - $args{'startTime'}, " wall-clock seconds.\n" if (time() != $args{'startTime'}); if (-e $args{'runInforFile'}) { my $time = time(); open(F, ">> $args{'runInforFile'}"); print F "endTime: $time (", scalar(localtime($time)), ")\n"; close(F); } exit(0); kmer-code-2013-trunk/ESTmapper/Make.include0000644000000000000000000000136211676744271017251 0ustar rootroot# -*- makefile -*- LIBUTL/ :=$(realpath $/../libutil/)/ LIBBIO/ :=$(realpath $/../libbio/)/ LIBSEQ/ :=$(realpath $/../libseq/)/ LIBKMER/ :=$(realpath $/../libkmer/)/ LIBSIM4/ :=$(realpath $/../libsim4/)/ $(eval $/%.d $/%.o: CXXFLAGS+=-I${LIBKMER/} -I${LIBBIO/} -I${LIBSEQ/} -I${LIBSIM4/} -I${LIBUTL/}) $/.CXX_SRCS := $/mergeCounts.C $/terminate.C $/.CXX_EXES := $/mergeCounts $/terminate $/.PERL_EXES := $/ESTmapper.pl $/configureESTmapper.pl $/runConcurrently.pl $/.PERL_LIBS := $/scheduler.pm $/mergeCounts: $/mergeCounts.o $/terminate: $/terminate.o ${LIBSIM4/}libsim4.a ${LIBSEQ/}libseq.a ${LIBBIO/}libbio.a ${LIBUTL/}libutil.a $/.CLEAN := $/*.o $/.REAL-CLEAN := $/mergeCounts $/terminate kmer-code-2013-trunk/ESTmapper/configureESTmapper.pl0000644000000000000000000003106011066705337021115 0ustar rootroot#!/usr/bin/perl use strict; use FindBin; use Config; # for @signame use lib "$FindBin::Bin/util"; my $exechome = "$FindBin::Bin"; my $leaff = "$exechome/leaff"; my $posdb = "$exechome/positionDB"; my $meryl = "$exechome/meryl"; my $genome = undef; my $genomedir = undef; my $mersize = 20; my $merskip = 0; my $memory = 1000; my $segments = 0; my $local = 1; my $sge = undef; my $sgename = "EMconfig"; ################################################################################ # # Utility to run a command and check the exit status (sadly, duplicated # in configureESTmapper.pl). # ################################################################################ sub runCommand { my $cmd = shift @_; print STDERR "$cmd\n"; my $rc = 0xffff & system($cmd); # Pretty much copied from Programming Perl page 230 return(0) if ($rc == 0); # Bunch of busy work to get the names of signals. Is it really worth it?! # my @signame; if (defined($Config{sig_name})) { my $i = 0; foreach my $n (split('\s+', $Config{sig_name})) { $signame[$i] = $n; $i++; } } my $error = "ERROR: $cmd\n failed with "; if ($rc == 0xff00) { $error .= "$!\n"; } elsif ($rc > 0x80) { $rc >>= 8; $error .= "exit status $rc\n"; } else { if ($rc & 0x80) { $rc &= ~0x80; $error .= "coredump from "; } if (defined($signame[$rc])) { $error .= "signal $signame[$rc]\n"; } else { $error .= "signal $rc\n"; } } print STDERR $error; return(1); } ################################################################################ # # Main # ################################################################################ while (scalar(@ARGV)) { my $arg = shift @ARGV; if ($arg eq "-genome") { $genome = shift @ARGV; } elsif ($arg eq "-genomedir") { $genomedir = shift @ARGV; } elsif ($arg eq "-mersize") { $mersize = int(shift @ARGV); } elsif ($arg eq "-merskip") { $merskip = int(shift @ARGV); } elsif ($arg eq "-memory") { $memory = int(shift @ARGV); } elsif ($arg eq "-segments") { $segments = int(shift @ARGV); } elsif ($arg eq "-sge") { $local = undef; $sge = shift @ARGV; } elsif ($arg eq "-sgename") { $sgename = shift @ARGV; } elsif ($arg eq "-local") { $local = 1; } elsif ($arg eq "-h") { undef $genome; undef $genomedir; undef @ARGV; } elsif ($arg eq "-justtestingifitworks") { exit(0); } else { die "ERROR: unknown arg '$arg'\n"; } } if (!defined($genome) || !defined($genomedir)) { print STDERR "usage: $0 -genome g.fasta -genomedir /some/path [args]\n"; print STDERR " -genome g.fasta the genome to map to\n"; print STDERR " -genomedir d the directory to save the configuration in\n"; print STDERR "\n"; print STDERR " -mersize m use m-mers (default 20)\n"; print STDERR " -merskip s skip s m-mers between mers (default 0, use all mers)\n"; print STDERR " -memory M use M MB memory for the search processes (default 1000MB)\n"; print STDERR " -segments S use S search processes (default, based on memory size)\n"; print STDERR " -sge compute the configuration on the grid; args are passed to qsub\n"; print STDERR " -sgename sge job name (default 'EMconfig')\n"; print STDERR " -local compute the configuration right now (the default)\n"; print STDERR "\n"; print STDERR " This precomputes search tables for ESTmapper.\n"; print STDERR " Both -genome and -genomedir must be specified.\n"; print STDERR " One of -memory and -segments should be specified.\n"; print STDERR "\n"; print STDERR "Example:\n"; print STDERR " configureESTmapper.pl -genome B35LC.fasta -genomedir B35LC -memory 900 -sge \"-pe thread 2\"\n"; print STDERR "\n"; exit(1); } $genome = "$ENV{'PWD'}/$genome" if ($genome !~ m!^/!); $genomedir = "$ENV{'PWD'}/$genomedir" if ($genomedir !~ m!^/!); system("mkdir -p $genomedir") if (! -d $genomedir); if ($genome !~ m/^\//) { my $cwd = `pwd`; chomp $cwd; $genome = "$cwd/$genome"; } die "Can't find genome '$genome'\n" if (! -e $genome); die "Can't find output directory '$genomedir'\n" if (! -d $genomedir); print STDERR "Configuring ESTmapper:\n"; print STDERR " merSize $mersize\n"; print STDERR " merSkip $merskip\n"; print STDERR " ${memory}MB\n" if (defined($memory)); print STDERR " $segments segments\n" if (defined($segments)); symlink "${genome}", "$genomedir/genome.fasta" if ((! -f "$genomedir/genome.fasta")); print STDERR "configureESTmapper-- Initializing positionDB creation.\n"; if (! -e "$genomedir/genome.seqStore") { if (runCommand("$leaff -f $genomedir/genome.fasta --seqstore $genomedir/genome.seqStore > $genomedir/genome.seqStore.out 2>&1")) { unlink "$genomedir/genome.seqStore"; die "Failed.\n"; } } my $acgtInFile = 0; my $acgtPerSegment = 0; my $segmentOverlap = 10000000; open(F, "< $genomedir/genome.seqStore.out") or die; while () { if (m/\s+(\d+)\s+ACGT\s+letters/) { $acgtInFile = $1; } } close(F); print STDERR "Found $acgtInFile ACGT in the input.\n"; die "No ACGT found?\n" if ($acgtInFile <= 0); # XXX: Magic Number! 12 bytes per base! if ($memory > 0) { $acgtPerSegment = int($memory / 12 * 1000000) + 1; print STDERR "configureESTmapper-- packing to preserve ${memory}MB memory limit ($acgtPerSegment mers per segment)\n"; } if ($segments > 0) { $acgtPerSegment = int($acgtInFile / $segments + $segmentOverlap) + 1; print STDERR "configureESTmapper-- packing to preserve $segments processor limit ($acgtPerSegment mers per segment)\n"; } $memory = int($acgtPerSegment * 12 / 1000000); open(F, "> $genomedir/memoryLimit") or die "Can't write $genomedir/memoryLimit\n"; print F "$memory\n"; close(F); my $merBeg = 0; my $merEnd = 0; my $segId = "000"; open(F, "> $genomedir/segments"); open(S, "> $genomedir/create.dat"); while ($merBeg < $acgtInFile) { $merEnd = $merBeg + $acgtPerSegment; print F "$segId\n"; print S "$segId $merBeg $merEnd\n"; $merBeg += $acgtPerSegment - $segmentOverlap; $segId++; } close(F); close(S); print STDERR "configureESTmapper-- Created $segId groups with maximum memory requirement of ${memory}MB.\n"; die "Created no groups?\n" if ($segId eq "000"); # Configure meryl # if (! -e "$genomedir/genome.merylArgs") { my $cmd; $cmd = "$meryl"; $cmd .= " -B -L 5 -f -m $mersize -segments $segId -configbatch"; $cmd .= " -s $genomedir/genome.seqStore"; $cmd .= " -o $genomedir/genome"; $cmd .= " > $genomedir/meryl.config.out 2>&1"; if (runCommand($cmd)) { die "Failed.\n"; } } # Create the script that builds the positionDB's and meryl partitions # # If there is only one segment ($segId == "000") then meryl doesn't # use the batch mechanism; the meryl in create.sh writes the final # output. open(F, "> $genomedir/create.sh"); print F "#!/bin/sh\n"; print F "\n"; print F "jobid=\$SGE_TASK_ID\n"; print F "if [ x\$jobid = x -o x\$jobid = xundefined ]; then\n"; print F " jobid=\$1\n"; print F "fi\n"; print F "if [ x\$jobid = x ]; then\n"; print F " echo Error: I need SGE_TASK_ID set, or a job index on the command line.\n"; print F " exit 1\n"; print F "fi\n"; print F "jobp=`cat $genomedir/create.dat | head -n \$jobid | tail -n 1`\n"; print F "\n"; print F "seg=`echo \$jobp | awk '{ print \$1 }'`\n"; print F "beg=`echo \$jobp | awk '{ print \$2 }'`\n"; print F "end=`echo \$jobp | awk '{ print \$3 }'`\n"; print F "\n"; print F "if [ ! -e \"$genomedir/seg\$seg.posDB\" ] ; then\n"; print F " $posdb \\\n"; print F " -mersize $mersize \\\n"; print F " -merbegin \$beg \\\n"; print F " -merend \$end \\\n"; print F " -sequence \"$genomedir/genome.seqStore\" \\\n"; print F " -output \"$genomedir/seg\$seg.building.posDB\" \\\n"; print F " > \"$genomedir/seg\$seg.building.posDB.err\" 2>&1 \\\n"; print F " && \\\n"; print F " rm -f \"$genomedir/seg\$seg.building.posDB.err\" \\\n"; print F " && \\\n"; print F " mv \"$genomedir/seg\$seg.building.posDB\" \\\n"; print F " \"$genomedir/seg\$seg.posDB\"\n"; print F "fi\n"; print F "\n"; print F "bat=`expr \$jobid - 1`\n"; print F "\n"; print F "if [ ! -e \"$genomedir/genome.batch\$bat.mcdat\" -o ! -e \"$genomedir/genome.mcdat\" ] ; then\n"; print F " $meryl \\\n"; print F " -countbatch \$bat \\\n"; print F " -o \"$genomedir/genome\" \\\n"; print F " || \\\n"; print F " rm -f \"$genomedir/genome.batch\$bat.mcidx\" \\\n"; print F " \"$genomedir/genome.batch\$bat.mcdat\" \\\n"; print F " \"$genomedir/genome.mcdat\" \\\n"; print F " \"$genomedir/genome.mcdat\"\n"; print F "fi\n"; close(F); # Create the script that merges meryl outputs # open(F, "> $genomedir/meryl.sh"); print F "#!/bin/sh\n"; print F "\n"; print F "if [ ! -e \"$genomedir/genome.mcidx\" ] ; then\n"; print F " $meryl \\\n"; print F " -mergebatch \\\n"; print F " -o \"$genomedir/genome\" \\\n"; print F " || \\\n"; print F " rm -f \"$genomedir/genome.mcidx\" \\\n"; print F " \"$genomedir/genome.mcdat\"\n"; print F "fi\n"; print F "\n"; print F "if [ ! -e \"$genomedir/frequentMers-ge1000.fasta\" ] ; then\n"; print F " $meryl \\\n"; print F " -Dt -n 1000 \\\n"; print F " -s \"$genomedir/genome\" \\\n"; print F " > \"$genomedir/frequentMers-ge1000.fasta\" \\\n"; print F " || \\\n"; print F " rm -f \"$genomedir/frequentMers-ge1000.fasta\"\n"; print F "fi\n"; close(F); ######################################## # # run the jobs. # if ($local) { my $seg = "000"; while ($seg ne $segId) { # Copy $seg (a string) into $s (an integer). my $s = int($seg); print STDERR "Creating $seg out of $segId\n"; if ((! -e "$genomedir/seg$seg.posDB") || (! -e "$genomedir/genome.batch$s.mcdat")) { $s++; runCommand("/bin/sh $genomedir/create.sh $s") and die "Segment $seg failed.\n"; } $seg++; $seg = substr("000$seg", -3); } runCommand("/bin/sh $genomedir/meryl.sh") and die "Meryl failed.\n"; } elsif ($sge) { # Check if we need to submit pieces of the array, or if we can submit the whole thing. # my @ap; my $wholeThing = 0; system("mkdir $genomedir/sgeout") if (! -d "$genomedir/sgeout"); my $sgebuildname = "$sgename." . time(); my $seg = "000"; while ($seg ne $segId) { if (-e "$genomedir/seg$seg.posDB") { #print STDERR "Segment $seg finished successfully!\n"; } else { #print STDERR "Segment $seg failed.\n"; $ap[$seg] = 1; $wholeThing++; } $seg++; $seg = substr("000$seg", -3); } if ($wholeThing == $seg) { # Yippee! Submit all at once! # if (runCommand("qsub -cwd -j y -o $genomedir/sgeout/seg\\\$TASK_ID.out -t 1-$segId $sge -N $sgebuildname $genomedir/create.sh")) { die "SGE submission failed?\n"; } } elsif ($wholeThing > 0) { # Dang, we need to submit individually....or we can take five # minutes and figure out ranges to submit. # my $st; my $ed; my $it = 0; # +2 so that we run off the end -- ensuring that we submit # even the last batch of jobs. while ($it < $segId + 2) { if (!defined($st) && ($ap[$it] == 1)) { # SGE wants to start at 1, we start at 0. $st = $it + 1; } if (defined($st) && !defined($ed) && ($ap[$it] == 0)) { # SGE wants to start at 1, we start at 0. $ed = $it; } if (defined($st) && defined($ed)) { #print STDERR "submit $st - $ed\n"; if (runCommand("qsub -cwd -j y -o $genomedir/sgeout/seg\\\$TASK_ID.out -t $st-$ed $sge -N $sgebuildname $genomedir/create.sh")) { die "SGE submission failed?\n"; } undef $st; undef $ed; } $it++; } } else { print STDERR "All segments computed successfully!\n"; } if (runCommand("qsub -cwd -j y -o $genomedir/sgeout/meryl.out $sge -hold_jid $sgebuildname -N $sgename $genomedir/meryl.sh")) { die "SGE submission failed?\n"; } } else { die "HELP! I don't know how to run jobs!\n"; }