pax_global_header00006660000000000000000000000064121216240300014501gustar00rootroot0000000000000052 comment=44428cd6d2c6f24b7fe59e1333d883a5a1a11e12 samtools-0.1.19/000077500000000000000000000000001212162403000134325ustar00rootroot00000000000000samtools-0.1.19/.gitignore000066400000000000000000000000261212162403000154200ustar00rootroot00000000000000*.o .*.swp *.a *.dSYM samtools-0.1.19/AUTHORS000066400000000000000000000016241212162403000145050ustar00rootroot00000000000000Heng Li from the Sanger Institute wrote most of the initial source codes of SAMtools and various converters. Bob Handsaker from the Broad Institute is a major contributor to the SAM/BAM specification. He designed and implemented the BGZF format, the underlying indexable compression format for the BAM format. BGZF does not support arithmetic between file offsets. Jue Ruan for the Beijing Genome Institute designed and implemented the RAZF format, an alternative indexable compression format. RAZF supports arithmetic between file offsets, at the cost of increased index file size and the full compatibility with gzip. RAZF is optional and only used in `faidx' for indexing RAZF compressed fasta files. Colin Hercus updated novo2sam.pl to support gapped alignment by novoalign. Petr Danecek contributed the header parsing library sam_header.c and sam2vcf.pl script and added knet support to the RAZF library. samtools-0.1.19/COPYING000066400000000000000000000020751212162403000144710ustar00rootroot00000000000000The MIT License Copyright (c) 2008-2009 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.samtools-0.1.19/ChangeLog.old000066400000000000000000003432201212162403000157650ustar00rootroot00000000000000commit db2ad3e19068cbafde72ecde75d0638bbb3598ba Author: Heng Li Date: Thu Feb 16 14:45:17 2012 -0500 removed downsample.c commit 6c55c576903992c6fef148fe3b606fbc8bd10655 Author: Heng Li Date: Thu Feb 16 14:45:06 2012 -0500 print to output commit db1044a34e6049c87eaa63c39ed6e56f03e7d4c1 Author: Heng Li Date: Thu Feb 16 14:39:34 2012 -0500 removed sample Downsampling already exists in "view". View also keeps pairing while "sample" does not. commit ffdeed3e5d4a530bfdf6f9ba97fff0ba7add6cba Merge: 2daad7b accf026 Author: Heng Li Date: Thu Feb 16 14:22:15 2012 -0500 Merge branch 'master' of github.com:lh3/samtools commit accf0260fd1117e10047344345d40b31a9ec31bb Merge: 9134e0d c554160 Author: Heng Li Date: Thu Feb 16 11:21:14 2012 -0800 Merge pull request #8 from nh13/master Patches commit c554160df16ec7748cfdda4c7b54c641be7b809f Author: Nils Homer Date: Thu Feb 16 14:06:52 2012 -0500 * more README.md work commit 2a81ffe349208d917666808fbc9f3041e0cb57de Author: Nils Homer Date: Thu Feb 16 14:06:10 2012 -0500 * more README work commit fb3125f732715f62cded8685a23a002a96ce009b Author: Nils Homer Date: Thu Feb 16 14:05:19 2012 -0500 * more README work commit 444d41002c37e1c3d0f9208b4a88126c47276386 Author: Nils Homer Date: Thu Feb 16 14:02:13 2012 -0500 * updating README commit dec53cb1043fe7efadfde75fa2fd39b76de22e54 Author: Nils Homer Date: Thu Feb 16 13:55:01 2012 -0500 updating the README for markdown syntax commit 798da18c346dca8ec6005582a0ddb1d5420b04ca Author: Nils Homer Date: Thu Feb 16 13:48:35 2012 -0500 adding a README with the current differences between this repository and the official one commit 4d22d86c0f28636662f2144a88cd168e104c4275 Author: Nils Homer Date: Thu Feb 16 13:35:03 2012 -0500 adding "samtools sample" to the main commit 893c25a37c21005dc42f45d45e9ad78ddc5f29bb Author: Nils Homer Date: Thu Feb 16 13:33:51 2012 -0500 * removing some compile flags to work with OS X commit 7ac22f72fdc32edd5c24af6baebfa7db5faf8e7b Author: Jonathan Manning Date: Thu Feb 16 10:47:14 2012 -0500 Check write filehandle after opening for write. tamw/tamr is a union type, so change is only semantic. Signed-off-by: Nils Homer commit fef53330416631690f60fdff42b6e43d764170dc Author: Jonathan Manning Date: Thu Feb 16 10:44:59 2012 -0500 Catch and report invalid BAM header, instead of segfaulting later on. Signed-off-by: Nils Homer commit 5cc013fe4930bf9b6e7963aab1cd4a3c94f695bc Author: Jonathan Manning Date: Thu Feb 16 10:44:16 2012 -0500 Add downsample to examples. Signed-off-by: Nils Homer commit b3fa9e7071532905a81dc7aa48eadc24b8c8846b Author: Jonathan Manning Date: Thu Feb 16 10:43:48 2012 -0500 Adjust for leading hard clip on colorspace reads. Signed-off-by: Nils Homer commit 1a9296c1389469d1c1db5b8069f0e11ffcc8abb2 Author: Jonathan Manning Date: Thu Feb 16 10:42:52 2012 -0500 Add samtools sample command, contributed by Davide Cittaro . Signed-off-by: Nils Homer commit 2a804f3379748aeba944f1dec306dd726ff3235e Author: Jonathan Manning Date: Thu Feb 16 10:42:07 2012 -0500 Add samtools qa command, contributed by Roman Valls Guimera . Signed-off-by: Nils Homer commit 0f3207fe8fd93e44d40fcf57204079c8c06d24a6 Author: Jonathan Manning Date: Thu Feb 16 10:39:08 2012 -0500 Makefile cleanup - allow CC, CFLAGS, LDFLAGS to be passed on make command line. Use LDFLAGS in samtools compile. Signed-off-by: Nils Homer commit 6e7df604025f6a86881bf7f4a16f30e15d31538a Author: Jonathan Manning Date: Thu Feb 16 10:31:15 2012 -0500 Allow max_mem for sort to be specified with units. Signed-off-by: Nils Homer commit f12ebcaf6e60d34180a27d70e09b743cef140b98 Author: Jonathan Manning Date: Thu Feb 16 10:29:11 2012 -0500 Allow user defined [lowercase] tags in header elements. Signed-off-by: Nils Homer commit 50b931fa3312dc109537a4260698ddecd0f06a05 Author: Jonathan Manning Date: Thu Feb 16 10:27:11 2012 -0500 Check lowerbound in text entry box to avoid segfault in tview. Remove redundant call to bam_aux_get. Signed-off-by: Nils Homer commit 5e729da5190949a813d20d329eab7ddb661816bd Author: Nils Homer Date: Thu Feb 16 10:31:48 2012 -0500 * fixing overflow/underflow in integer parsing commit fa50a4330b9abedaf07c26e13d31f05e57f1d319 Author: Nils Homer Date: Thu Feb 16 10:30:40 2012 -0500 * updating help message for samtools depth commit 79e52c9624b6dd3bdfdf439f4b4bc6f774c230a4 Author: Nils Homer Date: Thu Feb 16 10:29:32 2012 -0500 * adding support for outputting a circos histogram file in "samtools depth". Use the "-c/-B" options. commit 2daad7b52daa86561c0fb65fe366691fad9f5ed3 Author: Heng Li Date: Thu Feb 16 09:31:57 2012 -0500 bugfix: wrong SP; missing DV in the VCF hdr commit 9134e0d5047c281ef3bd53da91771d4814a5131c Author: Heng Li Date: Wed Feb 8 11:19:12 2012 -0500 missing support of DV commit 34ebf12078c1d1015a0b8b9a9221243a60b22893 Author: Heng Li Date: Wed Feb 8 11:08:56 2012 -0500 new BCF DV format: number of variant reads commit 9589d3312fa2d076f48bdd68e2a5edd419c8070c Author: Heng Li Date: Tue Jan 10 10:30:27 2012 -0500 scale depth to quality (hidden option) commit 704473e14668333ecaca5fb7b238af405c43e3b1 Author: Heng Li Date: Tue Jan 10 10:18:17 2012 -0500 really nothing commit 01b307fd287962372bbf07461c88b54f41636817 Author: Heng Li Date: Wed Dec 7 13:07:42 2011 -0500 added an example containing 'B' commit c678791f0451ceb9205c1ab5c52c84641863c99a Author: Heng Li Date: Sat Dec 3 12:10:30 2011 -0500 'B' now moves backward w.r.t. the query commit 152119bc06a073933ca830e8e1407538e44626cc Author: Heng Li Date: Fri Dec 2 10:50:12 2011 -0500 better consensus; a little more robust commit 454da4754ac503edda5b1329b67757d797e46e07 Author: Heng Li Date: Fri Dec 2 00:20:22 2011 -0500 in pileup call remove_B() commit ff2bcac1cc078ba1879f18c89cfae314439d7086 Author: Heng Li Date: Fri Dec 2 00:17:32 2011 -0500 working on a few toy examples commit 745ca7260158d6df7897b52598033ffb055a9e4f Author: Heng Li Date: Thu Dec 1 22:55:39 2011 -0500 bam_remove_B(); not tested commit 07e4cdc7300abfcc82e03105b4689f95cab551cd Author: Heng Li Date: Thu Nov 10 12:58:55 2011 -0500 baseQ threshold on plain pipleup; removed -E commit 322ebf2082dfa91df44b3a996d26c85357e5d5a2 Author: Heng Li Date: Wed Oct 19 09:28:04 2011 -0400 fixed two gcc warnings commit a632457b4c4adc50d833b56b5a5231feafaf8193 Author: Heng Li Date: Tue Oct 4 10:13:23 2011 -0400 change size_t to uint32_t in bam_header_t This may cause issues on 64-bit big-endian machines. Reported and fixed by Paolo Emilio Mazzon. commit af31bf5a78aea03baf6eb90fe50076549d499f6e Author: Heng Li Date: Mon Sep 26 20:17:57 2011 -0400 rename pad2unpad to depad commit 77b198b73dfad1048e5d1c5a64aa75ee7b90f596 Author: Heng Li Date: Fri Sep 23 01:22:40 2011 -0400 convert padded BAM to unpadded BAM commit adb9e2342b7b7501d9527d3c23afab10469ae2c6 Author: Heng Li Date: Wed Sep 7 11:40:50 2011 -0400 generate template cigar with "fixmate" commit 46e5ab445a0fe880216cbc0daf1225725b569d7a Author: Heng Li Date: Fri Sep 2 12:50:18 2011 -0400 update kseq.h to the latest version commit 68e9e4a73eb91405bb3e56bf0cdaf12d1b487abb Author: Heng Li Date: Fri Sep 2 12:44:45 2011 -0400 Release samtools-0.1.18 commit aa06bdadb2d109a79f927f478102f96a1f5fd258 Author: Heng Li Date: Fri Sep 2 12:14:17 2011 -0400 updated the revision number commit 267e1e1b6e54c0ab24f94cd9aee9cbd2d1923f9f Merge: 19ff1d3 aebab30 Author: Heng Li Date: Fri Sep 2 12:13:08 2011 -0400 Merge https://github.com/lh3/samtools into reduce Conflicts: bam_md.c Fixed a few typos in the merge commit aebab302399c24eaa6c5ab79d13d6bd5e2e9ea9a Merge: c2c63d0 da62663 Author: Heng Li Date: Fri Sep 2 09:03:49 2011 -0700 Merge pull request #4 from peterjc/x_equals2 Implement basic support for =/X CIGAR operations commit 19ff1d3d7f47d7e61b121292aefe5a74bb8a18d2 Author: Heng Li Date: Thu Aug 25 16:38:12 2011 -0400 reduce BAM size (experimental) commit da626630fd98fd4e07ceb4d58c5c9a42d312a85d Author: peterjc Date: Mon Aug 22 06:58:08 2011 +0100 Support =/X CIGAR operations (treated like M) commit 461d8003529db77a4d5ecbd108312e868b051a3d Author: peterjc Date: Mon Aug 22 05:52:56 2011 +0100 Define CIGAR equals and X operationss (7 and 8) commit c2c63d067113baab41f3bc35fb28f4f00578accb Merge: 7ab3ef3 9a0ed9a Author: Heng Li Date: Thu Aug 18 17:21:54 2011 -0700 Merge pull request #3 from peterjc/x_equals Accept SAM files using = in CIGAR (treats X and = as M) commit 9a0ed9a6b85c7981465f459300208dbd93e3c6f5 Author: peterjc Date: Thu Aug 18 19:28:52 2011 +0100 Accept SAM files using = in CIGAR (treats X and = as M) commit 7ab3ef388c1eb34d7912fd70cc5656c955240263 Author: Heng Li Date: Mon Aug 8 10:22:22 2011 -0400 bugfix: indexing takes huge memory This happens when an unmapped mate has coordinate 1. Thank Joel Martin for the fix. commit a3f6738593e944354a8f75306687d8b3acf08bf1 Merge: a8bdca9 bc67ea2 Author: Heng Li Date: Mon Aug 8 09:52:26 2011 -0400 Merge branch 'master' of github.com:lh3/samtools commit bc67ea225da653f36a70b38382d6111dd494f659 Author: Petr Danecek Date: Thu Jul 28 20:03:16 2011 +0100 Variant Distance Bias commit deb578f0c49d0b7d8c3bc6be220b4d67e2e7dfdf Author: Petr Danecek Date: Tue Jul 26 09:57:37 2011 +0100 If there is only one RG tag present in the header and reads are not annotated, don't refuse to work but use the tag instead. commit a8bdca9cf482a637b89ee4f98469a93e0ab5e69b Author: Heng Li Date: Mon Jul 25 10:10:55 2011 -0400 bugfix: LRT2=nan commit 0afe33137d046a3e849eeb4a54590f27cbad4228 Author: Heng Li Date: Fri Jul 22 21:55:38 2011 -0400 fixed a bug/typo commit 62d5849658c10222d40308c6b53ab4f99a448494 Author: Heng Li Date: Fri Jul 15 16:04:19 2011 -0400 allow to set see in subsampling commit 5f46243824cc9435b167973e1d51e13128794ea1 Author: Heng Li Date: Fri Jul 15 15:54:47 2011 -0400 support subsampling commit 5e55b6f34fc86cba7cf98d52ccaed405c3ffabbc Author: Heng Li Date: Fri Jul 15 15:53:38 2011 -0400 support indels commit f31c162926d6f43e8b60171789a258d02e1f9be5 Author: Heng Li Date: Thu Jul 7 17:02:33 2011 -0400 do not count indel with "view -Y" commit e412dae587883b4c17e5fbf4b7c33f38bfa8458a Author: Heng Li Date: Thu Jul 7 00:35:25 2011 -0400 for WIN32 compatibility commit 70a52501bcfa63824749893a5ab8ed3c38e34958 Author: Heng Li Date: Thu Jul 7 00:32:46 2011 -0400 for WIN32 compatibility commit 00438f14ed5984f08e8f7645a9b95644a812f969 Author: Heng Li Date: Wed Jul 6 23:41:45 2011 -0400 fixed an uninitialized variable commit 7609c4a01059c326544b3d0142dfe9c4229d68c6 Author: Heng Li Date: Wed Jul 6 23:39:31 2011 -0400 fixed an uninitialized variable commit cec7189a412f80ccb068a73bd28528915c16b0bf Author: Heng Li Date: Wed Jul 6 22:53:19 2011 -0400 Release samtools-0.1.17 commit 93c06a249de3bb666029bf07b66de5e8e5e314fa Author: Heng Li Date: Wed Jul 6 09:46:09 2011 -0400 bugfix: incorrect idxstats for the last seq Again, this bug is caused by 3rd-party code for the sorting order checking. commit 84f6ca62db6e27b8c4c711e7b5f3ca704bf27b4f Author: Heng Li Date: Tue Jul 5 23:30:23 2011 -0400 output mapping quality in the old pileup format commit 362e05fd670886acaede69b864903d730b9db3ca Author: Heng Li Date: Tue Jul 5 21:59:22 2011 -0400 added a brief description of the VCF format commit e690a696468205e0cc4560016361c997660dd496 Author: Heng Li Date: Tue Jul 5 16:23:10 2011 -0400 improved samtools manual page commit 362b4a1408ef3c32311d638aa8d85ce39c1c7b2d Author: Heng Li Date: Tue Jul 5 15:58:29 2011 -0400 merge bcftools.1 to samtools.1 commit 643e0e61ba7266efbc9e5bfcb8e41f369ba2ce0a Author: Heng Li Date: Tue Jul 5 13:39:02 2011 -0400 mpileup: when region set, set reference properly commit 613e4d67624a94f62563935fbd5cc294df69605a Author: Heng Li Date: Mon Jul 4 23:29:02 2011 -0400 compute the min PL diff commit 5b7d5d3f52b97ca42c8500eede808dab88a46a53 Author: Heng Li Date: Mon Jul 4 22:57:48 2011 -0400 rename trio.c to mut.c commit 84fe96ad64b0365ead93a4115d1684b9bebb98fc Author: Heng Li Date: Sun Jul 3 15:38:51 2011 -0400 added pair caller interface; not tested commit 2f2867b87b84c35319cc416d6173819d5c8a4e8c Author: Heng Li Date: Sun Jul 3 15:24:23 2011 -0400 inital implementation of a pair caller commit e97653cf2ad653c95886933c42a2b5492ccab5ff Author: Heng Li Date: Sun Jul 3 00:06:28 2011 -0400 convert bam to single-end fastq commit e8013e11f7a8db0a8d18c60d130169cca39bf2bd Author: Heng Li Date: Sat Jul 2 14:39:18 2011 -0400 improve BED parsing commit 1025714325fdc636aeee47a76db8dafbbbfde64b Author: Heng Li Date: Fri Jul 1 14:19:54 2011 -0400 update the manual page commit 8022d0039dff47b1c11b2421357d510c1f28ae15 Author: Heng Li Date: Fri Jul 1 14:17:03 2011 -0400 output the best constrained genotypes in trio commit 18c87295e12f5bebafdcae00d52000fb94c8a566 Author: Heng Li Date: Fri Jul 1 11:18:14 2011 -0400 added documentations for view -T commit daf7a8d96bd495296bf7c7d99cddb808a3ced7d5 Author: Heng Li Date: Thu Jun 30 22:45:20 2011 -0400 fixed a bug in writing SP commit e5c32bf9b28c6e3e861db88de56b5dbe11058b61 Author: Heng Li Date: Thu Jun 30 22:35:25 2011 -0400 optionally output read positions in mpileup commit 1008051155ec994c1901e18f3eb03ea32a62e5d7 Author: Heng Li Date: Thu Jun 30 22:17:25 2011 -0400 make faidx works with <2GB lines commit 2daebb63762425dd3074ddf71582ad189001e394 Author: Heng Li Date: Thu Jun 30 17:28:58 2011 -0400 fixed an issue in the trio caller and the indel caller commit 9fdd52cf0716fb342a94946433d564b28b230835 Author: Heng Li Date: Thu Jun 30 13:34:01 2011 -0400 Added trio caller; NOT tested yet commit ea22a8ed83625e9c82382b56acc42a2d9cfd17e5 Author: Heng Li Date: Thu Jun 30 11:42:29 2011 -0400 convert PL to 10-likelihood GL commit 10d7065267b0d12c2bfcb6c70204fb6944cd395d Author: Heng Li Date: Thu Jun 30 10:49:05 2011 -0400 fix a compatibility issue with the new bcftools commit d340f01f609c61b719d38a6a55629a3fc899e1cd Author: Heng Li Date: Sun Jun 26 23:41:20 2011 -0400 allow to ignore RG commit d6321faf98ebfe899b9409fb23c90a4aa8c6b542 Author: Heng Li Date: Sun Jun 5 23:05:21 2011 -0400 fixed a bug in SO checking due to a recent change commit bc995abf666d0c9ab4258f6c1b3518a45a89209f Author: Heng Li Date: Fri Jun 3 14:45:36 2011 -0400 update the version number commit 9e7cd83a08383858d008e0ccb2238a2b93831d6c Author: Heng Li Date: Fri Jun 3 14:43:12 2011 -0400 smarter way to parse a region string commit e58a90a0fde54053dac65352b34c13c3fea815fc Author: Heng Li Date: Wed Jun 1 14:36:22 2011 -0400 output LRT2 instead of LRT1 commit 08f78c9af3e5661f04f80bef424232de721dba03 Author: Heng Li Date: Wed Jun 1 14:02:28 2011 -0400 genotype test, but assuming 1-degree commit 587b852340d7e60f6f7cf474a92ef77aeab46018 Author: Heng Li Date: Wed Jun 1 12:55:19 2011 -0400 perform 2-degree test by default commit 3d38e403c5c830478b7eb157a484776997440501 Author: Heng Li Date: Wed Jun 1 12:44:34 2011 -0400 fixed a typo; but the result is still not good commit 06291624f7dcc57445676f3be25d0bc355dd7110 Author: Heng Li Date: Wed Jun 1 12:24:18 2011 -0400 fixed a typo commit 63b98aa33636b0d82a435bf49153c8c1502e7d42 Author: Heng Li Date: Wed Jun 1 12:23:37 2011 -0400 added HWE+F<0 filter commit 37d926e8999999b593d0637ab7dc379dbd3d6006 Author: Heng Li Date: Wed May 4 10:11:59 2011 -0400 improved sorting order checking in index Patches from Jonathan Manning commit 1c2dc6762c5f7cd946046b53346513f2f9761dbf Author: Heng Li Date: Tue May 3 23:09:05 2011 -0400 added r^2 estimate; added Brent's method commit c2d3bcd8f98e31668b5f1321222fbc6fd6336e75 Author: Heng Li Date: Sun May 1 23:45:23 2011 -0400 combine several utilites into vcfutils.lua commit be2e7362d7593ea4d03fb33cdb6af2aa096ca6c4 Author: Heng Li Date: Wed Apr 27 21:09:22 2011 -0400 minor warning commit 683ef0443860813d743cf84fa86dda9bfaf5445a Author: Heng Li Date: Wed Apr 27 10:10:38 2011 -0400 added versioning commit ed72f25ec85671f7646dbc92fa7b5b1dda427f7d Author: Heng Li Date: Wed Apr 27 10:04:02 2011 -0400 Output ML allele count commit 2a9e36d2d6c405b2411ca47458f028ada8fe1000 Author: Heng Li Date: Tue Apr 26 16:14:20 2011 -0400 use ar -s commit 7a4f54e6dbcd7c94acbb3f1050a93f94b8a07949 Author: Heng Li Date: Sat Apr 23 01:22:31 2011 -0400 added another type of LRT commit b9c5e84762a4aacce3a3771b51ea80967c79a2e5 Author: Heng Li Date: Fri Apr 22 16:00:31 2011 -0400 added version commit 8fad6677c5952efd67391581d64e67e02e7f6e68 Author: Heng Li Date: Fri Apr 22 00:30:19 2011 -0400 remove the pileup command commit 3a962fb6ebf779de70f9e6effb2d8701a9aa3dd9 Author: Heng Li Date: Thu Apr 21 23:10:45 2011 -0400 Release 0.1.16 (r963:234) commit b4d683cffbd98c43f05aff8610b37d63dd7e54aa Author: Heng Li Date: Thu Apr 21 12:44:44 2011 -0400 fixed a bug when coordinate-less reads are on the reverse strand commit c5ec45a128f409debc6a56a798024f53004037dc Author: Heng Li Date: Wed Apr 20 11:36:52 2011 -0400 added option '-f' to merge to avoid overwritting commit 68d431531370d24907c01a27f166f2341d7c4d35 Author: Heng Li Date: Wed Apr 20 10:26:58 2011 -0400 do not print a warning commit 32922607e51ad2260c337eb022b9e4aedacb049f Author: Heng Li Date: Wed Apr 20 10:21:06 2011 -0400 Added ldpair to compute LD between requested pairs commit b8d6fa71b91678fa02338257e0707d1e5ca098dd Author: Heng Li Date: Sun Apr 17 21:51:43 2011 -0400 On a toy sample, type "B" seems to be accepted commit 0e7ee9a6bb4029184202aa6e6738105ba0c0510b Author: Heng Li Date: Sun Apr 17 21:21:20 2011 -0400 added type "B"; not tested yet commit a513dfad0ac0062b03871eb6ecf26cb8d18dc895 Author: Heng Li Date: Sun Apr 17 19:25:54 2011 -0400 fixed a bug in bedidx.c: input BED not sorted commit de1e192bb0a8a762a54a6eee81d882fab01c3d32 Author: Heng Li Date: Sun Apr 17 18:51:08 2011 -0400 by default, always perform posterior chi^2 commit df6e0d1099895fc6cd7a19dc89fba95ed6654d35 Author: Heng Li Date: Sat Apr 16 12:33:28 2011 -0400 added debugging commit 8ce52e024dc2ef361dbd5399c232163055057e70 Author: Heng Li Date: Sat Apr 16 00:59:05 2011 -0400 avoid a segfault given wrong input commit e66b6684fc9a397f91ec29fdeecae9f8eb986a55 Author: Heng Li Date: Fri Apr 15 19:55:39 2011 -0400 do not segfault when there is no PL commit 9ce3c584ec0cebfa45576f2ef538df4dad2b7e55 Author: Heng Li Date: Fri Apr 15 11:59:55 2011 -0400 remove another unused part commit f53a051d68bf312ac8d5865210fae7a9808c0fb9 Author: Heng Li Date: Fri Apr 15 10:41:25 2011 -0400 print G3 if HWE is small commit 4b2c08bb86ca4ed4959e4cb77a28f7d6fc19f5c9 Author: Heng Li Date: Fri Apr 15 10:04:34 2011 -0400 fixed a bug actually not fix, but hide it commit 088e13c32453fb533b7bb1c65a573f9b90a23625 Author: Heng Li Date: Fri Apr 15 09:48:47 2011 -0400 added LRT based permutation; not used though commit 1e3c2001afcb80b5eaa4c3f88df9da7b01b62524 Author: Heng Li Date: Fri Apr 15 09:28:55 2011 -0400 Perform posterior contrast for small LRT Posterior contrast is much slower than LRT. Nonetheless, posterior P-value is more robust to sequencing artifacts. Thus we may combine the two to achieve a balance between speed and low FPR. commit 6f1b066270902198a7175ff6c1b05ebc8d1919be Author: Heng Li Date: Fri Apr 15 01:36:06 2011 -0400 Added Brent's method commit 3d061e5db25b67b25f6ff87afe4162e121354232 Author: Heng Li Date: Thu Apr 14 23:30:10 2011 -0400 fixed a typo in printing commit 7fd14ceb5990bb350b8e97346ef3537d80058def Author: Heng Li Date: Thu Apr 14 23:14:23 2011 -0400 fixed a stupid bug commit f5b2c3459ec098b3cafd9619b9077132516baf58 Author: Heng Li Date: Thu Apr 14 22:42:35 2011 -0400 separate EM and posterior Now, constrast is not performed unless -C is in use. EM can be invoked independently with -e without computing the posterior. commit 9eefcac963697fae554789b11ae3cb2c23f224d0 Author: Heng Li Date: Thu Apr 14 22:00:19 2011 -0400 further code cleanup; prepare to add EM interface commit c2cce52355262743711e4742b0c8542bfcab1cdd Author: Heng Li Date: Thu Apr 14 21:44:03 2011 -0400 drop EM from prob1 commit 24016f04bd3bdffb7eeb50cb25854f5007feb70f Author: Heng Li Date: Thu Apr 14 21:08:33 2011 -0400 drop posterior LRT; prepare for clean up commit 3670d8bd88c3eb22873f0a80e2a5913f64ca8c9a Author: Heng Li Date: Thu Apr 14 20:57:43 2011 -0400 better initial values for LD commit d48a8873c060b18b57799cfe3a0e5496ba069457 Author: Heng Li Date: Thu Apr 14 20:36:25 2011 -0400 finished EM commit b101f2db476188a950c23f5c1b6185fdb7f8f40b Author: Heng Li Date: Wed Apr 13 01:19:04 2011 -0400 genotype frequency estimate commit d79bdcbf6242ecfb8accba9ac9a22fbcbd543cf2 Author: Heng Li Date: Wed Apr 13 00:37:22 2011 -0400 prepare for code clean up commit e0ce416abfc094f0c090957080b1404fd0edf752 Author: Heng Li Date: Wed Apr 13 00:34:15 2011 -0400 rename ld.c to em.c commit 45ede3ad181f35c1be24bed5d75841e472357ab7 Author: Heng Li Date: Wed Apr 13 00:22:10 2011 -0400 implemeted EM likelihood ratio test The idea is learned from a brief chat with Rasmus Nielsen. commit 0454a346b60e42b75a2f742272089810279c7131 Author: Heng Li Date: Tue Apr 12 15:45:52 2011 -0400 added likelihood-ratio test (idea from Nick) commit f6287c8646c690440a1554c8958e7268f4134dc2 Author: Heng Li Date: Sun Apr 10 18:24:37 2011 -0400 Release samtools-0.1.15 (r949:203) commit de6023f38f4d652438557cf7a0ac6eec324e7416 Author: Heng Li Date: Sun Apr 10 15:54:58 2011 -0400 improved help information commit d3b337f2b7eda1e6f8f5575a19d1b5ed55cae279 Author: Heng Li Date: Sat Apr 9 16:28:01 2011 -0400 fixed a minor issue commit 82f6e4f49247e75fbd8ec08c285b8d3047b3d235 Author: Heng Li Date: Sat Apr 9 15:49:04 2011 -0400 separate QC-pass and QC-fail reads commit 8362b4a255081ee7ca0a4ca2eabc8c76758b6863 Author: Heng Li Date: Fri Apr 8 17:45:19 2011 -0400 added verbose level commit f7bf419c290462be7d289249a4a6d28f825b4c93 Author: Heng Li Date: Fri Apr 8 16:08:14 2011 -0400 fixed a bug commit 890cbb1ac93b3004fb6cf42ff47195077dcfc8ad Author: Heng Li Date: Fri Apr 8 16:00:37 2011 -0400 drop unrelated @RG when "-R" is in use commit a62dc929c950fb51311b705f5b5bfba8e3f704d7 Author: Heng Li Date: Fri Apr 8 16:00:14 2011 -0400 skip header validation commit 39da810e2c56c8f0eff1ab726600b41f26d3d8e9 Author: Heng Li Date: Tue Apr 5 23:52:22 2011 -0400 change error message commit c0c50a34df250ef8a7a29b172058cd229be582b5 Author: Heng Li Date: Tue Apr 5 23:50:46 2011 -0400 fixed a bug caused by recent modifications commit 25226e8c468404cb5e1b5272efcea57e4193c762 Author: Heng Li Date: Tue Apr 5 13:31:19 2011 -0400 reduce the indel filtering window commit 5e18d7014437734f9dac9ab45a95e43ec2526101 Author: Heng Li Date: Mon Apr 4 13:56:20 2011 -0400 only output hwe if it is small enough commit 614941fb7dd276de662e7820eb8c7bae871a18cc Author: Heng Li Date: Mon Apr 4 13:34:02 2011 -0400 added HWE back commit 7abe8825aa0bacccdeb38125934ae94d18f0ad4d Author: Heng Li Date: Mon Apr 4 12:46:24 2011 -0400 EM estimate of genotype frequency commit 2bfeff9c645d177416664f1cb811e85cac3ff9e3 Author: Heng Li Date: Mon Apr 4 11:29:12 2011 -0400 minor commit 401e40647e7e3abbac6e4ec3d8bb68eb6f2d401b Author: Heng Li Date: Mon Apr 4 11:24:04 2011 -0400 Added genotype freq estimate and association test commit 6cc226df6e3b480f1bd6e763ce8ef47f785bbb74 Author: Heng Li Date: Sun Apr 3 20:57:23 2011 -0400 minor changes commit 7e47a39630e812f09b80369f14606245976f687e Author: Heng Li Date: Fri Apr 1 15:21:59 2011 -0400 print the grayscale commit 2f675d9c0dde3c166c99e335fa17c7873a5ae8d5 Author: Heng Li Date: Fri Apr 1 08:55:16 2011 -0400 change to comment commit 0592bb514994544ed84f51e509b233cf8821e0cf Author: Heng Li Date: Fri Apr 1 08:54:35 2011 -0400 added base quality filtering commit fc1b47e04a7b94f6362c45856cbeb89d9d0b5ca5 Author: Heng Li Date: Thu Mar 31 23:31:14 2011 -0400 fixed a few typos in comments commit 60be79bc8f0d24656e5e8a329af7e9b5b91d4c8b Author: Heng Li Date: Thu Mar 31 23:13:23 2011 -0400 comments commit 2432864acc25ebe5cee4217dbb0120439077a7f8 Author: Heng Li Date: Thu Mar 31 22:42:46 2011 -0400 added bam2depth.c, a demo program commit 39625f7c6bea9ccbfd9af0feb22348d52079f012 Author: Heng Li Date: Thu Mar 31 16:37:22 2011 -0400 added bgzf_check_bgzf() (used by tabix) commit 6de6bd3fb67fd22753a5f07d4cc25bf94e1b5a8c Author: Heng Li Date: Thu Mar 31 16:37:08 2011 -0400 fixed a bug in bedidx.c commit 3b9e257d25b2e81eed1625bc5d2882ed486ef20e Author: Heng Li Date: Wed Mar 30 13:27:15 2011 -0400 added bed support to bcftools commit 47bcce3d14ec4d205283b61e5e653803996c42e0 Author: Heng Li Date: Wed Mar 30 12:56:40 2011 -0400 Added BED support to "samtools view" commit a812386017faedfc86c0e6562adbb2138329cfeb Author: Heng Li Date: Wed Mar 30 12:47:04 2011 -0400 support BED file commit 3052dddc929f1825e6e7f7f6f6724d9465d6cf9a Author: Heng Li Date: Mon Mar 28 15:51:55 2011 -0400 relax RG matching; proper mismatching message commit f86d60c8fe25785523f01fae1486d2a6df4ee6ef Author: Heng Li Date: Sat Mar 26 10:38:23 2011 -0400 Avoid reporting association when something unexpected, which I do not understand, happens. commit dd41e6b26fd9fe30218748b9a0a1f49bdb1862b9 Author: Heng Li Date: Sat Mar 26 10:38:01 2011 -0400 Added -1 to merge commit 4a0364b0d7f87f1c88d71ec5857a1f1d40710681 Author: Heng Li Date: Wed Mar 23 16:56:55 2011 -0400 plot pairwise r^2 commit 452629a711582e612bec22b3b082e234bd37039b Author: Heng Li Date: Wed Mar 23 14:31:01 2011 -0400 pairwise LD; case-control AF2 commit 52862951adcaecde26ba8f0d9c1897944640a674 Author: Heng Li Date: Mon Mar 21 23:03:14 2011 -0400 Release samtools-0.1.14 (r933:170) commit 59a5a8ba8e2940f0e38238f9339f02c91a8a0ce4 Author: Heng Li Date: Mon Mar 21 13:52:55 2011 -0400 optionally skip loci with too low sample coverage commit 6434264b5c69514d4fafe62cbd30b3bbaddc1d41 Author: Heng Li Date: Sat Mar 19 14:38:25 2011 -0400 mpileup support Illumina1.3+ quality; skip non-variant sites when "view -v" is in use commit 5f59e01987e1d5eca7d6359cae64a9734b18beea Author: Heng Li Date: Fri Mar 18 17:19:18 2011 -0400 update version to r933:167 commit 4d2c3c950910aa3d2c87760c3532e458fe01c0fa Author: Heng Li Date: Fri Mar 18 16:25:01 2011 -0400 added "-1" to the command-line help commit 55313a015a7bd6369cf5a66fed7fab2333201dc9 Author: Heng Li Date: Fri Mar 18 16:22:12 2011 -0400 added the "cat" command (by Chris Saunders) commit b670272cadf3efa4dc456ac4c76104f73477d60d Author: Heng Li Date: Fri Mar 18 15:59:46 2011 -0400 support varying the compression level commit c5dd3c9ca5f75f880e52c8cd2beae983bcb8d3b1 Author: Heng Li Date: Wed Mar 16 14:33:45 2011 -0400 update the manual pages commit 12fb4b596dc51bccd154fc4bd0593442f7937a46 Author: Heng Li Date: Wed Mar 16 12:49:26 2011 -0400 update changelog commit e7fe4fd66e02d60a1ca7952ad1938809e77729a9 Author: Heng Li Date: Wed Mar 16 12:10:05 2011 -0400 do not call indels when the depth is very high commit 7455eeaa32b949bb3856f75810890aabf7cacb18 Author: Heng Li Date: Wed Mar 16 11:56:56 2011 -0400 code clean up commit 5f16679e54ced8e67a75d949f9175c50480b914e Author: Heng Li Date: Tue Mar 15 14:45:24 2011 -0400 when -s is specified, change the sample order commit 7ba95adee09d3b06a7eaf797d25efef837e592f5 Author: Heng Li Date: Tue Mar 15 14:11:42 2011 -0400 compute the rank in permutation commit d219783cea7643fc7e10e1bd3a98e9b3165b4506 Author: Heng Li Date: Sun Mar 13 21:35:13 2011 -0400 I have found a SERIOUS BUG!!! commit 8e20d04ecdac1a7788eef71c4bb91b8479cf7150 Author: Heng Li Date: Sun Mar 13 17:04:04 2011 -0400 optionally shuffle samples in a BCF (debugging) commit fc7b261f181f2a411427bc9ee5d586c883ca9cdc Author: Heng Li Date: Fri Mar 11 09:34:20 2011 -0500 fixed a bug commit b3bbcc3d40994ae85705ab6fef9866ec8c142201 Author: Heng Li Date: Thu Mar 10 20:25:59 2011 -0500 use mode instead of mean commit f1161262d137098a19143b5cb0de810e5db3243e Author: Heng Li Date: Thu Mar 10 20:09:16 2011 -0500 start from the mean instead of the mode commit 2ba56f5e99e90674855c4ffc8bf583340b932e1e Author: Heng Li Date: Thu Mar 10 17:13:34 2011 -0500 fixed an error in Chi^2 test commit b4ce7ae400290bc43dd287240479667f99b3b11e Author: Heng Li Date: Thu Mar 10 00:23:39 2011 -0500 minor commit 8487fa5d3a73a43443964e731ea2a4c873c9d4e5 Author: Heng Li Date: Wed Mar 9 21:33:19 2011 -0500 added -F to accept BCFs generated by old samtools commit fd51d2093f7fd775a7eaaeea57fa34716ab59ac2 Author: Heng Li Date: Wed Mar 9 17:39:09 2011 -0500 update version commit b6da54335df943015a998a934075331b467abb5b Author: Heng Li Date: Wed Mar 9 17:37:14 2011 -0500 compute pseudo-chi2 probability commit 9f73cefdb8935421d872b989dd98fbc8e1295029 Author: Heng Li Date: Wed Mar 9 15:54:04 2011 -0500 remove a comment which is wrong commit b10b1e47ece522e97ab8ef23417bcb6454f8b9db Author: Heng Li Date: Wed Mar 9 15:51:12 2011 -0500 clean up commit 353bfae2c6ff59205bd9223db04084cf7f507f01 Author: Heng Li Date: Wed Mar 9 15:45:29 2011 -0500 for backup commit 53915d1c6410c2537d18bfa8eb8c657a2233c35e Author: Heng Li Date: Wed Mar 9 15:27:56 2011 -0500 having debugging code commit 0d0dbf66995b1511390d593981eae7b5d36fe17b Author: Heng Li Date: Wed Mar 9 14:58:23 2011 -0500 temporary backup commit 5b74a174a8b637dee43b7f30250df6fb96580e12 Author: Heng Li Date: Tue Mar 8 15:46:11 2011 -0500 the output makes sense, but there may be a typo... commit d81ec654b6c0c1eef6b0625d96f14b3155cee7c6 Author: Heng Li Date: Tue Mar 8 15:19:09 2011 -0500 added contrast2(); fixed a bug in haploid mode commit 0cfd896fad5f7737cca49efa94a11892dafcd812 Author: Heng Li Date: Mon Mar 7 21:40:17 2011 -0500 fixed a bug in haploid genotyping commit ccd52155ef61273f2b42ad9c7b31ff1915f81b24 Author: Heng Li Date: Sat Mar 5 18:10:35 2011 -0500 fixed a few bugs; still not fully working commit edc3af753f96f831968ae32f2e0f915b74f31e6e Author: Heng Li Date: Fri Mar 4 17:31:33 2011 -0500 drop HWE calculation commit 92dac194debb66ca0718c21c871822dda2dd5bc1 Author: Heng Li Date: Fri Mar 4 17:28:35 2011 -0500 implemented hap/dipoind mode; probably BUGGY! commit 7f26804bc27937e36fdc967e5c76514653ea40f5 Author: Heng Li Date: Fri Mar 4 16:01:27 2011 -0500 read ploidy commit e7b7213475b5e61a69aab77ffb02b4983c8e7678 Author: Heng Li Date: Fri Mar 4 14:12:14 2011 -0500 added math notes commit 46023e2f21321da83fc8e83e9229757a4e821acb Author: Heng Li Date: Fri Mar 4 13:34:10 2011 -0500 update BCF spec commit 13190c49eeb006ad7013b7f1e9fc1b3beca3ae78 Author: Heng Li Date: Tue Mar 1 14:45:19 2011 -0500 Release samtools-0.1.13 (r926:134) commit be8fabbb6001d9fd5263a70a3e21ed6dfe5a9837 Author: Heng Li Date: Tue Mar 1 14:07:15 2011 -0500 prepare to finalize 0.1.13 commit 1e8c753660978bed7e9289fe50becd596d9314bb Author: Heng Li Date: Tue Mar 1 09:40:17 2011 -0500 allow to change whether to drop ambiguous reads commit 412210bfdb46606023f2e4b9086f2787f0cf1c62 Author: Heng Li Date: Mon Feb 28 22:01:29 2011 -0500 revert to the old behavior of phase commit 46035589518cf84738de8666b866e2619457c1fb Author: Heng Li Date: Mon Feb 28 16:46:23 2011 -0500 change version number commit 7f40c33e37fc16fcb0a375ce46ae1d09cafb6d50 Author: Heng Li Date: Mon Feb 28 16:37:42 2011 -0500 bugfix in indel calling: interger overflow commit 75849470efbe30042e5ddd516f9bcbe3b9bf6062 Author: Heng Li Date: Mon Feb 28 15:35:47 2011 -0500 fixed a typo commit 9e6fb569885f906fabaab7fc2f02eae82f4bd602 Author: Heng Li Date: Mon Feb 28 15:34:09 2011 -0500 minor changes to heuristic rules commit 30a799a91f5e2c10b761aa5437f902c6649fceb3 Author: Heng Li Date: Mon Feb 28 15:20:26 2011 -0500 fixed a bug in the latest change commit e21ba9df950ea37f5c1b35c2af9ba9a4e0bba02a Author: Heng Li Date: Mon Feb 28 12:47:06 2011 -0500 put version in bam.h commit 918b14780c1dceb39c7010638ecd61c626e17166 Author: Heng Li Date: Mon Feb 28 12:00:38 2011 -0500 frag_t::phased==0 reads are dumped to chimera.bam commit 657293c7bdba3ac69f53cd1ffa2874ed8756475e Author: Heng Li Date: Mon Feb 28 11:05:29 2011 -0500 change default -q to 37 (previously 40) commit 33d8d3bea76e466798ea322d68d34deb8d2dff06 Author: Heng Li Date: Mon Feb 28 10:39:57 2011 -0500 fixed a minor bug in BAM reading commit daa25d426d42465d76c7317c95772bbb36bb3f47 Author: Heng Li Date: Sat Feb 26 21:07:24 2011 -0500 suppress gzopen64() warning commit 9cec4256eb9e7848d4711adb67b540659c141e32 Author: Heng Li Date: Fri Feb 25 22:14:52 2011 -0500 fixed a long existing bug in vcf2fq commit 304487c83067a733add71cbc3886fa8c49f7ef2a Author: Heng Li Date: Fri Feb 25 16:37:40 2011 -0500 change version number commit 10ba6bf4f16692760f696f7b17f3719065786f77 Author: Heng Li Date: Fri Feb 25 16:34:08 2011 -0500 Change the order of PL; change SP to int32_t commit c5cc2a8036a9c3579fbfde651efec4f6763b0228 Author: Heng Li Date: Fri Feb 25 14:52:03 2011 -0500 claim X defined in the header commit 4ee8cb29f6092fd14a89f0cc5d3575112a204f39 Author: Heng Li Date: Fri Feb 25 14:40:24 2011 -0500 minor changes commit 00065e9336a2831dc53bee7da2f4719845be1a2a Author: Heng Li Date: Fri Feb 25 11:39:06 2011 -0500 fixed an error in the BCF spec commit 1e2a73afcb72a02aa448718cb017c0438de89f90 Author: Heng Li Date: Fri Feb 25 11:36:40 2011 -0500 update BCF spec commit dbf8eedaa38a405cb2fba5b3952b85776f51d035 Author: Heng Li Date: Fri Feb 25 11:28:43 2011 -0500 update BCF spec commit eed1d91af9fad3c9d965333a55e623757f9c4e9d Author: Heng Li Date: Fri Feb 25 09:51:39 2011 -0500 fixed a flaw in targetcut commit 59bc980bb832b92a8b0cc244cf106e6150e4db6f Author: Heng Li Date: Fri Feb 25 00:54:35 2011 -0500 update manual page commit fcc4738c4abdca79e3de159e21208df1b98ac76c Author: Heng Li Date: Fri Feb 25 00:45:39 2011 -0500 update version format commit 5748639ae542b7f6b853562edc2bb3faf43030e4 Author: Heng Li Date: Fri Feb 25 00:45:12 2011 -0500 update version number commit 06b44cc366cf27ce8976ee6a05810a0b3c48b56d Author: Heng Li Date: Fri Feb 25 00:44:21 2011 -0500 update version number commit ab7f4529d12739ff66fd4c09af9d992ab59c53ef Author: Heng Li Date: Fri Feb 25 00:42:55 2011 -0500 various help message commit a092e1f6f963272f8bb23616986ddaf604fd0f82 Author: Heng Li Date: Thu Feb 24 23:43:13 2011 -0500 disable unfinished functionality commit f00a78db72b14ee4c6689fc13f20ed31aeaecd40 Author: Heng Li Date: Thu Feb 24 10:04:56 2011 -0500 added "const" to bcf_p1_cal() commit 91049c4a8db3bf50dcc9d07506f22fa4ca5b5a96 Author: Heng Li Date: Wed Feb 23 11:53:47 2011 -0500 randomly allocate unphased reads commit f4405354a8d4cb3441141fa734573031059d7f57 Author: Heng Li Date: Tue Feb 22 15:36:07 2011 -0500 fixed a typo commit 3075e4dc5c7c9d954426aabda6a73fa788357100 Author: Heng Li Date: Tue Feb 22 15:33:40 2011 -0500 make output more informative commit 628cf3235e2815a40acf089fb1d3357be6437787 Author: Heng Li Date: Tue Feb 22 14:50:06 2011 -0500 change the scoring rule; change default k to 13 commit f22fd99831e4b5c74f898719216f359dbe987bbf Author: Heng Li Date: Tue Feb 22 14:45:15 2011 -0500 update scoring in masking commit 2f23547b81984555032aa0eefd064b8e07986fdc Author: Heng Li Date: Tue Feb 22 14:37:17 2011 -0500 remove dropreg() commit 4d8b6b1f1f331ca9041983c66e34a857c3b8f1bb Author: Heng Li Date: Tue Feb 22 13:10:16 2011 -0500 accept files from stdin commit 9b50c5038e6fc0185e29ca5b50fe0806a9a939b9 Author: Heng Li Date: Tue Feb 22 11:16:57 2011 -0500 fixed a bug in consensus generation commit 1332ab32fb788fdc81b2ba8653b905d106238fad Author: Heng Li Date: Mon Feb 21 22:53:23 2011 -0500 print dropped fragments commit a288761b4ca1584e51076a71cbc4d72fe923dda1 Author: Heng Li Date: Mon Feb 21 22:37:04 2011 -0500 bugfix: singletons are not phased commit 683365f534c0223dea7d72532015ac16a45ba22b Author: Heng Li Date: Mon Feb 21 17:27:10 2011 -0500 output singleton blocks commit 841a4609084d81f1bc81e0b00dd806002461e7d9 Author: Heng Li Date: Mon Feb 21 15:58:55 2011 -0500 fixed a bug; not working with -l right now commit fdd57ea31732b5516dc212d72174b60206952636 Author: Heng Li Date: Mon Feb 21 15:17:00 2011 -0500 skip mapQ==0 reads commit 4eb6ba75c23c1c9be5f76814fa1b93a2e304b2af Author: Heng Li Date: Mon Feb 21 14:03:03 2011 -0500 print the "targetcut" command commit 0123d9559ba58b026c0dfd15bc26019a193cd21a Author: Heng Li Date: Mon Feb 21 11:22:13 2011 -0500 allow to set the maximum depth commit 0f92eb248a4d06645b2c3d736a0faea8a7a9f731 Author: Heng Li Date: Mon Feb 21 09:56:41 2011 -0500 use a proper error model to call hets commit 587a01504af5aea6288740d121dccf48fb8a75f4 Author: Heng Li Date: Mon Feb 21 09:16:38 2011 -0500 phase is UNFINISHED; strip RG when merging commit 723bf3cd79e4f4a558373d4c707fa6b3db0fb357 Author: Heng Li Date: Sat Feb 19 23:38:11 2011 -0500 use a proper model to compute consensus commit 891a6b02d4a9af2ed98fbaac4915bf1f0da4f6c8 Author: Heng Li Date: Sat Feb 19 22:14:19 2011 -0500 added comment commit 8b55e0a581ecc9e4ba754d1f3c8784f3038b6e48 Author: Heng Li Date: Fri Feb 18 17:23:39 2011 -0500 change the output format commit 75c36e8c563eddd0a362ba3b38cf0aea21aafb1f Author: Heng Li Date: Tue Feb 15 20:31:00 2011 -0500 fixed a bug in writing BAM commit bb0ce52f066cfebaa35a125d57b353bb717a5165 Author: Heng Li Date: Mon Feb 14 23:39:09 2011 -0500 skip uncovered; unknown alleles taken as X commit ba67f4d119c7d06907db3015d337d9a01a3fc9fe Author: Heng Li Date: Mon Feb 14 23:21:19 2011 -0500 fixed a bug commit e4448d49e6129a5e1ee9c7f04f43612f12d6aad6 Author: Heng Li Date: Mon Feb 14 22:43:09 2011 -0500 prepare to read hets from a list; unfinished commit 129ea29c1f12177c0a7c3e21676f6210370fc59b Author: Heng Li Date: Mon Feb 14 16:32:22 2011 -0500 updated khash.h to 0.2.5 commit 15b44ed93bd949dffcf79ac8dbea6d9b7dfcb58c Author: Heng Li Date: Mon Feb 14 16:15:04 2011 -0500 use the latest version of khash commit 486c05f06f44d981dfb2069bcb43e4b35fd8389c Author: Heng Li Date: Mon Feb 14 15:04:40 2011 -0500 change the default -k to 11 commit 07cf9d1e443d73cf053de38dd01671e3781f6e29 Author: Heng Li Date: Mon Feb 14 14:50:51 2011 -0500 sort fragments by vpos instead of by beg commit d0d3e7faabf5cbb7e5ff7b294f7e220da807c4c0 Author: Heng Li Date: Mon Feb 14 14:45:41 2011 -0500 shuffling the two haplotypes for better randomness commit 3be28eaf5f6033229aedf12ddb11a0084ba01cd8 Author: Heng Li Date: Mon Feb 14 14:09:17 2011 -0500 write chimeras to a separate BAM commit 80ccbc26f43918fe42be123cc1da9d3d7ce30816 Author: Heng Li Date: Mon Feb 14 13:54:13 2011 -0500 no mem leak/violation on small files; correctness is not checked commit 5c923867432fa14c26a19e3782e7f48d4080f6ac Author: Heng Li Date: Mon Feb 14 13:50:25 2011 -0500 bam separation; at least not immediate segfault commit cea2643ec30a59735bf89b2f562b563bf7263e79 Author: Heng Li Date: Sun Feb 13 23:24:11 2011 -0500 on the way to implement BAM separation; unfinished commit 964269cd15036a470ca89e43d0952201a0825671 Author: Heng Li Date: Sun Feb 13 18:07:56 2011 -0500 keep singletons in the hash table commit 2d4aa649bd670d5e038a1acaefd33c5fe24ae0e8 Author: Heng Li Date: Sun Feb 13 17:42:24 2011 -0500 Revert "prepare to add bam separation" This reverts commit ed6957e5211c2c4cf684dcb8bbb661052c74df6f. commit ed6957e5211c2c4cf684dcb8bbb661052c74df6f Author: Heng Li Date: Sun Feb 13 00:24:28 2011 -0500 prepare to add bam separation commit d211e652d93791d2e112d334added243ffe5fc3e Author: Heng Li Date: Sat Feb 12 18:50:20 2011 -0500 accelerate kstrtok commit 2d6af49d331ff5afe7b9e9b102e79d7d4512fdbe Author: Heng Li Date: Fri Feb 11 21:08:21 2011 -0500 split unlinked blocks commit 68e4cd1b560b0a6fd4c77e5e51eadde9fda26ea4 Author: Heng Li Date: Fri Feb 11 10:47:58 2011 -0500 remove heading and tailing ambiguous positions commit d2b685141426a902ae76660c1fbe8020da150cf8 Author: Heng Li Date: Fri Feb 11 10:02:21 2011 -0500 code clean up for further features commit c6980e062d55928b59f287c03e599dd5a37ed509 Author: Heng Li Date: Fri Feb 11 08:00:08 2011 -0500 change /64 to >>6; the latter is faster commit 91635b9c2687f24d72ee6a8aad2050a79bb8400f Merge: 41d4df2 9a7e155 Author: Heng Li Date: Fri Feb 11 01:22:55 2011 -0500 Merge branch 'master' into devel commit 9a7e155cc591c1b6c9f7f9cb939364a6becb65b2 Author: Heng Li Date: Fri Feb 11 01:21:07 2011 -0500 output an unrecognized field as '.'; autofix GL/PL commit 41d4df2e9545e9abe97151cfe5d6c763f3d00db1 Merge: c00c41c aacce0c Author: Heng Li Date: Thu Feb 10 23:00:14 2011 -0500 Merge branch 'master' into devel commit aacce0ce7276f451e4fddf81832f9e5f7f65198b Author: Heng Li Date: Thu Feb 10 22:57:53 2011 -0500 finished VCF->BCF conversion commit 0e875df643e41d848b709e2fa877de8ae53cdd4c Author: Heng Li Date: Thu Feb 10 21:57:28 2011 -0500 fixed a bug in reading VCF files commit c00c41c2a5da69cccea64adb542a0b365e56b4fc Author: Heng Li Date: Thu Feb 10 16:28:37 2011 -0500 suppres one-allele blocks commit 2e2354b673722e2f00d72970a043f80a66270da1 Author: Heng Li Date: Thu Feb 10 16:06:56 2011 -0500 fixed the bug in filtering commit d971e1fe24de4ecaf94055efffc5f641e2bdb563 Author: Heng Li Date: Thu Feb 10 12:24:23 2011 -0500 prepare to add filtering; buggy right now commit a0a5a3fbf504c3b02f7b9212e72315c1047cc249 Author: Heng Li Date: Thu Feb 10 11:55:02 2011 -0500 make masking optional commit 28db71ccd95054a5f8a47c2332794f8968f6a822 Author: Heng Li Date: Thu Feb 10 11:40:47 2011 -0500 routine to mask poorly called regions commit a3f6c439262bc10a4067860440f4d4dde9e0c515 Author: Heng Li Date: Wed Feb 9 17:18:33 2011 -0500 code clean up: remove globals commit 0b711978492f6ad39d459d78723c299468906818 Author: Heng Li Date: Wed Feb 9 16:52:54 2011 -0500 output more information commit f69d217ae5b691bf42ad07a97f29a7cc6456046f Author: Heng Li Date: Wed Feb 9 16:11:54 2011 -0500 fixed another bug in flipping commit d47882d549337fbcc251597508a2c7faf1bb92e2 Author: Heng Li Date: Wed Feb 9 16:01:35 2011 -0500 fixed a stupid bug in flipping commit e33f89de499496537f5fbde396a66557f0353f1b Author: Heng Li Date: Wed Feb 9 15:54:42 2011 -0500 fix chimeras; a little weird... commit 03d3c1d0b945245108ce0942d4772536a32212c7 Author: Heng Li Date: Wed Feb 9 13:27:35 2011 -0500 no effective change; prepare to fix chimera commit 6bc0a4676dd2252085a6e67bb06daa5ae05a554f Author: Heng Li Date: Wed Feb 9 11:52:58 2011 -0500 better count output commit dcac515439d25f71125d6de8111da417776ab9ce Author: Heng Li Date: Wed Feb 9 10:31:07 2011 -0500 prepare for another way of filtering commit ca7e4f1899b86d2e077994c789e8f69d699b3cd9 Author: Heng Li Date: Tue Feb 8 16:10:08 2011 -0500 fixed the bug; I can do better. commit 0733f77b98af121bdcb198cea6151d159831bb9c Author: Heng Li Date: Tue Feb 8 15:55:38 2011 -0500 fixed two bugs; still not working... commit 80f18cba9ba73c9592380fc1ecd53c351d294782 Author: Heng Li Date: Tue Feb 8 15:42:58 2011 -0500 filter false SNPs; NOT working right now commit 69a66e2f96d5b102cd712ff1527a3802fa84c590 Author: Heng Li Date: Tue Feb 8 14:39:09 2011 -0500 write sequence in the SAM format for debugging commit b6f1c9d160822af2b713be206f37bd6dde00546a Author: Heng Li Date: Mon Feb 7 11:51:21 2011 -0500 fixed two bugs commit 400aa5c06100af9c47cd5e4ce8b95b7deb84f54b Author: Heng Li Date: Mon Feb 7 11:22:38 2011 -0500 Optionally apply BAQ commit 4c82e0e19682e424f5cdb8381364114c307b329e Author: Heng Li Date: Mon Feb 7 01:23:31 2011 -0500 improved output; the result makes sense at a glance commit dc7853a581ab24bcc496e96b123ccf637e32ed1d Author: Heng Li Date: Sun Feb 6 14:12:43 2011 -0500 process per linked block instead of per chr commit e867d9c6c2e61d9e748e78163e5481dca5697a36 Author: Heng Li Date: Sun Feb 6 00:45:46 2011 -0500 DP seems to work on toy examples commit 445ad72fc43d4354d56f5f759790e8ae0be73d02 Author: Heng Li Date: Sat Feb 5 01:24:42 2011 -0500 implemented backtrack; not tested commit ba38e180b9cd545956583b22e97e09b4bb12073e Author: Heng Li Date: Fri Feb 4 23:55:23 2011 -0500 More "correct" DP; backtrack not implemented commit d69761fd9351273ccd37ea431b10509add91e7cf Author: Heng Li Date: Fri Feb 4 17:22:31 2011 -0500 scratch of dynamic programming; unfinished... commit 769ffcb44e26e59300791658801d321559b33858 Author: Heng Li Date: Fri Feb 4 16:29:55 2011 -0500 UNFINISHED commit. commit 9adab9591317c3467f3d8cdf2d19ec1f65d1b5b7 Author: Heng Li Date: Thu Feb 3 16:20:59 2011 -0500 another way of counting; can be even faster commit bbafbdc01ed1ceaab44927def1ad47c4c78aeb9c Author: Heng Li Date: Thu Feb 3 14:48:20 2011 -0500 for backup commit eba7446389cad62a19133bced1386a4334dcab79 Merge: a44a98e f01a593 Author: Heng Li Date: Wed Feb 2 14:06:07 2011 -0500 Merge branch 'master' into devel commit f01a5930445b5fda7e6b5b813ed63c652160ada2 Author: Heng Li Date: Wed Feb 2 11:31:54 2011 -0500 Better truncation warning when EOF is absent commit dd3ee5ed26c8bbef4a62fa5b2bfb0a75833f2c31 Author: Heng Li Date: Wed Feb 2 10:38:28 2011 -0500 fixed a typo in BCF/VCF headers commit b9d1137c55f401387113d1ad8a387489afe741db Author: Heng Li Date: Wed Feb 2 09:13:44 2011 -0500 fixed an out-of-boundary bug (fixed by Roel Kluin) commit a44a98e16559b9672e8a3492c8f8c640074b7ee2 Merge: ef68a14 d0443d5 Author: Heng Li Date: Tue Feb 1 21:54:48 2011 -0500 Merge branch 'master' into devel commit d0443d5c2f648e0f69bd4c56eaac7868e501c18b Author: Heng Li Date: Tue Feb 1 17:31:52 2011 -0500 improved sorting order checking commit ef68a14fab91399b2ecd38345936c3d6e7391cf3 Merge: 1e597b3 1a39a2e Author: Heng Li Date: Tue Feb 1 15:12:37 2011 -0500 Merge branch 'master' into devel commit 1a39a2eb08a270e20a34a0983e8bed6ffb3e2008 Author: Heng Li Date: Tue Feb 1 15:12:14 2011 -0500 more precise error message commit e028e7a47c02232e06a9dd3009262c00dede1060 Author: Heng Li Date: Tue Feb 1 14:48:01 2011 -0500 improved sorting order validation in index commit 1e597b3356744e2b791b12c9187f91c8054511d5 Author: Heng Li Date: Tue Feb 1 14:44:27 2011 -0500 testing only; not working commit 5753ace1e54228822d8ee95f69943f586e42f6e8 Author: Heng Li Date: Mon Jan 31 17:37:08 2011 -0500 reduce the effect of seq errors at the cost of SN commit 6f239ce5e0abd47babee33174476d48b723260d8 Author: Heng Li Date: Mon Jan 31 17:29:34 2011 -0500 added testing code commit 3db42fe22d27d61ab5735cd2308f73d93def8ebe Author: Heng Li Date: Mon Jan 31 14:33:21 2011 -0500 routine for phasing fosmid resequencing (incomplete) commit ed88f2797323229ae8f38fbcd107b231007956a8 Author: Heng Li Date: Mon Jan 31 10:12:53 2011 -0500 SAM output commit abc6acae28dc4794f6422255f077cf370d34e414 Merge: f1985a9 b133dbf Author: Heng Li Date: Sat Jan 29 22:56:10 2011 -0500 Merge branch 'master' into devel commit b133dbf82de4e8cea5eb56e5bbf0c4b3e9368fd5 Author: Heng Li Date: Sat Jan 29 22:37:11 2011 -0500 fixed a bug in tview on big-endian by Nathan Weeks commit 9d3fdaef29f91e21dbfcb9ff0165b9573e7c1042 Author: Heng Li Date: Sat Jan 29 22:24:00 2011 -0500 update INSTALL commit 9d074a38bde53961f96157b6fb3683b6dded38d7 Author: Heng Li Date: Sat Jan 29 21:56:25 2011 -0500 avoid a segfault when network connect fails commit f1985a93f7455b3ea1b0ef9b959d50b896ccd620 Author: Heng Li Date: Sat Jan 29 21:53:18 2011 -0500 fixed a bug about bit ordering commit d09797db6fef648a6823cbe718d67664660c6ebe Author: Heng Li Date: Thu Jan 27 16:53:19 2011 -0500 point out there are 4 or fewer free parameters commit 5fd1717650ed68ab6c55d094d1648c16a054891a Author: Heng Li Date: Thu Jan 27 16:09:18 2011 -0500 updated .gitignore commit fccb19fbe8f9de91f59d85bb49a248683dc6266c Author: Heng Li Date: Thu Jan 27 16:08:14 2011 -0500 fixed a bug; better scoring commit b4dcb844bde3d09eedcd9f6832186ece60ae5afd Merge: ffc3e89 6f502de Author: Heng Li Date: Thu Jan 27 14:50:30 2011 -0500 Merge branch 'master' into devel commit 6f502dec46b18dae4bb5b2319715d028b5e193d0 Author: Heng Li Date: Thu Jan 27 14:47:31 2011 -0500 skip unmapped and ref-skip reads in indel calling commit 3639f37dd8257b24560c35effcc3b6c16c3c1bcb Author: Heng Li Date: Thu Jan 27 14:19:15 2011 -0500 fixed an out-of-boundary bug in rare cases commit ffc3e89678ab9052b84f403da1e43044b045e73f Author: Heng Li Date: Thu Jan 27 14:00:17 2011 -0500 targetcut can be compiled, though probably buggy commit f452b3ac51306865ddde31a8d715b155d4d3e6e6 Author: Heng Li Date: Wed Jan 26 18:58:43 2011 -0500 this is for a very special application... commit ca1451c6406c7ee757cb31349ea0b8de70db0656 Author: Heng Li Date: Wed Jan 26 18:48:09 2011 -0500 fixed compiling errors commit 085b87a7642865f17239fb6a436e626e25417838 Author: Heng Li Date: Wed Jan 26 18:45:09 2011 -0500 This script was put in a wrong place... commit 090d360828622520de60385af4928ce1aebe0e48 Author: Heng Li Date: Wed Jan 26 18:33:58 2011 -0500 Imported from samtools-r902 ------------------------------------------------------------------------ r108 | lh3lh3 | 2009-01-20 11:56:45 +0000 (Tue, 20 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/examples/Makefile made it a little more convenient ------------------------------------------------------------------------ r107 | lh3lh3 | 2009-01-20 11:53:30 +0000 (Tue, 20 Jan 2009) | 2 lines Changed paths: A /branches/dev/samtools/examples/Makefile added a Makefile ------------------------------------------------------------------------ r106 | lh3lh3 | 2009-01-20 11:25:05 +0000 (Tue, 20 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/misc/maq2sam.c support RG tag ------------------------------------------------------------------------ r105 | lh3lh3 | 2009-01-18 17:37:20 +0000 (Sun, 18 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/ChangeLog update changelog ------------------------------------------------------------------------ r104 | lh3lh3 | 2009-01-18 17:31:21 +0000 (Sun, 18 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_lpileup.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-18 * fixed a bug in bam_lpileup.c: segment start and end are not correctly recognized ------------------------------------------------------------------------ r103 | lh3lh3 | 2009-01-18 16:34:03 +0000 (Sun, 18 Jan 2009) | 5 lines Changed paths: M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-17 * fixed a bug when there are reads without coordinates * also recognize type 'c' as 'A' * found a bug in bam_lpileup.c; NOT fixed yet ------------------------------------------------------------------------ r102 | lh3lh3 | 2009-01-17 19:46:49 +0000 (Sat, 17 Jan 2009) | 2 lines Changed paths: A /branches/dev/samtools/INSTALL Instruction for compilation ------------------------------------------------------------------------ r101 | lh3lh3 | 2009-01-17 19:31:36 +0000 (Sat, 17 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/Makefile A /branches/dev/samtools/Makefile.lite M /branches/dev/samtools/bam.h M /branches/dev/samtools/faidx.c M /branches/dev/samtools/misc/Makefile M /branches/dev/samtools/razf.c * replaced HAVE_RAZF with _NO_RAZF * added Makefile.lite for people who have trouble with razf.c ------------------------------------------------------------------------ r100 | lh3lh3 | 2009-01-16 10:03:37 +0000 (Fri, 16 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam_mate.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/misc/wgsim.c * samtools-0.1.1-15 * fixed another bug in fixmate: unmapped pair has non-zero isize ------------------------------------------------------------------------ r99 | lh3lh3 | 2009-01-16 09:13:36 +0000 (Fri, 16 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/ChangeLog M /branches/dev/samtools/bam_mate.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-14 * fixed a bug in fixmate: isize not equal to zero if two ends mapped to different chr ------------------------------------------------------------------------ r98 | lh3lh3 | 2009-01-15 16:47:41 +0000 (Thu, 15 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-13 * fixed the prior for hom indels (Richard pointed this out) ------------------------------------------------------------------------ r97 | lh3lh3 | 2009-01-15 16:38:47 +0000 (Thu, 15 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/COPYING M /branches/dev/samtools/bam_sort.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/source.dot * samtools-0.1.1-12 * fixed a bug in sort * update source file graph and copyright information ------------------------------------------------------------------------ r96 | lh3lh3 | 2009-01-14 21:46:14 +0000 (Wed, 14 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/glf.c fixed a typo ------------------------------------------------------------------------ r95 | lh3lh3 | 2009-01-14 21:44:53 +0000 (Wed, 14 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/glf.c added a main function for glf.c ------------------------------------------------------------------------ r94 | lh3lh3 | 2009-01-14 17:14:59 +0000 (Wed, 14 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/bgzf.h A /branches/dev/samtools/glf.c M /branches/dev/samtools/glf.h * samtools-0.1.1-11 * generate binary GLFv2 * added glfview command to dump GLFv2 binary file ------------------------------------------------------------------------ r93 | lh3lh3 | 2009-01-14 15:07:44 +0000 (Wed, 14 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/bam_rmdup.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/glf.h * samtools-0.1.1-10 * fixed several bugs in rmdup * prepare to generate GLF2 ------------------------------------------------------------------------ r92 | lh3lh3 | 2009-01-14 13:27:44 +0000 (Wed, 14 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_import.c A /branches/dev/samtools/bam_rmdup.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-9 * implemented rmdup; NOT tested yet ------------------------------------------------------------------------ r91 | lh3lh3 | 2009-01-13 20:15:43 +0000 (Tue, 13 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/examples/00README.txt update README for typos ------------------------------------------------------------------------ r90 | lh3lh3 | 2009-01-13 19:57:50 +0000 (Tue, 13 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/examples/ex1.sam.gz update example ------------------------------------------------------------------------ r89 | lh3lh3 | 2009-01-13 17:21:38 +0000 (Tue, 13 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam.c A /branches/dev/samtools/bam_mate.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-8 * added fixmate command ------------------------------------------------------------------------ r88 | lh3lh3 | 2009-01-13 10:48:23 +0000 (Tue, 13 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-7 * change the reported indel position to the previous way ------------------------------------------------------------------------ r87 | lh3lh3 | 2009-01-12 22:12:12 +0000 (Mon, 12 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-6 * addd glt output * allow to change indel calling parameters at the command line ------------------------------------------------------------------------ r86 | lh3lh3 | 2009-01-12 21:16:48 +0000 (Mon, 12 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_pileup.c M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-5 * added two more flags * allowed to select reads shown in pileup with a mask ------------------------------------------------------------------------ r85 | lh3lh3 | 2009-01-12 20:47:51 +0000 (Mon, 12 Jan 2009) | 4 lines Changed paths: M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-4 * fixed a bug in indexing (linear index) * prepare to add glt output from pileup ------------------------------------------------------------------------ r84 | lh3lh3 | 2009-01-12 09:22:35 +0000 (Mon, 12 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-3 * fixed a bug in outputing the coordinate of an indel ------------------------------------------------------------------------ r83 | lh3lh3 | 2009-01-11 15:18:01 +0000 (Sun, 11 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-2 * pileup: allows to output indel sites only ------------------------------------------------------------------------ r82 | lh3lh3 | 2009-01-10 23:34:31 +0000 (Sat, 10 Jan 2009) | 3 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bam_maqcns.h M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.1-1 * implemented a Bayesian indel caller ------------------------------------------------------------------------ r81 | lh3lh3 | 2009-01-09 09:54:28 +0000 (Fri, 09 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/examples/00README.txt D /branches/dev/samtools/examples/ex1.fa.fai Let users generate ex1.fa.fai. ------------------------------------------------------------------------ r80 | lh3lh3 | 2009-01-08 16:10:08 +0000 (Thu, 08 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/misc/bowtie2sam.pl make the bowtie converter works for "-k 2" ------------------------------------------------------------------------ r78 | lh3lh3 | 2009-01-03 17:25:24 +0000 (Sat, 03 Jan 2009) | 2 lines Changed paths: M /branches/dev/samtools/misc/export2sam.pl fixed a bug for "QC" reads ------------------------------------------------------------------------ r77 | lh3lh3 | 2009-01-01 18:32:06 +0000 (Thu, 01 Jan 2009) | 3 lines Changed paths: A /branches/dev/samtools/misc/bowtie2sam.pl M /branches/dev/samtools/misc/soap2sam.pl * soap2sam.pl: added NM tag * bowtie2sam.pl: converter for bowtie ------------------------------------------------------------------------ r76 | lh3lh3 | 2008-12-31 23:24:24 +0000 (Wed, 31 Dec 2008) | 2 lines Changed paths: A /branches/dev/samtools/misc/soap2sam.pl soap2sam.pl: convert soap output to SAM ------------------------------------------------------------------------ r75 | lh3lh3 | 2008-12-31 17:54:32 +0000 (Wed, 31 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/misc/wgsim_eval.pl * wgsim_eval.pl-0.1.1 * fixed a bug for a contig name like "NT_012345" ------------------------------------------------------------------------ r74 | lh3lh3 | 2008-12-31 16:38:21 +0000 (Wed, 31 Dec 2008) | 2 lines Changed paths: A /branches/dev/samtools/misc/wgsim_eval.pl * evaluate alignment for reads generated by wgsim ------------------------------------------------------------------------ r73 | lh3lh3 | 2008-12-31 15:11:22 +0000 (Wed, 31 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/Makefile M /branches/dev/samtools/misc/wgsim.c fixed compiling warnings for wgsim ------------------------------------------------------------------------ r72 | lh3lh3 | 2008-12-31 13:40:51 +0000 (Wed, 31 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/bam_tview.c remove an unused variable (a compiler warning only) ------------------------------------------------------------------------ r71 | lh3lh3 | 2008-12-31 13:37:16 +0000 (Wed, 31 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/Makefile A /branches/dev/samtools/misc/wgsim.c wgsim: Paired-end reads simulator ------------------------------------------------------------------------ r70 | bhandsaker | 2008-12-29 20:27:16 +0000 (Mon, 29 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bam_tview.c Move definition of bam_nt16_nt4_table so we can build without curses. ------------------------------------------------------------------------ r62 | lh3lh3 | 2008-12-22 15:55:13 +0000 (Mon, 22 Dec 2008) | 2 lines Changed paths: A /branches/dev/samtools/NEWS M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/samtools.1 Release samtools-0.1.1 ------------------------------------------------------------------------ r61 | lh3lh3 | 2008-12-22 15:46:08 +0000 (Mon, 22 Dec 2008) | 10 lines Changed paths: M /branches/dev/samtools/bam_aux.c M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bam_tview.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/razf.c M /branches/dev/samtools/samtools.1 * samtools-0.1.0-66 * fixed a bug in razf.c: reset z_eof when razf_seek() is called * fixed a memory leak in parsing a region * changed pileup a little bit when -s is in use: output ^ and $ * when a bam is not indexed, output more meaningful error message * fixed a bug in indexing for small alignment * fixed a bug in the viewer when we come to the end of a reference file * updated documentation * prepare to release 0.1.1 ------------------------------------------------------------------------ r60 | lh3lh3 | 2008-12-22 15:10:16 +0000 (Mon, 22 Dec 2008) | 2 lines Changed paths: A /branches/dev/samtools/examples A /branches/dev/samtools/examples/00README.txt A /branches/dev/samtools/examples/ex1.fa A /branches/dev/samtools/examples/ex1.fa.fai A /branches/dev/samtools/examples/ex1.sam.gz example ------------------------------------------------------------------------ r59 | lh3lh3 | 2008-12-22 09:38:15 +0000 (Mon, 22 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/ChangeLog update ChangeLog ------------------------------------------------------------------------ r58 | lh3lh3 | 2008-12-20 23:06:00 +0000 (Sat, 20 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/misc/export2sam.pl * added comments * fixed several bugs ------------------------------------------------------------------------ r57 | lh3lh3 | 2008-12-20 15:44:20 +0000 (Sat, 20 Dec 2008) | 2 lines Changed paths: A /branches/dev/samtools/misc/export2sam.pl convert Export format to SAM; not thoroughly tested ------------------------------------------------------------------------ r56 | lh3lh3 | 2008-12-19 22:13:28 +0000 (Fri, 19 Dec 2008) | 6 lines Changed paths: M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bam_tview.c M /branches/dev/samtools/bamtk.c A /branches/dev/samtools/source.dot * samtools-0.1.0-65 * pileup: generate maq-like simple output * pileup: allow to output pileup at required sites * source.dot: source file relationship graph * tview: fixed a minor bug ------------------------------------------------------------------------ r55 | lh3lh3 | 2008-12-19 20:10:26 +0000 (Fri, 19 Dec 2008) | 2 lines Changed paths: D /branches/dev/samtools/misc/all2sam.pl remove all2sam.pl ------------------------------------------------------------------------ r54 | lh3lh3 | 2008-12-16 22:34:25 +0000 (Tue, 16 Dec 2008) | 2 lines Changed paths: A /branches/dev/samtools/COPYING M /branches/dev/samtools/bam.h M /branches/dev/samtools/faidx.h M /branches/dev/samtools/khash.h M /branches/dev/samtools/kseq.h M /branches/dev/samtools/ksort.h M /branches/dev/samtools/samtools.1 Added copyright information and a bit more documentation. No code change. ------------------------------------------------------------------------ r53 | lh3lh3 | 2008-12-16 13:40:18 +0000 (Tue, 16 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam.c M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-64 * improved efficiency of the indel caller for spliced alignments ------------------------------------------------------------------------ r52 | lh3lh3 | 2008-12-16 10:28:20 +0000 (Tue, 16 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam.c M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_aux.c M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-63 * a bit code cleanup: reduce the dependency between source files ------------------------------------------------------------------------ r51 | lh3lh3 | 2008-12-15 14:29:32 +0000 (Mon, 15 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-62 * fixed a memory leak ------------------------------------------------------------------------ r50 | lh3lh3 | 2008-12-15 14:00:13 +0000 (Mon, 15 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/ChangeLog M /branches/dev/samtools/bam.h M /branches/dev/samtools/samtools.1 update documentation, ChangeLog and a comment ------------------------------------------------------------------------ r49 | lh3lh3 | 2008-12-15 13:36:43 +0000 (Mon, 15 Dec 2008) | 6 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bam_maqcns.h M /branches/dev/samtools/bam_pileup.c A /branches/dev/samtools/bam_plcmd.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/samtools.1 * samtools-0.1.0-61 * moved pileup command to a separate source file * added indel caller * added bam_cal_segend(). (NOT WORKING for spliced alignment!!!) * updated documentation ------------------------------------------------------------------------ r48 | lh3lh3 | 2008-12-12 13:55:36 +0000 (Fri, 12 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-60 * fixed another bug in maqcns when there is a nearby deletion ------------------------------------------------------------------------ r47 | lh3lh3 | 2008-12-12 13:42:16 +0000 (Fri, 12 Dec 2008) | 5 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bam_pileup.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-59 * pileup: outputing consensus is now optional * fixed a bug in glfgen. This bug also exists in maq's glfgen. However, I am not quite sure why the previous version may have problem. ------------------------------------------------------------------------ r46 | lh3lh3 | 2008-12-12 11:44:56 +0000 (Fri, 12 Dec 2008) | 6 lines Changed paths: M /branches/dev/samtools/bam_pileup.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-58 * add maq consensus to pileup. However, I will move this part to a new command as strictly speaking, consensus callin is not part of pileup, and imposing it would make it harder to generate for other language bindings. ------------------------------------------------------------------------ r45 | bhandsaker | 2008-12-11 20:43:56 +0000 (Thu, 11 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/bgzf.c Fix bug in tell() after reads that consume to the exact end of a block. ------------------------------------------------------------------------ r44 | lh3lh3 | 2008-12-11 09:36:53 +0000 (Thu, 11 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/samtools.1 update manual ------------------------------------------------------------------------ r43 | lh3lh3 | 2008-12-11 09:25:36 +0000 (Thu, 11 Dec 2008) | 4 lines Changed paths: M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-57 * fixed a bug in parser when there is auxiliary fields * made the parser a bit more robust ------------------------------------------------------------------------ r42 | lh3lh3 | 2008-12-10 14:57:29 +0000 (Wed, 10 Dec 2008) | 5 lines Changed paths: M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/bgzf.c * samtools-0.1.0-56 * fixed a bug in bgzf (only reading is affected) * fixed a typo in bam_index.c * in bam_index.c, check potential bugs in the underlying I/O library ------------------------------------------------------------------------ r41 | lh3lh3 | 2008-12-10 12:53:08 +0000 (Wed, 10 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/samtools.1 update manual ------------------------------------------------------------------------ r40 | lh3lh3 | 2008-12-10 11:52:10 +0000 (Wed, 10 Dec 2008) | 5 lines Changed paths: M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_pileup.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-55 * tried to make pileup work with clipping (previously not), though NOT tested * removed -v from pileup * made pileup take the reference sequence ------------------------------------------------------------------------ r39 | lh3lh3 | 2008-12-09 11:59:28 +0000 (Tue, 09 Dec 2008) | 4 lines Changed paths: M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/samtools.1 * samtools-0.1.0-54 * in parser, recognize "=", rather than ",", as a match * in parser, correctl parse "=" at the MRNM field. ------------------------------------------------------------------------ r38 | lh3lh3 | 2008-12-09 11:39:07 +0000 (Tue, 09 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/maq2sam.c fixed a bug in handling maq flag 64 and 192 ------------------------------------------------------------------------ r37 | lh3lh3 | 2008-12-09 09:53:46 +0000 (Tue, 09 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/md5fa.c also calculate unordered md5sum check ------------------------------------------------------------------------ r36 | lh3lh3 | 2008-12-09 09:46:21 +0000 (Tue, 09 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/md5fa.c fixed a minor bug when there are space in the sequence ------------------------------------------------------------------------ r35 | lh3lh3 | 2008-12-09 09:40:45 +0000 (Tue, 09 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/md5fa.c fixed a potential memory leak ------------------------------------------------------------------------ r34 | lh3lh3 | 2008-12-08 14:52:17 +0000 (Mon, 08 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bamtk.c * fixed a bug in import: bin is wrongly calculated ------------------------------------------------------------------------ r33 | lh3lh3 | 2008-12-08 14:08:01 +0000 (Mon, 08 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/misc/all2sam.pl nothing, really ------------------------------------------------------------------------ r32 | lh3lh3 | 2008-12-08 12:56:02 +0000 (Mon, 08 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/kseq.h M /branches/dev/samtools/misc/Makefile A /branches/dev/samtools/misc/md5.c A /branches/dev/samtools/misc/md5.h A /branches/dev/samtools/misc/md5fa.c * fixed two warnings in kseq.h * added md5sum utilities ------------------------------------------------------------------------ r31 | lh3lh3 | 2008-12-08 11:35:29 +0000 (Mon, 08 Dec 2008) | 5 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bamtk.c A /branches/dev/samtools/kseq.h D /branches/dev/samtools/kstream.h * samtools-0.1.0-52 * replace kstream with kseq. kseq is a superset of kstream. I need the extra functions in kseq.h. * also compile stand-alone faidx ------------------------------------------------------------------------ r30 | lh3lh3 | 2008-12-08 11:17:04 +0000 (Mon, 08 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_sort.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-51 * sorting by read names is available ------------------------------------------------------------------------ r29 | lh3lh3 | 2008-12-08 10:29:02 +0000 (Mon, 08 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam.c M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bam_pileup.c M /branches/dev/samtools/bam_sort.c M /branches/dev/samtools/bam_tview.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/misc/maq2sam.c * samtools-0.1.0-50 * format change to meet the latest specification ------------------------------------------------------------------------ r28 | lh3lh3 | 2008-12-04 16:09:21 +0000 (Thu, 04 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/misc/maq2sam.c * minor change in maqcns: special care when n==0 * change maq2sam to meet the latest specification ------------------------------------------------------------------------ r27 | lh3lh3 | 2008-12-04 15:55:44 +0000 (Thu, 04 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/razf.c M /branches/dev/samtools/razf.h considerable code clean up in razf ------------------------------------------------------------------------ r26 | lh3lh3 | 2008-12-04 15:08:18 +0000 (Thu, 04 Dec 2008) | 2 lines Changed paths: M /branches/dev/samtools/ChangeLog M /branches/dev/samtools/Makefile M /branches/dev/samtools/faidx.c make RAZF optional in faidx.c ------------------------------------------------------------------------ r25 | lh3lh3 | 2008-12-01 15:27:22 +0000 (Mon, 01 Dec 2008) | 3 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam.h M /branches/dev/samtools/bam_aux.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/samtools.1 * samtools-0.1.0-49 * added routines for retrieving aux data, NOT TESTED YET! ------------------------------------------------------------------------ r24 | lh3lh3 | 2008-12-01 14:29:43 +0000 (Mon, 01 Dec 2008) | 5 lines Changed paths: M /branches/dev/samtools/bam.c M /branches/dev/samtools/bam_import.c M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/bgzf.c M /branches/dev/samtools/samtools.1 * samtools-0.1.0-48 * bgzf: fixed a potential integer overflow on 32-it machines * maqcns: set the minimum combined quality as 0 * supporting hex strings ------------------------------------------------------------------------ r23 | lh3lh3 | 2008-11-27 17:14:37 +0000 (Thu, 27 Nov 2008) | 3 lines Changed paths: M /branches/dev/samtools/bam_maqcns.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-47 * fixed the bug in maqcns ------------------------------------------------------------------------ r22 | lh3lh3 | 2008-11-27 17:08:11 +0000 (Thu, 27 Nov 2008) | 3 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam.h A /branches/dev/samtools/bam_maqcns.c A /branches/dev/samtools/bam_maqcns.h M /branches/dev/samtools/bam_tview.c M /branches/dev/samtools/bamtk.c A /branches/dev/samtools/glf.h * samtools-0.1.0-46 * add MAQ consensus caller, currently BUGGY! ------------------------------------------------------------------------ r21 | lh3lh3 | 2008-11-27 13:51:28 +0000 (Thu, 27 Nov 2008) | 4 lines Changed paths: M /branches/dev/samtools/bam_pileup.c M /branches/dev/samtools/bam_tview.c M /branches/dev/samtools/bamtk.c * samtools-0.1.0-45 * tview: display padded alignment (but not P operation) * better coordinates and reference sequence ------------------------------------------------------------------------ r19 | lh3lh3 | 2008-11-27 09:26:05 +0000 (Thu, 27 Nov 2008) | 2 lines Changed paths: A /branches/dev/samtools/ChangeLog new ChangeLog ------------------------------------------------------------------------ r18 | lh3lh3 | 2008-11-27 09:24:45 +0000 (Thu, 27 Nov 2008) | 3 lines Changed paths: D /branches/dev/samtools/ChangeLog A /branches/dev/samtools/ChangeLog.old (from /branches/dev/samtools/ChangeLog:6) Rename ChangeLog to ChangeLog.old. This old ChangeLog is generated from the log of my personal SVN repository. ------------------------------------------------------------------------ r17 | lh3lh3 | 2008-11-27 09:22:55 +0000 (Thu, 27 Nov 2008) | 6 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/bgzf.c * samtools-0.1.0-44 * declare fseeko and ftello as some Linux may not do this by default and missing these declarations will make bgzf buggy * get rid of some harmless warings * use BGZF by default, now ------------------------------------------------------------------------ r16 | lh3lh3 | 2008-11-26 21:19:11 +0000 (Wed, 26 Nov 2008) | 4 lines Changed paths: M /branches/dev/samtools/bam_index.c M /branches/dev/samtools/bamtk.c M /branches/dev/samtools/razf.c * samtools-0.1.0-43 * fixed a bug in razf_read() * give more warnings when the file is truncated (or due to bugs in I/O library) ------------------------------------------------------------------------ r15 | lh3lh3 | 2008-11-26 20:41:39 +0000 (Wed, 26 Nov 2008) | 2 lines Changed paths: M /branches/dev/samtools/bgzf.c fixed a bug in bgzf.c at the end of the file ------------------------------------------------------------------------ r14 | lh3lh3 | 2008-11-26 17:05:18 +0000 (Wed, 26 Nov 2008) | 4 lines Changed paths: M /branches/dev/samtools/bamtk.c * samtools-0.1.0-42 * a lot happened to RAZF, although samtools itself is untouched. Better also update the version number anyway to avoid confusion ------------------------------------------------------------------------ r13 | lh3lh3 | 2008-11-26 17:03:48 +0000 (Wed, 26 Nov 2008) | 2 lines Changed paths: M /branches/dev/samtools/razf.c a change from Jue, but I think it should not matter ------------------------------------------------------------------------ r12 | lh3lh3 | 2008-11-26 16:48:14 +0000 (Wed, 26 Nov 2008) | 3 lines Changed paths: M /branches/dev/samtools/razf.c fixed a potential bug in razf. However, it seems still buggy, just rarely happens, very rarely. ------------------------------------------------------------------------ r11 | lh3lh3 | 2008-11-26 14:02:56 +0000 (Wed, 26 Nov 2008) | 2 lines Changed paths: M /branches/dev/samtools/razf.c fixed a bug in razf, with the help of Jue ------------------------------------------------------------------------ r10 | lh3lh3 | 2008-11-26 11:55:32 +0000 (Wed, 26 Nov 2008) | 2 lines Changed paths: M /branches/dev/samtools/bam_index.c remove a comment ------------------------------------------------------------------------ r9 | lh3lh3 | 2008-11-26 11:37:05 +0000 (Wed, 26 Nov 2008) | 2 lines Changed paths: M /branches/dev/samtools/Makefile M /branches/dev/samtools/bam.h M /branches/dev/samtools/razf.c M /branches/dev/samtools/razf.h * Jue has updated razf to realize Bob's scheme ------------------------------------------------------------------------ r7 | lh3lh3 | 2008-11-25 20:37:37 +0000 (Tue, 25 Nov 2008) | 2 lines Changed paths: A /branches/dev/samtools/samtools.1 the manual page ------------------------------------------------------------------------ r6 | lh3lh3 | 2008-11-25 20:37:16 +0000 (Tue, 25 Nov 2008) | 3 lines Changed paths: A /branches/dev/samtools/ChangeLog A /branches/dev/samtools/Makefile A /branches/dev/samtools/bam.c A /branches/dev/samtools/bam.h A /branches/dev/samtools/bam_aux.c A /branches/dev/samtools/bam_endian.h A /branches/dev/samtools/bam_import.c A /branches/dev/samtools/bam_index.c A /branches/dev/samtools/bam_lpileup.c A /branches/dev/samtools/bam_pileup.c A /branches/dev/samtools/bam_sort.c A /branches/dev/samtools/bam_tview.c A /branches/dev/samtools/bamtk.c A /branches/dev/samtools/bgzf.c A /branches/dev/samtools/bgzf.h A /branches/dev/samtools/bgzip.c A /branches/dev/samtools/faidx.c A /branches/dev/samtools/faidx.h A /branches/dev/samtools/khash.h A /branches/dev/samtools/ksort.h A /branches/dev/samtools/kstream.h A /branches/dev/samtools/misc A /branches/dev/samtools/misc/Makefile A /branches/dev/samtools/misc/all2sam.pl A /branches/dev/samtools/misc/maq2sam.c A /branches/dev/samtools/razf.c A /branches/dev/samtools/razf.h A /branches/dev/samtools/razip.c A /branches/dev/samtools/zutil.h The initial version of samtools, replicated from my local SVN repository. The current version is: 0.1.0-42. All future development will happen here. ------------------------------------------------------------------------ r5 | lh3lh3 | 2008-11-25 20:30:49 +0000 (Tue, 25 Nov 2008) | 2 lines Changed paths: A /branches/dev/samtools samtools (C version) ------------------------------------------------------------------------ ------------------------------------------------------------------------ r703 | lh3 | 2008-11-25 20:20:02 +0000 (Tue, 25 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/samtools.1 rename bamtk to samtools ------------------------------------------------------------------------ r702 | lh3 | 2008-11-25 20:15:09 +0000 (Tue, 25 Nov 2008) | 2 lines Changed paths: D /branches/prog/bam/bamtk.1 A /branches/prog/bam/samtools.1 (from /branches/prog/bam/bamtk.1:679) rename bamtk.1 to samtools.1 ------------------------------------------------------------------------ r701 | lh3 | 2008-11-25 13:29:10 +0000 (Tue, 25 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/misc/Makefile * samtools-0.1.0-41 * small (but a bit dangerous) changes to meet the latest specification ------------------------------------------------------------------------ r700 | lh3 | 2008-11-25 13:15:11 +0000 (Tue, 25 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/misc/all2sam.pl (from /branches/prog/bam/misc/all2tam.pl:649) D /branches/prog/bam/misc/all2tam.pl A /branches/prog/bam/misc/maq2sam.c (from /branches/prog/bam/misc/maq2tam.c:699) D /branches/prog/bam/misc/maq2tam.c rename tam to sam ------------------------------------------------------------------------ r699 | lh3 | 2008-11-25 13:14:49 +0000 (Tue, 25 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/misc/maq2tam.c change for the new specification ------------------------------------------------------------------------ r698 | lh3 | 2008-11-24 13:15:20 +0000 (Mon, 24 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/razf.c M /branches/prog/bam/razf.h * add a fake BGZF mode to razf. It is fake in that it loads razf index into memory but gives BGZF like virtual offset ------------------------------------------------------------------------ r697 | lh3 | 2008-11-24 09:53:44 +0000 (Mon, 24 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/ChangeLog change log ------------------------------------------------------------------------ r696 | lh3 | 2008-11-24 09:53:23 +0000 (Mon, 24 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bgzf.c updated bgzf, on behalf of Bob ------------------------------------------------------------------------ r695 | lh3 | 2008-11-23 11:40:31 +0000 (Sun, 23 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/razf.c fixed a bug in razf ------------------------------------------------------------------------ r694 | lh3 | 2008-11-22 16:23:52 +0000 (Sat, 22 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_lpileup.c M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bam-0.1.0-40 * fixed two small memory leaks * fixed a memory problem when seek outside the length of the sequence ------------------------------------------------------------------------ r693 | lh3 | 2008-11-22 16:10:04 +0000 (Sat, 22 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bam-0.1.0-39 * fixed an uninitialized warning. This does not matter in fact ------------------------------------------------------------------------ r692 | lh3 | 2008-11-22 15:44:05 +0000 (Sat, 22 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/razf.c M /branches/prog/bam/razf.h Jue's new razf ------------------------------------------------------------------------ r691 | lh3 | 2008-11-21 21:30:39 +0000 (Fri, 21 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/bgzip.c * bam-0.1.0-38 * get rid of some warings in bgzip.c * potentially improve performance in indexing for BGZF ------------------------------------------------------------------------ r690 | lh3 | 2008-11-21 21:15:51 +0000 (Fri, 21 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bgzf.c I think I have fixed the bug in bgzf ------------------------------------------------------------------------ r689 | lh3 | 2008-11-21 20:48:56 +0000 (Fri, 21 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bgzf.c bug fix by Bob ------------------------------------------------------------------------ r688 | lh3 | 2008-11-21 20:37:27 +0000 (Fri, 21 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c fixed a bug due to the name change in _IOLIB ------------------------------------------------------------------------ r687 | lh3 | 2008-11-21 14:42:56 +0000 (Fri, 21 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bgzf.c fix small things ------------------------------------------------------------------------ r686 | lh3 | 2008-11-21 14:37:59 +0000 (Fri, 21 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/bgzf.c A /branches/prog/bam/bgzf.h A /branches/prog/bam/bgzip.c Bob's BGZF format, although currently buggy ------------------------------------------------------------------------ r685 | lh3 | 2008-11-21 09:48:20 +0000 (Fri, 21 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bam-0.1.0-37 * improve interface a little bit ------------------------------------------------------------------------ r684 | lh3 | 2008-11-21 09:30:18 +0000 (Fri, 21 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bam-0.1.0-36 * improve the interface of tview, a little bit ------------------------------------------------------------------------ r683 | lh3 | 2008-11-20 22:33:54 +0000 (Thu, 20 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam_tview.c a little better viewer ------------------------------------------------------------------------ r682 | lh3 | 2008-11-20 22:27:01 +0000 (Thu, 20 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-35 * better viewer ------------------------------------------------------------------------ r681 | lh3 | 2008-11-20 20:51:16 +0000 (Thu, 20 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-34 * tview is now a component of bamtk ------------------------------------------------------------------------ r680 | lh3 | 2008-11-20 19:17:30 +0000 (Thu, 20 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/bam_tview.c text alignment viewer ------------------------------------------------------------------------ r679 | lh3 | 2008-11-20 19:17:15 +0000 (Thu, 20 Nov 2008) | 5 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_lpileup.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.1 M /branches/prog/bam/bamtk.c M /branches/prog/bam/faidx.c * bamtk-0.1.0-33 * added routines to reset pileup bufferes * fixed a bug in faidx * add text alignment viewer ------------------------------------------------------------------------ r678 | lh3 | 2008-11-20 11:05:02 +0000 (Thu, 20 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/Makefile A /branches/prog/bam/bam_lpileup.c (from /branches/prog/bam/bam_tview.c:668) D /branches/prog/bam/bam_tview.c rename tview as lpileup ------------------------------------------------------------------------ r677 | lh3 | 2008-11-20 10:08:52 +0000 (Thu, 20 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/razf.c fixed a bug in razf ------------------------------------------------------------------------ r676 | lh3 | 2008-11-19 22:52:20 +0000 (Wed, 19 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/faidx.h add documentations ------------------------------------------------------------------------ r674 | lh3 | 2008-11-19 21:39:17 +0000 (Wed, 19 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bamtk.1 M /branches/prog/bam/faidx.h update documentation ------------------------------------------------------------------------ r673 | lh3 | 2008-11-19 21:19:03 +0000 (Wed, 19 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/bamtk.1 add manual page ------------------------------------------------------------------------ r672 | lh3 | 2008-11-19 16:40:49 +0000 (Wed, 19 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bamtk.c M /branches/prog/bam/faidx.c * bamtk-0.1.0-32 * make faidx more error resistant ------------------------------------------------------------------------ r671 | lh3 | 2008-11-19 16:09:55 +0000 (Wed, 19 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/faidx.h add index ------------------------------------------------------------------------ r670 | lh3 | 2008-11-19 16:02:39 +0000 (Wed, 19 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/faidx.c * bamtk-0.1.0-31 * show reference sequence in pileup -v (not in the default pileup) ------------------------------------------------------------------------ r669 | lh3 | 2008-11-19 14:51:17 +0000 (Wed, 19 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bamtk.c M /branches/prog/bam/faidx.c * bamtk-0.1.0-30 * put faidx in bamtk and remove faidx_main.c ------------------------------------------------------------------------ r668 | lh3 | 2008-11-19 14:15:05 +0000 (Wed, 19 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c A /branches/prog/bam/faidx.c A /branches/prog/bam/faidx.h M /branches/prog/bam/razf.c * bamtk-0.1.0-29 * fixed a bug in tview.c * prepare to add faidx ------------------------------------------------------------------------ r667 | lh3 | 2008-11-19 10:20:45 +0000 (Wed, 19 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/razf.c M /branches/prog/bam/razf.h gzip-compatible razf ------------------------------------------------------------------------ r664 | lh3 | 2008-11-18 12:50:23 +0000 (Tue, 18 Nov 2008) | 5 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-28 * fetch: fixed a bug at an array boundary * fetch: fixed a bug when the whole chromosome is retrieved * add linear index ------------------------------------------------------------------------ r663 | lh3 | 2008-11-17 21:29:22 +0000 (Mon, 17 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-27 * put l_qseq into core and move l_aux to bam1_t ------------------------------------------------------------------------ r662 | lh3 | 2008-11-17 20:55:16 +0000 (Mon, 17 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-26 * save seq and qual separately ------------------------------------------------------------------------ r661 | lh3 | 2008-11-17 13:09:37 +0000 (Mon, 17 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h little ------------------------------------------------------------------------ r660 | lh3 | 2008-11-17 13:06:14 +0000 (Mon, 17 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h more documentations ------------------------------------------------------------------------ r659 | lh3 | 2008-11-17 12:55:08 +0000 (Mon, 17 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-25 * make tview work for TAM ------------------------------------------------------------------------ r658 | lh3 | 2008-11-17 12:50:21 +0000 (Mon, 17 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.h M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-24 * make tview as an independent module ------------------------------------------------------------------------ r657 | lh3 | 2008-11-17 11:26:06 +0000 (Mon, 17 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.h M /branches/prog/bam/bam_pileup.c change little ------------------------------------------------------------------------ r656 | lh3 | 2008-11-16 21:33:19 +0000 (Sun, 16 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-23 * also add tview for TAM ------------------------------------------------------------------------ r655 | lh3 | 2008-11-16 21:29:46 +0000 (Sun, 16 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-22 * make tview more efficient for deep depth ------------------------------------------------------------------------ r654 | lh3 | 2008-11-16 20:52:19 +0000 (Sun, 16 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_pileup.c A /branches/prog/bam/bam_tview.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-21 * fixed bug in the TAM parser: lowercase not recognized * unfinished function to leveled pileup (tview) ------------------------------------------------------------------------ r653 | lh3 | 2008-11-15 12:58:36 +0000 (Sat, 15 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-20 * pileup now display deleted bases as '*' ------------------------------------------------------------------------ r652 | lh3 | 2008-11-15 09:58:39 +0000 (Sat, 15 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-19 * fixed a bug in fetch() * reduce memory in indexing ------------------------------------------------------------------------ r651 | lh3 | 2008-11-14 21:56:05 +0000 (Fri, 14 Nov 2008) | 5 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-18 * important changes are made to index: the index size is increased, but now we have no limit on file sizes and the new method potentially works with BGZF, Bob's new compression format. ------------------------------------------------------------------------ r650 | lh3 | 2008-11-14 16:03:22 +0000 (Fri, 14 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-17 * more comments in bam.h * fixed a bug in bam_index.c ------------------------------------------------------------------------ r649 | lh3 | 2008-11-13 16:04:18 +0000 (Thu, 13 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bam_sort.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-16 * use macros to retrieve pointers from bam1_t and thus reduce the size of bam1_t struct. ------------------------------------------------------------------------ r648 | lh3 | 2008-11-13 13:21:39 +0000 (Thu, 13 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_sort.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-15 * make more things work over pipe ------------------------------------------------------------------------ r647 | lh3 | 2008-11-13 12:49:28 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/misc/maq2tam.c fixed a bug in maq2tam ------------------------------------------------------------------------ r646 | lh3 | 2008-11-13 11:46:59 +0000 (Thu, 13 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/Makefile M /branches/prog/bam/misc/Makefile M /branches/prog/bam/misc/maq2tam.c * bug fix in maq2tam.c * improve Makefile ------------------------------------------------------------------------ r645 | lh3 | 2008-11-13 11:39:46 +0000 (Thu, 13 Nov 2008) | 3 lines Changed paths: A /branches/prog/bam/misc/Makefile M /branches/prog/bam/misc/maq2tam.c * corrected maq2tam * add Makefile ------------------------------------------------------------------------ r644 | lh3 | 2008-11-13 11:25:45 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/razf.c fixed the bug in buffered write (on behalf of Jue) ------------------------------------------------------------------------ r643 | lh3 | 2008-11-13 10:53:42 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: D /branches/prog/bam/all2tam.pl A /branches/prog/bam/misc/all2tam.pl (from /branches/prog/bam/all2tam.pl:642) move to misc ------------------------------------------------------------------------ r642 | lh3 | 2008-11-13 10:53:23 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/all2tam.pl change tag ------------------------------------------------------------------------ r641 | lh3 | 2008-11-13 10:53:12 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: D /branches/prog/bam/utils has been renamed ------------------------------------------------------------------------ r640 | lh3 | 2008-11-13 10:52:50 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/misc (from /branches/prog/bam/utils:639) rename ------------------------------------------------------------------------ r639 | lh3 | 2008-11-13 10:52:35 +0000 (Thu, 13 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam/utils A /branches/prog/bam/utils/maq2tam.c utilities (converters and so on) ------------------------------------------------------------------------ r638 | lh3 | 2008-11-12 22:24:22 +0000 (Wed, 12 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-14 * copy the text header to BAM * add BAM1 header flag ------------------------------------------------------------------------ r637 | lh3 | 2008-11-12 14:56:08 +0000 (Wed, 12 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/razf.c * bamtk-0.1.0-13 * fixed a bug in razf * improved and fixed potential bugs in index ------------------------------------------------------------------------ r636 | lh3 | 2008-11-12 11:57:13 +0000 (Wed, 12 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c update documentation in the HeaderDOC format ------------------------------------------------------------------------ r635 | lh3 | 2008-11-12 10:08:38 +0000 (Wed, 12 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-12 * more documentations * rename baf1_core_t as bam1_core_t ------------------------------------------------------------------------ r634 | lh3 | 2008-11-11 23:00:35 +0000 (Tue, 11 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_pileup.c documentation ------------------------------------------------------------------------ r633 | lh3 | 2008-11-11 21:23:49 +0000 (Tue, 11 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-11 * give up regional pileup. We can now use pipe to mimic that. * for index file, change suffix .idx to .bmi ------------------------------------------------------------------------ r632 | lh3 | 2008-11-11 21:00:11 +0000 (Tue, 11 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/razf.c * bamtk-0.1.0-10 * make pileup work on TAM ------------------------------------------------------------------------ r631 | lh3 | 2008-11-11 09:20:29 +0000 (Tue, 11 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/razf.c M /branches/prog/bam/razf.h M /branches/prog/bam/razip.c * bamtk-0.1.0-9 * razf now supports streaming * prepare to improve pileup (have not yet) ------------------------------------------------------------------------ r630 | lh3 | 2008-11-10 18:34:40 +0000 (Mon, 10 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-8 * improve the interface of TAM parser ------------------------------------------------------------------------ r629 | lh3 | 2008-11-10 13:06:13 +0000 (Mon, 10 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-7 * almost nothing ------------------------------------------------------------------------ r628 | lh3 | 2008-11-10 12:56:36 +0000 (Mon, 10 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-6 * fixed a bug in bam_pileup.c ------------------------------------------------------------------------ r627 | lh3 | 2008-11-10 11:32:46 +0000 (Mon, 10 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bamtk.c M /branches/prog/bam/razf.c * bamtk-0.1.0-5 * fixed a bug in razf.c, caused by my modifications * improve the interface of pileup. Now it will be slower but more flexible ------------------------------------------------------------------------ r626 | lh3 | 2008-11-09 20:51:04 +0000 (Sun, 09 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.h M /branches/prog/bam/bamtk.c * bamtk-0.1.0-4 * view: dumping binary output ------------------------------------------------------------------------ r625 | lh3 | 2008-11-09 20:31:54 +0000 (Sun, 09 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bam_index.c M /branches/prog/bam/bam_pileup.c M /branches/prog/bam/bam_sort.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-3 * rename functions ------------------------------------------------------------------------ r624 | lh3 | 2008-11-09 15:07:32 +0000 (Sun, 09 Nov 2008) | 2 lines Changed paths: M /branches/prog/bam/bam.h add comments ------------------------------------------------------------------------ r623 | lh3 | 2008-11-08 22:32:49 +0000 (Sat, 08 Nov 2008) | 4 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-2 * improve indexing for a mixture of long and short reads, although currently I do not know whether it really works... ------------------------------------------------------------------------ r622 | lh3 | 2008-11-08 22:13:58 +0000 (Sat, 08 Nov 2008) | 3 lines Changed paths: M /branches/prog/bam/bam_index.c M /branches/prog/bam/bamtk.c * bamtk-0.1.0-1 * prepare for improving indexing algorithm ------------------------------------------------------------------------ r621 | lh3 | 2008-11-08 20:28:09 +0000 (Sat, 08 Nov 2008) | 4 lines Changed paths: A /branches/prog/bam/all2tam.pl M /branches/prog/bam/bam.c M /branches/prog/bam/bam.h M /branches/prog/bam/bam_import.c M /branches/prog/bam/bamtk.c D /branches/prog/bam/tam_utils.pl * bamtk-0.1.0 * smarter integers * rename tam_utils.pl to all2tam.pl ------------------------------------------------------------------------ r620 | lh3 | 2008-11-08 17:17:22 +0000 (Sat, 08 Nov 2008) | 2 lines Changed paths: A /branches/prog/bam A /branches/prog/bam/Makefile A /branches/prog/bam/bam.c A /branches/prog/bam/bam.h A /branches/prog/bam/bam_endian.h A /branches/prog/bam/bam_import.c A /branches/prog/bam/bam_index.c A /branches/prog/bam/bam_pileup.c A /branches/prog/bam/bam_sort.c A /branches/prog/bam/bamtk.c A /branches/prog/bam/khash.h A /branches/prog/bam/ksort.h A /branches/prog/bam/kstream.h A /branches/prog/bam/razf.c A /branches/prog/bam/razf.h A /branches/prog/bam/razip.c A /branches/prog/bam/tam_utils.pl A /branches/prog/bam/zutil.h The Binary Alignment/Mapping format. ------------------------------------------------------------------------ samtools-0.1.19/INSTALL000066400000000000000000000022601212162403000144630ustar00rootroot00000000000000System Requirements =================== SAMtools depends on the zlib library . Version 1.2.3+ is preferred and with 1.2.3+ you can compile razip and use it to compress a FASTA file. SAMtools' faidx is able to index a razip-compressed FASTA file to save diskspace. Older zlib also works with SAMtools, but razip cannot be compiled. The text-based viewer (tview) requires the GNU ncurses library , which comes with Mac OS X and most of the modern Linux/Unix distributions. If you do not have this library installed, you can still compile the rest of SAMtools by manually changing: `-D_CURSES_LIB=1' to `-D_CURSES_LIB=0' at the line starting with `DFLAGS=', and comment out the line starting with `LIBCURSES='. Compilation =========== Type `make' to compile samtools. If you have zlib >= 1.2.2.1, you can compile razip with `make razip'. Installation ============ Copy `samtools', `bcftools/bcftools' and other executables/scripts in `misc' to a location you want (e.g. a directory in your $PATH). You may also copy `samtools.1' and `bcftools/bcftools.1' to a directory in your $MANPATH such that the `man' command may find the manual. samtools-0.1.19/Makefile000066400000000000000000000057071212162403000151030ustar00rootroot00000000000000CC= gcc CFLAGS= -g -Wall -O2 #LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1 KNETFILE_O= knetfile.o LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \ $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bam_cat.o AOBJS= bam_tview.o bam_plcmd.o sam_view.o \ bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \ cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o \ bam_tview_curses.o bam_tview_html.o PROG= samtools INCLUDES= -I. SUBDIRS= . bcftools misc LIBPATH= LIBCURSES= -lcurses # -lXCurses .SUFFIXES:.c .o .PHONY: all lib .c.o: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ all-recur lib-recur clean-recur cleanlocal-recur install-recur: @target=`echo $@ | sed s/-recur//`; \ wdir=`pwd`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ cd $$subdir; \ $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ INCLUDES="$(INCLUDES)" LIBPATH="$(LIBPATH)" $$target || exit 1; \ cd $$wdir; \ done; all:$(PROG) .PHONY:all lib clean cleanlocal .PHONY:all-recur lib-recur clean-recur cleanlocal-recur install-recur lib:libbam.a libbam.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) samtools:lib-recur $(AOBJS) $(CC) $(CFLAGS) -o $@ $(AOBJS) $(LDFLAGS) libbam.a -Lbcftools -lbcf $(LIBPATH) $(LIBCURSES) -lm -lz -lpthread razip:razip.o razf.o $(KNETFILE_O) $(CC) $(CFLAGS) -o $@ $^ -lz bgzip:bgzip.o bgzf.o $(KNETFILE_O) $(CC) $(CFLAGS) -o $@ $^ -lz -lpthread bgzf.o:bgzf.c bgzf.h $(CC) -c $(CFLAGS) $(DFLAGS) -DBGZF_CACHE $(INCLUDES) bgzf.c -o $@ razip.o:razf.h bam.o:bam.h razf.h bam_endian.h kstring.h sam_header.h sam.o:sam.h bam.h bam_import.o:bam.h kseq.h khash.h razf.h bam_pileup.o:bam.h razf.h ksort.h bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h bam_lpileup.o:bam.h ksort.h bam_tview.o:bam.h faidx.h bam_tview.h bam_tview_curses.o:bam.h faidx.h bam_tview.h bam_tview_html.o:bam.h faidx.h bam_tview.h bam_sort.o:bam.h ksort.h razf.h bam_md.o:bam.h faidx.h sam_header.o:sam_header.h khash.h bcf.o:bcftools/bcf.h bam2bcf.o:bam2bcf.h errmod.h bcftools/bcf.h bam2bcf_indel.o:bam2bcf.h errmod.o:errmod.h phase.o:bam.h khash.h ksort.h bamtk.o:bam.h faidx.o:faidx.h razf.h khash.h faidx_main.o:faidx.h razf.h libbam.1.dylib-local:$(LOBJS) libtool -dynamic $(LOBJS) -o libbam.1.dylib -lc -lz libbam.so.1-local:$(LOBJS) $(CC) -shared -Wl,-soname,libbam.so -o libbam.so.1 $(LOBJS) -lc -lz dylib: @$(MAKE) cleanlocal; \ case `uname` in \ Linux) $(MAKE) CFLAGS="$(CFLAGS) -fPIC" libbam.so.1-local;; \ Darwin) $(MAKE) CFLAGS="$(CFLAGS) -fPIC" libbam.1.dylib-local;; \ *) echo 'Unknown OS';; \ esac cleanlocal: rm -fr gmon.out *.o a.out *.exe *.dSYM razip bgzip $(PROG) *~ *.a *.so.* *.so *.dylib clean:cleanlocal-recur samtools-0.1.19/Makefile.mingw000066400000000000000000000040541212162403000162150ustar00rootroot00000000000000CC= gcc.exe AR= ar.exe CFLAGS= -g -Wall -O2 DFLAGS= -D_USE_KNETFILE -D_CURSES_LIB=2 KNETFILE_O= knetfile.o LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o \ $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bedidx.o AOBJS= bam_tview.o bam_plcmd.o sam_view.o \ bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \ cut_target.o phase.o bam_cat.o bam2depth.o BCFOBJS= bcftools/bcf.o bcftools/fet.o bcftools/bcf2qcall.o bcftools/bcfutils.o \ bcftools/call1.o bcftools/index.o bcftools/kfunc.o bcftools/em.o \ bcftools/kmin.o bcftools/prob1.o bcftools/vcf.o bcftools/mut.o PROG= samtools.exe bcftools.exe INCLUDES= -I. -Iwin32 SUBDIRS= . LIBPATH= .SUFFIXES:.c .o .c.o: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ all:$(PROG) .PHONY:all lib clean cleanlocal .PHONY:all-recur lib-recur clean-recur cleanlocal-recur install-recur lib:libbam.a libbam.a:$(LOBJS) $(AR) -cru $@ $(LOBJS) samtools.exe:$(AOBJS) libbam.a $(BCFOBJS) $(CC) $(CFLAGS) -o $@ $(AOBJS) $(BCFOBJS) $(LIBPATH) -lm -L. -lbam -Lwin32 -lz -lcurses -lws2_32 bcftools.exe:$(BCFOBJS) bcftools/main.o kstring.o bgzf.o knetfile.o bedidx.o $(CC) $(CFLAGS) -o $@ $(BCFOBJS) bcftools/main.o kstring.o bgzf.o knetfile.o bedidx.o -lm -Lwin32 -lz -lws2_32 razip.o:razf.h bam.o:bam.h razf.h bam_endian.h kstring.h sam_header.h sam.o:sam.h bam.h bam_import.o:bam.h kseq.h khash.h razf.h bam_pileup.o:bam.h razf.h ksort.h bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h bam_lpileup.o:bam.h ksort.h bam_tview.o:bam.h faidx.h bam_sort.o:bam.h ksort.h razf.h bam_md.o:bam.h faidx.h sam_header.o:sam_header.h khash.h bcf.o:bcftools/bcf.h bam2bcf.o:bam2bcf.h errmod.h bcftools/bcf.h bam2bcf_indel.o:bam2bcf.h errmod.o:errmod.h faidx.o:faidx.h razf.h khash.h faidx_main.o:faidx.h razf.h clean: rm -fr gmon.out *.o a.out *.exe *.dSYM razip bgzip $(PROG) *~ *.a *.so.* *.so *.dylib samtools-0.1.19/NEWS000066400000000000000000000670161212162403000141430ustar00rootroot00000000000000Beta Release 0.1.19 (15 March, 2013) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes in samtools and bcftools: * The latest source code and development moved to github, http://github.com/samtools/samtools * Many important bugfixes and contributions by many people. Thanks to all! * Performance improvements (multi-threading) * Important changes in calling, see - samtools mpileup -p - bcftools view -m * New annotations useful for filtering (RPB, HWE, QBD, MDV) * New tools, bamcheck and plot-bamcheck * New features in samtools tview * And much more.. For a detailed list of commits, please see http://github.com/samtools/samtools/commits/master (0.1.19: 15 March 2013, commit 96b5f2294ac0054230e88913c4983d548069ea4e) Beta Release 0.1.18 (2 September, 2011) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes in samtools: * Support the new =/X CIGAR operators (by Peter Cock). * Allow to subsample BAM while keeping the pairing intact (view -s). * Implemented variant distance bias as a new filter (by Petr Danecek). * Bugfix: huge memory usage during indexing * Bugfix: use of uninitialized variable in mpileup (rare) * Bugfix: wrong BAQ probability (rare) Notable changes in bcftools: * Support indel in the contrast caller. * Bugfix: LRT2=nan in rare cases (0.1.18: 2 September 2011, r982:295) Beta Release 0.1.17 (6 July, 2011) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With the maturity of `mpileup' and the lack of update in the `pileup' command, the `pileup' command is now formally dropped. Most of the pileup functionality, such as outputting mapping quality and read positions, have been added `mpileup'. Since this release, `bcftools view' is able to perform contrast SNP calling (option -T) for discovering de novo and/or somatic mutations between a pair of samples or in a family trio. Potential mutations are scored by a log likelihood ratio, which is very simple in math, but should be comparable to more sophisticated methods. Note that getting the score is only the very first step. A lot more need to be done to reduce systematical errors due to mapping and reference errors and structural variations. Other notable changes in samtools: * Improved sorting order checking during indexing. * Improved region parsing. Colons in reference sequence names are parsed properly. * Fixed an issue where mpileup does not apply BAQ for the first few reads when a region is specified. * Fixed an issue where `faidx' does not work with FASTA files with long lines. * Bugfix: wrong SP genotype information in the BCF output. Other notable changes in bcftools: * Output the ML esitmate of the allele count. * Added the HWE plus F<0 filter to varFilter. For multiple samples, it effectively filters false heterozygous calls around centromeres. * For association mapping, perform both 1-degree and 2-degree test. The 2-degree test is conservative but more robust to HWE violation. (0.1.17: 6 July 2011, r973:277) Beta Release 0.1.16 (21 April, 2011) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes in samtools: * Support the new SAM/BAM type `B' in the latest SAM spec v1.4. * When the output file of `samtools merge' exists, do not overwrite it unless a new command-line option `-f' is applied. * Bugfix: BED support is not working when the input BED is not sorted. * Bugfix: some reads without coordinates but given on the reverse strand are lost in merging. Notable changes in bcftools: * Code cleanup: separated max-likelihood inference and Bayesian inference. * Test Hardy-Weinberg equilibrium with a likelihood-ratio test. * Provided another association test P-value by likelihood-ratio test. * Use Brent's method to estimate the site allele frequency when EM converges slowly. The resulting ML estimate of allele frequnecy is more accurate. * Added the `ldpair' command, which computes r^2 between SNP pairs given in an input file. Also, the `pileup' command, which has been deprecated by `mpileup' since version 0.1.10, will be dropped in the next release. The old `pileup' command is substandard and causing a lot of confusion. (0.1.16: 21 April 2011, r963:234) Beta Release 0.1.15 (10 April, 2011) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Noteable changes: * Allow to perform variant calling or to extract information in multiple regions specified by a BED file (`samtools mpileup -l', `samtools view -L' and `bcftools view -l'). * Added the `depth' command to samtools to compute the per-base depth with a simpler interface. File `bam2depth.c', which implements this command, is the recommended example on how to use the mpileup APIs. * Estimate genotype frequencies with ML; perform chi^2 based Hardy-Weinberg test using this estimate. * For `samtools view', when `-R' is specified, drop read groups in the header that are not contained in the specified file. * For `samtools flagstat', separate QC-pass and QC-fail reads. * Improved the command line help of `samtools mpileup' and `bcftools view'. * Use a global variable to control the verbose level of samtools stderr output. Nonetheless, it has not been full utilized. * Fixed an issue in association test which may report false associations, possibly due to floating point underflow. (0.1.15: 10 April 2011, r949:203) Beta release 0.1.14 (21 March, 2011) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This release implements a method for testing associations for case-control data. The method does not call genotypes but instead sums over all genotype configurations to compute a chi^2 based test statistics. It can be potentially applied to comparing a pair of samples (e.g. a tumor-normal pair), but this has not been evaluated on real data. Another new feature is to make X chromosome variant calls when female and male samples are both present. The user needs to provide a file indicating the ploidy of each sample (see also manual bcftools/bcftools.1). Other notable changes: * Added `bcftools view -F' to parse BCF files generated by samtools r921 or older which encodes PL in a different way. * Changed the behavior of `bcftools view -s'. Now when a list of samples is provided, the samples in the output will be reordered to match the ordering in the sample list. This change is mainly designed for association test. * Sped up `bcftools view -v' for target sequencing given thousands of samples. Also added a new option `view -d' to skip loci where only a few samples are covered by reads. * Dropped HWE test. This feature has never been implemented properly. An EM should be much better. To be implemented in future. * Added the `cat' command to samtools. This command concatenate BAMs with identical sequence dictionaries in an efficient way. Modified from bam_cat.c written by Chris Saunders. * Added `samtools view -1' to write BAMs at a low compression level but twice faster to create. The `sort' command generates temporary files at a low compression level as well. * Added `samtools mpileup -6' to accept "BAM" with Illumina 1.3+ quality strings (strictly speaking, such a file is not BAM). * Added `samtools mpileup -L' to skip INDEL calling in regions with excessively high coverage. Such regions dramatically slow down mpileup. * Updated `misc/export2sam.pl', provided by Chris Saunders from Illumina Inc. (0.1.14: 21 March 2011, r933:170) Beta release 0.1.13 (1 March, 2011) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The most important though largely invisible modification is the change of the order of genotypes in the PL VCF/BCF tag. This is to conform the upcoming VCF spec v4.1. The change means that 0.1.13 is not backward compatible with VCF/BCF generated by samtools older than r921 inclusive. VCF/BCF generated by the new samtools will contain a line `##fileformat=VCFv4.1' as well as the samtools version number. Single Individual Haplotyping (SIH) is added as an experimental feature. It originally aims to produce haploid consensus from fosmid pool sequencing, but also works with short-read data. For short reads, phased blocks are usually too short to be useful in many applications, but they can help to rule out part of SNPs close to INDELs or between copies of CNVs. Other notable changes in samtools: * Construct per-sample consensus to reduce the effect of nearby SNPs in INDEL calling. This reduces the power but improves specificity. * Improved sorting order checking in indexing. Now indexing is the preferred way to check if a BAM is sorted. * Added a switch `-E' to mpileup and calmd. This option uses an alternative way to apply BAQ, which increases sensistivity, especially to MNPs, at the cost of a little loss in specificity. * Added `mpileup -A' to allow to use reads in anomalous pairs in SNP calling. * Added `mpileup -m' to allow fine control of the collection of INDEL candidates. * Added `mpileup -S' to compute per-sample strand bias P-value. * Added `mpileup -G' to exclude read groups in variant calling. * Fixed segfault in indel calling related to unmapped and refskip reads. * Fixed an integer overflow in INDEL calling. This bug produces wrong INDEL genotypes for longer short INDELs, typically over 10bp. * Fixed a bug in tview on big-endian machines. * Fixed a very rare memory issue in bam_md.c * Fixed an out-of-boundary bug in mpileup when the read base is `N'. * Fixed a compiling error when the knetfile library is not used. Fixed a library compiling error due to the lack of bam_nt16_nt4_table[] table. Suppress a compiling warning related to the latest zlib. Other notable changes in bcftools: * Updated the BCF spec. * Added the `FQ' VCF INFO field, which gives the phred-scaled probability of all samples being the same (identical to the reference or all homozygous variants). Option `view -f' has been dropped. * Implementated of "vcfutils.pl vcf2fq" to generate a consensus sequence similar to "samtools.pl pileup2fq". * Make sure the GT FORMAT field is always the first FORMAT to conform the VCF spec. Drop bcf-fix.pl. * Output bcftools specific INFO and FORMAT in the VCF header. * Added `view -s' to call variants from a subset of samples. * Properly convert VCF to BCF with a user provided sequence dictionary. Nonetheless, custom fields are still unparsed and will be stored as a missing value. * Fixed a minor bug in Fisher's exact test; the results are rarely changed. (0.1.13: 1 March 2011, r926:134) Beta release 0.1.12a (2 December, 2010) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is another bug fix release: * Fixed a memory violation in mpileup, which causes segfault. Release 0.1.9 and above are affected. * Fixed a memory violation in the indel caller, which does not causes segfault, but may potentially affect deletion calls in an unexpected way. Release 0.1.10 and above are affected. * Fixed a bug in computing r-square in bcftools. Few are using this functionality and it only has minor effect. * Fixed a memory leak in bam_fetch(). * Fixed a bug in writing meta information to the BAM index for the last sequence. This bug is invisible to most users, but it is a bug anyway. * Fixed a bug in bcftools which causes false "DP4=0,0,0,0" annotations. (0.1.12: 2 December 2010, r862) Beta release 0.1.11 (21 November, 2010) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is mainly a bug fix release: * Fixed a bug in random retrieval (since 0.1.8). It occurs when reads are retrieved from a small region containing no reads. * Fixed a bug in pileup (since 0.1.9). The bug causes an assertion failure when the first CIGAR operation is a deletion. * Improved fault tolerence in remote access. One minor feature has been implemented in bcftools: * Added a reference-free variant calling mode. In this mode, a site is regarded as a variat iff the sample(s) contains two or more alleles; the meaning of the QUAL field in the VCF output is changed accordingly. Effectively, the reference allele is irrelevant to the result in the new mode, although the reference sequence has to be used in realignment when SAMtools computes genotype likelihoods. In addition, since 0.1.10, the `pileup' command has been deprecated by `mpileup' which is more powerful and more accurate. The `pileup' command will not be removed in the next few releases, but new features will not be added. (0.1.11: 21 November 2010, r851) Beta Release 0.1.10 (16 November, 2010) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This release is featured as the first major improvement to the indel caller. The method is similar to the old one implemented in the pileup command, but the details are handled more carefully both in theory and in practice. As a result, the new indel caller usually gives more accurate indel calls, though at the cost of sensitivity. The caller is implemented in the mpileup command and is invoked by default. It works with multiple samples. Other notable changes: * With the -r option, the calmd command writes the difference between the original base quality and the BAQ capped base quality at the BQ tag but does not modify the base quality. Please use -Ar to overwrite the original base quality (the 0.1.9 behavior). * Allow to set a maximum per-sample read depth to reduce memory. In 0.1.9, most of memory is wasted for the ultra high read depth in some regions (e.g. the chr1 centromere). * Optionally write per-sample read depth and per-sample strand bias P-value. * Compute equal-tail (Bayesian) credible interval of site allele frequency at the CI95 VCF annotation. * Merged the vcfutils.pl varFilter and filter4vcf for better SNP/indel filtering. (0.1.10: 16 November 2010, r829) Beta Release 0.1.9 (27 October, 2010) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This release is featured as the first major improvement to the samtools' SNP caller. It comes with a revised MAQ error model, the support of multi-sample SNP calling and the computation of base alignment quality (BAQ). The revised MAQ error model is based on the original model. It solves an issue of miscalling SNPs in repetitive regions. Althought such SNPs can usually be filtered at a later step, they mess up unfiltered calls. This is a theoretical flaw in the original model. The revised MAQ model deprecates the orginal MAQ model and the simplified SOAPsnp model. Multi-sample SNP calling is separated in two steps. The first is done by samtools mpileup and the second by a new program, bcftools, which is included in the samtools source code tree. Multi-sample SNP calling also works for single sample and has the advantage of enabling more powerful filtration. It is likely to deprecate pileup in future once a proper indel calling method is implemented. BAQ is the Phred-scaled probability of a read base being wrongly aligned. Capping base quality by BAQ has been shown to be very effective in suppressing false SNPs caused by misalignments around indels or in low-complexity regions with acceptable compromise on computation time. This strategy is highly recommended and can be used with other SNP callers as well. In addition to the three major improvements, other notable changes are: * Changes to the pileup format. A reference skip (the N CIGAR operator) is shown as '<' or '>' depending on the strand. Tview is also changed accordingly. * Accelerated pileup. The plain pileup is about 50% faster. * Regional merge. The merge command now accepts a new option to merge files in a specified region. * Fixed a bug in bgzip and razip which causes source files to be deleted even if option -c is applied. * In APIs, propogate errors to downstream callers and make samtools return non-zero values once errors occur. (0.1.9: 27 October 2010, r783) Beta Release 0.1.8 (11 July, 2010) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable functional changes: * Added the `reheader' command which replaces a BAM header with a new header. This command is much faster than replacing header by BAM->SAM->BAM conversions. * Added the `mpileup' command which computes the pileup of multiple alignments. * The `index' command now stores the number of mapped and unmapped reads in the index file. This information can be retrieved quickly by the new `idxstats' command. * By default, pileup used the SOAPsnp model for SNP calling. This avoids the floating overflow in the MAQ model which leads to spurious calls in repetitive regions, although these calls will be immediately filtered by varFilter. * The `tview' command now correctly handles CIGARs like 7I10M and 10M1P1I10M which cause assertion failure in earlier versions. * Tview accepts a region like `=10,000' where `=' stands for the current sequence name. This saves typing for long sequence names. * Added the `-d' option to `pileup' which avoids slow indel calling in ultradeep regions by subsampling reads locally. * Added the `-R' option to `view' which retrieves alignments in read groups listed in the specified file. Performance improvements: * The BAM->SAM conversion is up to twice faster, depending on the characteristic of the input. * Parsing SAM headers with a lot of reference sequences is now much faster. * The number of lseek() calls per query is reduced when the query region contains no read alignments. Bug fixes: * Fixed an issue in the indel caller that leads to miscall of indels. Note that this solution may not work well when the sequencing indel error rate is higher than the rate of SNPs. * Fixed another issue in the indel caller which may lead to incorrect genotype. * Fixed a bug in `sort' when option `-o' is applied. * Fixed a bug in `view -r'. APIs and other changes: * Added iterator interfaces to random access and pileup. The callback interfaces directly call the iterator interfaces. * The BGZF blocks holding the BAM header are indepedent of alignment BGZF blocks. Alignment records shorter than 64kB is guaranteed to be fully contained in one BGZF block. This change is fully compatible with the old version of samtools/picard. Changes in other utilities: * Updated export2sam.pl by Chris Saunders. * Improved the sam2vcf.pl script. * Added a Python version of varfilter.py by Aylwyn Scally. (0.1.8: 11 July 2010, r613) Beta Release 0.1.7 (10 November, 2009) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes: * Improved the indel caller in complex scenariors, in particular for long reads. The indel caller is now able to make reasonable indel calls from Craig Venter capillary reads. * Rewrote single-end duplicate removal with improved performance. Paired-end reads are not touched. * Duplicate removal is now library aware. Samtools remove potential PCR/optical dupliates inside a library rather than across libraries. * SAM header is now fully parsed, although this functionality is not used in merging and so on. * In samtools merge, optionally take the input file name as RG-ID and attach the RG tag to each alignment. * Added FTP support in the RAZF library. RAZF-compressed reference sequence can be retrieved remotely. * Improved network support for Win32. * Samtools sort and merge are now stable. Changes in other utilities: * Implemented sam2vcf.pl that converts the pileup format to the VCF format. * This release of samtools is known to work with the latest Bio-Samtools Perl module. (0.1.7: 10 November 2009, r510) Beta Release 0.1.6 (2 September, 2009) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes: * In tview, do not show a blank screen when no reads mapped to the corresponding region. * Implemented native HTTP support in the BGZF library. Samtools is now able to directly open a BAM file on HTTP. HTTP proxy is also supported via the "http_proxy" environmental variable. * Samtools is now compitable with the MinGW (win32) compiler and the PDCurses library. * The calmd (or fillmd) command now calculates the NM tag and replaces MD tags if they are wrong. * The view command now recognizes and optionally prints FLAG in HEXs or strings to make a SAM file more friendly to human eyes. This is a samtools-C extension, not implemented in Picard for the time being. Please type `samtools view -?' for more information. * BAM files now have an end-of-file (EOF) marker to facilitate truncation detection. A warning will be given if an on-disk BAM file does not have this marker. The warning will be seen on BAM files generated by an older version of samtools. It does NO harm. * New key bindings in tview: `r' to show read names and `s' to show reference skip (N operation) as deletions. * Fixed a bug in `samtools merge -n'. * Samtools merge now optionally copies the header of a user specified SAM file to the resultant BAM output. * Samtools pileup/tview works with a CIGAR with the first or the last operation is an indel. * Fixed a bug in bam_aux_get(). Changes in other utilies: * Fixed wrong FLAG in maq2sam. (0.1.6: 2 September 2009, r453) Beta Release 0.1.5 (7 July, 2009) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes: * Support opening a BAM alignment on FTP. Users can now use "tview" to view alignments at the NCBI ftp site. Please read manual for more information. * In library, propagate errors rather than exit or complain assertion failure. * Simplified the building system and fixed compiling errors caused by zlib<1.2.2.1. * Fixed an issue about lost header information when a SAM is imported with "view -t". * Implemented "samtool.pl varFilter" which filters both SNPs and short indels. This command replaces "indelFilter". * Implemented "samtools.pl pileup2fq" to generate FASTQ consensus from pileup output. * In pileup, cap mapping quality at 60. This helps filtering when different aligners are in use. * In pileup, allow to output variant sites only. * Made pileup generate correct calls in repetitive region. At the same time, I am considering to implement a simplified model in SOAPsnp, although this has not happened yet. * In view, added '-u' option to output BAM without compression. This option is preferred when the output is piped to other commands. * In view, added '-l' and '-r' to get the alignments for one library or read group. The "@RG" header lines are now partially parsed. * Do not include command line utilities to libbam.a. * Fixed memory leaks in pileup and bam_view1(). * Made faidx more tolerant to empty lines right before or after FASTA > lines. Changes in other utilities: * Updated novo2sam.pl by Colin Hercus, the key developer of novoalign. This release involves several modifications to the key code base which may potentially introduce new bugs even though we have tried to minimize this by testing on several examples. Please let us know if you catch bugs. (0.1.5: 7 July 2009, r373) Beta Release 0.1.4 (21 May, 2009) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes: * Added the 'rmdupse' command: removing duplicates for SE reads. * Fixed a critical bug in the indel caller: clipped alignments are not processed correctly. * Fixed a bug in the tview: gapped alignment may be incorrectly displayed. * Unified the interface to BAM and SAM I/O. This is done by implementing a wrapper on top of the old APIs and therefore old APIs are still valid. The new I/O APIs also recognize the @SQ header lines. * Generate the MD tag. * Generate "=" bases. However, the indel caller will not work when "=" bases are present. * Enhanced support of color-read display (by Nils Homer). * Implemented the GNU building system. However, currently the building system does not generate libbam.a. We will improve this later. For the time being, `make -f Makefile.generic' is preferred. * Fixed a minor bug in pileup: the first read in a chromosome may be skipped. * Fixed bugs in bam_aux.c. These bugs do not affect other components as they were not used previously. * Output the 'SM' tag from maq2sam. (0.1.4: 21 May 2009, r297) Beta Release 0.1.3 (15 April, 2009) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes in SAMtools: * SAMtools is more consistent with the specification: a) '*' in the QUAL field is allowed; b) the field separator is TAB only and SPACE is treated as a character in a field; c) empty header is allowed. * Implemented GLFv3 support in pileup. * Fixed a severe bug in fixmate: strand information is wrongly overwritten. * Fixed a bug in alignment retrieval: alignments bridging n*16384bp are not correctly retrieved sometimes. * Fixed a bug in rmdup: segfault if unmapped reads are present. * Move indel_filter.pl to samtools.pl and improved the filtering by checking the actual number of alignments containing indels. The indel pileup line is also changed a little to make this filtration easier. * Fixed a minor bug in indexing: the bin number of an unmapped read is wrongly calculated. * Added `flagstat' command to show statistics on the FLAG field. * Improved indel caller by setting the maximum window size in local realignment. Changes in other utilities: * Fixed a bug in maq2sam: a tag name is obsolete. * Improvement to wgsim: a) added support for SOLiD read simulation; b) show the number of substitutions/indels/errors in read name; c) considerable code clean up. * Various converters: improved functionality in general. * Updated the example SAM due to the previous bug in fixmate. (0.1.3: 15 April 2009, r227) Beta Release 0.1.2 (28 January, 2008) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notable changes in SAMtools: * Implemented a Bayesian indel caller. The new caller generate scores and genotype and is potentially more accurate than Maq's indel caller. The pileup format is also changed accordingly. * Implemented rmdup command: remove potential PCR duplicates. Note that this command ONLY works for FR orientation and requires ISIZE is correctly set. * Added fixmate command: fill in mate coordinates, ISIZE and mate related flags from a name-sorted alignment. * Fixed a bug in indexing: reads bridging 16x kbp were not retrieved. * Allow to select reads shown in the pileup output with a mask. * Generate GLFv2 from pileup. * Added two more flags for flagging PCR/optical duplicates and for QC failure. * Fixed a bug in sort command: name sorting for large alignment did not work. * Allow to completely disable RAZF (using Makefile.lite) as some people have problem to compile it. * Fixed a bug in import command when there are reads without coordinates. * Fixed a bug in tview: clipping broke the alignment viewer. * Fixed a compiling error when _NO_CURSES is applied. * Fixed a bug in merge command. Changes in other utilities: * Added wgsim, a paired-end reads simulator. Wgsim was adapted from maq's reads simulator. Colin Hercus further improved it to allow longer indels. * Added wgsim_eval.pl, a script that evaluates the accuracy of alignment on reads generated by wgsim. * Added soap2sam.pl, a SOAP2->SAM converter. This converter does not work properly when multiple hits are output. * Added bowtie2sam.pl, a Bowtie->SAM converter. Only the top hit will be retained when multiple hits are present. * Fixed a bug in export2sam.pl for QC reads. * Support RG tag at MAQ->SAM converter. * Added novo2sam.pl, a NovoAlign->SAM converter. Multiple hits and indel are not properly handled, though. * Added zoom2sam.pl, a ZOOM->SAM converter. It only works with the default Illumina output. (0.1.2: 28 January 2008; r116) Beta Release 0.1.1 (22 December, 2008) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The is the first public release of samtools. For more information, please check the manual page `samtools.1' and the samtools website http://samtools.sourceforge.net samtools-0.1.19/bam.c000066400000000000000000000371551212162403000143500ustar00rootroot00000000000000#include #include #include #include #include "bam.h" #include "bam_endian.h" #include "kstring.h" #include "sam_header.h" int bam_is_be = 0, bam_verbose = 2, bam_no_B = 0; char *bam_flag2char_table = "pPuUrR12sfd\0\0\0\0\0"; /************************** * CIGAR related routines * **************************/ uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar) { int k, end = c->pos; for (k = 0; k < c->n_cigar; ++k) { int op = bam_cigar_op(cigar[k]); int len = bam_cigar_oplen(cigar[k]); if (op == BAM_CBACK) { // move backward int l, u, v; if (k == c->n_cigar - 1) break; // skip trailing 'B' for (l = k - 1, u = v = 0; l >= 0; --l) { int op1 = bam_cigar_op(cigar[l]); int len1 = bam_cigar_oplen(cigar[l]); if (bam_cigar_type(op1)&1) { // consume query if (u + len1 >= len) { // stop if (bam_cigar_type(op1)&2) v += len - u; break; } else u += len1; } if (bam_cigar_type(op1)&2) v += len1; } end = l < 0? c->pos : end - v; } else if (bam_cigar_type(op)&2) end += bam_cigar_oplen(cigar[k]); } return end; } int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar) { uint32_t k; int32_t l = 0; for (k = 0; k < c->n_cigar; ++k) if (bam_cigar_type(bam_cigar_op(cigar[k]))&1) l += bam_cigar_oplen(cigar[k]); return l; } /******************** * BAM I/O routines * ********************/ bam_header_t *bam_header_init() { bam_is_be = bam_is_big_endian(); return (bam_header_t*)calloc(1, sizeof(bam_header_t)); } void bam_header_destroy(bam_header_t *header) { int32_t i; extern void bam_destroy_header_hash(bam_header_t *header); if (header == 0) return; if (header->target_name) { for (i = 0; i < header->n_targets; ++i) free(header->target_name[i]); free(header->target_name); free(header->target_len); } free(header->text); if (header->dict) sam_header_free(header->dict); if (header->rg2lib) sam_tbl_destroy(header->rg2lib); bam_destroy_header_hash(header); free(header); } bam_header_t *bam_header_read(bamFile fp) { bam_header_t *header; char buf[4]; int magic_len; int32_t i = 1, name_len; // check EOF i = bgzf_check_EOF(fp); if (i < 0) { // If the file is a pipe, checking the EOF marker will *always* fail // with ESPIPE. Suppress the error message in this case. if (errno != ESPIPE) perror("[bam_header_read] bgzf_check_EOF"); } else if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent. The input is probably truncated.\n"); // read "BAM1" magic_len = bam_read(fp, buf, 4); if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) { fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n"); return 0; } header = bam_header_init(); // read plain text and the number of reference sequences bam_read(fp, &header->l_text, 4); if (bam_is_be) bam_swap_endian_4p(&header->l_text); header->text = (char*)calloc(header->l_text + 1, 1); bam_read(fp, header->text, header->l_text); bam_read(fp, &header->n_targets, 4); if (bam_is_be) bam_swap_endian_4p(&header->n_targets); // read reference sequence names and lengths header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); header->target_len = (uint32_t*)calloc(header->n_targets, 4); for (i = 0; i != header->n_targets; ++i) { bam_read(fp, &name_len, 4); if (bam_is_be) bam_swap_endian_4p(&name_len); header->target_name[i] = (char*)calloc(name_len, 1); bam_read(fp, header->target_name[i], name_len); bam_read(fp, &header->target_len[i], 4); if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]); } return header; } int bam_header_write(bamFile fp, const bam_header_t *header) { char buf[4]; int32_t i, name_len, x; // write "BAM1" strncpy(buf, "BAM\001", 4); bam_write(fp, buf, 4); // write plain text and the number of reference sequences if (bam_is_be) { x = bam_swap_endian_4(header->l_text); bam_write(fp, &x, 4); if (header->l_text) bam_write(fp, header->text, header->l_text); x = bam_swap_endian_4(header->n_targets); bam_write(fp, &x, 4); } else { bam_write(fp, &header->l_text, 4); if (header->l_text) bam_write(fp, header->text, header->l_text); bam_write(fp, &header->n_targets, 4); } // write sequence names and lengths for (i = 0; i != header->n_targets; ++i) { char *p = header->target_name[i]; name_len = strlen(p) + 1; if (bam_is_be) { x = bam_swap_endian_4(name_len); bam_write(fp, &x, 4); } else bam_write(fp, &name_len, 4); bam_write(fp, p, name_len); if (bam_is_be) { x = bam_swap_endian_4(header->target_len[i]); bam_write(fp, &x, 4); } else bam_write(fp, &header->target_len[i], 4); } bgzf_flush(fp); return 0; } static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data) { uint8_t *s; uint32_t i, *cigar = (uint32_t*)(data + c->l_qname); s = data + c->n_cigar*4 + c->l_qname + c->l_qseq + (c->l_qseq + 1)/2; for (i = 0; i < c->n_cigar; ++i) bam_swap_endian_4p(&cigar[i]); while (s < data + data_len) { uint8_t type; s += 2; // skip key type = toupper(*s); ++s; // skip type if (type == 'C' || type == 'A') ++s; else if (type == 'S') { bam_swap_endian_2p(s); s += 2; } else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; } else if (type == 'D') { bam_swap_endian_8p(s); s += 8; } else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; } else if (type == 'B') { int32_t n, Bsize = bam_aux_type2size(*s); memcpy(&n, s + 1, 4); if (1 == Bsize) { } else if (2 == Bsize) { for (i = 0; i < n; i += 2) bam_swap_endian_2p(s + 5 + i); } else if (4 == Bsize) { for (i = 0; i < n; i += 4) bam_swap_endian_4p(s + 5 + i); } bam_swap_endian_4p(s+1); } } } int bam_read1(bamFile fp, bam1_t *b) { bam1_core_t *c = &b->core; int32_t block_len, ret, i; uint32_t x[8]; assert(BAM_CORE_SIZE == 32); if ((ret = bam_read(fp, &block_len, 4)) != 4) { if (ret == 0) return -1; // normal end-of-file else return -2; // truncated } if (bam_read(fp, x, BAM_CORE_SIZE) != BAM_CORE_SIZE) return -3; if (bam_is_be) { bam_swap_endian_4p(&block_len); for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i); } c->tid = x[0]; c->pos = x[1]; c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff; c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff; c->l_qseq = x[4]; c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7]; b->data_len = block_len - BAM_CORE_SIZE; if (b->m_data < b->data_len) { b->m_data = b->data_len; kroundup32(b->m_data); b->data = (uint8_t*)realloc(b->data, b->m_data); } if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4; b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2; if (bam_is_be) swap_endian_data(c, b->data_len, b->data); if (bam_no_B) bam_remove_B(b); return 4 + block_len; } inline int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data) { uint32_t x[8], block_len = data_len + BAM_CORE_SIZE, y; int i; assert(BAM_CORE_SIZE == 32); x[0] = c->tid; x[1] = c->pos; x[2] = (uint32_t)c->bin<<16 | c->qual<<8 | c->l_qname; x[3] = (uint32_t)c->flag<<16 | c->n_cigar; x[4] = c->l_qseq; x[5] = c->mtid; x[6] = c->mpos; x[7] = c->isize; bgzf_flush_try(fp, 4 + block_len); if (bam_is_be) { for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i); y = block_len; bam_write(fp, bam_swap_endian_4p(&y), 4); swap_endian_data(c, data_len, data); } else bam_write(fp, &block_len, 4); bam_write(fp, x, BAM_CORE_SIZE); bam_write(fp, data, data_len); if (bam_is_be) swap_endian_data(c, data_len, data); return 4 + block_len; } int bam_write1(bamFile fp, const bam1_t *b) { return bam_write1_core(fp, &b->core, b->data_len, b->data); } char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) { uint8_t *s = bam1_seq(b), *t = bam1_qual(b); int i; const bam1_core_t *c = &b->core; kstring_t str; str.l = str.m = 0; str.s = 0; kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str); if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); } else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag); else { // BAM_OFSTR for (i = 0; i < 16; ++i) if ((c->flag & 1<tid < 0) kputsn("*\t", 2, &str); else { if (header) kputs(header->target_name[c->tid] , &str); else kputw(c->tid, &str); kputc('\t', &str); } kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str); if (c->n_cigar == 0) kputc('*', &str); else { uint32_t *cigar = bam1_cigar(b); for (i = 0; i < c->n_cigar; ++i) { kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str); kputc(bam_cigar_opchr(cigar[i]), &str); } } kputc('\t', &str); if (c->mtid < 0) kputsn("*\t", 2, &str); else if (c->mtid == c->tid) kputsn("=\t", 2, &str); else { if (header) kputs(header->target_name[c->mtid], &str); else kputw(c->mtid, &str); kputc('\t', &str); } kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str); if (c->l_qseq) { for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str); kputc('\t', &str); if (t[0] == 0xff) kputc('*', &str); else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str); } else kputsn("*\t*", 3, &str); s = bam1_aux(b); while (s < b->data + b->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str); if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; } else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; } else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; } else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; } else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; } else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; } else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; } else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; } else if (type == 'B') { uint8_t sub_type = *(s++); int32_t n; memcpy(&n, s, 4); s += 4; // no point to the start of the array kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing for (i = 0; i < n; ++i) { kputc(',', &str); if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; } else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; } else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; } else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; } else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; } else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; } else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; } } } } return str.s; } char *bam_format1(const bam_header_t *header, const bam1_t *b) { return bam_format1_core(header, b, BAM_OFDEC); } void bam_view1(const bam_header_t *header, const bam1_t *b) { char *s = bam_format1(header, b); puts(s); free(s); } int bam_validate1(const bam_header_t *header, const bam1_t *b) { char *s; if (b->core.tid < -1 || b->core.mtid < -1) return 0; if (header && (b->core.tid >= header->n_targets || b->core.mtid >= header->n_targets)) return 0; if (b->data_len < b->core.l_qname) return 0; s = memchr(bam1_qname(b), '\0', b->core.l_qname); if (s != &bam1_qname(b)[b->core.l_qname-1]) return 0; // FIXME: Other fields could also be checked, especially the auxiliary data return 1; } // FIXME: we should also check the LB tag associated with each alignment const char *bam_get_library(bam_header_t *h, const bam1_t *b) { const uint8_t *rg; if (h->dict == 0) h->dict = sam_header_parse2(h->text); if (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB"); rg = bam_aux_get(b, "RG"); return (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1)); } /************ * Remove B * ************/ int bam_remove_B(bam1_t *b) { int i, j, end_j, k, l, no_qual; uint32_t *cigar, *new_cigar; uint8_t *seq, *qual, *p; // test if removal is necessary if (b->core.flag & BAM_FUNMAP) return 0; // unmapped; do nothing cigar = bam1_cigar(b); for (k = 0; k < b->core.n_cigar; ++k) if (bam_cigar_op(cigar[k]) == BAM_CBACK) break; if (k == b->core.n_cigar) return 0; // no 'B' if (bam_cigar_op(cigar[0]) == BAM_CBACK) goto rmB_err; // cannot be removed // allocate memory for the new CIGAR if (b->data_len + (b->core.n_cigar + 1) * 4 > b->m_data) { // not enough memory b->m_data = b->data_len + b->core.n_cigar * 4; kroundup32(b->m_data); b->data = (uint8_t*)realloc(b->data, b->m_data); cigar = bam1_cigar(b); // after realloc, cigar may be changed } new_cigar = (uint32_t*)(b->data + (b->m_data - b->core.n_cigar * 4)); // from the end of b->data // the core loop seq = bam1_seq(b); qual = bam1_qual(b); no_qual = (qual[0] == 0xff); // test whether base quality is available i = j = 0; end_j = -1; for (k = l = 0; k < b->core.n_cigar; ++k) { int op = bam_cigar_op(cigar[k]); int len = bam_cigar_oplen(cigar[k]); if (op == BAM_CBACK) { // the backward operation int t, u; if (k == b->core.n_cigar - 1) break; // ignore 'B' at the end of CIGAR if (len > j) goto rmB_err; // an excessively long backward for (t = l - 1, u = 0; t >= 0; --t) { // look back int op1 = bam_cigar_op(new_cigar[t]); int len1 = bam_cigar_oplen(new_cigar[t]); if (bam_cigar_type(op1)&1) { // consume the query if (u + len1 >= len) { // stop new_cigar[t] -= (len - u) << BAM_CIGAR_SHIFT; break; } else u += len1; } } if (bam_cigar_oplen(new_cigar[t]) == 0) --t; // squeeze out the zero-length operation l = t + 1; end_j = j; j -= len; } else { // other CIGAR operations new_cigar[l++] = cigar[k]; if (bam_cigar_type(op)&1) { // consume the query if (i != j) { // no need to copy if i == j int u, c, c0; for (u = 0; u < len; ++u) { // construct the consensus c = bam1_seqi(seq, i+u); if (j + u < end_j) { // in an overlap c0 = bam1_seqi(seq, j+u); if (c != c0) { // a mismatch; choose the better base if (qual[j+u] < qual[i+u]) { // the base in the 2nd segment is better bam1_seq_seti(seq, j+u, c); qual[j+u] = qual[i+u] - qual[j+u]; } else qual[j+u] -= qual[i+u]; // the 1st is better; reduce base quality } else qual[j+u] = qual[j+u] > qual[i+u]? qual[j+u] : qual[i+u]; } else { // not in an overlap; copy over bam1_seq_seti(seq, j+u, c); qual[j+u] = qual[i+u]; } } } i += len, j += len; } } } if (no_qual) qual[0] = 0xff; // in very rare cases, this may be modified // merge adjacent operations if possible for (k = 1; k < l; ++k) if (bam_cigar_op(new_cigar[k]) == bam_cigar_op(new_cigar[k-1])) new_cigar[k] += new_cigar[k-1] >> BAM_CIGAR_SHIFT << BAM_CIGAR_SHIFT, new_cigar[k-1] &= 0xf; // kill zero length operations for (k = i = 0; k < l; ++k) if (new_cigar[k] >> BAM_CIGAR_SHIFT) new_cigar[i++] = new_cigar[k]; l = i; // update b memcpy(cigar, new_cigar, l * 4); // set CIGAR p = b->data + b->core.l_qname + l * 4; memmove(p, seq, (j+1)>>1); p += (j+1)>>1; // set SEQ memmove(p, qual, j); p += j; // set QUAL memmove(p, bam1_aux(b), b->l_aux); p += b->l_aux; // set optional fields b->core.n_cigar = l, b->core.l_qseq = j; // update CIGAR length and query length b->data_len = p - b->data; // update record length return 0; rmB_err: b->core.flag |= BAM_FUNMAP; return -1; } samtools-0.1.19/bam.h000066400000000000000000000622741212162403000143550ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008-2010 Genome Research Ltd (GRL). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Contact: Heng Li */ #ifndef BAM_BAM_H #define BAM_BAM_H /*! @header BAM library provides I/O and various operations on manipulating files in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map) format. It now supports importing from or exporting to SAM, sorting, merging, generating pileup, and quickly retrieval of reads overlapped with a specified region. @copyright Genome Research Ltd. */ #define BAM_VERSION "0.1.19-96b5f2294a" #include #include #include #include #ifndef BAM_LITE #define BAM_VIRTUAL_OFFSET16 #include "bgzf.h" /*! @abstract BAM file handler */ typedef BGZF *bamFile; #define bam_open(fn, mode) bgzf_open(fn, mode) #define bam_dopen(fd, mode) bgzf_fdopen(fd, mode) #define bam_close(fp) bgzf_close(fp) #define bam_read(fp, buf, size) bgzf_read(fp, buf, size) #define bam_write(fp, buf, size) bgzf_write(fp, buf, size) #define bam_tell(fp) bgzf_tell(fp) #define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir) #else #define BAM_TRUE_OFFSET #include typedef gzFile bamFile; #define bam_open(fn, mode) gzopen(fn, mode) #define bam_dopen(fd, mode) gzdopen(fd, mode) #define bam_close(fp) gzclose(fp) #define bam_read(fp, buf, size) gzread(fp, buf, size) /* no bam_write/bam_tell/bam_seek() here */ #endif /*! @typedef @abstract Structure for the alignment header. @field n_targets number of reference sequences @field target_name names of the reference sequences @field target_len lengths of the referene sequences @field dict header dictionary @field hash hash table for fast name lookup @field rg2lib hash table for @RG-ID -> LB lookup @field l_text length of the plain text in the header @field text plain text @discussion Field hash points to null by default. It is a private member. */ typedef struct { int32_t n_targets; char **target_name; uint32_t *target_len; void *dict, *hash, *rg2lib; uint32_t l_text, n_text; char *text; } bam_header_t; /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */ #define BAM_FPAIRED 1 /*! @abstract the read is mapped in a proper pair */ #define BAM_FPROPER_PAIR 2 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */ #define BAM_FUNMAP 4 /*! @abstract the mate is unmapped */ #define BAM_FMUNMAP 8 /*! @abstract the read is mapped to the reverse strand */ #define BAM_FREVERSE 16 /*! @abstract the mate is mapped to the reverse strand */ #define BAM_FMREVERSE 32 /*! @abstract this is read1 */ #define BAM_FREAD1 64 /*! @abstract this is read2 */ #define BAM_FREAD2 128 /*! @abstract not primary alignment */ #define BAM_FSECONDARY 256 /*! @abstract QC failure */ #define BAM_FQCFAIL 512 /*! @abstract optical or PCR duplicate */ #define BAM_FDUP 1024 #define BAM_OFDEC 0 #define BAM_OFHEX 1 #define BAM_OFSTR 2 /*! @abstract defautl mask for pileup */ #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) #define BAM_CORE_SIZE sizeof(bam1_core_t) /** * Describing how CIGAR operation/length is packed in a 32-bit integer. */ #define BAM_CIGAR_SHIFT 4 #define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1) /* CIGAR operations. */ /*! @abstract CIGAR: M = match or mismatch*/ #define BAM_CMATCH 0 /*! @abstract CIGAR: I = insertion to the reference */ #define BAM_CINS 1 /*! @abstract CIGAR: D = deletion from the reference */ #define BAM_CDEL 2 /*! @abstract CIGAR: N = skip on the reference (e.g. spliced alignment) */ #define BAM_CREF_SKIP 3 /*! @abstract CIGAR: S = clip on the read with clipped sequence present in qseq */ #define BAM_CSOFT_CLIP 4 /*! @abstract CIGAR: H = clip on the read with clipped sequence trimmed off */ #define BAM_CHARD_CLIP 5 /*! @abstract CIGAR: P = padding */ #define BAM_CPAD 6 /*! @abstract CIGAR: equals = match */ #define BAM_CEQUAL 7 /*! @abstract CIGAR: X = mismatch */ #define BAM_CDIFF 8 #define BAM_CBACK 9 #define BAM_CIGAR_STR "MIDNSHP=XB" #define BAM_CIGAR_TYPE 0x3C1A7 #define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK) #define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT) #define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)]) #define bam_cigar_gen(l, o) ((l)<>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference /*! @typedef @abstract Structure for core alignment information. @field tid chromosome ID, defined by bam_header_t @field pos 0-based leftmost coordinate @field bin bin calculated by bam_reg2bin() @field qual mapping quality @field l_qname length of the query name @field flag bitwise flag @field n_cigar number of CIGAR operations @field l_qseq length of the query sequence (read) */ typedef struct { int32_t tid; int32_t pos; uint32_t bin:16, qual:8, l_qname:8; uint32_t flag:16, n_cigar:16; int32_t l_qseq; int32_t mtid; int32_t mpos; int32_t isize; } bam1_core_t; /*! @typedef @abstract Structure for one alignment. @field core core information about the alignment @field l_aux length of auxiliary data @field data_len current length of bam1_t::data @field m_data maximum length of bam1_t::data @field data all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux @discussion Notes: 1. qname is zero tailing and core.l_qname includes the tailing '\0'. 2. l_qseq is calculated from the total length of an alignment block on reading or from CIGAR. 3. cigar data is encoded 4 bytes per CIGAR operation. 4. seq is nybble-encoded according to bam_nt16_table. */ typedef struct { bam1_core_t core; int l_aux, data_len, m_data; uint8_t *data; } bam1_t; typedef struct __bam_iter_t *bam_iter_t; #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0) #define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0) /*! @function @abstract Get the CIGAR array @param b pointer to an alignment @return pointer to the CIGAR array @discussion In the CIGAR array, each element is a 32-bit integer. The lower 4 bits gives a CIGAR operation and the higher 28 bits keep the length of a CIGAR. */ #define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname)) /*! @function @abstract Get the name of the query @param b pointer to an alignment @return pointer to the name string, null terminated */ #define bam1_qname(b) ((char*)((b)->data)) /*! @function @abstract Get query sequence @param b pointer to an alignment @return pointer to sequence @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G, 8 for T and 15 for N. Two bases are packed in one byte with the base at the higher 4 bits having smaller coordinate on the read. It is recommended to use bam1_seqi() macro to get the base. */ #define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname) /*! @function @abstract Get query quality @param b pointer to an alignment @return pointer to quality string */ #define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1)) /*! @function @abstract Get a base on read @param s Query sequence returned by bam1_seq() @param i The i-th position, 0-based @return 4-bit integer representing the base. */ //#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) #define bam1_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf) #define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) ) /*! @function @abstract Get query sequence and quality @param b pointer to an alignment @return pointer to the concatenated auxiliary data */ #define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2) #ifndef kroundup32 /*! @function @abstract Round an integer to the next closest power-2 integer. @param x integer to be rounded (in place) @discussion x will be modified. */ #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif /*! @abstract Whether the machine is big-endian; modified only in bam_header_init(). */ extern int bam_is_be; /*! @abstract Verbose level between 0 and 3; 0 is supposed to disable all debugging information, though this may not have been implemented. */ extern int bam_verbose; extern int bam_no_B; /*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */ extern unsigned char bam_nt16_table[256]; /*! @abstract Table for converting a 4-bit encoded nucleotide to a letter. */ extern char *bam_nt16_rev_table; extern char bam_nt16_nt4_table[]; #ifdef __cplusplus extern "C" { #endif /********************* * Low-level SAM I/O * *********************/ /*! @abstract TAM file handler */ typedef struct __tamFile_t *tamFile; /*! @abstract Open a SAM file for reading, either uncompressed or compressed by gzip/zlib. @param fn SAM file name @return SAM file handler */ tamFile sam_open(const char *fn); /*! @abstract Close a SAM file handler @param fp SAM file handler */ void sam_close(tamFile fp); /*! @abstract Read one alignment from a SAM file handler @param fp SAM file handler @param header header information (ordered names of chromosomes) @param b read alignment; all members in b will be updated @return 0 if successful; otherwise negative */ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b); /*! @abstract Read header information from a TAB-delimited list file. @param fn_list file name for the list @return a pointer to the header structure @discussion Each line in this file consists of chromosome name and the length of chromosome. */ bam_header_t *sam_header_read2(const char *fn_list); /*! @abstract Read header from a SAM file (if present) @param fp SAM file handler @return pointer to header struct; 0 if no @SQ lines available */ bam_header_t *sam_header_read(tamFile fp); /*! @abstract Parse @SQ lines a update a header struct @param h pointer to the header struct to be updated @return number of target sequences @discussion bam_header_t::{n_targets,target_len,target_name} will be destroyed in the first place. */ int sam_header_parse(bam_header_t *h); int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); /*! @abstract Parse @RG lines a update a header struct @param h pointer to the header struct to be updated @return number of @RG lines @discussion bam_header_t::rg2lib will be destroyed in the first place. */ int sam_header_parse_rg(bam_header_t *h); #define sam_write1(header, b) bam_view1(header, b) /******************************** * APIs for string dictionaries * ********************************/ int bam_strmap_put(void *strmap, const char *rg, const char *lib); const char *bam_strmap_get(const void *strmap, const char *rg); void *bam_strmap_dup(const void*); void *bam_strmap_init(); void bam_strmap_destroy(void *strmap); /********************* * Low-level BAM I/O * *********************/ /*! @abstract Initialize a header structure. @return the pointer to the header structure @discussion This function also modifies the global variable bam_is_be. */ bam_header_t *bam_header_init(); /*! @abstract Destroy a header structure. @param header pointer to the header */ void bam_header_destroy(bam_header_t *header); /*! @abstract Read a header structure from BAM. @param fp BAM file handler, opened by bam_open() @return pointer to the header structure @discussion The file position indicator must be placed at the beginning of the file. Upon success, the position indicator will be set at the start of the first alignment. */ bam_header_t *bam_header_read(bamFile fp); /*! @abstract Write a header structure to BAM. @param fp BAM file handler @param header pointer to the header structure @return always 0 currently */ int bam_header_write(bamFile fp, const bam_header_t *header); /*! @abstract Read an alignment from BAM. @param fp BAM file handler @param b read alignment; all members are updated. @return number of bytes read from the file @discussion The file position indicator must be placed right before an alignment. Upon success, this function will set the position indicator to the start of the next alignment. This function is not affected by the machine endianness. */ int bam_read1(bamFile fp, bam1_t *b); int bam_remove_B(bam1_t *b); /*! @abstract Write an alignment to BAM. @param fp BAM file handler @param c pointer to the bam1_core_t structure @param data_len total length of variable size data related to the alignment @param data pointer to the concatenated data @return number of bytes written to the file @discussion This function is not affected by the machine endianness. */ int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data); /*! @abstract Write an alignment to BAM. @param fp BAM file handler @param b alignment to write @return number of bytes written to the file @abstract It is equivalent to: bam_write1_core(fp, &b->core, b->data_len, b->data) */ int bam_write1(bamFile fp, const bam1_t *b); /*! @function @abstract Initiate a pointer to bam1_t struct */ #define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t))) /*! @function @abstract Free the memory allocated for an alignment. @param b pointer to an alignment */ #define bam_destroy1(b) do { \ if (b) { free((b)->data); free(b); } \ } while (0) /*! @abstract Format a BAM record in the SAM format @param header pointer to the header structure @param b alignment to print @return a pointer to the SAM string */ char *bam_format1(const bam_header_t *header, const bam1_t *b); char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of); /*! @abstract Check whether a BAM record is plausibly valid @param header associated header structure, or NULL if unavailable @param b alignment to validate @return 0 if the alignment is invalid; non-zero otherwise @discussion Simple consistency check of some of the fields of the alignment record. If the header is provided, several additional checks are made. Not all fields are checked, so a non-zero result is not a guarantee that the record is valid. However it is usually good enough to detect when bam_seek() has been called with a virtual file offset that is not the offset of an alignment record. */ int bam_validate1(const bam_header_t *header, const bam1_t *b); const char *bam_get_library(bam_header_t *header, const bam1_t *b); /*************** * pileup APIs * ***************/ /*! @typedef @abstract Structure for one alignment covering the pileup position. @field b pointer to the alignment @field qpos position of the read base at the pileup site, 0-based @field indel indel length; 0 for no indel, positive for ins and negative for del @field is_del 1 iff the base on the padded read is a deletion @field level the level of the read in the "viewer" mode @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The difference between the two functions is that the former does not set bam_pileup1_t::level, while the later does. Level helps the implementation of alignment viewers, but calculating this has some overhead. */ typedef struct { bam1_t *b; int32_t qpos; int indel, level; uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28; } bam_pileup1_t; typedef int (*bam_plp_auto_f)(void *data, bam1_t *b); struct __bam_plp_t; typedef struct __bam_plp_t *bam_plp_t; bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data); int bam_plp_push(bam_plp_t iter, const bam1_t *b); const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); void bam_plp_set_mask(bam_plp_t iter, int mask); void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt); void bam_plp_reset(bam_plp_t iter); void bam_plp_destroy(bam_plp_t iter); struct __bam_mplp_t; typedef struct __bam_mplp_t *bam_mplp_t; bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data); void bam_mplp_destroy(bam_mplp_t iter); void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt); int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp); /*! @typedef @abstract Type of function to be called by bam_plbuf_push(). @param tid chromosome ID as is defined in the header @param pos start coordinate of the alignment, 0-based @param n number of elements in pl array @param pl array of alignments @param data user provided data @discussion See also bam_plbuf_push(), bam_plbuf_init() and bam_pileup1_t. */ typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); typedef struct { bam_plp_t iter; bam_pileup_f func; void *data; } bam_plbuf_t; void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask); void bam_plbuf_reset(bam_plbuf_t *buf); bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data); void bam_plbuf_destroy(bam_plbuf_t *buf); int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf); int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data); struct __bam_lplbuf_t; typedef struct __bam_lplbuf_t bam_lplbuf_t; void bam_lplbuf_reset(bam_lplbuf_t *buf); /*! @abstract bam_plbuf_init() equivalent with level calculated. */ bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data); /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */ void bam_lplbuf_destroy(bam_lplbuf_t *tv); /*! @abstract bam_plbuf_push() equivalent with level calculated. */ int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf); /********************* * BAM indexing APIs * *********************/ struct __bam_index_t; typedef struct __bam_index_t bam_index_t; /*! @abstract Build index for a BAM file. @discussion Index file "fn.bai" will be created. @param fn name of the BAM file @return always 0 currently */ int bam_index_build(const char *fn); /*! @abstract Load index from file "fn.bai". @param fn name of the BAM file (NOT the index file) @return pointer to the index structure */ bam_index_t *bam_index_load(const char *fn); /*! @abstract Destroy an index structure. @param idx pointer to the index structure */ void bam_index_destroy(bam_index_t *idx); /*! @typedef @abstract Type of function to be called by bam_fetch(). @param b the alignment @param data user provided data */ typedef int (*bam_fetch_f)(const bam1_t *b, void *data); /*! @abstract Retrieve the alignments that are overlapped with the specified region. @discussion A user defined function will be called for each retrieved alignment ordered by its start position. @param fp BAM file handler @param idx pointer to the alignment index @param tid chromosome ID as is defined in the header @param beg start coordinate, 0-based @param end end coordinate, 0-based @param data user provided data (will be transferred to func) @param func user defined function */ int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func); bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end); int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b); void bam_iter_destroy(bam_iter_t iter); /*! @abstract Parse a region in the format: "chr2:100,000-200,000". @discussion bam_header_t::hash will be initialized if empty. @param header pointer to the header structure @param str string to be parsed @param ref_id the returned chromosome ID @param begin the returned start coordinate @param end the returned end coordinate @return 0 on success; -1 on failure */ int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end); /************************** * APIs for optional tags * **************************/ /*! @abstract Retrieve data of a tag @param b pointer to an alignment struct @param tag two-character tag to be retrieved @return pointer to the type and data. The first character is the type that can be 'iIsScCdfAZH'. @discussion Use bam_aux2?() series to convert the returned data to the corresponding type. */ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]); int32_t bam_aux2i(const uint8_t *s); float bam_aux2f(const uint8_t *s); double bam_aux2d(const uint8_t *s); char bam_aux2A(const uint8_t *s); char *bam_aux2Z(const uint8_t *s); int bam_aux_del(bam1_t *b, uint8_t *s); void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data); uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get() /***************** * Miscellaneous * *****************/ /*! @abstract Calculate the rightmost coordinate of an alignment on the reference genome. @param c pointer to the bam1_core_t structure @param cigar the corresponding CIGAR array (from bam1_t::cigar) @return the rightmost coordinate, 0-based */ uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar); /*! @abstract Calculate the length of the query sequence from CIGAR. @param c pointer to the bam1_core_t structure @param cigar the corresponding CIGAR array (from bam1_t::cigar) @return length of the query sequence */ int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar); #ifdef __cplusplus } #endif /*! @abstract Calculate the minimum bin that contains a region [beg,end). @param beg start of the region, 0-based @param end end of the region, 0-based @return bin */ static inline int bam_reg2bin(uint32_t beg, uint32_t end) { --end; if (beg>>14 == end>>14) return 4681 + (beg>>14); if (beg>>17 == end>>17) return 585 + (beg>>17); if (beg>>20 == end>>20) return 73 + (beg>>20); if (beg>>23 == end>>23) return 9 + (beg>>23); if (beg>>26 == end>>26) return 1 + (beg>>26); return 0; } /*! @abstract Copy an alignment @param bdst destination alignment struct @param bsrc source alignment struct @return pointer to the destination alignment struct */ static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) { uint8_t *data = bdst->data; int m_data = bdst->m_data; // backup data and m_data if (m_data < bsrc->data_len) { // double the capacity m_data = bsrc->data_len; kroundup32(m_data); data = (uint8_t*)realloc(data, m_data); } memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data *bdst = *bsrc; // copy the rest // restore the backup bdst->m_data = m_data; bdst->data = data; return bdst; } /*! @abstract Duplicate an alignment @param src source alignment struct @return pointer to the destination alignment struct */ static inline bam1_t *bam_dup1(const bam1_t *src) { bam1_t *b; b = bam_init1(); *b = *src; b->m_data = b->data_len; b->data = (uint8_t*)calloc(b->data_len, 1); memcpy(b->data, src->data, b->data_len); return b; } static inline int bam_aux_type2size(int x) { if (x == 'C' || x == 'c' || x == 'A') return 1; else if (x == 'S' || x == 's') return 2; else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4; else return 0; } /********************************* *** Compatibility with htslib *** *********************************/ typedef bam_header_t bam_hdr_t; #define bam_get_qname(b) bam1_qname(b) #define bam_get_cigar(b) bam1_cigar(b) #define bam_hdr_read(fp) bam_header_read(fp) #define bam_hdr_write(fp, h) bam_header_write(fp, h) #define bam_hdr_destroy(fp) bam_header_destroy(fp) #endif samtools-0.1.19/bam2bcf.c000066400000000000000000000347241212162403000151040ustar00rootroot00000000000000#include #include #include #include "bam.h" #include "kstring.h" #include "bam2bcf.h" #include "errmod.h" #include "bcftools/bcf.h" extern void ks_introsort_uint32_t(size_t n, uint32_t a[]); #define CALL_ETA 0.03f #define CALL_MAX 256 #define CALL_DEFTHETA 0.83f #define DEF_MAPQ 20 #define CAP_DIST 25 bcf_callaux_t *bcf_call_init(double theta, int min_baseQ) { bcf_callaux_t *bca; if (theta <= 0.) theta = CALL_DEFTHETA; bca = calloc(1, sizeof(bcf_callaux_t)); bca->capQ = 60; bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100; bca->min_baseQ = min_baseQ; bca->e = errmod_init(1. - theta); bca->min_frac = 0.002; bca->min_support = 1; bca->per_sample_flt = 0; bca->npos = 100; bca->ref_pos = calloc(bca->npos, sizeof(int)); bca->alt_pos = calloc(bca->npos, sizeof(int)); return bca; } static int get_position(const bam_pileup1_t *p, int *len) { int icig, n_tot_bases = 0, iread = 0, edist = p->qpos + 1; for (icig=0; icigb->core.n_cigar; icig++) { // Conversion from uint32_t to MIDNSHP // 0123456 // MIDNSHP int cig = bam1_cigar(p->b)[icig] & BAM_CIGAR_MASK; int ncig = bam1_cigar(p->b)[icig] >> BAM_CIGAR_SHIFT; if ( cig==0 ) { n_tot_bases += ncig; iread += ncig; } else if ( cig==1 ) { n_tot_bases += ncig; iread += ncig; } else if ( cig==4 ) { iread += ncig; if ( iread<=p->qpos ) edist -= ncig; } } *len = n_tot_bases; return edist; } void bcf_call_destroy(bcf_callaux_t *bca) { if (bca == 0) return; errmod_destroy(bca->e); if (bca->npos) { free(bca->ref_pos); free(bca->alt_pos); bca->npos = 0; } free(bca->bases); free(bca->inscns); free(bca); } /* ref_base is the 4-bit representation of the reference base. It is * negative if we are looking at an indel. */ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r) { int i, n, ref4, is_indel, ori_depth = 0; memset(r, 0, sizeof(bcf_callret1_t)); if (ref_base >= 0) { ref4 = bam_nt16_nt4_table[ref_base]; is_indel = 0; } else ref4 = 4, is_indel = 1; if (_n == 0) return -1; // enlarge the bases array if necessary if (bca->max_bases < _n) { bca->max_bases = _n; kroundup32(bca->max_bases); bca->bases = (uint16_t*)realloc(bca->bases, 2 * bca->max_bases); } // fill the bases array for (i = n = r->n_supp = 0; i < _n; ++i) { const bam_pileup1_t *p = pl + i; int q, b, mapQ, baseQ, is_diff, min_dist, seqQ; // set base if (p->is_del || p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue; ++ori_depth; baseQ = q = is_indel? p->aux&0xff : (int)bam1_qual(p->b)[p->qpos]; // base/indel quality seqQ = is_indel? (p->aux>>8&0xff) : 99; if (q < bca->min_baseQ) continue; if (q > seqQ) q = seqQ; mapQ = p->b->core.qual < 255? p->b->core.qual : DEF_MAPQ; // special case for mapQ==255 mapQ = mapQ < bca->capQ? mapQ : bca->capQ; if (q > mapQ) q = mapQ; if (q > 63) q = 63; if (q < 4) q = 4; if (!is_indel) { b = bam1_seqi(bam1_seq(p->b), p->qpos); // base b = bam_nt16_nt4_table[b? b : ref_base]; // b is the 2-bit base is_diff = (ref4 < 4 && b == ref4)? 0 : 1; } else { b = p->aux>>16&0x3f; is_diff = (b != 0); } if (is_diff) ++r->n_supp; bca->bases[n++] = q<<5 | (int)bam1_strand(p->b)<<4 | b; // collect annotations if (b < 4) r->qsum[b] += q; ++r->anno[0<<2|is_diff<<1|bam1_strand(p->b)]; min_dist = p->b->core.l_qseq - 1 - p->qpos; if (min_dist > p->qpos) min_dist = p->qpos; if (min_dist > CAP_DIST) min_dist = CAP_DIST; r->anno[1<<2|is_diff<<1|0] += baseQ; r->anno[1<<2|is_diff<<1|1] += baseQ * baseQ; r->anno[2<<2|is_diff<<1|0] += mapQ; r->anno[2<<2|is_diff<<1|1] += mapQ * mapQ; r->anno[3<<2|is_diff<<1|0] += min_dist; r->anno[3<<2|is_diff<<1|1] += min_dist * min_dist; // collect read positions for ReadPosBias int len, pos = get_position(p, &len); int epos = (double)pos/(len+1) * bca->npos; if ( bam1_seqi(bam1_seq(p->b),p->qpos) == ref_base ) bca->ref_pos[epos]++; else bca->alt_pos[epos]++; } r->depth = n; r->ori_depth = ori_depth; // glfgen errmod_cal(bca->e, n, 5, bca->bases, r->p); return r->depth; } double mann_whitney_1947(int n, int m, int U) { if (U<0) return 0; if (n==0||m==0) return U==0 ? 1 : 0; return (double)n/(n+m)*mann_whitney_1947(n-1,m,U-m) + (double)m/(n+m)*mann_whitney_1947(n,m-1,U); } void calc_ReadPosBias(bcf_callaux_t *bca, bcf_call_t *call) { int i, nref = 0, nalt = 0; unsigned long int U = 0; for (i=0; inpos; i++) { nref += bca->ref_pos[i]; nalt += bca->alt_pos[i]; U += nref*bca->alt_pos[i]; bca->ref_pos[i] = 0; bca->alt_pos[i] = 0; } #if 0 //todo double var = 0, avg = (double)(nref+nalt)/bca->npos; for (i=0; inpos; i++) { double ediff = bca->ref_pos[i] + bca->alt_pos[i] - avg; var += ediff*ediff; bca->ref_pos[i] = 0; bca->alt_pos[i] = 0; } call->read_pos.avg = avg; call->read_pos.var = sqrt(var/bca->npos); call->read_pos.dp = nref+nalt; #endif if ( !nref || !nalt ) { call->read_pos_bias = -1; return; } if ( nref>=8 || nalt>=8 ) { // normal approximation double mean = ((double)nref*nalt+1.0)/2.0; double var2 = (double)nref*nalt*(nref+nalt+1.0)/12.0; double z = (U-mean)/sqrt(var2); call->read_pos_bias = z; //fprintf(stderr,"nref=%d nalt=%d U=%ld mean=%e var=%e zval=%e\n", nref,nalt,U,mean,sqrt(var2),call->read_pos_bias); } else { double p = mann_whitney_1947(nalt,nref,U); // biased form claimed by GATK to behave better empirically // double var2 = (1.0+1.0/(nref+nalt+1.0))*(double)nref*nalt*(nref+nalt+1.0)/12.0; double var2 = (double)nref*nalt*(nref+nalt+1.0)/12.0; double z; if ( p >= 1./sqrt(var2*2*M_PI) ) z = 0; // equal to mean else { if ( U >= nref*nalt/2. ) z = sqrt(-2*log(sqrt(var2*2*M_PI)*p)); else z = -sqrt(-2*log(sqrt(var2*2*M_PI)*p)); } call->read_pos_bias = z; //fprintf(stderr,"nref=%d nalt=%d U=%ld p=%e var2=%e zval=%e\n", nref,nalt,U, p,var2,call->read_pos_bias); } } float mean_diff_to_prob(float mdiff, int dp, int readlen) { if ( dp==2 ) { if ( mdiff==0 ) return (2.0*readlen + 4.0*(readlen-1.0))/((float)readlen*readlen); else return 8.0*(readlen - 4.0*mdiff)/((float)readlen*readlen); } // This is crude empirical approximation and is not very accurate for // shorter read lengths (<100bp). There certainly is a room for // improvement. const float mv[24][2] = { {0,0}, {0,0}, {0,0}, { 9.108, 4.934}, { 9.999, 3.991}, {10.273, 3.485}, {10.579, 3.160}, {10.828, 2.889}, {11.014, 2.703}, {11.028, 2.546}, {11.244, 2.391}, {11.231, 2.320}, {11.323, 2.138}, {11.403, 2.123}, {11.394, 1.994}, {11.451, 1.928}, {11.445, 1.862}, {11.516, 1.815}, {11.560, 1.761}, {11.544, 1.728}, {11.605, 1.674}, {11.592, 1.652}, {11.674, 1.613}, {11.641, 1.570} }; float m, v; if ( dp>=24 ) { m = readlen/8.; if (dp>100) dp = 100; v = 1.476/(0.182*pow(dp,0.514)); v = v*(readlen/100.); } else { m = mv[dp][0]; v = mv[dp][1]; m = m*readlen/100.; v = v*readlen/100.; v *= 1.2; // allow more variability } return 1.0/(v*sqrt(2*M_PI)) * exp(-0.5*((mdiff-m)/v)*((mdiff-m)/v)); } void calc_vdb(bcf_callaux_t *bca, bcf_call_t *call) { int i, dp = 0; float mean_pos = 0, mean_diff = 0; for (i=0; inpos; i++) { if ( !bca->alt_pos[i] ) continue; dp += bca->alt_pos[i]; int j = inpos/2 ? i : bca->npos - i; mean_pos += bca->alt_pos[i]*j; } if ( dp<2 ) { call->vdb = -1; return; } mean_pos /= dp; for (i=0; inpos; i++) { if ( !bca->alt_pos[i] ) continue; int j = inpos/2 ? i : bca->npos - i; mean_diff += bca->alt_pos[i] * fabs(j - mean_pos); } mean_diff /= dp; call->vdb = mean_diff_to_prob(mean_diff, dp, bca->npos); } /** * bcf_call_combine() - sets the PL array and VDB, RPB annotations, finds the top two alleles * @n: number of samples * @calls: each sample's calls * @bca: auxiliary data structure for holding temporary values * @ref_base: the reference base * @call: filled with the annotations */ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call) { int ref4, i, j, qsum[4]; int64_t tmp; if (ref_base >= 0) { call->ori_ref = ref4 = bam_nt16_nt4_table[ref_base]; if (ref4 > 4) ref4 = 4; } else call->ori_ref = -1, ref4 = 0; // calculate qsum memset(qsum, 0, 4 * sizeof(int)); for (i = 0; i < n; ++i) for (j = 0; j < 4; ++j) qsum[j] += calls[i].qsum[j]; int qsum_tot=0; for (j=0; j<4; j++) { qsum_tot += qsum[j]; call->qsum[j] = 0; } for (j = 0; j < 4; ++j) qsum[j] = qsum[j] << 2 | j; // find the top 2 alleles for (i = 1; i < 4; ++i) // insertion sort for (j = i; j > 0 && qsum[j] < qsum[j-1]; --j) tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp; // set the reference allele and alternative allele(s) for (i = 0; i < 5; ++i) call->a[i] = -1; call->unseen = -1; call->a[0] = ref4; for (i = 3, j = 1; i >= 0; --i) { if ((qsum[i]&3) != ref4) { if (qsum[i]>>2 != 0) { if ( j<4 ) call->qsum[j] = (float)(qsum[i]>>2)/qsum_tot; // ref N can make j>=4 call->a[j++] = qsum[i]&3; } else break; } else call->qsum[0] = (float)(qsum[i]>>2)/qsum_tot; } if (ref_base >= 0) { // for SNPs, find the "unseen" base if (((ref4 < 4 && j < 4) || (ref4 == 4 && j < 5)) && i >= 0) call->unseen = j, call->a[j++] = qsum[i]&3; call->n_alleles = j; } else { call->n_alleles = j; if (call->n_alleles == 1) return -1; // no reliable supporting read. stop doing anything } // set the PL array if (call->n < n) { call->n = n; call->PL = realloc(call->PL, 15 * n); } { int x, g[15], z; double sum_min = 0.; x = call->n_alleles * (call->n_alleles + 1) / 2; // get the possible genotypes for (i = z = 0; i < call->n_alleles; ++i) for (j = 0; j <= i; ++j) g[z++] = call->a[j] * 5 + call->a[i]; for (i = 0; i < n; ++i) { uint8_t *PL = call->PL + x * i; const bcf_callret1_t *r = calls + i; float min = 1e37; for (j = 0; j < x; ++j) if (min > r->p[g[j]]) min = r->p[g[j]]; sum_min += min; for (j = 0; j < x; ++j) { int y; y = (int)(r->p[g[j]] - min + .499); if (y > 255) y = 255; PL[j] = y; } } // if (ref_base < 0) fprintf(stderr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen); call->shift = (int)(sum_min + .499); } // combine annotations memset(call->anno, 0, 16 * sizeof(int)); for (i = call->depth = call->ori_depth = 0, tmp = 0; i < n; ++i) { call->depth += calls[i].depth; call->ori_depth += calls[i].ori_depth; for (j = 0; j < 16; ++j) call->anno[j] += calls[i].anno[j]; } calc_vdb(bca, call); calc_ReadPosBias(bca, call); return 0; } int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag, const bcf_callaux_t *bca, const char *ref) { extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); kstring_t s; int i, j; b->n_smpl = bc->n; b->tid = tid; b->pos = pos; b->qual = 0; s.s = b->str; s.m = b->m_str; s.l = 0; kputc('\0', &s); if (bc->ori_ref < 0) { // an indel // write REF kputc(ref[pos], &s); for (j = 0; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s); kputc('\0', &s); // write ALT kputc(ref[pos], &s); for (i = 1; i < 4; ++i) { if (bc->a[i] < 0) break; if (i > 1) { kputc(',', &s); kputc(ref[pos], &s); } if (bca->indel_types[bc->a[i]] < 0) { // deletion for (j = -bca->indel_types[bc->a[i]]; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s); } else { // insertion; cannot be a reference unless a bug char *inscns = &bca->inscns[bc->a[i] * bca->maxins]; for (j = 0; j < bca->indel_types[bc->a[i]]; ++j) kputc("ACGTN"[(int)inscns[j]], &s); for (j = 0; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s); } } kputc('\0', &s); } else { // a SNP kputc("ACGTN"[bc->ori_ref], &s); kputc('\0', &s); for (i = 1; i < 5; ++i) { if (bc->a[i] < 0) break; if (i > 1) kputc(',', &s); kputc(bc->unseen == i? 'X' : "ACGT"[bc->a[i]], &s); } kputc('\0', &s); } kputc('\0', &s); // INFO if (bc->ori_ref < 0) ksprintf(&s,"INDEL;IS=%d,%f;", bca->max_support, bca->max_frac); kputs("DP=", &s); kputw(bc->ori_depth, &s); kputs(";I16=", &s); for (i = 0; i < 16; ++i) { if (i) kputc(',', &s); kputw(bc->anno[i], &s); } //ksprintf(&s,";RPS=%d,%f,%f", bc->read_pos.dp,bc->read_pos.avg,bc->read_pos.var); ksprintf(&s,";QS=%f,%f,%f,%f", bc->qsum[0],bc->qsum[1],bc->qsum[2],bc->qsum[3]); if (bc->vdb != -1) ksprintf(&s, ";VDB=%e", bc->vdb); if (bc->read_pos_bias != -1 ) ksprintf(&s, ";RPB=%e", bc->read_pos_bias); kputc('\0', &s); // FMT kputs("PL", &s); if (bcr && fmt_flag) { if (fmt_flag & B2B_FMT_DP) kputs(":DP", &s); if (fmt_flag & B2B_FMT_DV) kputs(":DV", &s); if (fmt_flag & B2B_FMT_SP) kputs(":SP", &s); } kputc('\0', &s); b->m_str = s.m; b->str = s.s; b->l_str = s.l; bcf_sync(b); memcpy(b->gi[0].data, bc->PL, b->gi[0].len * bc->n); if (bcr && fmt_flag) { uint16_t *dp = (fmt_flag & B2B_FMT_DP)? b->gi[1].data : 0; uint16_t *dv = (fmt_flag & B2B_FMT_DV)? b->gi[1 + ((fmt_flag & B2B_FMT_DP) != 0)].data : 0; int32_t *sp = (fmt_flag & B2B_FMT_SP)? b->gi[1 + ((fmt_flag & B2B_FMT_DP) != 0) + ((fmt_flag & B2B_FMT_DV) != 0)].data : 0; for (i = 0; i < bc->n; ++i) { bcf_callret1_t *p = bcr + i; if (dp) dp[i] = p->depth < 0xffff? p->depth : 0xffff; if (dv) dv[i] = p->n_supp < 0xffff? p->n_supp : 0xffff; if (sp) { if (p->anno[0] + p->anno[1] < 2 || p->anno[2] + p->anno[3] < 2 || p->anno[0] + p->anno[2] < 2 || p->anno[1] + p->anno[3] < 2) { sp[i] = 0; } else { double left, right, two; int x; kt_fisher_exact(p->anno[0], p->anno[1], p->anno[2], p->anno[3], &left, &right, &two); x = (int)(-4.343 * log(two) + .499); if (x > 255) x = 255; sp[i] = x; } } } } return 0; } samtools-0.1.19/bam2bcf.h000066400000000000000000000035411212162403000151020ustar00rootroot00000000000000#ifndef BAM2BCF_H #define BAM2BCF_H #include #include "errmod.h" #include "bcftools/bcf.h" #define B2B_INDEL_NULL 10000 #define B2B_FMT_DP 0x1 #define B2B_FMT_SP 0x2 #define B2B_FMT_DV 0x4 typedef struct __bcf_callaux_t { int capQ, min_baseQ; int openQ, extQ, tandemQ; // for indels int min_support, max_support; // for collecting indel candidates double min_frac, max_frac; // for collecting indel candidates int per_sample_flt; // indel filtering strategy int *ref_pos, *alt_pos, npos; // for ReadPosBias // for internal uses int max_bases; int indel_types[4]; int maxins, indelreg; int read_len; char *inscns; uint16_t *bases; errmod_t *e; void *rghash; } bcf_callaux_t; typedef struct { int depth, n_supp, ori_depth, qsum[4]; unsigned int anno[16]; float p[25]; } bcf_callret1_t; typedef struct { int a[5]; // alleles: ref, alt, alt2, alt3 float qsum[4]; int n, n_alleles, shift, ori_ref, unseen; int n_supp; // number of supporting non-reference reads unsigned int anno[16], depth, ori_depth; uint8_t *PL; float vdb; // variant distance bias float read_pos_bias; struct { float avg, var; int dp; } read_pos; } bcf_call_t; #ifdef __cplusplus extern "C" { #endif bcf_callaux_t *bcf_call_init(double theta, int min_baseQ); void bcf_call_destroy(bcf_callaux_t *bca); int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r); int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call); int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag, const bcf_callaux_t *bca, const char *ref); int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref, const void *rghash); #ifdef __cplusplus } #endif #endif samtools-0.1.19/bam2bcf_indel.c000066400000000000000000000436571212162403000162640ustar00rootroot00000000000000#include #include #include #include "bam.h" #include "bam2bcf.h" #include "kaln.h" #include "kprobaln.h" #include "khash.h" KHASH_SET_INIT_STR(rg) #include "ksort.h" KSORT_INIT_GENERIC(uint32_t) #define MINUS_CONST 0x10000000 #define INDEL_WINDOW_SIZE 50 void *bcf_call_add_rg(void *_hash, const char *hdtext, const char *list) { const char *s, *p, *q, *r, *t; khash_t(rg) *hash; if (list == 0 || hdtext == 0) return _hash; if (_hash == 0) _hash = kh_init(rg); hash = (khash_t(rg)*)_hash; if ((s = strstr(hdtext, "@RG\t")) == 0) return hash; do { t = strstr(s + 4, "@RG\t"); // the next @RG if ((p = strstr(s, "\tID:")) != 0) p += 4; if ((q = strstr(s, "\tPL:")) != 0) q += 4; if (p && q && (t == 0 || (p < t && q < t))) { // ID and PL are both present int lp, lq; char *x; for (r = p; *r && *r != '\t' && *r != '\n'; ++r); lp = r - p; for (r = q; *r && *r != '\t' && *r != '\n'; ++r); lq = r - q; x = calloc((lp > lq? lp : lq) + 1, 1); for (r = q; *r && *r != '\t' && *r != '\n'; ++r) x[r-q] = *r; if (strstr(list, x)) { // insert ID to the hash table khint_t k; int ret; for (r = p; *r && *r != '\t' && *r != '\n'; ++r) x[r-p] = *r; x[r-p] = 0; k = kh_get(rg, hash, x); if (k == kh_end(hash)) k = kh_put(rg, hash, x, &ret); else free(x); } else free(x); } s = t; } while (s); return hash; } void bcf_call_del_rghash(void *_hash) { khint_t k; khash_t(rg) *hash = (khash_t(rg)*)_hash; if (hash == 0) return; for (k = kh_begin(hash); k < kh_end(hash); ++k) if (kh_exist(hash, k)) free((char*)kh_key(hash, k)); kh_destroy(rg, hash); } static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos) { int k, x = c->pos, y = 0, last_y = 0; *_tpos = c->pos; for (k = 0; k < c->n_cigar; ++k) { int op = cigar[k] & BAM_CIGAR_MASK; int l = cigar[k] >> BAM_CIGAR_SHIFT; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { if (c->pos > tpos) return y; if (x + l > tpos) { *_tpos = tpos; return y + (tpos - x); } x += l; y += l; last_y = y; } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; else if (op == BAM_CDEL || op == BAM_CREF_SKIP) { if (x + l > tpos) { *_tpos = is_left? x : x + l; return y; } x += l; } } *_tpos = x; return last_y; } // FIXME: check if the inserted sequence is consistent with the homopolymer run // l is the relative gap length and l_run is the length of the homopolymer on the reference static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run) { int q, qh; q = bca->openQ + bca->extQ * (abs(l) - 1); qh = l_run >= 3? (int)(bca->tandemQ * (double)abs(l) / l_run + .499) : 1000; return q < qh? q : qh; } static inline int est_indelreg(int pos, const char *ref, int l, char *ins4) { int i, j, max = 0, max_i = pos, score = 0; l = abs(l); for (i = pos + 1, j = 0; ref[i]; ++i, ++j) { if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; else score += (toupper(ref[i]) != toupper(ref[pos+1+j%l]))? -10 : 1; if (score < 0) break; if (max < score) max = score, max_i = i; } return max_i - pos; } /* * @n: number of samples */ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref, const void *rghash) { int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2; int N, K, l_run, ref_type, n_alt; char *inscns = 0, *ref2, *query, **ref_sample; khash_t(rg) *hash = (khash_t(rg)*)rghash; if (ref == 0 || bca == 0) return -1; // mark filtered reads if (rghash) { N = 0; for (s = N = 0; s < n; ++s) { for (i = 0; i < n_plp[s]; ++i) { bam_pileup1_t *p = plp[s] + i; const uint8_t *rg = bam_aux_get(p->b, "RG"); p->aux = 1; // filtered by default if (rg) { khint_t k = kh_get(rg, hash, (const char*)(rg + 1)); if (k != kh_end(hash)) p->aux = 0, ++N; // not filtered } } } if (N == 0) return -1; // no reads left } // determine if there is a gap for (s = N = 0; s < n; ++s) { for (i = 0; i < n_plp[s]; ++i) if (plp[s][i].indel != 0) break; if (i < n_plp[s]) break; } if (s == n) return -1; // there is no indel at this position. for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads { // find out how many types of indels are present bca->max_support = bca->max_frac = 0; int m, n_alt = 0, n_tot = 0, indel_support_ok = 0; uint32_t *aux; aux = calloc(N + 1, 4); m = max_rd_len = 0; aux[m++] = MINUS_CONST; // zero indel is always a type for (s = 0; s < n; ++s) { int na = 0, nt = 0; for (i = 0; i < n_plp[s]; ++i) { const bam_pileup1_t *p = plp[s] + i; if (rghash == 0 || p->aux == 0) { ++nt; if (p->indel != 0) { ++na; aux[m++] = MINUS_CONST + p->indel; } } j = bam_cigar2qlen(&p->b->core, bam1_cigar(p->b)); if (j > max_rd_len) max_rd_len = j; } float frac = (float)na/nt; if ( !indel_support_ok && na >= bca->min_support && frac >= bca->min_frac ) indel_support_ok = 1; if ( na > bca->max_support && frac > 0 ) bca->max_support = na, bca->max_frac = frac; n_alt += na; n_tot += nt; } // To prevent long stretches of N's to be mistaken for indels (sometimes thousands of bases), // check the number of N's in the sequence and skip places where half or more reference bases are Ns. int nN=0; for (i=pos; i-posi ) { free(aux); return -1; } ks_introsort(uint32_t, m, aux); // squeeze out identical types for (i = 1, n_types = 1; i < m; ++i) if (aux[i] != aux[i-1]) ++n_types; // Taking totals makes it hard to call rare indels if ( !bca->per_sample_flt ) indel_support_ok = ( (float)n_alt / n_tot < bca->min_frac || n_alt < bca->min_support ) ? 0 : 1; if ( n_types == 1 || !indel_support_ok ) { // then skip free(aux); return -1; } if (n_types >= 64) { free(aux); if (bam_verbose >= 2) fprintf(stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1); return -1; } types = (int*)calloc(n_types, sizeof(int)); t = 0; types[t++] = aux[0] - MINUS_CONST; for (i = 1; i < m; ++i) if (aux[i] != aux[i-1]) types[t++] = aux[i] - MINUS_CONST; free(aux); for (t = 0; t < n_types; ++t) if (types[t] == 0) break; ref_type = t; // the index of the reference type (0) } { // calculate left and right boundary left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0; right = pos + INDEL_WINDOW_SIZE; if (types[0] < 0) right -= types[0]; // in case the alignments stand out the reference for (i = pos; i < right; ++i) if (ref[i] == 0) break; right = i; } /* The following block fixes a long-existing flaw in the INDEL * calling model: the interference of nearby SNPs. However, it also * reduces the power because sometimes, substitutions caused by * indels are not distinguishable from true mutations. Multiple * sequence realignment helps to increase the power. * * Masks mismatches present in at least 70% of the reads with 'N'. */ { // construct per-sample consensus int L = right - left + 1, max_i, max2_i; uint32_t *cns, max, max2; char *ref0, *r; ref_sample = calloc(n, sizeof(void*)); cns = calloc(L, 4); ref0 = calloc(L, 1); for (i = 0; i < right - left; ++i) ref0[i] = bam_nt16_table[(int)ref[i+left]]; for (s = 0; s < n; ++s) { r = ref_sample[s] = calloc(L, 1); memset(cns, 0, sizeof(int) * L); // collect ref and non-ref counts for (i = 0; i < n_plp[s]; ++i) { bam_pileup1_t *p = plp[s] + i; bam1_t *b = p->b; uint32_t *cigar = bam1_cigar(b); uint8_t *seq = bam1_seq(b); int x = b->core.pos, y = 0; for (k = 0; k < b->core.n_cigar; ++k) { int op = cigar[k]&0xf; int j, l = cigar[k]>>4; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (j = 0; j < l; ++j) if (x + j >= left && x + j < right) cns[x+j-left] += (bam1_seqi(seq, y+j) == ref0[x+j-left])? 1 : 0x10000; x += l; y += l; } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; } } // determine the consensus for (i = 0; i < right - left; ++i) r[i] = ref0[i]; max = max2 = 0; max_i = max2_i = -1; for (i = 0; i < right - left; ++i) { if (cns[i]>>16 >= max>>16) max2 = max, max2_i = max_i, max = cns[i], max_i = i; else if (cns[i]>>16 >= max2>>16) max2 = cns[i], max2_i = i; } if ((double)(max&0xffff) / ((max&0xffff) + (max>>16)) >= 0.7) max_i = -1; if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7) max2_i = -1; if (max_i >= 0) r[max_i] = 15; if (max2_i >= 0) r[max2_i] = 15; //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], stderr); fputc('\n', stderr); } free(ref0); free(cns); } { // the length of the homopolymer run around the current position int c = bam_nt16_table[(int)ref[pos + 1]]; if (c == 15) l_run = 1; else { for (i = pos + 2; ref[i]; ++i) if (bam_nt16_table[(int)ref[i]] != c) break; l_run = i; for (i = pos; i >= 0; --i) if (bam_nt16_table[(int)ref[i]] != c) break; l_run -= i + 1; } } // construct the consensus sequence max_ins = types[n_types - 1]; // max_ins is at least 0 if (max_ins > 0) { int *inscns_aux = calloc(5 * n_types * max_ins, sizeof(int)); // count the number of occurrences of each base at each position for each type of insertion for (t = 0; t < n_types; ++t) { if (types[t] > 0) { for (s = 0; s < n; ++s) { for (i = 0; i < n_plp[s]; ++i) { bam_pileup1_t *p = plp[s] + i; if (p->indel == types[t]) { uint8_t *seq = bam1_seq(p->b); for (k = 1; k <= p->indel; ++k) { int c = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos + k)]; assert(c<5); ++inscns_aux[(t*max_ins+(k-1))*5 + c]; } } } } } } // use the majority rule to construct the consensus inscns = calloc(n_types * max_ins, 1); for (t = 0; t < n_types; ++t) { for (j = 0; j < types[t]; ++j) { int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5]; for (k = 0; k < 5; ++k) if (ia[k] > max) max = ia[k], max_k = k; inscns[t*max_ins + j] = max? max_k : 4; if ( max_k==4 ) { types[t] = 0; break; } // discard insertions which contain N's } } free(inscns_aux); } // compute the likelihood given each type of indel for each read max_ref2 = right - left + 2 + 2 * (max_ins > -types[0]? max_ins : -types[0]); ref2 = calloc(max_ref2, 1); query = calloc(right - left + max_rd_len + max_ins + 2, 1); score1 = calloc(N * n_types, sizeof(int)); score2 = calloc(N * n_types, sizeof(int)); bca->indelreg = 0; for (t = 0; t < n_types; ++t) { int l, ir; kpa_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 }; apf1.bw = apf2.bw = abs(types[t]) + 3; // compute indelreg if (types[t] == 0) ir = 0; else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]); else ir = est_indelreg(pos, ref, -types[t], 0); if (ir > bca->indelreg) bca->indelreg = ir; // fprintf(stderr, "%d, %d, %d\n", pos, types[t], ir); // realignment for (s = K = 0; s < n; ++s) { // write ref2 for (k = 0, j = left; j <= pos; ++j) ref2[k++] = bam_nt16_nt4_table[(int)ref_sample[s][j-left]]; if (types[t] <= 0) j += -types[t]; else for (l = 0; l < types[t]; ++l) ref2[k++] = inscns[t*max_ins + l]; for (; j < right && ref[j]; ++j) ref2[k++] = bam_nt16_nt4_table[(int)ref_sample[s][j-left]]; for (; k < max_ref2; ++k) ref2[k] = 4; if (j < right) right = j; // align each read to ref2 for (i = 0; i < n_plp[s]; ++i, ++K) { bam_pileup1_t *p = plp[s] + i; int qbeg, qend, tbeg, tend, sc, kk; uint8_t *seq = bam1_seq(p->b); uint32_t *cigar = bam1_cigar(p->b); if (p->b->core.flag&4) continue; // unmapped reads // FIXME: the following loop should be better moved outside; nonetheless, realignment should be much slower anyway. for (kk = 0; kk < p->b->core.n_cigar; ++kk) if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) break; if (kk < p->b->core.n_cigar) continue; // FIXME: the following skips soft clips, but using them may be more sensitive. // determine the start and end of sequences for alignment qbeg = tpos2qpos(&p->b->core, bam1_cigar(p->b), left, 0, &tbeg); qend = tpos2qpos(&p->b->core, bam1_cigar(p->b), right, 1, &tend); if (types[t] < 0) { int l = -types[t]; tbeg = tbeg - l > left? tbeg - l : left; } // write the query sequence for (l = qbeg; l < qend; ++l) query[l - qbeg] = bam_nt16_nt4_table[bam1_seqi(seq, l)]; { // do realignment; this is the bottleneck const uint8_t *qual = bam1_qual(p->b), *bq; uint8_t *qq; qq = calloc(qend - qbeg, 1); bq = (uint8_t*)bam_aux_get(p->b, "ZQ"); if (bq) ++bq; // skip type for (l = qbeg; l < qend; ++l) { qq[l - qbeg] = bq? qual[l] + (bq[l] - 64) : qual[l]; if (qq[l - qbeg] > 30) qq[l - qbeg] = 30; if (qq[l - qbeg] < 7) qq[l - qbeg] = 7; } sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]), (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0); l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below if (l > 255) l = 255; score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l; if (sc > 5) { sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]), (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0); l = (int)(100. * sc / (qend - qbeg) + .499); if (l > 255) l = 255; score2[K*n_types + t] = sc<<8 | l; } free(qq); } /* for (l = 0; l < tend - tbeg + abs(types[t]); ++l) fputc("ACGTN"[(int)ref2[tbeg-left+l]], stderr); fputc('\n', stderr); for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], stderr); fputc('\n', stderr); fprintf(stderr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam1_qname(p->b), qbeg, tbeg, sc); */ } } } free(ref2); free(query); { // compute indelQ int *sc, tmp, *sumq; sc = alloca(n_types * sizeof(int)); sumq = alloca(n_types * sizeof(int)); memset(sumq, 0, sizeof(int) * n_types); for (s = K = 0; s < n; ++s) { for (i = 0; i < n_plp[s]; ++i, ++K) { bam_pileup1_t *p = plp[s] + i; int *sct = &score1[K*n_types], indelQ1, indelQ2, seqQ, indelQ; for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t; for (t = 1; t < n_types; ++t) // insertion sort for (j = t; j > 0 && sc[j] < sc[j-1]; --j) tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp; /* errmod_cal() assumes that if the call is wrong, the * likelihoods of other events are equal. This is about * right for substitutions, but is not desired for * indels. To reuse errmod_cal(), I have to make * compromise for multi-allelic indels. */ if ((sc[0]&0x3f) == ref_type) { indelQ1 = (sc[1]>>14) - (sc[0]>>14); seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run); } else { for (t = 0; t < n_types; ++t) // look for the reference type if ((sc[t]&0x3f) == ref_type) break; indelQ1 = (sc[t]>>14) - (sc[0]>>14); seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run); } tmp = sc[0]>>6 & 0xff; indelQ1 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ1 + .499); // reduce indelQ sct = &score2[K*n_types]; for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t; for (t = 1; t < n_types; ++t) // insertion sort for (j = t; j > 0 && sc[j] < sc[j-1]; --j) tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp; if ((sc[0]&0x3f) == ref_type) { indelQ2 = (sc[1]>>14) - (sc[0]>>14); } else { for (t = 0; t < n_types; ++t) // look for the reference type if ((sc[t]&0x3f) == ref_type) break; indelQ2 = (sc[t]>>14) - (sc[0]>>14); } tmp = sc[0]>>6 & 0xff; indelQ2 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ2 + .499); // pick the smaller between indelQ1 and indelQ2 indelQ = indelQ1 < indelQ2? indelQ1 : indelQ2; if (indelQ > 255) indelQ = 255; if (seqQ > 255) seqQ = 255; p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ; // fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ); } } // determine bca->indel_types[] and bca->inscns bca->maxins = max_ins; bca->inscns = realloc(bca->inscns, bca->maxins * 4); for (t = 0; t < n_types; ++t) sumq[t] = sumq[t]<<6 | t; for (t = 1; t < n_types; ++t) // insertion sort for (j = t; j > 0 && sumq[j] > sumq[j-1]; --j) tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp; for (t = 0; t < n_types; ++t) // look for the reference type if ((sumq[t]&0x3f) == ref_type) break; if (t) { // then move the reference type to the first tmp = sumq[t]; for (; t > 0; --t) sumq[t] = sumq[t-1]; sumq[0] = tmp; } for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL; for (t = 0; t < 4 && t < n_types; ++t) { bca->indel_types[t] = types[sumq[t]&0x3f]; memcpy(&bca->inscns[t * bca->maxins], &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins); } // update p->aux for (s = n_alt = 0; s < n; ++s) { for (i = 0; i < n_plp[s]; ++i) { bam_pileup1_t *p = plp[s] + i; int x = types[p->aux>>16&0x3f]; for (j = 0; j < 4; ++j) if (x == bca->indel_types[j]) break; p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff)); if ((p->aux>>16&0x3f) > 0) ++n_alt; // fprintf(stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d q=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), p->aux>>16&63, bca->indel_types[p->aux>>16&63], p->aux&0xff, p->aux>>8&0xff); } } } free(score1); free(score2); // free for (i = 0; i < n; ++i) free(ref_sample[i]); free(ref_sample); free(types); free(inscns); return n_alt > 0? 0 : -1; } samtools-0.1.19/bam2depth.c000066400000000000000000000133241212162403000154470ustar00rootroot00000000000000/* This program demonstrates how to generate pileup from multiple BAMs * simutaneously, to achieve random access and to use the BED interface. * To compile this program separately, you may: * * gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -L. -lbam -lz */ #include #include #include #include #include "bam.h" typedef struct { // auxiliary data structure bamFile fp; // the file handler bam_iter_t iter; // NULL if a region not specified int min_mapQ, min_len; // mapQ filter; length filter } aux_t; void *bed_read(const char *fn); // read a BED or position list file void bed_destroy(void *_h); // destroy the BED data structure int bed_overlap(const void *_h, const char *chr, int beg, int end); // test if chr:beg-end overlaps // This function reads a BAM alignment from one BAM file. static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup { aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b); if (!(b->core.flag&BAM_FUNMAP)) { if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP; else if (aux->min_len && bam_cigar2qlen(&b->core, bam1_cigar(b)) < aux->min_len) b->core.flag |= BAM_FUNMAP; } return ret; } int read_file_list(const char *file_list,int *n,char **argv[]); #ifdef _MAIN_BAM2DEPTH int main(int argc, char *argv[]) #else int main_depth(int argc, char *argv[]) #endif { int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, nfiles; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure char *file_list = NULL, **fn = NULL; bam_header_t *h = 0; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; // parse the command line while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) { switch (n) { case 'l': min_len = atoi(optarg); break; // minimum query length case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold case 'f': file_list = optarg; break; } } if (optind == argc && !file_list) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -b list of positions or regions\n"); fprintf(stderr, " -f list of input BAM filenames, one per line [null]\n"); fprintf(stderr, " -l minQLen\n"); fprintf(stderr, " -q base quality threshold\n"); fprintf(stderr, " -Q mapping quality threshold\n"); fprintf(stderr, " -r region\n"); fprintf(stderr, "\n"); return 1; } // initialize the auxiliary data structures if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; n = nfiles; argv = fn; optind = 0; } else n = argc - optind; // the number of BAMs on the command line data = calloc(n, sizeof(void*)); // data[i] for the i-th input beg = 0; end = 1<<30; tid = -1; // set the default region for (i = 0; i < n; ++i) { bam_header_t *htmp; data[i] = calloc(1, sizeof(aux_t)); data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM data[i]->min_mapQ = mapQ; // set the mapQ filter data[i]->min_len = min_len; // set the qlen filter htmp = bam_header_read(data[i]->fp); // read the BAM header if (i == 0) { h = htmp; // keep the header of the 1st BAM if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header if (tid >= 0) { // if a region is specified and parsed successfully bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator bam_index_destroy(idx); // the index is not needed any more; phase out of the memory } } // the core multi-pileup loop mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp) while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position if (pos < beg || pos >= end) continue; // out of range; skip if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster for (i = 0; i < n; ++i) { // base level filters have to go here int j, m = 0; for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality } printf("\t%d", n_plp[i] - m); // this the depth to output } putchar('\n'); } free(n_plp); free(plp); bam_mplp_destroy(mplp); bam_header_destroy(h); for (i = 0; i < n; ++i) { bam_close(data[i]->fp); if (data[i]->iter) bam_iter_destroy(data[i]->iter); free(data[i]); } free(data); free(reg); if (bed) bed_destroy(bed); if ( file_list ) { for (i=0; i #include "bam.h" #include "khash.h" typedef char *str_p; KHASH_MAP_INIT_STR(s, int) KHASH_MAP_INIT_STR(r2l, str_p) void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) { int ori_len = b->data_len; b->data_len += 3 + len; b->l_aux += 3 + len; if (b->m_data < b->data_len) { b->m_data = b->data_len; kroundup32(b->m_data); b->data = (uint8_t*)realloc(b->data, b->m_data); } b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1]; b->data[ori_len + 2] = type; memcpy(b->data + ori_len + 3, data, len); } uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) { return bam_aux_get(b, tag); } #define __skip_tag(s) do { \ int type = toupper(*(s)); \ ++(s); \ if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \ else (s) += bam_aux_type2size(type); \ } while(0) uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) { uint8_t *s; int y = tag[0]<<8 | tag[1]; s = bam1_aux(b); while (s < b->data + b->data_len) { int x = (int)s[0]<<8 | s[1]; s += 2; if (x == y) return s; __skip_tag(s); } return 0; } // s MUST BE returned by bam_aux_get() int bam_aux_del(bam1_t *b, uint8_t *s) { uint8_t *p, *aux; aux = bam1_aux(b); p = s - 2; __skip_tag(s); memmove(p, s, b->l_aux - (s - aux)); b->data_len -= s - p; b->l_aux -= s - p; return 0; } int bam_aux_drop_other(bam1_t *b, uint8_t *s) { if (s) { uint8_t *p, *aux; aux = bam1_aux(b); p = s - 2; __skip_tag(s); memmove(aux, p, s - p); b->data_len -= b->l_aux - (s - p); b->l_aux = s - p; } else { b->data_len -= b->l_aux; b->l_aux = 0; } return 0; } void bam_init_header_hash(bam_header_t *header) { if (header->hash == 0) { int ret, i; khiter_t iter; khash_t(s) *h; header->hash = h = kh_init(s); for (i = 0; i < header->n_targets; ++i) { iter = kh_put(s, h, header->target_name[i], &ret); kh_value(h, iter) = i; } } } void bam_destroy_header_hash(bam_header_t *header) { if (header->hash) kh_destroy(s, (khash_t(s)*)header->hash); } int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) { khint_t k; khash_t(s) *h = (khash_t(s)*)header->hash; k = kh_get(s, h, seq_name); return k == kh_end(h)? -1 : kh_value(h, k); } int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *beg, int *end) { char *s; int i, l, k, name_end; khiter_t iter; khash_t(s) *h; bam_init_header_hash(header); h = (khash_t(s)*)header->hash; *ref_id = *beg = *end = -1; name_end = l = strlen(str); s = (char*)malloc(l+1); // remove space for (i = k = 0; i < l; ++i) if (!isspace(str[i])) s[k++] = str[i]; s[k] = 0; l = k; // determine the sequence name for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end if (i >= 0) name_end = i; if (name_end < l) { // check if this is really the end int n_hyphen = 0; for (i = name_end + 1; i < l; ++i) { if (s[i] == '-') ++n_hyphen; else if (!isdigit(s[i]) && s[i] != ',') break; } if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name s[name_end] = 0; iter = kh_get(s, h, s); if (iter == kh_end(h)) { // cannot find the sequence name iter = kh_get(s, h, str); // try str as the name if (iter == kh_end(h)) { if (bam_verbose >= 2) fprintf(stderr, "[%s] fail to determine the sequence name.\n", __func__); free(s); return -1; } else s[name_end] = ':', name_end = l; } } else iter = kh_get(s, h, str); if (iter == kh_end(h)) { free(s); return -1; } *ref_id = kh_val(h, iter); // parse the interval if (name_end < l) { for (i = k = name_end + 1; i < l; ++i) if (s[i] != ',') s[k++] = s[i]; s[k] = 0; *beg = atoi(s + name_end + 1); for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break; *end = i < k? atoi(s + i + 1) : 1<<29; if (*beg > 0) --*beg; } else *beg = 0, *end = 1<<29; free(s); return *beg <= *end? 0 : -1; } int32_t bam_aux2i(const uint8_t *s) { int type; if (s == 0) return 0; type = *s++; if (type == 'c') return (int32_t)*(int8_t*)s; else if (type == 'C') return (int32_t)*(uint8_t*)s; else if (type == 's') return (int32_t)*(int16_t*)s; else if (type == 'S') return (int32_t)*(uint16_t*)s; else if (type == 'i' || type == 'I') return *(int32_t*)s; else return 0; } float bam_aux2f(const uint8_t *s) { int type; type = *s++; if (s == 0) return 0.0; if (type == 'f') return *(float*)s; else return 0.0; } double bam_aux2d(const uint8_t *s) { int type; type = *s++; if (s == 0) return 0.0; if (type == 'd') return *(double*)s; else return 0.0; } char bam_aux2A(const uint8_t *s) { int type; type = *s++; if (s == 0) return 0; if (type == 'A') return *(char*)s; else return 0; } char *bam_aux2Z(const uint8_t *s) { int type; type = *s++; if (s == 0) return 0; if (type == 'Z' || type == 'H') return (char*)s; else return 0; } #ifdef _WIN32 double drand48() { return (double)rand() / RAND_MAX; } #endif samtools-0.1.19/bam_cat.c000066400000000000000000000132531212162403000151700ustar00rootroot00000000000000/* bam_cat -- efficiently concatenates bam files bam_cat can be used to concatenate BAM files. Under special circumstances, it can be used as an alternative to 'samtools merge' to concatenate multiple sorted files into a single sorted file. For this to work each file must be sorted, and the sorted files must be given as command line arguments in order such that the final read in file i is less than or equal to the first read in file i+1. This code is derived from the bam_reheader function in samtools 0.1.8 and modified to perform concatenation by Chris Saunders on behalf of Illumina. ########## License: The MIT License Original SAMtools work copyright (c) 2008-2009 Genome Research Ltd. Modified SAMtools work copyright (c) 2010 Illumina, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* makefile: """ CC=gcc CFLAGS+=-g -Wall -O2 -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -I$(SAMTOOLS_DIR) LDFLAGS+=-L$(SAMTOOLS_DIR) LDLIBS+=-lbam -lz all:bam_cat """ */ #include #include #include #include "knetfile.h" #include "bgzf.h" #include "bam.h" #define BUF_SIZE 0x10000 #define GZIPID1 31 #define GZIPID2 139 #define BGZF_EMPTY_BLOCK_SIZE 28 int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam) { BGZF *fp; FILE* fp_file; uint8_t *buf; uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE]; const int es=BGZF_EMPTY_BLOCK_SIZE; int i; fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w"); if (fp == 0) { fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam); return 1; } if (h) bam_header_write(fp, h); buf = (uint8_t*) malloc(BUF_SIZE); for(i = 0; i < nfn; ++i){ BGZF *in; bam_header_t *old; int len,j; in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]); return -1; } if (in->is_write) return -1; old = bam_header_read(in); if (h == 0 && i == 0) bam_header_write(fp, old); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } j=0; #ifdef _USE_KNETFILE fp_file = fp->fp; while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) { #else fp_file = fp->fp; while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) { #endif if(len= 0) { switch (c) { case 'h': { tamFile fph = sam_open(optarg); if (fph == 0) { fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]); return 1; } h = sam_header_read(fph); sam_close(fph); break; } case 'o': outfn = strdup(optarg); break; } } if (argc - optind < 2) { fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] [...]\n"); return 1; } ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-"); free(outfn); return ret; } samtools-0.1.19/bam_color.c000066400000000000000000000064051212162403000155400ustar00rootroot00000000000000#include #include "bam.h" /*! @abstract Get the color encoding the previous and current base @param b pointer to an alignment @param i The i-th position, 0-based @return color @discussion Returns 0 no color information is found. */ char bam_aux_getCSi(bam1_t *b, int i) { uint8_t *c = bam_aux_get(b, "CS"); char *cs = NULL; // return the base if the tag was not found if(0 == c) return 0; cs = bam_aux2Z(c); // adjust for strandedness and leading adaptor if(bam1_strand(b)) { i = strlen(cs) - 1 - i; // adjust for leading hard clip uint32_t cigar = bam1_cigar(b)[0]; if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) { i -= cigar >> BAM_CIGAR_SHIFT; } } else { i++; } return cs[i]; } /*! @abstract Get the color quality of the color encoding the previous and current base @param b pointer to an alignment @param i The i-th position, 0-based @return color quality @discussion Returns 0 no color information is found. */ char bam_aux_getCQi(bam1_t *b, int i) { uint8_t *c = bam_aux_get(b, "CQ"); char *cq = NULL; // return the base if the tag was not found if(0 == c) return 0; cq = bam_aux2Z(c); // adjust for strandedness if(bam1_strand(b)) { i = strlen(cq) - 1 - i; // adjust for leading hard clip uint32_t cigar = bam1_cigar(b)[0]; if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) { i -= (cigar >> BAM_CIGAR_SHIFT); } } return cq[i]; } char bam_aux_nt2int(char a) { switch(toupper(a)) { case 'A': return 0; break; case 'C': return 1; break; case 'G': return 2; break; case 'T': return 3; break; default: return 4; break; } } char bam_aux_ntnt2cs(char a, char b) { a = bam_aux_nt2int(a); b = bam_aux_nt2int(b); if(4 == a || 4 == b) return '4'; return "0123"[(int)(a ^ b)]; } /*! @abstract Get the color error profile at the give position @param b pointer to an alignment @return the original color if the color was an error, '-' (dash) otherwise @discussion Returns 0 no color information is found. */ char bam_aux_getCEi(bam1_t *b, int i) { int cs_i; uint8_t *c = bam_aux_get(b, "CS"); char *cs = NULL; char prev_b, cur_b; char cur_color, cor_color; // return the base if the tag was not found if(0 == c) return 0; cs = bam_aux2Z(c); // adjust for strandedness and leading adaptor if(bam1_strand(b)) { //reverse strand cs_i = strlen(cs) - 1 - i; // adjust for leading hard clip uint32_t cigar = bam1_cigar(b)[0]; if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) { cs_i -= cigar >> BAM_CIGAR_SHIFT; } // get current color cur_color = cs[cs_i]; // get previous base. Note: must rc adaptor prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; // get current base cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; } else { cs_i=i+1; // get current color cur_color = cs[cs_i]; // get previous base prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; // get current base cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; } // corrected color cor_color = bam_aux_ntnt2cs(prev_b, cur_b); if(cur_color == cor_color) { return '-'; } else { return cur_color; } } samtools-0.1.19/bam_endian.h000066400000000000000000000020501212162403000156550ustar00rootroot00000000000000#ifndef BAM_ENDIAN_H #define BAM_ENDIAN_H #include static inline int bam_is_big_endian() { long one= 1; return !(*((char *)(&one))); } static inline uint16_t bam_swap_endian_2(uint16_t v) { return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); } static inline void *bam_swap_endian_2p(void *x) { *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); return x; } static inline uint32_t bam_swap_endian_4(uint32_t v) { v = ((v & 0x0000FFFFU) << 16) | (v >> 16); return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); } static inline void *bam_swap_endian_4p(void *x) { *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); return x; } static inline uint64_t bam_swap_endian_8(uint64_t v) { v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); } static inline void *bam_swap_endian_8p(void *x) { *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); return x; } #endif samtools-0.1.19/bam_import.c000066400000000000000000000401151212162403000157300ustar00rootroot00000000000000#include #include #include #include #include #include #include #ifdef _WIN32 #include #endif #include "kstring.h" #include "bam.h" #include "sam_header.h" #include "kseq.h" #include "khash.h" KSTREAM_INIT(gzFile, gzread, 16384) KHASH_MAP_INIT_STR(ref, uint64_t) void bam_init_header_hash(bam_header_t *header); void bam_destroy_header_hash(bam_header_t *header); int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); unsigned char bam_nt16_table[256] = { 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 }; unsigned short bam_char2flag_table[256] = { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,BAM_FREAD1,BAM_FREAD2,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, BAM_FPROPER_PAIR,0,BAM_FMREVERSE,0, 0,BAM_FMUNMAP,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, BAM_FDUP,0,BAM_FQCFAIL,0, 0,0,0,0, 0,0,0,0, BAM_FPAIRED,0,BAM_FREVERSE,BAM_FSECONDARY, 0,BAM_FUNMAP,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN"; struct __tamFile_t { gzFile fp; kstream_t *ks; kstring_t *str; uint64_t n_lines; int is_first; }; char **__bam_get_lines(const char *fn, int *_n) // for bam_plcmd.c only { char **list = 0, *s; int n = 0, dret, m = 0; gzFile fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); kstream_t *ks; kstring_t *str; str = (kstring_t*)calloc(1, sizeof(kstring_t)); ks = ks_init(fp); while (ks_getuntil(ks, '\n', str, &dret) > 0) { if (n == m) { m = m? m << 1 : 16; list = (char**)realloc(list, m * sizeof(char*)); } if (str->s[str->l-1] == '\r') str->s[--str->l] = '\0'; s = list[n++] = (char*)calloc(str->l + 1, 1); strcpy(s, str->s); } ks_destroy(ks); gzclose(fp); free(str->s); free(str); *_n = n; return list; } static bam_header_t *hash2header(const kh_ref_t *hash) { bam_header_t *header; khiter_t k; header = bam_header_init(); header->n_targets = kh_size(hash); header->target_name = (char**)calloc(kh_size(hash), sizeof(char*)); header->target_len = (uint32_t*)calloc(kh_size(hash), 4); for (k = kh_begin(hash); k != kh_end(hash); ++k) { if (kh_exist(hash, k)) { int i = (int)kh_value(hash, k); header->target_name[i] = (char*)kh_key(hash, k); header->target_len[i] = kh_value(hash, k)>>32; } } bam_init_header_hash(header); return header; } bam_header_t *sam_header_read2(const char *fn) { bam_header_t *header; int c, dret, ret, error = 0; gzFile fp; kstream_t *ks; kstring_t *str; kh_ref_t *hash; khiter_t k; if (fn == 0) return 0; fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); if (fp == 0) return 0; hash = kh_init(ref); ks = ks_init(fp); str = (kstring_t*)calloc(1, sizeof(kstring_t)); while (ks_getuntil(ks, 0, str, &dret) > 0) { char *s = strdup(str->s); int len, i; i = kh_size(hash); ks_getuntil(ks, 0, str, &dret); len = atoi(str->s); k = kh_put(ref, hash, s, &ret); if (ret == 0) { fprintf(stderr, "[sam_header_read2] duplicated sequence name: %s\n", s); error = 1; } kh_value(hash, k) = (uint64_t)len<<32 | i; if (dret != '\n') while ((c = ks_getc(ks)) != '\n' && c != -1); } ks_destroy(ks); gzclose(fp); free(str->s); free(str); fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", kh_size(hash)); if (error) return 0; header = hash2header(hash); kh_destroy(ref, hash); return header; } static inline uint8_t *alloc_data(bam1_t *b, int size) { if (b->m_data < size) { b->m_data = size; kroundup32(b->m_data); b->data = (uint8_t*)realloc(b->data, b->m_data); } return b->data; } static inline void parse_error(int64_t n_lines, const char * __restrict msg) { fprintf(stderr, "Parse error at line %lld: %s\n", (long long)n_lines, msg); abort(); } static inline void append_text(bam_header_t *header, kstring_t *str) { size_t x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null kroundup32(x); kroundup32(y); if (x < y) { header->n_text = y; header->text = (char*)realloc(header->text, y); if ( !header->text ) { fprintf(stderr,"realloc failed to alloc %ld bytes\n", y); abort(); } } // Sanity check if ( header->l_text+str->l+1 >= header->n_text ) { fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n", header->l_text+str->l+1,(long)header->n_text,x,y); abort(); } strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here. header->l_text += str->l + 1; header->text[header->l_text] = 0; } int sam_header_parse(bam_header_t *h) { char **tmp; int i; free(h->target_len); free(h->target_name); h->n_targets = 0; h->target_len = 0; h->target_name = 0; if (h->l_text < 3) return 0; if (h->dict == 0) h->dict = sam_header_parse2(h->text); tmp = sam_header2list(h->dict, "SQ", "SN", &h->n_targets); if (h->n_targets == 0) return 0; h->target_name = calloc(h->n_targets, sizeof(void*)); for (i = 0; i < h->n_targets; ++i) h->target_name[i] = strdup(tmp[i]); free(tmp); tmp = sam_header2list(h->dict, "SQ", "LN", &h->n_targets); h->target_len = calloc(h->n_targets, 4); for (i = 0; i < h->n_targets; ++i) h->target_len[i] = atoi(tmp[i]); free(tmp); return h->n_targets; } bam_header_t *sam_header_read(tamFile fp) { int ret, dret; bam_header_t *header = bam_header_init(); kstring_t *str = fp->str; while ((ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret)) >= 0 && str->s[0] == '@') { // skip header str->s[str->l] = dret; // note that str->s is NOT null terminated!! append_text(header, str); if (dret != '\n') { ret = ks_getuntil(fp->ks, '\n', str, &dret); str->s[str->l] = '\n'; // NOT null terminated!! append_text(header, str); } ++fp->n_lines; } sam_header_parse(header); bam_init_header_hash(header); fp->is_first = 1; return header; } int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) { int ret, doff, doff0, dret, z = 0; bam1_core_t *c = &b->core; kstring_t *str = fp->str; kstream_t *ks = fp->ks; if (fp->is_first) { fp->is_first = 0; ret = str->l; } else { do { // special consideration for empty lines ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret); if (ret >= 0) z += str->l + 1; } while (ret == 0); } if (ret < 0) return -1; ++fp->n_lines; doff = 0; { // name c->l_qname = strlen(str->s) + 1; memcpy(alloc_data(b, doff + c->l_qname) + doff, str->s, c->l_qname); doff += c->l_qname; } { // flag long flag; char *s; ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; flag = strtol((char*)str->s, &s, 0); if (*s) { // not the end of the string flag = 0; for (s = str->s; *s; ++s) flag |= bam_char2flag_table[(int)*s]; } c->flag = flag; } { // tid, pos, qual ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->tid = bam_get_tid(header, str->s); if (c->tid < 0 && strcmp(str->s, "*")) { if (header->n_targets == 0) { fprintf(stderr, "[sam_read1] missing header? Abort!\n"); exit(1); } else fprintf(stderr, "[sam_read1] reference '%s' is recognized as '*'.\n", str->s); } ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->qual = isdigit(str->s[0])? atoi(str->s) : 0; if (ret < 0) return -2; } { // cigar char *s, *t; int i, op; long x; c->n_cigar = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -3; z += str->l + 1; if (str->s[0] != '*') { uint32_t *cigar; for (s = str->s; *s; ++s) { if ((isalpha(*s)) || (*s=='=')) ++c->n_cigar; else if (!isdigit(*s)) parse_error(fp->n_lines, "invalid CIGAR character"); } b->data = alloc_data(b, doff + c->n_cigar * 4); cigar = bam1_cigar(b); for (i = 0, s = str->s; i != c->n_cigar; ++i) { x = strtol(s, &t, 10); op = toupper(*t); if (op == 'M') op = BAM_CMATCH; else if (op == 'I') op = BAM_CINS; else if (op == 'D') op = BAM_CDEL; else if (op == 'N') op = BAM_CREF_SKIP; else if (op == 'S') op = BAM_CSOFT_CLIP; else if (op == 'H') op = BAM_CHARD_CLIP; else if (op == 'P') op = BAM_CPAD; else if (op == '=') op = BAM_CEQUAL; else if (op == 'X') op = BAM_CDIFF; else if (op == 'B') op = BAM_CBACK; else parse_error(fp->n_lines, "invalid CIGAR operation"); s = t + 1; cigar[i] = bam_cigar_gen(x, op); } if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); c->bin = bam_reg2bin(c->pos, bam_calend(c, cigar)); doff += c->n_cigar * 4; } else { if (!(c->flag&BAM_FUNMAP)) { fprintf(stderr, "Parse warning at line %lld: mapped sequence without CIGAR\n", (long long)fp->n_lines); c->flag |= BAM_FUNMAP; } c->bin = bam_reg2bin(c->pos, c->pos + 1); } } { // mtid, mpos, isize ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid; ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0; if (ret < 0) return -4; } { // seq and qual int i; uint8_t *p = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq z += str->l + 1; if (strcmp(str->s, "*")) { c->l_qseq = strlen(str->s); if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) { fprintf(stderr, "Line %ld, sequence length %i vs %i from CIGAR\n", (long)fp->n_lines, c->l_qseq, (int32_t)bam_cigar2qlen(c, bam1_cigar(b))); parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); } p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; memset(p, 0, (c->l_qseq+1)/2); for (i = 0; i < c->l_qseq; ++i) p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); } else c->l_qseq = 0; if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual z += str->l + 1; if (strcmp(str->s, "*") && c->l_qseq != strlen(str->s)) parse_error(fp->n_lines, "sequence and quality are inconsistent"); p += (c->l_qseq+1)/2; if (strcmp(str->s, "*") == 0) for (i = 0; i < c->l_qseq; ++i) p[i] = 0xff; else for (i = 0; i < c->l_qseq; ++i) p[i] = str->s[i] - 33; doff += c->l_qseq + (c->l_qseq+1)/2; } doff0 = doff; if (dret != '\n' && dret != '\r') { // aux while (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) { uint8_t *s, type, key[2]; z += str->l + 1; if (str->l < 6 || str->s[2] != ':' || str->s[4] != ':') parse_error(fp->n_lines, "missing colon in auxiliary data"); key[0] = str->s[0]; key[1] = str->s[1]; type = str->s[3]; s = alloc_data(b, doff + 3) + doff; s[0] = key[0]; s[1] = key[1]; s += 2; doff += 2; if (type == 'A' || type == 'a' || type == 'c' || type == 'C') { // c and C for backward compatibility s = alloc_data(b, doff + 2) + doff; *s++ = 'A'; *s = str->s[5]; doff += 2; } else if (type == 'I' || type == 'i') { long long x; s = alloc_data(b, doff + 5) + doff; x = (long long)atoll(str->s + 5); if (x < 0) { if (x >= -127) { *s++ = 'c'; *(int8_t*)s = (int8_t)x; s += 1; doff += 2; } else if (x >= -32767) { *s++ = 's'; *(int16_t*)s = (int16_t)x; s += 2; doff += 3; } else { *s++ = 'i'; *(int32_t*)s = (int32_t)x; s += 4; doff += 5; if (x < -2147483648ll) fprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.", (long long)fp->n_lines, x); } } else { if (x <= 255) { *s++ = 'C'; *s++ = (uint8_t)x; doff += 2; } else if (x <= 65535) { *s++ = 'S'; *(uint16_t*)s = (uint16_t)x; s += 2; doff += 3; } else { *s++ = 'I'; *(uint32_t*)s = (uint32_t)x; s += 4; doff += 5; if (x > 4294967295ll) fprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.", (long long)fp->n_lines, x); } } } else if (type == 'f') { s = alloc_data(b, doff + 5) + doff; *s++ = 'f'; *(float*)s = (float)atof(str->s + 5); s += 4; doff += 5; } else if (type == 'd') { s = alloc_data(b, doff + 9) + doff; *s++ = 'd'; *(float*)s = (float)atof(str->s + 9); s += 8; doff += 9; } else if (type == 'Z' || type == 'H') { int size = 1 + (str->l - 5) + 1; if (type == 'H') { // check whether the hex string is valid int i; if ((str->l - 5) % 2 == 1) parse_error(fp->n_lines, "length of the hex string not even"); for (i = 0; i < str->l - 5; ++i) { int c = toupper(str->s[5 + i]); if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'))) parse_error(fp->n_lines, "invalid hex character"); } } s = alloc_data(b, doff + size) + doff; *s++ = type; memcpy(s, str->s + 5, str->l - 5); s[str->l - 5] = 0; doff += size; } else if (type == 'B') { int32_t n = 0, Bsize, k = 0, size; char *p; if (str->l < 8) parse_error(fp->n_lines, "too few values in aux type B"); Bsize = bam_aux_type2size(str->s[5]); // the size of each element for (p = (char*)str->s + 6; *p; ++p) // count the number of elements in the array if (*p == ',') ++n; p = str->s + 7; // now p points to the first number in the array size = 6 + Bsize * n; // total number of bytes allocated to this tag s = alloc_data(b, doff + 6 * Bsize * n) + doff; // allocate memory *s++ = 'B'; *s++ = str->s[5]; memcpy(s, &n, 4); s += 4; // write the number of elements if (str->s[5] == 'c') while (p < str->s + str->l) ((int8_t*)s)[k++] = (int8_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 'C') while (p < str->s + str->l) ((uint8_t*)s)[k++] = (uint8_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 's') while (p < str->s + str->l) ((int16_t*)s)[k++] = (int16_t)strtol(p, &p, 0), ++p; // FIXME: avoid unaligned memory else if (str->s[5] == 'S') while (p < str->s + str->l) ((uint16_t*)s)[k++] = (uint16_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 'i') while (p < str->s + str->l) ((int32_t*)s)[k++] = (int32_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 'I') while (p < str->s + str->l) ((uint32_t*)s)[k++] = (uint32_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 'f') while (p < str->s + str->l) ((float*)s)[k++] = (float)strtod(p, &p), ++p; else parse_error(fp->n_lines, "unrecognized array type"); s += Bsize * n; doff += size; } else parse_error(fp->n_lines, "unrecognized type"); if (dret == '\n' || dret == '\r') break; } } b->l_aux = doff - doff0; b->data_len = doff; if (bam_no_B) bam_remove_B(b); return z; } tamFile sam_open(const char *fn) { tamFile fp; gzFile gzfp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "rb") : gzopen(fn, "rb"); if (gzfp == 0) return 0; fp = (tamFile)calloc(1, sizeof(struct __tamFile_t)); fp->str = (kstring_t*)calloc(1, sizeof(kstring_t)); fp->fp = gzfp; fp->ks = ks_init(fp->fp); return fp; } void sam_close(tamFile fp) { if (fp) { ks_destroy(fp->ks); gzclose(fp->fp); free(fp->str->s); free(fp->str); free(fp); } } samtools-0.1.19/bam_index.c000066400000000000000000000517631212162403000155400ustar00rootroot00000000000000#include #include #include "bam.h" #include "khash.h" #include "ksort.h" #include "bam_endian.h" #ifdef _USE_KNETFILE #include "knetfile.h" #endif /*! @header Alignment indexing. Before indexing, BAM must be sorted based on the leftmost coordinate of alignments. In indexing, BAM uses two indices: a UCSC binning index and a simple linear index. The binning index is efficient for alignments spanning long distance, while the auxiliary linear index helps to reduce unnecessary seek calls especially for short alignments. The UCSC binning scheme was suggested by Richard Durbin and Lincoln Stein and is explained by Kent et al. (2002). In this scheme, each bin represents a contiguous genomic region which can be fully contained in another bin; each alignment is associated with a bin which represents the smallest region containing the entire alignment. The binning scheme is essentially another representation of R-tree. A distinct bin uniquely corresponds to a distinct internal node in a R-tree. Bin A is a child of Bin B if region A is contained in B. In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp, 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to find the alignments overlapped with a region [rbeg,rend), we need to calculate the list of bins that may be overlapped the region and test the alignments in the bins to confirm the overlaps. If the specified region is short, typically only a few alignments in six bins need to be retrieved. The overlapping alignments can be quickly fetched. */ #define BAM_MIN_CHUNK_GAP 32768 // 1<<14 is the size of minimum bin. #define BAM_LIDX_SHIFT 14 #define BAM_MAX_BIN 37450 // =(8^6-1)/7+1 typedef struct { uint64_t u, v; } pair64_t; #define pair64_lt(a,b) ((a).u < (b).u) KSORT_INIT(off, pair64_t, pair64_lt) typedef struct { uint32_t m, n; pair64_t *list; } bam_binlist_t; typedef struct { int32_t n, m; uint64_t *offset; } bam_lidx_t; KHASH_MAP_INIT_INT(i, bam_binlist_t) struct __bam_index_t { int32_t n; uint64_t n_no_coor; // unmapped reads without coordinate khash_t(i) **index; bam_lidx_t *index2; }; // requirement: len <= LEN_MASK static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end) { khint_t k; bam_binlist_t *l; int ret; k = kh_put(i, h, bin, &ret); l = &kh_value(h, k); if (ret) { // not present l->m = 1; l->n = 0; l->list = (pair64_t*)calloc(l->m, 16); } if (l->n == l->m) { l->m <<= 1; l->list = (pair64_t*)realloc(l->list, l->m * 16); } l->list[l->n].u = beg; l->list[l->n++].v = end; } static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset) { int i, beg, end; beg = b->core.pos >> BAM_LIDX_SHIFT; end = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT; if (index2->m < end + 1) { int old_m = index2->m; index2->m = end + 1; kroundup32(index2->m); index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8); memset(index2->offset + old_m, 0, 8 * (index2->m - old_m)); } if (beg == end) { if (index2->offset[beg] == 0) index2->offset[beg] = offset; } else { for (i = beg; i <= end; ++i) if (index2->offset[i] == 0) index2->offset[i] = offset; } index2->n = end + 1; } static void merge_chunks(bam_index_t *idx) { #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16) khash_t(i) *index; int i, l, m; khint_t k; for (i = 0; i < idx->n; ++i) { index = idx->index[i]; for (k = kh_begin(index); k != kh_end(index); ++k) { bam_binlist_t *p; if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue; p = &kh_value(index, k); m = 0; for (l = 1; l < p->n; ++l) { #ifdef BAM_TRUE_OFFSET if (p->list[m].v + BAM_MIN_CHUNK_GAP > p->list[l].u) p->list[m].v = p->list[l].v; #else if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v; #endif else p->list[++m] = p->list[l]; } // ~for(l) p->n = m + 1; } // ~for(k) } // ~for(i) #endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF) } static void fill_missing(bam_index_t *idx) { int i, j; for (i = 0; i < idx->n; ++i) { bam_lidx_t *idx2 = &idx->index2[i]; for (j = 1; j < idx2->n; ++j) if (idx2->offset[j] == 0) idx2->offset[j] = idx2->offset[j-1]; } } bam_index_t *bam_index_core(bamFile fp) { bam1_t *b; bam_header_t *h; int i, ret; bam_index_t *idx; uint32_t last_bin, save_bin; int32_t last_coor, last_tid, save_tid; bam1_core_t *c; uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor; h = bam_header_read(fp); if(h == 0) { fprintf(stderr, "[bam_index_core] Invalid BAM header."); return NULL; } idx = (bam_index_t*)calloc(1, sizeof(bam_index_t)); b = (bam1_t*)calloc(1, sizeof(bam1_t)); c = &b->core; idx->n = h->n_targets; bam_header_destroy(h); idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*)); for (i = 0; i < idx->n; ++i) idx->index[i] = kh_init(i); idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t)); save_bin = save_tid = last_tid = last_bin = 0xffffffffu; save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu; n_mapped = n_unmapped = n_no_coor = off_end = 0; off_beg = off_end = bam_tell(fp); while ((ret = bam_read1(fp, b)) >= 0) { if (c->tid < 0) ++n_no_coor; if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes last_tid = c->tid; last_bin = 0xffffffffu; } else if ((uint32_t)last_tid > (uint32_t)c->tid) { fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n", bam1_qname(b), last_tid+1, c->tid+1); return NULL; } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) { fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n", bam1_qname(b), last_coor, c->pos, c->tid+1); return NULL; } if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off); if (c->bin != last_bin) { // then possibly write the binning index if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record insert_offset(idx->index[save_tid], save_bin, save_off, last_off); if (last_bin == 0xffffffffu && save_tid != 0xffffffffu) { // write the meta element off_end = last_off; insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end); insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped); n_mapped = n_unmapped = 0; off_beg = off_end; } save_off = last_off; save_bin = last_bin = c->bin; save_tid = c->tid; if (save_tid < 0) break; } if (bam_tell(fp) <= last_off) { fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n", (unsigned long long)bam_tell(fp), (unsigned long long)last_off); return NULL; } if (c->flag & BAM_FUNMAP) ++n_unmapped; else ++n_mapped; last_off = bam_tell(fp); last_coor = b->core.pos; } if (save_tid >= 0) { insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp)); insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, bam_tell(fp)); insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped); } merge_chunks(idx); fill_missing(idx); if (ret >= 0) { while ((ret = bam_read1(fp, b)) >= 0) { ++n_no_coor; if (c->tid >= 0 && n_no_coor) { fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n"); return NULL; } } } if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret); free(b->data); free(b); idx->n_no_coor = n_no_coor; return idx; } void bam_index_destroy(bam_index_t *idx) { khint_t k; int i; if (idx == 0) return; for (i = 0; i < idx->n; ++i) { khash_t(i) *index = idx->index[i]; bam_lidx_t *index2 = idx->index2 + i; for (k = kh_begin(index); k != kh_end(index); ++k) { if (kh_exist(index, k)) free(kh_value(index, k).list); } kh_destroy(i, index); free(index2->offset); } free(idx->index); free(idx->index2); free(idx); } void bam_index_save(const bam_index_t *idx, FILE *fp) { int32_t i, size; khint_t k; fwrite("BAI\1", 1, 4, fp); if (bam_is_be) { uint32_t x = idx->n; fwrite(bam_swap_endian_4p(&x), 4, 1, fp); } else fwrite(&idx->n, 4, 1, fp); for (i = 0; i < idx->n; ++i) { khash_t(i) *index = idx->index[i]; bam_lidx_t *index2 = idx->index2 + i; // write binning index size = kh_size(index); if (bam_is_be) { // big endian uint32_t x = size; fwrite(bam_swap_endian_4p(&x), 4, 1, fp); } else fwrite(&size, 4, 1, fp); for (k = kh_begin(index); k != kh_end(index); ++k) { if (kh_exist(index, k)) { bam_binlist_t *p = &kh_value(index, k); if (bam_is_be) { // big endian uint32_t x; x = kh_key(index, k); fwrite(bam_swap_endian_4p(&x), 4, 1, fp); x = p->n; fwrite(bam_swap_endian_4p(&x), 4, 1, fp); for (x = 0; (int)x < p->n; ++x) { bam_swap_endian_8p(&p->list[x].u); bam_swap_endian_8p(&p->list[x].v); } fwrite(p->list, 16, p->n, fp); for (x = 0; (int)x < p->n; ++x) { bam_swap_endian_8p(&p->list[x].u); bam_swap_endian_8p(&p->list[x].v); } } else { fwrite(&kh_key(index, k), 4, 1, fp); fwrite(&p->n, 4, 1, fp); fwrite(p->list, 16, p->n, fp); } } } // write linear index (index2) if (bam_is_be) { int x = index2->n; fwrite(bam_swap_endian_4p(&x), 4, 1, fp); } else fwrite(&index2->n, 4, 1, fp); if (bam_is_be) { // big endian int x; for (x = 0; (int)x < index2->n; ++x) bam_swap_endian_8p(&index2->offset[x]); fwrite(index2->offset, 8, index2->n, fp); for (x = 0; (int)x < index2->n; ++x) bam_swap_endian_8p(&index2->offset[x]); } else fwrite(index2->offset, 8, index2->n, fp); } { // write the number of reads coor-less records. uint64_t x = idx->n_no_coor; if (bam_is_be) bam_swap_endian_8p(&x); fwrite(&x, 8, 1, fp); } fflush(fp); } static bam_index_t *bam_index_load_core(FILE *fp) { int i; char magic[4]; bam_index_t *idx; if (fp == 0) { fprintf(stderr, "[bam_index_load_core] fail to load index.\n"); return 0; } fread(magic, 1, 4, fp); if (strncmp(magic, "BAI\1", 4)) { fprintf(stderr, "[bam_index_load] wrong magic number.\n"); fclose(fp); return 0; } idx = (bam_index_t*)calloc(1, sizeof(bam_index_t)); fread(&idx->n, 4, 1, fp); if (bam_is_be) bam_swap_endian_4p(&idx->n); idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*)); idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t)); for (i = 0; i < idx->n; ++i) { khash_t(i) *index; bam_lidx_t *index2 = idx->index2 + i; uint32_t key, size; khint_t k; int j, ret; bam_binlist_t *p; index = idx->index[i] = kh_init(i); // load binning index fread(&size, 4, 1, fp); if (bam_is_be) bam_swap_endian_4p(&size); for (j = 0; j < (int)size; ++j) { fread(&key, 4, 1, fp); if (bam_is_be) bam_swap_endian_4p(&key); k = kh_put(i, index, key, &ret); p = &kh_value(index, k); fread(&p->n, 4, 1, fp); if (bam_is_be) bam_swap_endian_4p(&p->n); p->m = p->n; p->list = (pair64_t*)malloc(p->m * 16); fread(p->list, 16, p->n, fp); if (bam_is_be) { int x; for (x = 0; x < p->n; ++x) { bam_swap_endian_8p(&p->list[x].u); bam_swap_endian_8p(&p->list[x].v); } } } // load linear index fread(&index2->n, 4, 1, fp); if (bam_is_be) bam_swap_endian_4p(&index2->n); index2->m = index2->n; index2->offset = (uint64_t*)calloc(index2->m, 8); fread(index2->offset, index2->n, 8, fp); if (bam_is_be) for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]); } if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0; if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor); return idx; } bam_index_t *bam_index_load_local(const char *_fn) { FILE *fp; char *fnidx, *fn; if (strstr(_fn, "ftp://") == _fn || strstr(_fn, "http://") == _fn) { const char *p; int l = strlen(_fn); for (p = _fn + l - 1; p >= _fn; --p) if (*p == '/') break; fn = strdup(p + 1); } else fn = strdup(_fn); fnidx = (char*)calloc(strlen(fn) + 5, 1); strcpy(fnidx, fn); strcat(fnidx, ".bai"); fp = fopen(fnidx, "rb"); if (fp == 0) { // try "{base}.bai" char *s = strstr(fn, "bam"); if (s == fn + strlen(fn) - 3) { strcpy(fnidx, fn); fnidx[strlen(fn)-1] = 'i'; fp = fopen(fnidx, "rb"); } } free(fnidx); free(fn); if (fp) { bam_index_t *idx = bam_index_load_core(fp); fclose(fp); return idx; } else return 0; } #ifdef _USE_KNETFILE static void download_from_remote(const char *url) { const int buf_size = 1 * 1024 * 1024; char *fn; FILE *fp; uint8_t *buf; knetFile *fp_remote; int l; if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return; l = strlen(url); for (fn = (char*)url + l - 1; fn >= url; --fn) if (*fn == '/') break; ++fn; // fn now points to the file name fp_remote = knet_open(url, "r"); if (fp_remote == 0) { fprintf(stderr, "[download_from_remote] fail to open remote file.\n"); return; } if ((fp = fopen(fn, "wb")) == 0) { fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n"); knet_close(fp_remote); return; } buf = (uint8_t*)calloc(buf_size, 1); while ((l = knet_read(fp_remote, buf, buf_size)) != 0) fwrite(buf, 1, l, fp); free(buf); fclose(fp); knet_close(fp_remote); } #else static void download_from_remote(const char *url) { return; } #endif bam_index_t *bam_index_load(const char *fn) { bam_index_t *idx; idx = bam_index_load_local(fn); if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) { char *fnidx = calloc(strlen(fn) + 5, 1); strcat(strcpy(fnidx, fn), ".bai"); fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n"); download_from_remote(fnidx); free(fnidx); idx = bam_index_load_local(fn); } if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n"); return idx; } int bam_index_build2(const char *fn, const char *_fnidx) { char *fnidx; FILE *fpidx; bamFile fp; bam_index_t *idx; if ((fp = bam_open(fn, "r")) == 0) { fprintf(stderr, "[bam_index_build2] fail to open the BAM file.\n"); return -1; } idx = bam_index_core(fp); bam_close(fp); if(idx == 0) { fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n"); return -1; } if (_fnidx == 0) { fnidx = (char*)calloc(strlen(fn) + 5, 1); strcpy(fnidx, fn); strcat(fnidx, ".bai"); } else fnidx = strdup(_fnidx); fpidx = fopen(fnidx, "wb"); if (fpidx == 0) { fprintf(stderr, "[bam_index_build2] fail to create the index file.\n"); free(fnidx); bam_index_destroy(idx); return -1; } bam_index_save(idx, fpidx); bam_index_destroy(idx); fclose(fpidx); free(fnidx); return 0; } int bam_index_build(const char *fn) { return bam_index_build2(fn, 0); } int bam_index(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr, "Usage: samtools index [out.index]\n"); return 1; } if (argc >= 3) bam_index_build2(argv[1], argv[2]); else bam_index_build(argv[1]); return 0; } int bam_idxstats(int argc, char *argv[]) { bam_index_t *idx; bam_header_t *header; bamFile fp; int i; if (argc < 2) { fprintf(stderr, "Usage: samtools idxstats \n"); return 1; } fp = bam_open(argv[1], "r"); if (fp == 0) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; } header = bam_header_read(fp); bam_close(fp); idx = bam_index_load(argv[1]); if (idx == 0) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; } for (i = 0; i < idx->n; ++i) { khint_t k; khash_t(i) *h = idx->index[i]; printf("%s\t%d", header->target_name[i], header->target_len[i]); k = kh_get(i, h, BAM_MAX_BIN); if (k != kh_end(h)) printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v); else printf("\t0\t0"); putchar('\n'); } printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor); bam_header_destroy(header); bam_index_destroy(idx); return 0; } static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[BAM_MAX_BIN]) { int i = 0, k; if (beg >= end) return 0; if (end >= 1u<<29) end = 1u<<29; --end; list[i++] = 0; for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list[i++] = k; for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list[i++] = k; for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list[i++] = k; for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list[i++] = k; for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k; return i; } static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b) { uint32_t rbeg = b->core.pos; uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1; return (rend > beg && rbeg < end); } struct __bam_iter_t { int from_first; // read from the first record; no random access int tid, beg, end, n_off, i, finished; uint64_t curr_off; pair64_t *off; }; // bam_fetch helper function retrieves bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end) { uint16_t *bins; int i, n_bins, n_off; pair64_t *off; khint_t k; khash_t(i) *index; uint64_t min_off; bam_iter_t iter = 0; if (beg < 0) beg = 0; if (end < beg) return 0; // initialize iter iter = calloc(1, sizeof(struct __bam_iter_t)); iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1; // bins = (uint16_t*)calloc(BAM_MAX_BIN, 2); n_bins = reg2bins(beg, end, bins); index = idx->index[tid]; if (idx->index2[tid].n > 0) { min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1] : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT]; if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4 int n = beg>>BAM_LIDX_SHIFT; if (n > idx->index2[tid].n) n = idx->index2[tid].n; for (i = n - 1; i >= 0; --i) if (idx->index2[tid].offset[i] != 0) break; if (i >= 0) min_off = idx->index2[tid].offset[i]; } } else min_off = 0; // tabix 0.1.2 may produce such index files for (i = n_off = 0; i < n_bins; ++i) { if ((k = kh_get(i, index, bins[i])) != kh_end(index)) n_off += kh_value(index, k).n; } if (n_off == 0) { free(bins); return iter; } off = (pair64_t*)calloc(n_off, 16); for (i = n_off = 0; i < n_bins; ++i) { if ((k = kh_get(i, index, bins[i])) != kh_end(index)) { int j; bam_binlist_t *p = &kh_value(index, k); for (j = 0; j < p->n; ++j) if (p->list[j].v > min_off) off[n_off++] = p->list[j]; } } free(bins); if (n_off == 0) { free(off); return iter; } { bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t)); int l; ks_introsort(off, n_off, off); // resolve completely contained adjacent blocks for (i = 1, l = 0; i < n_off; ++i) if (off[l].v < off[i].v) off[++l] = off[i]; n_off = l + 1; // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing for (i = 1; i < n_off; ++i) if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u; { // merge adjacent blocks #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16) for (i = 1, l = 0; i < n_off; ++i) { #ifdef BAM_TRUE_OFFSET if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v; #else if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v; #endif else off[++l] = off[i]; } n_off = l + 1; #endif } bam_destroy1(b); } iter->n_off = n_off; iter->off = off; return iter; } pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off) { // for pysam compatibility bam_iter_t iter; pair64_t *off; iter = bam_iter_query(idx, tid, beg, end); off = iter->off; *cnt_off = iter->n_off; free(iter); return off; } void bam_iter_destroy(bam_iter_t iter) { if (iter) { free(iter->off); free(iter); } } int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b) { int ret; if (iter && iter->finished) return -1; if (iter == 0 || iter->from_first) { ret = bam_read1(fp, b); if (ret < 0 && iter) iter->finished = 1; return ret; } if (iter->off == 0) return -1; for (;;) { if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET); iter->curr_off = bam_tell(fp); } ++iter->i; } if ((ret = bam_read1(fp, b)) >= 0) { iter->curr_off = bam_tell(fp); if (b->core.tid != iter->tid || b->core.pos >= iter->end) { // no need to proceed ret = bam_validate1(NULL, b)? -1 : -5; // determine whether end of region or error break; } else if (is_overlap(iter->beg, iter->end, b)) return ret; } else break; // end of file or error } iter->finished = 1; return ret; } int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func) { int ret; bam_iter_t iter; bam1_t *b; b = bam_init1(); iter = bam_iter_query(idx, tid, beg, end); while ((ret = bam_iter_read(fp, iter, b)) >= 0) func(b, data); bam_iter_destroy(iter); bam_destroy1(b); return (ret == -1)? 0 : ret; } samtools-0.1.19/bam_lpileup.c000066400000000000000000000115601212162403000160720ustar00rootroot00000000000000#include #include #include #include "bam.h" #include "ksort.h" #define TV_GAP 2 typedef struct __freenode_t { uint32_t level:28, cnt:4; struct __freenode_t *next; } freenode_t, *freenode_p; #define freenode_lt(a,b) ((a)->cnt < (b)->cnt || ((a)->cnt == (b)->cnt && (a)->level < (b)->level)) KSORT_INIT(node, freenode_p, freenode_lt) /* Memory pool, similar to the one in bam_pileup.c */ typedef struct { int cnt, n, max; freenode_t **buf; } mempool_t; static mempool_t *mp_init() { return (mempool_t*)calloc(1, sizeof(mempool_t)); } static void mp_destroy(mempool_t *mp) { int k; for (k = 0; k < mp->n; ++k) free(mp->buf[k]); free(mp->buf); free(mp); } static inline freenode_t *mp_alloc(mempool_t *mp) { ++mp->cnt; if (mp->n == 0) return (freenode_t*)calloc(1, sizeof(freenode_t)); else return mp->buf[--mp->n]; } static inline void mp_free(mempool_t *mp, freenode_t *p) { --mp->cnt; p->next = 0; p->cnt = TV_GAP; if (mp->n == mp->max) { mp->max = mp->max? mp->max<<1 : 256; mp->buf = (freenode_t**)realloc(mp->buf, sizeof(freenode_t*) * mp->max); } mp->buf[mp->n++] = p; } /* core part */ struct __bam_lplbuf_t { int max, n_cur, n_pre; int max_level, *cur_level, *pre_level; mempool_t *mp; freenode_t **aux, *head, *tail; int n_nodes, m_aux; bam_pileup_f func; void *user_data; bam_plbuf_t *plbuf; }; void bam_lplbuf_reset(bam_lplbuf_t *buf) { freenode_t *p, *q; bam_plbuf_reset(buf->plbuf); for (p = buf->head; p->next;) { q = p->next; mp_free(buf->mp, p); p = q; } buf->head = buf->tail; buf->max_level = 0; buf->n_cur = buf->n_pre = 0; buf->n_nodes = 0; } static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) { bam_lplbuf_t *tv = (bam_lplbuf_t*)data; freenode_t *p; int i, l, max_level; // allocate memory if necessary if (tv->max < n) { // enlarge tv->max = n; kroundup32(tv->max); tv->cur_level = (int*)realloc(tv->cur_level, sizeof(int) * tv->max); tv->pre_level = (int*)realloc(tv->pre_level, sizeof(int) * tv->max); } tv->n_cur = n; // update cnt for (p = tv->head; p->next; p = p->next) if (p->cnt > 0) --p->cnt; // calculate cur_level[] max_level = 0; for (i = l = 0; i < n; ++i) { const bam_pileup1_t *p = pl + i; if (p->is_head) { if (tv->head->next && tv->head->cnt == 0) { // then take a free slot freenode_t *p = tv->head->next; tv->cur_level[i] = tv->head->level; mp_free(tv->mp, tv->head); tv->head = p; --tv->n_nodes; } else tv->cur_level[i] = ++tv->max_level; } else { tv->cur_level[i] = tv->pre_level[l++]; if (p->is_tail) { // then return a free slot tv->tail->level = tv->cur_level[i]; tv->tail->next = mp_alloc(tv->mp); tv->tail = tv->tail->next; ++tv->n_nodes; } } if (tv->cur_level[i] > max_level) max_level = tv->cur_level[i]; ((bam_pileup1_t*)p)->level = tv->cur_level[i]; } assert(l == tv->n_pre); tv->func(tid, pos, n, pl, tv->user_data); // sort the linked list if (tv->n_nodes) { freenode_t *q; if (tv->n_nodes + 1 > tv->m_aux) { // enlarge tv->m_aux = tv->n_nodes + 1; kroundup32(tv->m_aux); tv->aux = (freenode_t**)realloc(tv->aux, sizeof(void*) * tv->m_aux); } for (p = tv->head, i = l = 0; p->next;) { if (p->level > max_level) { // then discard this entry q = p->next; mp_free(tv->mp, p); p = q; } else { tv->aux[i++] = p; p = p->next; } } tv->aux[i] = tv->tail; // add a proper tail for the loop below tv->n_nodes = i; if (tv->n_nodes) { ks_introsort(node, tv->n_nodes, tv->aux); for (i = 0; i < tv->n_nodes; ++i) tv->aux[i]->next = tv->aux[i+1]; tv->head = tv->aux[0]; } else tv->head = tv->tail; } // clean up tv->max_level = max_level; memcpy(tv->pre_level, tv->cur_level, tv->n_cur * 4); // squeeze out terminated levels for (i = l = 0; i < n; ++i) { const bam_pileup1_t *p = pl + i; if (!p->is_tail) tv->pre_level[l++] = tv->pre_level[i]; } tv->n_pre = l; /* fprintf(stderr, "%d\t", pos+1); for (i = 0; i < n; ++i) { const bam_pileup1_t *p = pl + i; if (p->is_head) fprintf(stderr, "^"); if (p->is_tail) fprintf(stderr, "$"); fprintf(stderr, "%d,", p->level); } fprintf(stderr, "\n"); */ return 0; } bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data) { bam_lplbuf_t *tv; tv = (bam_lplbuf_t*)calloc(1, sizeof(bam_lplbuf_t)); tv->mp = mp_init(); tv->head = tv->tail = mp_alloc(tv->mp); tv->func = func; tv->user_data = data; tv->plbuf = bam_plbuf_init(tview_func, tv); return (bam_lplbuf_t*)tv; } void bam_lplbuf_destroy(bam_lplbuf_t *tv) { freenode_t *p, *q; free(tv->cur_level); free(tv->pre_level); bam_plbuf_destroy(tv->plbuf); free(tv->aux); for (p = tv->head; p->next;) { q = p->next; mp_free(tv->mp, p); p = q; } mp_free(tv->mp, p); assert(tv->mp->cnt == 0); mp_destroy(tv->mp); free(tv); } int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *tv) { return bam_plbuf_push(b, tv->plbuf); } samtools-0.1.19/bam_mate.c000066400000000000000000000107111212162403000153430ustar00rootroot00000000000000#include #include #include #include "kstring.h" #include "bam.h" void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str) { bam1_t *swap; int i, end; uint32_t *cigar; str->l = 0; if (b1->core.tid != b2->core.tid || b1->core.tid < 0) return; // coordinateless or not on the same chr; skip if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam1_cigar(b1); i < b1->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } end = bam_calend(&b1->core, cigar); kputw(b2->core.pos - end, str); kputc('T', str); kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam1_cigar(b2); i < b2->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } bam_aux_append(b1, "CT", 'Z', str->l+1, (uint8_t*)str->s); } // currently, this function ONLY works if each read has one hit void bam_mating_core(bamFile in, bamFile out, int remove_reads) { bam_header_t *header; bam1_t *b[2]; int curr, has_prev, pre_end = 0, cur_end; kstring_t str; str.l = str.m = 0; str.s = 0; header = bam_header_read(in); bam_header_write(out, header); b[0] = bam_init1(); b[1] = bam_init1(); curr = 0; has_prev = 0; while (bam_read1(in, b[curr]) >= 0) { bam1_t *cur = b[curr], *pre = b[1-curr]; if (cur->core.tid < 0) { if ( !remove_reads ) bam_write1(out, cur); continue; } cur_end = bam_calend(&cur->core, bam1_cigar(cur)); if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP; if (cur->core.flag & BAM_FSECONDARY) { if ( !remove_reads ) bam_write1(out, cur); continue; // skip secondary alignments } if (has_prev) { if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos; pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos; if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP)) && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE { uint32_t cur5, pre5; cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos; pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos; cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5; } else cur->core.isize = pre->core.isize = 0; if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE; else cur->core.flag &= ~BAM_FMREVERSE; if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE; else pre->core.flag &= ~BAM_FMREVERSE; if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; } if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; } bam_template_cigar(pre, cur, &str); bam_write1(out, pre); bam_write1(out, cur); has_prev = 0; } else { // unpaired or singleton pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0; if (pre->core.flag & BAM_FPAIRED) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR; } bam_write1(out, pre); } } else has_prev = 1; curr = 1 - curr; pre_end = cur_end; } if (has_prev) bam_write1(out, b[1-curr]); bam_header_destroy(header); bam_destroy1(b[0]); bam_destroy1(b[1]); free(str.s); } void usage() { fprintf(stderr,"Usage: samtools fixmate \n"); fprintf(stderr,"Options:\n"); fprintf(stderr," -r remove unmapped reads and secondary alignments\n"); exit(1); } int bam_mating(int argc, char *argv[]) { bamFile in, out; int c, remove_reads=0; while ((c = getopt(argc, argv, "r")) >= 0) { switch (c) { case 'r': remove_reads=1; break; } } if (optind+1 >= argc) usage(); in = (strcmp(argv[optind], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[optind], "r"); out = (strcmp(argv[optind+1], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[optind+1], "w"); bam_mating_core(in, out, remove_reads); bam_close(in); bam_close(out); return 0; } samtools-0.1.19/bam_md.c000066400000000000000000000312161212162403000150200ustar00rootroot00000000000000#include #include #include #include #include #include "faidx.h" #include "sam.h" #include "kstring.h" #include "kaln.h" #include "kprobaln.h" #define USE_EQUAL 1 #define DROP_TAG 2 #define BIN_QUAL 4 #define UPDATE_NM 8 #define UPDATE_MD 16 #define HASH_QNM 32 char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; int bam_aux_drop_other(bam1_t *b, uint8_t *s); void bam_fillmd1_core(bam1_t *b, char *ref, int flag, int max_nm) { uint8_t *seq = bam1_seq(b); uint32_t *cigar = bam1_cigar(b); bam1_core_t *c = &b->core; int i, x, y, u = 0; kstring_t *str; int32_t old_nm_i = -1, nm = 0; str = (kstring_t*)calloc(1, sizeof(kstring_t)); for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (j = 0; j < l; ++j) { int z = y + j; int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; if (ref[x+j] == 0) break; // out of boundary if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match if (flag&USE_EQUAL) seq[z/2] &= (z&1)? 0xf0 : 0x0f; ++u; } else { kputw(u, str); kputc(ref[x+j], str); u = 0; ++nm; } } if (j < l) break; x += l; y += l; } else if (op == BAM_CDEL) { kputw(u, str); kputc('^', str); for (j = 0; j < l; ++j) { if (ref[x+j] == 0) break; kputc(ref[x+j], str); } u = 0; if (j < l) break; x += l; nm += l; } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { y += l; if (op == BAM_CINS) nm += l; } else if (op == BAM_CREF_SKIP) { x += l; } } kputw(u, str); // apply max_nm if (max_nm > 0 && nm >= max_nm) { for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (j = 0; j < l; ++j) { int z = y + j; int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; if (ref[x+j] == 0) break; // out of boundary if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match seq[z/2] |= (z&1)? 0x0f : 0xf0; bam1_qual(b)[z] = 0; } } if (j < l) break; x += l; y += l; } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; } } // update NM if (flag & UPDATE_NM) { uint8_t *old_nm = bam_aux_get(b, "NM"); if (c->flag & BAM_FUNMAP) return; if (old_nm) old_nm_i = bam_aux2i(old_nm); if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); else if (nm != old_nm_i) { fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm); bam_aux_del(b, old_nm); bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); } } // update MD if (flag & UPDATE_MD) { uint8_t *old_md = bam_aux_get(b, "MD"); if (c->flag & BAM_FUNMAP) return; if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); else { int is_diff = 0; if (strlen((char*)old_md+1) == str->l) { for (i = 0; i < str->l; ++i) if (toupper(old_md[i+1]) != toupper(str->s[i])) break; if (i < str->l) is_diff = 1; } else is_diff = 1; if (is_diff) { fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam1_qname(b), old_md+1, str->s); bam_aux_del(b, old_md); bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); } } } // drop all tags but RG if (flag&DROP_TAG) { uint8_t *q = bam_aux_get(b, "RG"); bam_aux_drop_other(b, q); } // reduce the resolution of base quality if (flag&BIN_QUAL) { uint8_t *qual = bam1_qual(b); for (i = 0; i < b->core.l_qseq; ++i) if (qual[i] >= 3) qual[i] = qual[i]/10*10 + 7; } free(str->s); free(str); } void bam_fillmd1(bam1_t *b, char *ref, int flag) { bam_fillmd1_core(b, ref, flag, 0); } int bam_cap_mapQ(bam1_t *b, char *ref, int thres) { uint8_t *seq = bam1_seq(b), *qual = bam1_qual(b); uint32_t *cigar = bam1_cigar(b); bam1_core_t *c = &b->core; int i, x, y, mm, q, len, clip_l, clip_q; double t; if (thres < 0) thres = 40; // set the default mm = q = len = clip_l = clip_q = 0; for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (j = 0; j < l; ++j) { int z = y + j; int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; if (ref[x+j] == 0) break; // out of boundary if (c2 != 15 && c1 != 15 && qual[z] >= 13) { // not ambiguous ++len; if (c1 && c1 != c2 && qual[z] >= 13) { // mismatch ++mm; q += qual[z] > 33? 33 : qual[z]; } } } if (j < l) break; x += l; y += l; len += l; } else if (op == BAM_CDEL) { for (j = 0; j < l; ++j) if (ref[x+j] == 0) break; if (j < l) break; x += l; } else if (op == BAM_CSOFT_CLIP) { for (j = 0; j < l; ++j) clip_q += qual[y+j]; clip_l += l; y += l; } else if (op == BAM_CHARD_CLIP) { clip_q += 13 * l; clip_l += l; } else if (op == BAM_CINS) y += l; else if (op == BAM_CREF_SKIP) x += l; } for (i = 0, t = 1; i < mm; ++i) t *= (double)len / (i+1); t = q - 4.343 * log(t) + clip_q / 5.; if (t > thres) return -1; if (t < 0) t = 0; t = sqrt((thres - t) / thres) * thres; // fprintf(stderr, "%s %lf %d\n", bam1_qname(b), t, q); return (int)(t + .499); } int bam_prob_realn_core(bam1_t *b, const char *ref, int flag) { int k, i, bw, x, y, yb, ye, xb, xe, apply_baq = flag&1, extend_baq = flag>>1&1, redo_baq = flag&4; uint32_t *cigar = bam1_cigar(b); bam1_core_t *c = &b->core; kpa_par_t conf = kpa_par_def; uint8_t *bq = 0, *zq = 0, *qual = bam1_qual(b); if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0) return -1; // do nothing // test if BQ or ZQ is present if ((bq = bam_aux_get(b, "BQ")) != 0) ++bq; if ((zq = bam_aux_get(b, "ZQ")) != 0 && *zq == 'Z') ++zq; if (bq && redo_baq) { bam_aux_del(b, bq-1); bq = 0; } if (bq && zq) { // remove the ZQ tag bam_aux_del(b, zq-1); zq = 0; } if (bq || zq) { if ((apply_baq && zq) || (!apply_baq && bq)) return -3; // in both cases, do nothing if (bq && apply_baq) { // then convert BQ to ZQ for (i = 0; i < c->l_qseq; ++i) qual[i] = qual[i] + 64 < bq[i]? 0 : qual[i] - ((int)bq[i] - 64); *(bq - 3) = 'Z'; } else if (zq && !apply_baq) { // then convert ZQ to BQ for (i = 0; i < c->l_qseq; ++i) qual[i] += (int)zq[i] - 64; *(zq - 3) = 'B'; } return 0; } // find the start and end of the alignment x = c->pos, y = 0, yb = ye = xb = xe = -1; for (k = 0; k < c->n_cigar; ++k) { int op, l; op = cigar[k]&0xf; l = cigar[k]>>4; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { if (yb < 0) yb = y; if (xb < 0) xb = x; ye = y + l; xe = x + l; x += l; y += l; } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l; else if (op == BAM_CDEL) x += l; else if (op == BAM_CREF_SKIP) return -1; // do nothing if there is a reference skip } // set bandwidth and the start and the end bw = 7; if (abs((xe - xb) - (ye - yb)) > bw) bw = abs((xe - xb) - (ye - yb)) + 3; conf.bw = bw; xb -= yb + bw/2; if (xb < 0) xb = 0; xe += c->l_qseq - ye + bw/2; if (xe - xb - c->l_qseq > bw) xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2; { // glocal uint8_t *s, *r, *q, *seq = bam1_seq(b), *bq; int *state; bq = calloc(c->l_qseq + 1, 1); memcpy(bq, qual, c->l_qseq); s = calloc(c->l_qseq, 1); for (i = 0; i < c->l_qseq; ++i) s[i] = bam_nt16_nt4_table[bam1_seqi(seq, i)]; r = calloc(xe - xb, 1); for (i = xb; i < xe; ++i) { if (ref[i] == 0) { xe = i; break; } r[i-xb] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[i]]]; } state = calloc(c->l_qseq, sizeof(int)); q = calloc(c->l_qseq, 1); kpa_glocal(r, xe-xb, s, c->l_qseq, qual, &conf, state, q); if (!extend_baq) { // in this block, bq[] is capped by base quality qual[] for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) { int op = cigar[k]&0xf, l = cigar[k]>>4; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (i = y; i < y + l; ++i) { if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0; else bq[i] = bq[i] < q[i]? bq[i] : q[i]; } x += l; y += l; } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l; else if (op == BAM_CDEL) x += l; } for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ } else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!) uint8_t *left, *rght; left = calloc(c->l_qseq, 1); rght = calloc(c->l_qseq, 1); for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) { int op = cigar[k]&0xf, l = cigar[k]>>4; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (i = y; i < y + l; ++i) bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i]; for (left[y] = bq[y], i = y + 1; i < y + l; ++i) left[i] = bq[i] > left[i-1]? bq[i] : left[i-1]; for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i) rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1]; for (i = y; i < y + l; ++i) bq[i] = left[i] < rght[i]? left[i] : rght[i]; x += l; y += l; } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l; else if (op == BAM_CDEL) x += l; } for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ free(left); free(rght); } if (apply_baq) { for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq); } else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq); free(bq); free(s); free(r); free(q); free(state); } return 0; } int bam_prob_realn(bam1_t *b, const char *ref) { return bam_prob_realn_core(b, ref, 1); } int bam_fillmd(int argc, char *argv[]) { int c, flt_flag, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed, max_nm, is_realn, capQ, baq_flag; samfile_t *fp, *fpout = 0; faidx_t *fai; char *ref = 0, mode_w[8], mode_r[8]; bam1_t *b; flt_flag = UPDATE_NM | UPDATE_MD; is_bam_out = is_sam_in = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0; mode_w[0] = mode_r[0] = 0; strcpy(mode_r, "r"); strcpy(mode_w, "w"); while ((c = getopt(argc, argv, "EqreuNhbSC:n:Ad")) >= 0) { switch (c) { case 'r': is_realn = 1; break; case 'e': flt_flag |= USE_EQUAL; break; case 'd': flt_flag |= DROP_TAG; break; case 'q': flt_flag |= BIN_QUAL; break; case 'h': flt_flag |= HASH_QNM; break; case 'N': flt_flag &= ~(UPDATE_MD|UPDATE_NM); break; case 'b': is_bam_out = 1; break; case 'u': is_uncompressed = is_bam_out = 1; break; case 'S': is_sam_in = 1; break; case 'n': max_nm = atoi(optarg); break; case 'C': capQ = atoi(optarg); break; case 'A': baq_flag |= 1; break; case 'E': baq_flag |= 2; break; default: fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n", c); return 1; } } if (!is_sam_in) strcat(mode_r, "b"); if (is_bam_out) strcat(mode_w, "b"); else strcat(mode_w, "h"); if (is_uncompressed) strcat(mode_w, "u"); if (optind + 1 >= argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools fillmd [-eubrS] \n\n"); fprintf(stderr, "Options: -e change identical bases to '='\n"); fprintf(stderr, " -u uncompressed BAM output (for piping)\n"); fprintf(stderr, " -b compressed BAM output\n"); fprintf(stderr, " -S the input is SAM with header\n"); fprintf(stderr, " -A modify the quality string\n"); fprintf(stderr, " -r compute the BQ tag (without -A) or cap baseQ by BAQ (with -A)\n"); fprintf(stderr, " -E extended BAQ for better sensitivity but lower specificity\n\n"); return 1; } fp = samopen(argv[optind], mode_r, 0); if (fp == 0) return 1; if (is_sam_in && (fp->header == 0 || fp->header->n_targets == 0)) { fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n"); return 1; } fpout = samopen("-", mode_w, fp->header); fai = fai_load(argv[optind+1]); b = bam_init1(); while ((ret = samread(fp, b)) >= 0) { if (b->core.tid >= 0) { if (tid != b->core.tid) { free(ref); ref = fai_fetch(fai, fp->header->target_name[b->core.tid], &len); tid = b->core.tid; if (ref == 0) fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n", fp->header->target_name[tid]); } if (is_realn) bam_prob_realn_core(b, ref, baq_flag); if (capQ > 10) { int q = bam_cap_mapQ(b, ref, capQ); if (b->core.qual > q) b->core.qual = q; } if (ref) bam_fillmd1_core(b, ref, flt_flag, max_nm); } samwrite(fpout, b); } bam_destroy1(b); free(ref); fai_destroy(fai); samclose(fp); samclose(fpout); return 0; } samtools-0.1.19/bam_pileup.c000066400000000000000000000312321212162403000157140ustar00rootroot00000000000000#include #include #include #include #include "sam.h" typedef struct { int k, x, y, end; } cstate_t; static cstate_t g_cstate_null = { -1, 0, 0, 0 }; typedef struct __linkbuf_t { bam1_t b; uint32_t beg, end; cstate_t s; struct __linkbuf_t *next; } lbnode_t; /* --- BEGIN: Memory pool */ typedef struct { int cnt, n, max; lbnode_t **buf; } mempool_t; static mempool_t *mp_init() { mempool_t *mp; mp = (mempool_t*)calloc(1, sizeof(mempool_t)); return mp; } static void mp_destroy(mempool_t *mp) { int k; for (k = 0; k < mp->n; ++k) { free(mp->buf[k]->b.data); free(mp->buf[k]); } free(mp->buf); free(mp); } static inline lbnode_t *mp_alloc(mempool_t *mp) { ++mp->cnt; if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t)); else return mp->buf[--mp->n]; } static inline void mp_free(mempool_t *mp, lbnode_t *p) { --mp->cnt; p->next = 0; // clear lbnode_t::next here if (mp->n == mp->max) { mp->max = mp->max? mp->max<<1 : 256; mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max); } mp->buf[mp->n++] = p; } /* --- END: Memory pool */ /* --- BEGIN: Auxiliary functions */ /* s->k: the index of the CIGAR operator that has just been processed. s->x: the reference coordinate of the start of s->k s->y: the query coordiante of the start of s->k */ static inline int resolve_cigar2(bam_pileup1_t *p, uint32_t pos, cstate_t *s) { #define _cop(c) ((c)&BAM_CIGAR_MASK) #define _cln(c) ((c)>>BAM_CIGAR_SHIFT) bam1_t *b = p->b; bam1_core_t *c = &b->core; uint32_t *cigar = bam1_cigar(b); int k, is_head = 0; // determine the current CIGAR operation // fprintf(stderr, "%s\tpos=%d\tend=%d\t(%d,%d,%d)\n", bam1_qname(b), pos, s->end, s->k, s->x, s->y); if (s->k == -1) { // never processed is_head = 1; if (c->n_cigar == 1) { // just one operation, save a loop if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0; } else { // find the first match or deletion for (k = 0, s->x = c->pos, s->y = 0; k < c->n_cigar; ++k) { int op = _cop(cigar[k]); int l = _cln(cigar[k]); if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CEQUAL || op == BAM_CDIFF) break; else if (op == BAM_CREF_SKIP) s->x += l; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; } assert(k < c->n_cigar); s->k = k; } } else { // the read has been processed before int op, l = _cln(cigar[s->k]); if (pos - s->x >= l) { // jump to the next operation assert(s->k < c->n_cigar); // otherwise a bug: this function should not be called in this case op = _cop(cigar[s->k+1]); if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) { // jump to the next without a loop if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; s->x += l; ++s->k; } else { // find the next M/D/N/=/X if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; s->x += l; for (k = s->k + 1; k < c->n_cigar; ++k) { op = _cop(cigar[k]), l = _cln(cigar[k]); if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) break; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; } s->k = k; } assert(s->k < c->n_cigar); // otherwise a bug } // else, do nothing } { // collect pileup information int op, l; op = _cop(cigar[s->k]); l = _cln(cigar[s->k]); p->is_del = p->indel = p->is_refskip = 0; if (s->x + l - 1 == pos && s->k + 1 < c->n_cigar) { // peek the next operation int op2 = _cop(cigar[s->k+1]); int l2 = _cln(cigar[s->k+1]); if (op2 == BAM_CDEL) p->indel = -(int)l2; else if (op2 == BAM_CINS) p->indel = l2; else if (op2 == BAM_CPAD && s->k + 2 < c->n_cigar) { // no working for adjacent padding int l3 = 0; for (k = s->k + 2; k < c->n_cigar; ++k) { op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); if (op2 == BAM_CINS) l3 += l2; else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP || op2 == BAM_CEQUAL || op2 == BAM_CDIFF) break; } if (l3 > 0) p->indel = l3; } } if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { p->qpos = s->y + (pos - s->x); } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) { p->is_del = 1; p->qpos = s->y; // FIXME: distinguish D and N!!!!! p->is_refskip = (op == BAM_CREF_SKIP); } // cannot be other operations; otherwise a bug p->is_head = (pos == c->pos); p->is_tail = (pos == s->end); } return 1; } /* --- END: Auxiliary functions */ /******************* * pileup iterator * *******************/ struct __bam_plp_t { mempool_t *mp; lbnode_t *head, *tail, *dummy; int32_t tid, pos, max_tid, max_pos; int is_eof, flag_mask, max_plp, error, maxcnt; bam_pileup1_t *plp; // for the "auto" interface only bam1_t *b; bam_plp_auto_f func; void *data; }; bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data) { bam_plp_t iter; iter = calloc(1, sizeof(struct __bam_plp_t)); iter->mp = mp_init(); iter->head = iter->tail = mp_alloc(iter->mp); iter->dummy = mp_alloc(iter->mp); iter->max_tid = iter->max_pos = -1; iter->flag_mask = BAM_DEF_MASK; iter->maxcnt = 8000; if (func) { iter->func = func; iter->data = data; iter->b = bam_init1(); } return iter; } void bam_plp_destroy(bam_plp_t iter) { mp_free(iter->mp, iter->dummy); mp_free(iter->mp, iter->head); if (iter->mp->cnt != 0) fprintf(stderr, "[bam_plp_destroy] memory leak: %d. Continue anyway.\n", iter->mp->cnt); mp_destroy(iter->mp); if (iter->b) bam_destroy1(iter->b); free(iter->plp); free(iter); } const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) { if (iter->error) { *_n_plp = -1; return 0; } *_n_plp = 0; if (iter->is_eof && iter->head->next == 0) return 0; while (iter->is_eof || iter->max_tid > iter->tid || (iter->max_tid == iter->tid && iter->max_pos > iter->pos)) { int n_plp = 0; lbnode_t *p, *q; // write iter->plp at iter->pos iter->dummy->next = iter->head; for (p = iter->head, q = iter->dummy; p->next; q = p, p = p->next) { if (p->b.core.tid < iter->tid || (p->b.core.tid == iter->tid && p->end <= iter->pos)) { // then remove q->next = p->next; mp_free(iter->mp, p); p = q; } else if (p->b.core.tid == iter->tid && p->beg <= iter->pos) { // here: p->end > pos; then add to pileup if (n_plp == iter->max_plp) { // then double the capacity iter->max_plp = iter->max_plp? iter->max_plp<<1 : 256; iter->plp = (bam_pileup1_t*)realloc(iter->plp, sizeof(bam_pileup1_t) * iter->max_plp); } iter->plp[n_plp].b = &p->b; if (resolve_cigar2(iter->plp + n_plp, iter->pos, &p->s)) ++n_plp; // actually always true... } } iter->head = iter->dummy->next; // dummy->next may be changed *_n_plp = n_plp; *_tid = iter->tid; *_pos = iter->pos; // update iter->tid and iter->pos if (iter->head->next) { if (iter->tid > iter->head->b.core.tid) { fprintf(stderr, "[%s] unsorted input. Pileup aborts.\n", __func__); iter->error = 1; *_n_plp = -1; return 0; } } if (iter->tid < iter->head->b.core.tid) { // come to a new reference sequence iter->tid = iter->head->b.core.tid; iter->pos = iter->head->beg; // jump to the next reference } else if (iter->pos < iter->head->beg) { // here: tid == head->b.core.tid iter->pos = iter->head->beg; // jump to the next position } else ++iter->pos; // scan contiguously // return if (n_plp) return iter->plp; if (iter->is_eof && iter->head->next == 0) break; } return 0; } int bam_plp_push(bam_plp_t iter, const bam1_t *b) { if (iter->error) return -1; if (b) { if (b->core.tid < 0) return 0; if (b->core.flag & iter->flag_mask) return 0; if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) return 0; bam_copy1(&iter->tail->b, b); iter->tail->beg = b->core.pos; iter->tail->end = bam_calend(&b->core, bam1_cigar(b)); iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t if (b->core.tid < iter->max_tid) { fprintf(stderr, "[bam_pileup_core] the input is not sorted (chromosomes out of order)\n"); iter->error = 1; return -1; } if ((b->core.tid == iter->max_tid) && (iter->tail->beg < iter->max_pos)) { fprintf(stderr, "[bam_pileup_core] the input is not sorted (reads out of order)\n"); iter->error = 1; return -1; } iter->max_tid = b->core.tid; iter->max_pos = iter->tail->beg; if (iter->tail->end > iter->pos || iter->tail->b.core.tid > iter->tid) { iter->tail->next = mp_alloc(iter->mp); iter->tail = iter->tail->next; } } else iter->is_eof = 1; return 0; } const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) { const bam_pileup1_t *plp; if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; } if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; else { // no pileup line can be obtained; read alignments *_n_plp = 0; if (iter->is_eof) return 0; while (iter->func(iter->data, iter->b) >= 0) { if (bam_plp_push(iter, iter->b) < 0) { *_n_plp = -1; return 0; } if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; // otherwise no pileup line can be returned; read the next alignment. } bam_plp_push(iter, 0); if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; return 0; } } void bam_plp_reset(bam_plp_t iter) { lbnode_t *p, *q; iter->max_tid = iter->max_pos = -1; iter->tid = iter->pos = 0; iter->is_eof = 0; for (p = iter->head; p->next;) { q = p->next; mp_free(iter->mp, p); p = q; } iter->head = iter->tail; } void bam_plp_set_mask(bam_plp_t iter, int mask) { iter->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask); } void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt) { iter->maxcnt = maxcnt; } /***************** * callback APIs * *****************/ int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data) { bam_plbuf_t *buf; int ret; bam1_t *b; b = bam_init1(); buf = bam_plbuf_init(func, func_data); bam_plbuf_set_mask(buf, mask); while ((ret = bam_read1(fp, b)) >= 0) bam_plbuf_push(b, buf); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); bam_destroy1(b); return 0; } void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask) { bam_plp_set_mask(buf->iter, mask); } void bam_plbuf_reset(bam_plbuf_t *buf) { bam_plp_reset(buf->iter); } bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data) { bam_plbuf_t *buf; buf = calloc(1, sizeof(bam_plbuf_t)); buf->iter = bam_plp_init(0, 0); buf->func = func; buf->data = data; return buf; } void bam_plbuf_destroy(bam_plbuf_t *buf) { bam_plp_destroy(buf->iter); free(buf); } int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf) { int ret, n_plp, tid, pos; const bam_pileup1_t *plp; ret = bam_plp_push(buf->iter, b); if (ret < 0) return ret; while ((plp = bam_plp_next(buf->iter, &tid, &pos, &n_plp)) != 0) buf->func(tid, pos, n_plp, plp, buf->data); return 0; } /*********** * mpileup * ***********/ struct __bam_mplp_t { int n; uint64_t min, *pos; bam_plp_t *iter; int *n_plp; const bam_pileup1_t **plp; }; bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data) { int i; bam_mplp_t iter; iter = calloc(1, sizeof(struct __bam_mplp_t)); iter->pos = calloc(n, 8); iter->n_plp = calloc(n, sizeof(int)); iter->plp = calloc(n, sizeof(void*)); iter->iter = calloc(n, sizeof(void*)); iter->n = n; iter->min = (uint64_t)-1; for (i = 0; i < n; ++i) { iter->iter[i] = bam_plp_init(func, data[i]); iter->pos[i] = iter->min; } return iter; } void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt) { int i; for (i = 0; i < iter->n; ++i) iter->iter[i]->maxcnt = maxcnt; } void bam_mplp_destroy(bam_mplp_t iter) { int i; for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]); free(iter->iter); free(iter->pos); free(iter->n_plp); free(iter->plp); free(iter); } int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp) { int i, ret = 0; uint64_t new_min = (uint64_t)-1; for (i = 0; i < iter->n; ++i) { if (iter->pos[i] == iter->min) { int tid, pos; iter->plp[i] = bam_plp_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]); iter->pos[i] = (uint64_t)tid<<32 | pos; } if (iter->plp[i] && iter->pos[i] < new_min) new_min = iter->pos[i]; } iter->min = new_min; if (new_min == (uint64_t)-1) return 0; *_tid = new_min>>32; *_pos = (uint32_t)new_min; for (i = 0; i < iter->n; ++i) { if (iter->pos[i] == iter->min) { // FIXME: valgrind reports "uninitialised value(s) at this line" n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i]; ++ret; } else n_plp[i] = 0, plp[i] = 0; } return ret; } samtools-0.1.19/bam_plcmd.c000066400000000000000000000532371212162403000155260ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include "sam.h" #include "faidx.h" #include "kstring.h" #include "sam_header.h" static inline int printw(int c, FILE *fp) { char buf[16]; int l, x; if (c == 0) return fputc('0', fp); for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; if (c < 0) buf[l++] = '-'; buf[l] = 0; for (x = 0; x < l/2; ++x) { int y = buf[x]; buf[x] = buf[l-1-x]; buf[l-1-x] = y; } fputs(buf, fp); return 0; } static inline void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char *ref) { int j; if (p->is_head) { putchar('^'); putchar(p->b->core.qual > 93? 126 : p->b->core.qual + 33); } if (!p->is_del) { int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; if (ref) { int rb = pos < ref_len? ref[pos] : 'N'; if (c == '=' || bam_nt16_table[c] == bam_nt16_table[rb]) c = bam1_strand(p->b)? ',' : '.'; else c = bam1_strand(p->b)? tolower(c) : toupper(c); } else { if (c == '=') c = bam1_strand(p->b)? ',' : '.'; else c = bam1_strand(p->b)? tolower(c) : toupper(c); } putchar(c); } else putchar(p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*'); if (p->indel > 0) { putchar('+'); printw(p->indel, stdout); for (j = 1; j <= p->indel; ++j) { int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); } } else if (p->indel < 0) { printw(p->indel, stdout); for (j = 1; j <= -p->indel; ++j) { int c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N'; putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); } } if (p->is_tail) putchar('$'); } #include #include "bam2bcf.h" #include "sample.h" #define MPLP_GLF 0x10 #define MPLP_NO_COMP 0x20 #define MPLP_NO_ORPHAN 0x40 #define MPLP_REALN 0x80 #define MPLP_NO_INDEL 0x400 #define MPLP_REDO_BAQ 0x800 #define MPLP_ILLUMINA13 0x1000 #define MPLP_IGNORE_RG 0x2000 #define MPLP_PRINT_POS 0x4000 #define MPLP_PRINT_MAPQ 0x8000 #define MPLP_PER_SAMPLE 0x10000 void *bed_read(const char *fn); void bed_destroy(void *_h); int bed_overlap(const void *_h, const char *chr, int beg, int end); typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag; int rflag_require, rflag_filter; int openQ, extQ, tandemQ, min_support; // for indels double min_frac; // for indels char *reg, *pl_list, *fai_fname; faidx_t *fai; void *bed, *rghash; } mplp_conf_t; typedef struct { bamFile fp; bam_iter_t iter; bam_header_t *h; int ref_id; char *ref; const mplp_conf_t *conf; } mplp_aux_t; typedef struct { int n; int *n_plp, *m_plp; bam_pileup1_t **plp; } mplp_pileup_t; static int mplp_func(void *data, bam1_t *b) { extern int bam_realn(bam1_t *b, const char *ref); extern int bam_prob_realn_core(bam1_t *b, const char *ref, int); extern int bam_cap_mapQ(bam1_t *b, char *ref, int thres); mplp_aux_t *ma = (mplp_aux_t*)data; int ret, skip = 0; do { int has_ref; ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b); if (ret < 0) break; if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { // exclude unmapped reads skip = 1; continue; } if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; } if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; } if (ma->conf->bed) { // test overlap skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b))); if (skip) continue; } if (ma->conf->rghash) { // exclude read groups uint8_t *rg = bam_aux_get(b, "RG"); skip = (rg && bcf_str2id(ma->conf->rghash, (const char*)(rg+1)) >= 0); if (skip) continue; } if (ma->conf->flag & MPLP_ILLUMINA13) { int i; uint8_t *qual = bam1_qual(b); for (i = 0; i < b->core.l_qseq; ++i) qual[i] = qual[i] > 31? qual[i] - 31 : 0; } has_ref = (ma->ref && ma->ref_id == b->core.tid)? 1 : 0; skip = 0; if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3); if (has_ref && ma->conf->capQ_thres > 10) { int q = bam_cap_mapQ(b, ma->ref, ma->conf->capQ_thres); if (q < 0) skip = 1; else if (b->core.qual > q) b->core.qual = q; } else if (b->core.qual < ma->conf->min_mq) skip = 1; else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&1) && !(b->core.flag&2)) skip = 1; } while (skip); return ret; } static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf, int n, char *const*fn, int *n_plp, const bam_pileup1_t **plp, int ignore_rg) { int i, j; memset(m->n_plp, 0, m->n * sizeof(int)); for (i = 0; i < n; ++i) { for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; uint8_t *q; int id = -1; q = ignore_rg? 0 : bam_aux_get(p->b, "RG"); if (q) id = bam_smpl_rg2smid(sm, fn[i], (char*)q+1, buf); if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf); if (id < 0 || id >= m->n) { assert(q); // otherwise a bug fprintf(stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]); exit(1); } if (m->n_plp[id] == m->m_plp[id]) { m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8; m->plp[id] = realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]); } m->plp[id][m->n_plp[id]++] = *p; } } } static int mpileup(mplp_conf_t *conf, int n, char **fn) { extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list); extern void bcf_call_del_rghash(void *rghash); mplp_aux_t **data; int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid = -1, max_depth, max_indel_depth; const bam_pileup1_t **plp; bam_mplp_t iter; bam_header_t *h = 0; char *ref; void *rghash = 0; bcf_callaux_t *bca = 0; bcf_callret1_t *bcr = 0; bcf_call_t bc; bcf_t *bp = 0; bcf_hdr_t *bh = 0; bam_sample_t *sm = 0; kstring_t buf; mplp_pileup_t gplp; memset(&gplp, 0, sizeof(mplp_pileup_t)); memset(&buf, 0, sizeof(kstring_t)); memset(&bc, 0, sizeof(bcf_call_t)); data = calloc(n, sizeof(void*)); plp = calloc(n, sizeof(void*)); n_plp = calloc(n, sizeof(int*)); sm = bam_smpl_init(); // read the header and initialize data for (i = 0; i < n; ++i) { bam_header_t *h_tmp; data[i] = calloc(1, sizeof(mplp_aux_t)); data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r"); if ( !data[i]->fp ) { fprintf(stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno)); exit(1); } data[i]->conf = conf; h_tmp = bam_header_read(data[i]->fp); if ( !h_tmp ) { fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]); exit(1); } data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text); rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); if (conf->reg) { int beg, end; bam_index_t *idx; idx = bam_index_load(fn[i]); if (idx == 0) { fprintf(stderr, "[%s] fail to load index for %s\n", __func__, fn[i]); exit(1); } if (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) { fprintf(stderr, "[%s] malformatted region or wrong seqname for %s\n", __func__, fn[i]); exit(1); } if (i == 0) tid0 = tid, beg0 = beg, end0 = end; data[i]->iter = bam_iter_query(idx, tid, beg, end); bam_index_destroy(idx); } if (i == 0) h = h_tmp; else { // FIXME: to check consistency bam_header_destroy(h_tmp); } } gplp.n = sm->n; gplp.n_plp = calloc(sm->n, sizeof(int)); gplp.m_plp = calloc(sm->n, sizeof(int)); gplp.plp = calloc(sm->n, sizeof(void*)); fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n); // write the VCF header if (conf->flag & MPLP_GLF) { kstring_t s; bh = calloc(1, sizeof(bcf_hdr_t)); s.l = s.m = 0; s.s = 0; bp = bcf_open("-", (conf->flag&MPLP_NO_COMP)? "wu" : "w"); for (i = 0; i < h->n_targets; ++i) { kputs(h->target_name[i], &s); kputc('\0', &s); } bh->l_nm = s.l; bh->name = malloc(s.l); memcpy(bh->name, s.s, s.l); s.l = 0; for (i = 0; i < sm->n; ++i) { kputs(sm->smpl[i], &s); kputc('\0', &s); } bh->l_smpl = s.l; bh->sname = malloc(s.l); memcpy(bh->sname, s.s, s.l); s.l = 0; ksprintf(&s, "##samtoolsVersion=%s\n", BAM_VERSION); if (conf->fai_fname) ksprintf(&s, "##reference=file://%s\n", conf->fai_fname); h->dict = sam_header_parse2(h->text); int nseq; const char *tags[] = {"SN","LN","UR","M5",NULL}; char **tbl = sam_header2tbl_n(h->dict, "SQ", tags, &nseq); for (i=0; i\n", &s); } if (tbl) free(tbl); bh->txt = s.s; bh->l_txt = 1 + s.l; bcf_hdr_sync(bh); bcf_hdr_write(bp, bh); bca = bcf_call_init(-1., conf->min_baseQ); bcr = calloc(sm->n, sizeof(bcf_callret1_t)); bca->rghash = rghash; bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ; bca->min_frac = conf->min_frac; bca->min_support = conf->min_support; bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE; } if (tid0 >= 0 && conf->fai) { // region is set ref = faidx_fetch_seq(conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len); ref_tid = tid0; for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0; } else ref_tid = -1, ref = 0; iter = bam_mplp_init(n, mplp_func, (void**)data); max_depth = conf->max_depth; if (max_depth * sm->n > 1<<20) fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__); if (max_depth * sm->n < 8000) { max_depth = 8000 / sm->n; fprintf(stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth); } max_indel_depth = conf->max_indel_depth * sm->n; bam_mplp_set_maxcnt(iter, max_depth); while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) { if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue; if (tid != ref_tid) { free(ref); ref = 0; if (conf->fai) ref = faidx_fetch_seq(conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len); for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid; ref_tid = tid; } if (conf->flag & MPLP_GLF) { int total_depth, _ref0, ref16; bcf1_t *b = calloc(1, sizeof(bcf1_t)); for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i]; group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG); _ref0 = (ref && pos < ref_len)? ref[pos] : 'N'; ref16 = bam_nt16_table[_ref0]; for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); bcf_call_combine(gplp.n, bcr, bca, ref16, &bc); bcf_call2bcf(tid, pos, &bc, b, bcr, conf->fmt_flag, 0, 0); bcf_write(bp, bh, b); bcf_destroy(b); // call indels if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) { for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i); if (bcf_call_combine(gplp.n, bcr, bca, -1, &bc) >= 0) { b = calloc(1, sizeof(bcf1_t)); bcf_call2bcf(tid, pos, &bc, b, bcr, conf->fmt_flag, bca, ref); bcf_write(bp, bh, b); bcf_destroy(b); } } } else { printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { int j, cnt; for (j = cnt = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; if (bam1_qual(p->b)[p->qpos] >= conf->min_baseQ) ++cnt; } printf("\t%d\t", cnt); if (n_plp[i] == 0) { printf("*\t*"); // FIXME: printf() is very slow... if (conf->flag & MPLP_PRINT_POS) printf("\t*"); } else { for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; if (bam1_qual(p->b)[p->qpos] >= conf->min_baseQ) pileup_seq(plp[i] + j, pos, ref_len, ref); } putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; int c = bam1_qual(p->b)[p->qpos]; if (c >= conf->min_baseQ) { c = c + 33 < 126? c + 33 : 126; putchar(c); } } if (conf->flag & MPLP_PRINT_MAPQ) { putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { int c = plp[i][j].b->core.qual + 33; if (c > 126) c = 126; putchar(c); } } if (conf->flag & MPLP_PRINT_POS) { putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { if (j > 0) putchar(','); printf("%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow... } } } } putchar('\n'); } } bcf_close(bp); bam_smpl_destroy(sm); free(buf.s); for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]); free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp); bcf_call_del_rghash(rghash); bcf_hdr_destroy(bh); bcf_call_destroy(bca); free(bc.PL); free(bcr); bam_mplp_destroy(iter); bam_header_destroy(h); for (i = 0; i < n; ++i) { bam_close(data[i]->fp); if (data[i]->iter) bam_iter_destroy(data[i]->iter); free(data[i]); } free(data); free(plp); free(ref); free(n_plp); return 0; } #define MAX_PATH_LEN 1024 int read_file_list(const char *file_list,int *n,char **argv[]) { char buf[MAX_PATH_LEN]; int len, nfiles = 0; char **files = NULL; struct stat sb; *n = 0; *argv = NULL; FILE *fh = fopen(file_list,"r"); if ( !fh ) { fprintf(stderr,"%s: %s\n", file_list,strerror(errno)); return 1; } files = calloc(nfiles,sizeof(char*)); nfiles = 0; while ( fgets(buf,MAX_PATH_LEN,fh) ) { // allow empty lines and trailing spaces len = strlen(buf); while ( len>0 && isspace(buf[len-1]) ) len--; if ( !len ) continue; // check sanity of the file list buf[len] = 0; if (stat(buf, &sb) != 0) { // no such file, check if it is safe to print its name int i, safe_to_print = 1; for (i=0; i= 0) { switch (c) { case 1 : mplp.rflag_require = strtol(optarg,0,0); break; case 2 : mplp.rflag_filter = strtol(optarg,0,0); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; mplp.fai_fname = optarg; break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': mplp.bed = bed_read(optarg); break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'g': mplp.flag |= MPLP_GLF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break; case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'D': mplp.fmt_flag |= B2B_FMT_DP; break; case 'S': mplp.fmt_flag |= B2B_FMT_SP; break; case 'V': mplp.fmt_flag |= B2B_FMT_DV; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'R': mplp.flag |= MPLP_IGNORE_RG; break; case 's': mplp.flag |= MPLP_PRINT_MAPQ; break; case 'O': mplp.flag |= MPLP_PRINT_POS; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': mplp.openQ = atoi(optarg); break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': { FILE *fp_rg; char buf[1024]; mplp.rghash = bcf_str2id_init(); if ((fp_rg = fopen(optarg, "r")) == 0) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... bcf_str2id_add(mplp.rghash, strdup(buf)); fclose(fp_rg); } break; } } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n"); fprintf(stderr, "Input options:\n\n"); fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n"); fprintf(stderr, " -A count anomalous read pairs\n"); fprintf(stderr, " -B disable BAQ computation\n"); fprintf(stderr, " -b FILE list of input BAM filenames, one per line [null]\n"); fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n"); fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth); fprintf(stderr, " -E recalculate extended BAQ on the fly thus ignoring existing BQs\n"); fprintf(stderr, " -f FILE faidx indexed reference sequence file [null]\n"); fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n"); fprintf(stderr, " -l FILE list of positions (chr pos) or regions (BED) [null]\n"); fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq); fprintf(stderr, " -r STR region in which pileup is generated [null]\n"); fprintf(stderr, " -R ignore RG tags\n"); fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ); fprintf(stderr, " --rf INT required flags: skip reads with mask bits unset []\n"); fprintf(stderr, " --ff INT filter flags: skip reads with mask bits set []\n"); fprintf(stderr, "\nOutput options:\n\n"); fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n"); fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n"); fprintf(stderr, " -O output base positions on reads (disabled by -g/-u)\n"); fprintf(stderr, " -s output mapping quality (disabled by -g/-u)\n"); fprintf(stderr, " -S output per-sample strand bias P-value in BCF (require -g/-u)\n"); fprintf(stderr, " -u generate uncompress BCF output\n"); fprintf(stderr, "\nSNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):\n\n"); fprintf(stderr, " -e INT Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ); fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac); fprintf(stderr, " -h INT coefficient for homopolymer errors [%d]\n", mplp.tandemQ); fprintf(stderr, " -I do not perform indel calling\n"); fprintf(stderr, " -L INT max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth); fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support); fprintf(stderr, " -o INT Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ); fprintf(stderr, " -p apply -m and -F per-sample to increase sensitivity\n"); fprintf(stderr, " -P STR comma separated list of platforms for indels [all]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; } bam_no_B = 1; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mpileup(&mplp,nfiles,fn); for (c=0; c #include #include "knetfile.h" #include "bgzf.h" #include "bam.h" #define BUF_SIZE 0x10000 int bam_reheader(BGZF *in, const bam_header_t *h, int fd) { BGZF *fp; bam_header_t *old; int len; uint8_t *buf; if (in->is_write) return -1; buf = malloc(BUF_SIZE); old = bam_header_read(in); fp = bgzf_fdopen(fd, "w"); bam_header_write(fp, h); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } #ifdef _USE_KNETFILE while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) fwrite(buf, 1, len, fp->fp); #else while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) fwrite(buf, 1, len, fp->file); #endif free(buf); fp->block_offset = in->block_offset = 0; bgzf_close(fp); return 0; } int main_reheader(int argc, char *argv[]) { bam_header_t *h; BGZF *in; if (argc != 3) { fprintf(stderr, "Usage: samtools reheader \n"); return 1; } { // read the header tamFile fph = sam_open(argv[1]); if (fph == 0) { fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]); return 1; } h = sam_header_read(fph); sam_close(fph); } in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]); return 1; } bam_reheader(in, h, fileno(stdout)); bgzf_close(in); return 0; } samtools-0.1.19/bam_rmdup.c000066400000000000000000000127771212162403000155620ustar00rootroot00000000000000#include #include #include #include #include #include "sam.h" typedef bam1_t *bam1_p; #include "khash.h" KHASH_SET_INIT_STR(name) KHASH_MAP_INIT_INT64(pos, bam1_p) #define BUFFER_SIZE 0x40000 typedef struct { uint64_t n_checked, n_removed; khash_t(pos) *best_hash; } lib_aux_t; KHASH_MAP_INIT_STR(lib, lib_aux_t) typedef struct { int n, max; bam1_t **a; } tmp_stack_t; static inline void stack_insert(tmp_stack_t *stack, bam1_t *b) { if (stack->n == stack->max) { stack->max = stack->max? stack->max<<1 : 0x10000; stack->a = (bam1_t**)realloc(stack->a, sizeof(bam1_t*) * stack->max); } stack->a[stack->n++] = b; } static inline void dump_best(tmp_stack_t *stack, samfile_t *out) { int i; for (i = 0; i != stack->n; ++i) { samwrite(out, stack->a[i]); bam_destroy1(stack->a[i]); } stack->n = 0; } static void clear_del_set(khash_t(name) *del_set) { khint_t k; for (k = kh_begin(del_set); k < kh_end(del_set); ++k) if (kh_exist(del_set, k)) free((char*)kh_key(del_set, k)); kh_clear(name, del_set); } static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib) { khint_t k = kh_get(lib, aux, lib); if (k == kh_end(aux)) { int ret; char *p = strdup(lib); lib_aux_t *q; k = kh_put(lib, aux, p, &ret); q = &kh_val(aux, k); q->n_checked = q->n_removed = 0; q->best_hash = kh_init(pos); return q; } else return &kh_val(aux, k); } static void clear_best(khash_t(lib) *aux, int max) { khint_t k; for (k = kh_begin(aux); k != kh_end(aux); ++k) { if (kh_exist(aux, k)) { lib_aux_t *q = &kh_val(aux, k); if (kh_size(q->best_hash) >= max) kh_clear(pos, q->best_hash); } } } static inline int sum_qual(const bam1_t *b) { int i, q; uint8_t *qual = bam1_qual(b); for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i]; return q; } void bam_rmdup_core(samfile_t *in, samfile_t *out) { bam1_t *b; int last_tid = -1, last_pos = -1; tmp_stack_t stack; khint_t k; khash_t(lib) *aux; khash_t(name) *del_set; aux = kh_init(lib); del_set = kh_init(name); b = bam_init1(); memset(&stack, 0, sizeof(tmp_stack_t)); kh_resize(name, del_set, 4 * BUFFER_SIZE); while (samread(in, b) >= 0) { bam1_core_t *c = &b->core; if (c->tid != last_tid || last_pos != c->pos) { dump_best(&stack, out); // write the result clear_best(aux, BUFFER_SIZE); if (c->tid != last_tid) { clear_best(aux, 0); if (kh_size(del_set)) { // check fprintf(stderr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set)); clear_del_set(del_set); } if ((int)c->tid == -1) { // append unmapped reads samwrite(out, b); while (samread(in, b) >= 0) samwrite(out, b); break; } last_tid = c->tid; fprintf(stderr, "[bam_rmdup_core] processing reference %s...\n", in->header->target_name[c->tid]); } } if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) { samwrite(out, b); } else if (c->isize > 0) { // paired, head uint64_t key = (uint64_t)c->pos<<32 | c->isize; const char *lib; lib_aux_t *q; int ret; lib = bam_get_library(in->header, b); q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); ++q->n_checked; k = kh_put(pos, q->best_hash, key, &ret); if (ret == 0) { // found in best_hash bam1_t *p = kh_val(q->best_hash, k); ++q->n_removed; if (sum_qual(p) < sum_qual(b)) { // the current alignment is better; this can be accelerated in principle kh_put(name, del_set, strdup(bam1_qname(p)), &ret); // p will be removed bam_copy1(p, b); // replaced as b } else kh_put(name, del_set, strdup(bam1_qname(b)), &ret); // b will be removed if (ret == 0) fprintf(stderr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam1_qname(b)); } else { // not found in best_hash kh_val(q->best_hash, k) = bam_dup1(b); stack_insert(&stack, kh_val(q->best_hash, k)); } } else { // paired, tail k = kh_get(name, del_set, bam1_qname(b)); if (k != kh_end(del_set)) { free((char*)kh_key(del_set, k)); kh_del(name, del_set, k); } else samwrite(out, b); } last_pos = c->pos; } for (k = kh_begin(aux); k != kh_end(aux); ++k) { if (kh_exist(aux, k)) { lib_aux_t *q = &kh_val(aux, k); dump_best(&stack, out); fprintf(stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); kh_destroy(pos, q->best_hash); free((char*)kh_key(aux, k)); } } kh_destroy(lib, aux); clear_del_set(del_set); kh_destroy(name, del_set); free(stack.a); bam_destroy1(b); } void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se); int bam_rmdup(int argc, char *argv[]) { int c, is_se = 0, force_se = 0; samfile_t *in, *out; while ((c = getopt(argc, argv, "sS")) >= 0) { switch (c) { case 's': is_se = 1; break; case 'S': force_se = is_se = 1; break; } } if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools rmdup [-sS] \n\n"); fprintf(stderr, "Option: -s rmdup for SE reads\n"); fprintf(stderr, " -S treat PE reads as SE in rmdup (force -s)\n\n"); return 1; } in = samopen(argv[optind], "rb", 0); out = samopen(argv[optind+1], "wb", in->header); if (in == 0 || out == 0) { fprintf(stderr, "[bam_rmdup] fail to read/write input files\n"); return 1; } if (is_se) bam_rmdupse_core(in, out, force_se); else bam_rmdup_core(in, out); samclose(in); samclose(out); return 0; } samtools-0.1.19/bam_rmdupse.c000066400000000000000000000100371212162403000160750ustar00rootroot00000000000000#include #include "sam.h" #include "khash.h" #include "klist.h" #define QUEUE_CLEAR_SIZE 0x100000 #define MAX_POS 0x7fffffff typedef struct { int endpos; uint32_t score:31, discarded:1; bam1_t *b; } elem_t, *elem_p; #define __free_elem(p) bam_destroy1((p)->data.b) KLIST_INIT(q, elem_t, __free_elem) typedef klist_t(q) queue_t; KHASH_MAP_INIT_INT(best, elem_p) typedef khash_t(best) besthash_t; typedef struct { uint64_t n_checked, n_removed; besthash_t *left, *rght; } lib_aux_t; KHASH_MAP_INIT_STR(lib, lib_aux_t) static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib) { khint_t k = kh_get(lib, aux, lib); if (k == kh_end(aux)) { int ret; char *p = strdup(lib); lib_aux_t *q; k = kh_put(lib, aux, p, &ret); q = &kh_val(aux, k); q->left = kh_init(best); q->rght = kh_init(best); q->n_checked = q->n_removed = 0; return q; } else return &kh_val(aux, k); } static inline int sum_qual(const bam1_t *b) { int i, q; uint8_t *qual = bam1_qual(b); for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i]; return q; } static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, int score) { elem_t *p = kl_pushp(q, queue); p->discarded = 0; p->endpos = endpos; p->score = score; if (p->b == 0) p->b = bam_init1(); bam_copy1(p->b, b); return p; } static void clear_besthash(besthash_t *h, int32_t pos) { khint_t k; for (k = kh_begin(h); k != kh_end(h); ++k) if (kh_exist(h, k) && kh_val(h, k)->endpos <= pos) kh_del(best, h, k); } static void dump_alignment(samfile_t *out, queue_t *queue, int32_t pos, khash_t(lib) *h) { if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) { khint_t k; while (1) { elem_t *q; if (queue->head == queue->tail) break; q = &kl_val(queue->head); if (q->discarded) { q->b->data_len = 0; kl_shift(q, queue, 0); continue; } if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break; samwrite(out, q->b); q->b->data_len = 0; kl_shift(q, queue, 0); } for (k = kh_begin(h); k != kh_end(h); ++k) { if (kh_exist(h, k)) { clear_besthash(kh_val(h, k).left, pos); clear_besthash(kh_val(h, k).rght, pos); } } } } void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se) { bam1_t *b; queue_t *queue; khint_t k; int last_tid = -2; khash_t(lib) *aux; aux = kh_init(lib); b = bam_init1(); queue = kl_init(q); while (samread(in, b) >= 0) { bam1_core_t *c = &b->core; int endpos = bam_calend(c, bam1_cigar(b)); int score = sum_qual(b); if (last_tid != c->tid) { if (last_tid >= 0) dump_alignment(out, queue, MAX_POS, aux); last_tid = c->tid; } else dump_alignment(out, queue, c->pos, aux); if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) { push_queue(queue, b, endpos, score); } else { const char *lib; lib_aux_t *q; besthash_t *h; uint32_t key; int ret; lib = bam_get_library(in->header, b); q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); ++q->n_checked; h = (c->flag&BAM_FREVERSE)? q->rght : q->left; key = (c->flag&BAM_FREVERSE)? endpos : c->pos; k = kh_put(best, h, key, &ret); if (ret == 0) { // in the hash table elem_t *p = kh_val(h, k); ++q->n_removed; if (p->score < score) { if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue p->discarded = 1; kh_val(h, k) = push_queue(queue, b, endpos, score); } else { // replace p->score = score; p->endpos = endpos; bam_copy1(p->b, b); } } // otherwise, discard the alignment } else kh_val(h, k) = push_queue(queue, b, endpos, score); } } dump_alignment(out, queue, MAX_POS, aux); for (k = kh_begin(aux); k != kh_end(aux); ++k) { if (kh_exist(aux, k)) { lib_aux_t *q = &kh_val(aux, k); fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); kh_destroy(best, q->left); kh_destroy(best, q->rght); free((char*)kh_key(aux, k)); } } kh_destroy(lib, aux); bam_destroy1(b); kl_destroy(q, queue); } samtools-0.1.19/bam_sort.c000066400000000000000000000443751212162403000154210ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "bam.h" #include "ksort.h" static int g_is_by_qname = 0; static int strnum_cmp(const char *_a, const char *_b) { const unsigned char *a = (const unsigned char*)_a, *b = (const unsigned char*)_b; const unsigned char *pa = a, *pb = b; while (*pa && *pb) { if (isdigit(*pa) && isdigit(*pb)) { while (*pa == '0') ++pa; while (*pb == '0') ++pb; while (isdigit(*pa) && isdigit(*pb) && *pa == *pb) ++pa, ++pb; if (isdigit(*pa) && isdigit(*pb)) { int i = 0; while (isdigit(pa[i]) && isdigit(pb[i])) ++i; return isdigit(pa[i])? 1 : isdigit(pb[i])? -1 : (int)*pa - (int)*pb; } else if (isdigit(*pa)) return 1; else if (isdigit(*pb)) return -1; else if (pa - a != pb - b) return pa - a < pb - b? 1 : -1; } else { if (*pa != *pb) return (int)*pa - (int)*pb; ++pa; ++pb; } } return *pa? 1 : *pb? -1 : 0; } #define HEAP_EMPTY 0xffffffffffffffffull typedef struct { int i; uint64_t pos, idx; bam1_t *b; } heap1_t; #define __pos_cmp(a, b) ((a).pos > (b).pos || ((a).pos == (b).pos && ((a).i > (b).i || ((a).i == (b).i && (a).idx > (b).idx)))) static inline int heap_lt(const heap1_t a, const heap1_t b) { if (g_is_by_qname) { int t; if (a.b == 0 || b.b == 0) return a.b == 0? 1 : 0; t = strnum_cmp(bam1_qname(a.b), bam1_qname(b.b)); return (t > 0 || (t == 0 && (a.b->core.flag&0xc0) > (b.b->core.flag&0xc0))); } else return __pos_cmp(a, b); } KSORT_INIT(heap, heap1_t, heap_lt) static void swap_header_targets(bam_header_t *h1, bam_header_t *h2) { bam_header_t t; t.n_targets = h1->n_targets, h1->n_targets = h2->n_targets, h2->n_targets = t.n_targets; t.target_name = h1->target_name, h1->target_name = h2->target_name, h2->target_name = t.target_name; t.target_len = h1->target_len, h1->target_len = h2->target_len, h2->target_len = t.target_len; } static void swap_header_text(bam_header_t *h1, bam_header_t *h2) { int tempi; char *temps; tempi = h1->l_text, h1->l_text = h2->l_text, h2->l_text = tempi; temps = h1->text, h1->text = h2->text, h2->text = temps; } #define MERGE_RG 1 #define MERGE_UNCOMP 2 #define MERGE_LEVEL1 4 #define MERGE_FORCE 8 /*! @abstract Merge multiple sorted BAM. @param is_by_qname whether to sort by query name @param out output BAM file name @param headers name of SAM file from which to copy '@' header lines, or NULL to copy them from the first file to be merged @param n number of files to be merged @param fn names of files to be merged @discussion Padding information may NOT correctly maintained. This function is NOT thread safe. */ int bam_merge_core2(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg, int n_threads, int level) { bamFile fpout, *fp; heap1_t *heap; bam_header_t *hout = 0; bam_header_t *hheaders = NULL; int i, j, *RG_len = 0; uint64_t idx = 0; char **RG = 0, mode[8]; bam_iter_t *iter = 0; if (headers) { tamFile fpheaders = sam_open(headers); if (fpheaders == 0) { const char *message = strerror(errno); fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message); return -1; } hheaders = sam_header_read(fpheaders); sam_close(fpheaders); } g_is_by_qname = by_qname; fp = (bamFile*)calloc(n, sizeof(bamFile)); heap = (heap1_t*)calloc(n, sizeof(heap1_t)); iter = (bam_iter_t*)calloc(n, sizeof(bam_iter_t)); // prepare RG tag if (flag & MERGE_RG) { RG = (char**)calloc(n, sizeof(void*)); RG_len = (int*)calloc(n, sizeof(int)); for (i = 0; i != n; ++i) { int l = strlen(fn[i]); const char *s = fn[i]; if (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4; for (j = l - 1; j >= 0; --j) if (s[j] == '/') break; ++j; l -= j; RG[i] = calloc(l + 1, 1); RG_len[i] = l; strncpy(RG[i], s + j, l); } } // read the first for (i = 0; i != n; ++i) { bam_header_t *hin; fp[i] = bam_open(fn[i], "r"); if (fp[i] == 0) { int j; fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]); for (j = 0; j < i; ++j) bam_close(fp[j]); free(fp); free(heap); // FIXME: possible memory leak return -1; } hin = bam_header_read(fp[i]); if (i == 0) { // the first BAM hout = hin; } else { // validate multiple baf int min_n_targets = hout->n_targets; if (hin->n_targets < min_n_targets) min_n_targets = hin->n_targets; for (j = 0; j < min_n_targets; ++j) if (strcmp(hout->target_name[j], hin->target_name[j]) != 0) { fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'\n", hout->target_name[j], hin->target_name[j], fn[i]); return -1; } // If this input file has additional target reference sequences, // add them to the headers to be output if (hin->n_targets > hout->n_targets) { swap_header_targets(hout, hin); // FIXME Possibly we should also create @SQ text headers // for the newly added reference sequences } bam_header_destroy(hin); } } if (hheaders) { // If the text headers to be swapped in include any @SQ headers, // check that they are consistent with the existing binary list // of reference information. if (hheaders->n_targets > 0) { if (hout->n_targets != hheaders->n_targets) { fprintf(stderr, "[bam_merge_core] number of @SQ headers in '%s' differs from number of target sequences\n", headers); if (!reg) return -1; } for (j = 0; j < hout->n_targets; ++j) if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0) { fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence\n", hheaders->target_name[j], headers); if (!reg) return -1; } } swap_header_text(hout, hheaders); bam_header_destroy(hheaders); } if (reg) { int tid, beg, end; if (bam_parse_region(hout, reg, &tid, &beg, &end) < 0) { fprintf(stderr, "[%s] Malformated region string or undefined reference name\n", __func__); return -1; } for (i = 0; i < n; ++i) { bam_index_t *idx; idx = bam_index_load(fn[i]); iter[i] = bam_iter_query(idx, tid, beg, end); bam_index_destroy(idx); } } for (i = 0; i < n; ++i) { heap1_t *h = heap + i; h->i = i; h->b = (bam1_t*)calloc(1, sizeof(bam1_t)); if (bam_iter_read(fp[i], iter[i], h->b) >= 0) { h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam1_strand(h->b); h->idx = idx++; } else h->pos = HEAP_EMPTY; } if (flag & MERGE_UNCOMP) level = 0; else if (flag & MERGE_LEVEL1) level = 1; strcpy(mode, "w"); if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9); if ((fpout = strcmp(out, "-")? bam_open(out, "w") : bam_dopen(fileno(stdout), "w")) == 0) { fprintf(stderr, "[%s] fail to create the output file.\n", __func__); return -1; } bam_header_write(fpout, hout); bam_header_destroy(hout); if (!(flag & MERGE_UNCOMP)) bgzf_mt(fpout, n_threads, 256); ks_heapmake(heap, n, heap); while (heap->pos != HEAP_EMPTY) { bam1_t *b = heap->b; if (flag & MERGE_RG) { uint8_t *rg = bam_aux_get(b, "RG"); if (rg) bam_aux_del(b, rg); bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]); } bam_write1_core(fpout, &b->core, b->data_len, b->data); if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) { heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam1_strand(b); heap->idx = idx++; } else if (j == -1) { heap->pos = HEAP_EMPTY; free(heap->b->data); free(heap->b); heap->b = 0; } else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]); ks_heapadjust(heap, 0, n, heap); } if (flag & MERGE_RG) { for (i = 0; i != n; ++i) free(RG[i]); free(RG); free(RG_len); } for (i = 0; i != n; ++i) { bam_iter_destroy(iter[i]); bam_close(fp[i]); } bam_close(fpout); free(fp); free(heap); free(iter); return 0; } int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg) { return bam_merge_core2(by_qname, out, headers, n, fn, flag, reg, 0, -1); } int bam_merge(int argc, char *argv[]) { int c, is_by_qname = 0, flag = 0, ret = 0, n_threads = 0, level = -1; char *fn_headers = NULL, *reg = 0; while ((c = getopt(argc, argv, "h:nru1R:f@:l:")) >= 0) { switch (c) { case 'r': flag |= MERGE_RG; break; case 'f': flag |= MERGE_FORCE; break; case 'h': fn_headers = strdup(optarg); break; case 'n': is_by_qname = 1; break; case '1': flag |= MERGE_LEVEL1; break; case 'u': flag |= MERGE_UNCOMP; break; case 'R': reg = strdup(optarg); break; case 'l': level = atoi(optarg); break; case '@': n_threads = atoi(optarg); break; } } if (optind + 2 >= argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools merge [-nr] [-h inh.sam] [...]\n\n"); fprintf(stderr, "Options: -n sort by read names\n"); fprintf(stderr, " -r attach RG tag (inferred from file names)\n"); fprintf(stderr, " -u uncompressed BAM output\n"); fprintf(stderr, " -f overwrite the output BAM if exist\n"); fprintf(stderr, " -1 compress level 1\n"); fprintf(stderr, " -l INT compression level, from 0 to 9 [-1]\n"); fprintf(stderr, " -@ INT number of BAM compression threads [0]\n"); fprintf(stderr, " -R STR merge file in the specified region STR [all]\n"); fprintf(stderr, " -h FILE copy the header in FILE to [in1.bam]\n\n"); fprintf(stderr, "Note: Samtools' merge does not reconstruct the @RG dictionary in the header. Users\n"); fprintf(stderr, " must provide the correct header with -h, or uses Picard which properly maintains\n"); fprintf(stderr, " the header dictionary in merging.\n\n"); return 1; } if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) { FILE *fp = fopen(argv[optind], "rb"); if (fp != NULL) { fclose(fp); fprintf(stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]); return 1; } } if (bam_merge_core2(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg, n_threads, level) < 0) ret = 1; free(reg); free(fn_headers); return ret; } /*************** * BAM sorting * ***************/ #include typedef bam1_t *bam1_p; static int change_SO(bam_header_t *h, const char *so) { char *p, *q, *beg = 0, *end = 0, *newtext; if (h->l_text > 3) { if (strncmp(h->text, "@HD", 3) == 0) { if ((p = strchr(h->text, '\n')) == 0) return -1; *p = '\0'; if ((q = strstr(h->text, "\tSO:")) != 0) { *p = '\n'; // change back if (strncmp(q + 4, so, p - q - 4) != 0) { beg = q; for (q += 4; *q != '\n' && *q != '\t'; ++q); end = q; } else return 0; // no need to change } else beg = end = p, *p = '\n'; } } if (beg == 0) { // no @HD h->l_text += strlen(so) + 15; newtext = malloc(h->l_text + 1); sprintf(newtext, "@HD\tVN:1.3\tSO:%s\n", so); strcat(newtext, h->text); } else { // has @HD but different or no SO h->l_text = (beg - h->text) + (4 + strlen(so)) + (h->text + h->l_text - end); newtext = malloc(h->l_text + 1); strncpy(newtext, h->text, beg - h->text); sprintf(newtext + (beg - h->text), "\tSO:%s", so); strcat(newtext, end); } free(h->text); h->text = newtext; return 0; } static inline int bam1_lt(const bam1_p a, const bam1_p b) { if (g_is_by_qname) { int t = strnum_cmp(bam1_qname(a), bam1_qname(b)); return (t < 0 || (t == 0 && (a->core.flag&0xc0) < (b->core.flag&0xc0))); } else return (((uint64_t)a->core.tid<<32|(a->core.pos+1)<<1|bam1_strand(a)) < ((uint64_t)b->core.tid<<32|(b->core.pos+1)<<1|bam1_strand(b))); } KSORT_INIT(sort, bam1_p, bam1_lt) typedef struct { size_t buf_len; const char *prefix; bam1_p *buf; const bam_header_t *h; int index; } worker_t; static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_header_t *h, int n_threads) { size_t i; bamFile fp; fp = strcmp(fn, "-")? bam_open(fn, mode) : bam_dopen(fileno(stdout), mode); if (fp == 0) return; bam_header_write(fp, h); if (n_threads > 1) bgzf_mt(fp, n_threads, 256); for (i = 0; i < l; ++i) bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data); bam_close(fp); } static void *worker(void *data) { worker_t *w = (worker_t*)data; char *name; ks_mergesort(sort, w->buf_len, w->buf, 0); name = (char*)calloc(strlen(w->prefix) + 20, 1); sprintf(name, "%s.%.4d.bam", w->prefix, w->index); write_buffer(name, "w1", w->buf_len, w->buf, w->h, 0); free(name); return 0; } static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, const bam_header_t *h, int n_threads) { int i; size_t rest; bam1_p *b; pthread_t *tid; pthread_attr_t attr; worker_t *w; if (n_threads < 1) n_threads = 1; if (k < n_threads * 64) n_threads = 1; // use a single thread if we only sort a small batch of records pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); w = calloc(n_threads, sizeof(worker_t)); tid = calloc(n_threads, sizeof(pthread_t)); b = buf; rest = k; for (i = 0; i < n_threads; ++i) { w[i].buf_len = rest / (n_threads - i); w[i].buf = b; w[i].prefix = prefix; w[i].h = h; w[i].index = n_files + i; b += w[i].buf_len; rest -= w[i].buf_len; pthread_create(&tid[i], &attr, worker, &w[i]); } for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); free(tid); free(w); return n_files + n_threads; } /*! @abstract Sort an unsorted BAM file based on the chromosome order and the leftmost position of an alignment @param is_by_qname whether to sort by query name @param fn name of the file to be sorted @param prefix prefix of the output and the temporary files; upon sucessess, prefix.bam will be written. @param max_mem approxiate maximum memory (very inaccurate) @param full_path the given output path is the full path and not just the prefix @discussion It may create multiple temporary subalignment files and then merge them by calling bam_merge_core(). This function is NOT thread safe. */ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int full_path) { int ret, i, n_files = 0; size_t mem, max_k, k, max_mem; bam_header_t *header; bamFile fp; bam1_t *b, **buf; char *fnout = 0; char const *suffix = ".bam"; if (full_path) suffix += 4; if (n_threads < 2) n_threads = 1; g_is_by_qname = is_by_qname; max_k = k = 0; mem = 0; max_mem = _max_mem * n_threads; buf = 0; fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); if (fp == 0) { fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn); return; } header = bam_header_read(fp); if (is_by_qname) change_SO(header, "queryname"); else change_SO(header, "coordinate"); // write sub files for (;;) { if (k == max_k) { size_t old_max = max_k; max_k = max_k? max_k<<1 : 0x10000; buf = realloc(buf, max_k * sizeof(void*)); memset(buf + old_max, 0, sizeof(void*) * (max_k - old_max)); } if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t)); b = buf[k]; if ((ret = bam_read1(fp, b)) < 0) break; if (b->data_len < b->m_data>>2) { // shrink b->m_data = b->data_len; kroundup32(b->m_data); b->data = realloc(b->data, b->m_data); } mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays ++k; if (mem >= max_mem) { n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads); mem = k = 0; } } if (ret != -1) fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n"); // output file name fnout = calloc(strlen(prefix) + 20, 1); if (is_stdout) sprintf(fnout, "-"); else sprintf(fnout, "%s%s", prefix, suffix); // write the final output if (n_files == 0) { // a single block char mode[8]; strcpy(mode, "w"); if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9); ks_mergesort(sort, k, buf, 0); write_buffer(fnout, mode, k, buf, header, n_threads); } else { // then merge char **fns; n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads); fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files); fns = (char**)calloc(n_files, sizeof(char*)); for (i = 0; i < n_files; ++i) { fns[i] = (char*)calloc(strlen(prefix) + 20, 1); sprintf(fns[i], "%s.%.4d%s", prefix, i, suffix); } bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level); for (i = 0; i < n_files; ++i) { unlink(fns[i]); free(fns[i]); } free(fns); } free(fnout); // free for (k = 0; k < max_k; ++k) { if (!buf[k]) continue; free(buf[k]->data); free(buf[k]); } free(buf); bam_header_destroy(header); bam_close(fp); } void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem) { bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0, 0, -1, 0); } int bam_sort(int argc, char *argv[]) { size_t max_mem = 768<<20; // 512MB int c, is_by_qname = 0, is_stdout = 0, n_threads = 0, level = -1, full_path = 0; while ((c = getopt(argc, argv, "fnom:@:l:")) >= 0) { switch (c) { case 'f': full_path = 1; break; case 'o': is_stdout = 1; break; case 'n': is_by_qname = 1; break; case 'm': { char *q; max_mem = strtol(optarg, &q, 0); if (*q == 'k' || *q == 'K') max_mem <<= 10; else if (*q == 'm' || *q == 'M') max_mem <<= 20; else if (*q == 'g' || *q == 'G') max_mem <<= 30; break; } case '@': n_threads = atoi(optarg); break; case 'l': level = atoi(optarg); break; } } if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools sort [options] \n\n"); fprintf(stderr, "Options: -n sort by read name\n"); fprintf(stderr, " -f use as full file name instead of prefix\n"); fprintf(stderr, " -o final output to stdout\n"); fprintf(stderr, " -l INT compression level, from 0 to 9 [-1]\n"); fprintf(stderr, " -@ INT number of sorting and compression threads [1]\n"); fprintf(stderr, " -m INT max memory per thread; suffix K/M/G recognized [768M]\n"); fprintf(stderr, "\n"); return 1; } bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout, n_threads, level, full_path); return 0; } samtools-0.1.19/bam_stat.c000066400000000000000000000062021212162403000153700ustar00rootroot00000000000000#include #include #include "bam.h" typedef struct { long long n_reads[2], n_mapped[2], n_pair_all[2], n_pair_map[2], n_pair_good[2]; long long n_sgltn[2], n_read1[2], n_read2[2]; long long n_dup[2]; long long n_diffchr[2], n_diffhigh[2]; } bam_flagstat_t; #define flagstat_loop(s, c) do { \ int w = ((c)->flag & BAM_FQCFAIL)? 1 : 0; \ ++(s)->n_reads[w]; \ if ((c)->flag & BAM_FPAIRED) { \ ++(s)->n_pair_all[w]; \ if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good[w]; \ if ((c)->flag & BAM_FREAD1) ++(s)->n_read1[w]; \ if ((c)->flag & BAM_FREAD2) ++(s)->n_read2[w]; \ if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn[w]; \ if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \ ++(s)->n_pair_map[w]; \ if ((c)->mtid != (c)->tid) { \ ++(s)->n_diffchr[w]; \ if ((c)->qual >= 5) ++(s)->n_diffhigh[w]; \ } \ } \ } \ if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped[w]; \ if ((c)->flag & BAM_FDUP) ++(s)->n_dup[w]; \ } while (0) bam_flagstat_t *bam_flagstat_core(bamFile fp) { bam_flagstat_t *s; bam1_t *b; bam1_core_t *c; int ret; s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t)); b = bam_init1(); c = &b->core; while ((ret = bam_read1(fp, b)) >= 0) flagstat_loop(s, c); bam_destroy1(b); if (ret != -1) fprintf(stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n"); return s; } int bam_flagstat(int argc, char *argv[]) { bamFile fp; bam_header_t *header; bam_flagstat_t *s; if (argc == optind) { fprintf(stderr, "Usage: samtools flagstat \n"); return 1; } fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); assert(fp); header = bam_header_read(fp); s = bam_flagstat_core(fp); printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]); printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]); printf("%lld + %lld mapped (%.2f%%:%.2f%%)\n", s->n_mapped[0], s->n_mapped[1], (float)s->n_mapped[0] / s->n_reads[0] * 100.0, (float)s->n_mapped[1] / s->n_reads[1] * 100.0); printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]); printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]); printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]); printf("%lld + %lld properly paired (%.2f%%:%.2f%%)\n", s->n_pair_good[0], s->n_pair_good[1], (float)s->n_pair_good[0] / s->n_pair_all[0] * 100.0, (float)s->n_pair_good[1] / s->n_pair_all[1] * 100.0); printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]); printf("%lld + %lld singletons (%.2f%%:%.2f%%)\n", s->n_sgltn[0], s->n_sgltn[1], (float)s->n_sgltn[0] / s->n_pair_all[0] * 100.0, (float)s->n_sgltn[1] / s->n_pair_all[1] * 100.0); printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]); printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]); free(s); bam_header_destroy(header); bam_close(fp); return 0; } samtools-0.1.19/bam_tview.c000066400000000000000000000261771212162403000155700ustar00rootroot00000000000000#include #include "bam_tview.h" int base_tv_init(tview_t* tv,const char *fn, const char *fn_fa, const char *samples) { assert(tv!=NULL); assert(fn!=NULL); tv->mrow = 24; tv->mcol = 80; tv->color_for = TV_COLOR_MAPQ; tv->is_dot = 1; tv->fp = bam_open(fn, "r"); if(tv->fp==0) { fprintf(stderr,"bam_open %s. %s\n", fn,fn_fa); exit(EXIT_FAILURE); } bgzf_set_cache_size(tv->fp, 8 * 1024 *1024); assert(tv->fp); tv->header = bam_header_read(tv->fp); if(tv->header==0) { fprintf(stderr,"Cannot read '%s'.\n", fn); exit(EXIT_FAILURE); } tv->idx = bam_index_load(fn); if (tv->idx == 0) { fprintf(stderr,"Cannot read index for '%s'.\n", fn); exit(EXIT_FAILURE); } tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv); if (fn_fa) tv->fai = fai_load(fn_fa); tv->bca = bcf_call_init(0.83, 13); tv->ins = 1; if ( samples ) { if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text); void *iter = tv->header->dict; const char *key, *val; int n = 0; tv->rg_hash = kh_init(kh_rg); while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) ) { if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) ) { khiter_t k = kh_get(kh_rg, tv->rg_hash, key); if ( k != kh_end(tv->rg_hash) ) continue; int ret; k = kh_put(kh_rg, tv->rg_hash, key, &ret); kh_value(tv->rg_hash, k) = val; n++; } } if ( !n ) { fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples); exit(EXIT_FAILURE); } } return 0; } void base_tv_destroy(tview_t* tv) { bam_lplbuf_destroy(tv->lplbuf); bcf_call_destroy(tv->bca); bam_index_destroy(tv->idx); if (tv->fai) fai_destroy(tv->fai); free(tv->ref); bam_header_destroy(tv->header); bam_close(tv->fp); } int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) { extern unsigned char bam_nt16_table[256]; tview_t *tv = (tview_t*)data; int i, j, c, rb, attr, max_ins = 0; uint32_t call = 0; if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen // print referece rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'; for (i = tv->last_pos + 1; i < pos; ++i) { if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1); c = tv->ref? tv->ref[i - tv->left_pos] : 'N'; tv->my_mvaddch(tv,1, tv->ccol++, c); } if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1); { // call consensus bcf_callret1_t bcr; int qsum[4], a1, a2, tmp; double p[3], prior = 30; bcf_call_glfgen(n, pl, bam_nt16_table[rb], tv->bca, &bcr); for (i = 0; i < 4; ++i) qsum[i] = bcr.qsum[i]<<2 | i; for (i = 1; i < 4; ++i) // insertion sort for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j) tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp; a1 = qsum[0]&3; a2 = qsum[1]&3; p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2]; if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3; if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3; if (p[0] < p[1] && p[0] < p[2]) call = (1<my_underline(tv); c = ",ACMGRSVTWYHKDBN"[call>>16&0xf]; i = (call&0xffff)/10+1; if (i > 4) i = 4; attr |= tv->my_colorpair(tv,i); if (c == toupper(rb)) c = '.'; tv->my_attron(tv,attr); tv->my_mvaddch(tv,2, tv->ccol, c); tv->my_attroff(tv,attr); if(tv->ins) { // calculate maximum insert for (i = 0; i < n; ++i) { const bam_pileup1_t *p = pl + i; if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel; } } // core loop for (j = 0; j <= max_ins; ++j) { for (i = 0; i < n; ++i) { const bam_pileup1_t *p = pl + i; int row = TV_MIN_ALNROW + p->level - tv->row_shift; if (j == 0) { if (!p->is_del) { if (tv->base_for == TV_BASE_COLOR_SPACE && (c = bam_aux_getCSi(p->b, p->qpos))) { // assume that if we found one color, we will be able to get the color error if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? ',' : '.'; } else { if (tv->show_name) { char *name = bam1_qname(p->b); c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos]; } else { c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; if (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; } } } else c = p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*'; } else { // padding if (j > p->indel) c = '*'; else { // insertion if (tv->base_for == TV_BASE_NUCL) { if (tv->show_name) { char *name = bam1_qname(p->b); c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j]; } else { c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; } } else { c = bam_aux_getCSi(p->b, p->qpos + j); if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? ',' : '.'; } } } if (row > TV_MIN_ALNROW && row < tv->mrow) { int x; attr = 0; if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR)) || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv); if (tv->color_for == TV_COLOR_BASEQ) { x = bam1_qual(p->b)[p->qpos]/10 + 1; if (x > 4) x = 4; attr |= tv->my_colorpair(tv,x); } else if (tv->color_for == TV_COLOR_MAPQ) { x = p->b->core.qual/10 + 1; if (x > 4) x = 4; attr |= tv->my_colorpair(tv,x); } else if (tv->color_for == TV_COLOR_NUCL) { x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5; attr |= tv->my_colorpair(tv,x); } else if(tv->color_for == TV_COLOR_COL) { x = 0; switch(bam_aux_getCSi(p->b, p->qpos)) { case '0': x = 0; break; case '1': x = 1; break; case '2': x = 2; break; case '3': x = 3; break; case '4': x = 4; break; default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break; } x+=5; attr |= tv->my_colorpair(tv,x); } else if(tv->color_for == TV_COLOR_COLQ) { x = bam_aux_getCQi(p->b, p->qpos); if(0 == x) x = bam1_qual(p->b)[p->qpos]; x = x/10 + 1; if (x > 4) x = 4; attr |= tv->my_colorpair(tv,x); } tv->my_attron(tv,attr); tv->my_mvaddch(tv,row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c)); tv->my_attroff(tv,attr); } } c = j? '*' : rb; if (c == '*') { attr = tv->my_colorpair(tv,8); tv->my_attron(tv,attr); tv->my_mvaddch(tv,1, tv->ccol++, c); tv->my_attroff(tv,attr); } else tv->my_mvaddch(tv,1, tv->ccol++, c); } tv->last_pos = pos; return 0; } int tv_fetch_func(const bam1_t *b, void *data) { tview_t *tv = (tview_t*)data; if ( tv->rg_hash ) { const uint8_t *rg = bam_aux_get(b, "RG"); if ( !rg ) return 0; khiter_t k = kh_get(kh_rg, tv->rg_hash, (const char*)(rg + 1)); if ( k == kh_end(tv->rg_hash) ) return 0; } if (tv->no_skip) { uint32_t *cigar = bam1_cigar(b); // this is cheating... int i; for (i = 0; i core.n_cigar; ++i) { if ((cigar[i]&0xf) == BAM_CREF_SKIP) cigar[i] = cigar[i]>>4<<4 | BAM_CDEL; } } bam_lplbuf_push(b, tv->lplbuf); return 0; } int base_draw_aln(tview_t *tv, int tid, int pos) { assert(tv!=NULL); // reset tv->my_clear(tv); tv->curr_tid = tid; tv->left_pos = pos; tv->last_pos = tv->left_pos - 1; tv->ccol = 0; // print ref and consensus if (tv->fai) { char *str; if (tv->ref) free(tv->ref); assert(tv->curr_tid>=0); str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1); assert(str!=NULL); sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol); tv->ref = fai_fetch(tv->fai, str, &tv->l_ref); free(str); } // draw aln bam_lplbuf_reset(tv->lplbuf); bam_fetch(tv->fp, tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol, tv, tv_fetch_func); bam_lplbuf_push(0, tv->lplbuf); while (tv->ccol < tv->mcol) { int pos = tv->last_pos + 1; if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1); tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'); ++tv->last_pos; } return 0; } static void error(const char *format, ...) { if ( !format ) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bamtk tview [options] [ref.fasta]\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -d display output as (H)tml or (C)urses or (T)ext \n"); fprintf(stderr, " -p chr:pos go directly to this position\n"); fprintf(stderr, " -s STR display only reads from this sample or group\n"); fprintf(stderr, "\n\n"); } else { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); } exit(-1); } enum dipsay_mode {display_ncurses,display_html,display_text}; extern tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples); extern tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples); extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples); int bam_tview_main(int argc, char *argv[]) { int view_mode=display_ncurses; tview_t* tv=NULL; char *samples=NULL, *position=NULL; int c; while ((c = getopt(argc, argv, "s:p:d:")) >= 0) { switch (c) { case 's': samples=optarg; break; case 'p': position=optarg; break; case 'd': { switch(optarg[0]) { case 'H': case 'h': view_mode=display_html;break; case 'T': case 't': view_mode=display_text;break; case 'C': case 'c': view_mode=display_ncurses;break; default: view_mode=display_ncurses;break; } break; } default: error(NULL); } } if (argc==optind) error(NULL); switch(view_mode) { case display_ncurses: { tv = curses_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); break; } case display_text: { tv = text_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); break; } case display_html: { tv = html_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); break; } } if(tv==NULL) { error("cannot create view"); return EXIT_FAILURE; } if ( position ) { int _tid = -1, _beg, _end; bam_parse_region(tv->header, position, &_tid, &_beg, &_end); if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; } } tv->my_drawaln(tv, tv->curr_tid, tv->left_pos); tv->my_loop(tv); tv->my_destroy(tv); return EXIT_SUCCESS; } samtools-0.1.19/bam_tview.h000066400000000000000000000035301212162403000155610ustar00rootroot00000000000000#ifndef BAM_TVIEW_H #define BAM_TVIEW_H #include #include #include #include #include #include #include "bam.h" #include "faidx.h" #include "bam2bcf.h" #include "sam_header.h" #include "khash.h" KHASH_MAP_INIT_STR(kh_rg, const char *) typedef struct AbstractTview { int mrow, mcol; bam_index_t *idx; bam_lplbuf_t *lplbuf; bam_header_t *header; bamFile fp; int curr_tid, left_pos; faidx_t *fai; bcf_callaux_t *bca; int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name; char *ref; khash_t(kh_rg) *rg_hash; /* callbacks */ void (*my_destroy)(struct AbstractTview* ); void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...); void (*my_mvaddch)(struct AbstractTview*,int,int,int); void (*my_attron)(struct AbstractTview*,int); void (*my_attroff)(struct AbstractTview*,int); void (*my_clear)(struct AbstractTview*); int (*my_colorpair)(struct AbstractTview*,int); int (*my_drawaln)(struct AbstractTview*,int,int); int (*my_loop)(struct AbstractTview*); int (*my_underline)(struct AbstractTview*); } tview_t; char bam_aux_getCEi(bam1_t *b, int i); char bam_aux_getCSi(bam1_t *b, int i); char bam_aux_getCQi(bam1_t *b, int i); #define TV_MIN_ALNROW 2 #define TV_MAX_GOTO 40 #define TV_LOW_MAPQ 10 #define TV_COLOR_MAPQ 0 #define TV_COLOR_BASEQ 1 #define TV_COLOR_NUCL 2 #define TV_COLOR_COL 3 #define TV_COLOR_COLQ 4 #define TV_BASE_NUCL 0 #define TV_BASE_COLOR_SPACE 1 int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); int base_tv_init(tview_t*,const char *fn, const char *fn_fa, const char *samples); void base_tv_destroy(tview_t*); int base_draw_aln(tview_t *tv, int tid, int pos); typedef struct Tixel { int ch; int attributes; }tixel_t; #endif samtools-0.1.19/bam_tview_curses.c000066400000000000000000000173071212162403000171470ustar00rootroot00000000000000#undef _HAVE_CURSES #if _CURSES_LIB == 0 #elif _CURSES_LIB == 1 #include #ifndef NCURSES_VERSION #warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled" #else #define _HAVE_CURSES #endif #elif _CURSES_LIB == 2 #include #define _HAVE_CURSES #else #warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled" #endif #include "bam_tview.h" #ifdef _HAVE_CURSES typedef struct CursesTview { tview_t view; WINDOW *wgoto, *whelp; } curses_tview_t; #define FROM_TV(ptr) ((curses_tview_t*)ptr) static void curses_destroy(tview_t* base) { curses_tview_t* tv=(curses_tview_t*)base; delwin(tv->wgoto); delwin(tv->whelp); endwin(); base_tv_destroy(base); free(tv); } /* void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...); void (*my_)(struct AbstractTview*,int,int,int); void (*my_attron)(struct AbstractTview*,int); void (*my_attroff)(struct AbstractTview*,int); void (*my_clear)(struct AbstractTview*); int (*my_colorpair)(struct AbstractTview*,int); */ static void curses_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...) { unsigned int size=tv->mcol+2; char* str=malloc(size); if(str==0) exit(EXIT_FAILURE); va_list argptr; va_start(argptr, fmt); vsnprintf(str,size, fmt, argptr); va_end(argptr); mvprintw(y,x,str); free(str); } static void curses_mvaddch(struct AbstractTview* tv,int y,int x,int ch) { mvaddch(y,x,ch); } static void curses_attron(struct AbstractTview* tv,int flag) { attron(flag); } static void curses_attroff(struct AbstractTview* tv,int flag) { attroff(flag); } static void curses_clear(struct AbstractTview* tv) { clear(); } static int curses_colorpair(struct AbstractTview* tv,int flag) { return COLOR_PAIR(flag); } static int curses_drawaln(struct AbstractTview* tv, int tid, int pos) { return base_draw_aln(tv, tid, pos); } static void tv_win_goto(curses_tview_t *tv, int *tid, int *pos) { char str[256], *p; int i, l = 0; tview_t *base=(tview_t*)tv; wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+'); mvwprintw(tv->wgoto, 1, 2, "Goto: "); for (;;) { int c = wgetch(tv->wgoto); wrefresh(tv->wgoto); if (c == KEY_BACKSPACE || c == '\010' || c == '\177') { if(l > 0) --l; } else if (c == KEY_ENTER || c == '\012' || c == '\015') { int _tid = -1, _beg, _end; if (str[0] == '=') { _beg = strtol(str+1, &p, 10) - 1; if (_beg > 0) { *pos = _beg; return; } } else { bam_parse_region(base->header, str, &_tid, &_beg, &_end); if (_tid >= 0) { *tid = _tid; *pos = _beg; return; } } } else if (isgraph(c)) { if (l < TV_MAX_GOTO) str[l++] = c; } else if (c == '\027') l = 0; else if (c == '\033') return; str[l] = '\0'; for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' '); mvwprintw(tv->wgoto, 1, 8, "%s", str); } } static void tv_win_help(curses_tview_t *tv) { int r = 1; tview_t* base=(tview_t*)base; WINDOW *win = tv->whelp; wborder(win, '|', '|', '-', '-', '+', '+', '+', '+'); mvwprintw(win, r++, 2, " -=- Help -=- "); r++; mvwprintw(win, r++, 2, "? This window"); mvwprintw(win, r++, 2, "Arrows Small scroll movement"); mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement"); mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement"); mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left"); mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right"); mvwprintw(win, r++, 2, "space Scroll one screen"); mvwprintw(win, r++, 2, "backspace Scroll back one screen"); mvwprintw(win, r++, 2, "g Go to specific location"); mvwprintw(win, r++, 2, "m Color for mapping qual"); mvwprintw(win, r++, 2, "n Color for nucleotide"); mvwprintw(win, r++, 2, "b Color for base quality"); mvwprintw(win, r++, 2, "c Color for cs color"); mvwprintw(win, r++, 2, "z Color for cs qual"); mvwprintw(win, r++, 2, ". Toggle on/off dot view"); mvwprintw(win, r++, 2, "s Toggle on/off ref skip"); mvwprintw(win, r++, 2, "r Toggle on/off rd name"); mvwprintw(win, r++, 2, "N Turn on nt view"); mvwprintw(win, r++, 2, "C Turn on cs view"); mvwprintw(win, r++, 2, "i Toggle on/off ins"); mvwprintw(win, r++, 2, "q Exit"); r++; mvwprintw(win, r++, 2, "Underline: Secondary or orphan"); mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19"); mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30"); wrefresh(win); wgetch(win); } static int curses_underline(tview_t* tv) { return A_UNDERLINE; } static int curses_loop(tview_t* tv) { int tid, pos; curses_tview_t *CTV=(curses_tview_t *)tv; tid = tv->curr_tid; pos = tv->left_pos; while (1) { int c = getch(); switch (c) { case '?': tv_win_help(CTV); break; case '\033': case 'q': goto end_loop; case '/': case 'g': tv_win_goto(CTV, &tid, &pos); break; case 'm': tv->color_for = TV_COLOR_MAPQ; break; case 'b': tv->color_for = TV_COLOR_BASEQ; break; case 'n': tv->color_for = TV_COLOR_NUCL; break; case 'c': tv->color_for = TV_COLOR_COL; break; case 'z': tv->color_for = TV_COLOR_COLQ; break; case 's': tv->no_skip = !tv->no_skip; break; case 'r': tv->show_name = !tv->show_name; break; case KEY_LEFT: case 'h': --pos; break; case KEY_RIGHT: case 'l': ++pos; break; case KEY_SLEFT: case 'H': pos -= 20; break; case KEY_SRIGHT: case 'L': pos += 20; break; case '.': tv->is_dot = !tv->is_dot; break; case 'N': tv->base_for = TV_BASE_NUCL; break; case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break; case 'i': tv->ins = !tv->ins; break; case '\010': pos -= 1000; break; case '\014': pos += 1000; break; case ' ': pos += tv->mcol; break; case KEY_UP: case 'j': --tv->row_shift; break; case KEY_DOWN: case 'k': ++tv->row_shift; break; case KEY_BACKSPACE: case '\177': pos -= tv->mcol; break; case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break; default: continue; } if (pos < 0) pos = 0; if (tv->row_shift < 0) tv->row_shift = 0; tv->my_drawaln(tv, tid, pos); } end_loop: return 0; } tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples) { curses_tview_t *tv = (curses_tview_t*)calloc(1, sizeof(curses_tview_t)); tview_t* base=(tview_t*)tv; if(tv==0) { fprintf(stderr,"Calloc failed\n"); return 0; } base_tv_init(base,fn,fn_fa,samples); /* initialize callbacks */ #define SET_CALLBACK(fun) base->my_##fun=curses_##fun; SET_CALLBACK(destroy); SET_CALLBACK(mvprintw); SET_CALLBACK(mvaddch); SET_CALLBACK(attron); SET_CALLBACK(attroff); SET_CALLBACK(clear); SET_CALLBACK(colorpair); SET_CALLBACK(drawaln); SET_CALLBACK(loop); SET_CALLBACK(underline); #undef SET_CALLBACK initscr(); keypad(stdscr, TRUE); clear(); noecho(); cbreak(); getmaxyx(stdscr, base->mrow, base->mcol); tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5); tv->whelp = newwin(29, 40, 5, 5); start_color(); init_pair(1, COLOR_BLUE, COLOR_BLACK); init_pair(2, COLOR_GREEN, COLOR_BLACK); init_pair(3, COLOR_YELLOW, COLOR_BLACK); init_pair(4, COLOR_WHITE, COLOR_BLACK); init_pair(5, COLOR_GREEN, COLOR_BLACK); init_pair(6, COLOR_CYAN, COLOR_BLACK); init_pair(7, COLOR_YELLOW, COLOR_BLACK); init_pair(8, COLOR_RED, COLOR_BLACK); init_pair(9, COLOR_BLUE, COLOR_BLACK); return base; } #else // #ifdef _HAVE_CURSES #include #warning "No curses library is available; tview with curses is disabled." extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples); tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples) { return text_tv_init(fn,fn_fa,samples); } #endif // #ifdef _HAVE_CURSES samtools-0.1.19/bam_tview_html.c000066400000000000000000000203261212162403000166020ustar00rootroot00000000000000#include #include "bam_tview.h" #define UNDERLINE_FLAG 10 typedef struct HtmlTview { tview_t view; int row_count; tixel_t** screen; FILE* out; int attributes;/* color... */ } html_tview_t; #define FROM_TV(ptr) ((html_tview_t*)ptr) static void html_destroy(tview_t* base) { int i; html_tview_t* tv=(html_tview_t*)base; if(tv->screen!=NULL) { for(i=0;i< tv->row_count;++i) free(tv->screen[i]); free(tv->screen); } base_tv_destroy(base); free(tv); } /* void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...); void (*my_)(struct AbstractTview*,int,int,int); void (*my_attron)(struct AbstractTview*,int); void (*my_attroff)(struct AbstractTview*,int); void (*my_clear)(struct AbstractTview*); int (*my_colorpair)(struct AbstractTview*,int); */ static void html_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...) { int i,nchars=0; unsigned int size=tv->mcol+2; char* str=malloc(size); if(str==0) exit(EXIT_FAILURE); va_list argptr; va_start(argptr, fmt); nchars=vsnprintf(str,size, fmt, argptr); va_end(argptr); for(i=0;i< nchars;++i) { tv->my_mvaddch(tv,y,x+i,str[i]); } free(str); } static void html_mvaddch(struct AbstractTview* tv,int y,int x,int ch) { tixel_t* row=NULL; html_tview_t* ptr=FROM_TV(tv); if( x >= tv->mcol ) return; //out of screen while(ptr->row_count<=y) { int x; row=(tixel_t*)calloc(tv->mcol,sizeof(tixel_t)); if(row==0) exit(EXIT_FAILURE); for(x=0;xmcol;++x) {row[x].ch=' ';row[x].attributes=0;} ptr->screen=(tixel_t**)realloc(ptr->screen,sizeof(tixel_t*)*(ptr->row_count+1)); ptr->screen[ptr->row_count++]=row; } row=ptr->screen[y]; row[x].ch=ch; row[x].attributes=ptr->attributes; } static void html_attron(struct AbstractTview* tv,int flag) { html_tview_t* ptr=FROM_TV(tv); ptr->attributes |= flag; } static void html_attroff(struct AbstractTview* tv,int flag) { html_tview_t* ptr=FROM_TV(tv); ptr->attributes &= ~(flag); } static void html_clear(struct AbstractTview* tv) { html_tview_t* ptr=FROM_TV(tv); if(ptr->screen!=NULL) { int i; for(i=0;i< ptr->row_count;++i) free(ptr->screen[i]); free(ptr->screen); ptr->screen=NULL; } ptr->row_count=0; ptr->attributes=0; } static int html_colorpair(struct AbstractTview* tv,int flag) { return (1 << (flag)); } static int html_drawaln(struct AbstractTview* tv, int tid, int pos) { int y,x; html_tview_t* ptr=FROM_TV(tv); html_clear(tv); base_draw_aln(tv, tid, pos); fputs("",ptr->out); fprintf(ptr->out,"%s:%d", tv->header->target_name[tid], pos+1 ); //style fputs("",ptr->out); fputs("",ptr->out); fprintf(ptr->out,"
%s:%d
", tv->header->target_name[tid], pos+1 ); fputs("
",ptr->out);
    for(y=0;y< ptr->row_count;++y)
    	{
    	
    	for(x=0;x< tv->mcol;++x)
	    	{
	    	
		
		if(x== 0 || ptr->screen[y][x].attributes != ptr->screen[y][x-1].attributes)
	    		{
	    		int css=0;
			fprintf(ptr->out,"1) fprintf(stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
	    			if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
	    				{
	    				
	    				fprintf(ptr->out," class='tviewc%s%d'",
	    					(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)) )!=0?"u":""),
	    					css);
	    				break;
	    				}
	    			++css;
	    			}


	    		fputs(">",ptr->out);
	    		}
		
		int ch=ptr->screen[y][x].ch;
		switch(ch)
			{
			case '<': fputs("<",ptr->out);break;
			case '>': fputs(">",ptr->out);break;
			case '&': fputs("&",ptr->out);break;
			default: fputc(ch,ptr->out); break;
			}
	    	
	    	
	    	if(x+1 == tv->mcol  || ptr->screen[y][x].attributes!=ptr->screen[y][x+1].attributes)
	    		{
	    		fputs("",ptr->out);
	    		}
	    	}
    	if(y+1 < ptr->row_count) fputs("
",ptr->out); } fputs("
",ptr->out); return 0; } #define ANSI_COLOR_RED "\x1b[31m" #define ANSI_COLOR_GREEN "\x1b[32m" #define ANSI_COLOR_YELLOW "\x1b[33m" #define ANSI_COLOR_BLUE "\x1b[34m" #define ANSI_COLOR_MAGENTA "\x1b[35m" #define ANSI_COLOR_CYAN "\x1b[36m" #define ANSI_COLOR_BLACK "\x1b[0m" #define ANSI_COLOR_RESET ANSI_COLOR_BLACK #define ANSI_UNDERLINE_SET "\033[4m" #define ANSI_UNDERLINE_UNSET "\033[0m" static int text_drawaln(struct AbstractTview* tv, int tid, int pos) { int y,x; html_tview_t* ptr=FROM_TV(tv); html_clear(tv); base_draw_aln(tv, tid, pos); int is_term= isatty(fileno(ptr->out)); for(y=0;y< ptr->row_count;++y) { for(x=0;x< tv->mcol;++x) { if(is_term) { int css=0; while(css<32) { if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0) { break; } ++css; } switch(css) { //CSS(0, "black"); case 1: fputs(ANSI_COLOR_BLUE,ptr->out); break; case 2: fputs(ANSI_COLOR_GREEN,ptr->out); break; case 3: fputs(ANSI_COLOR_YELLOW,ptr->out); break; //CSS(4, "black"); case 5: fputs(ANSI_COLOR_GREEN,ptr->out); break; case 6: fputs(ANSI_COLOR_CYAN,ptr->out); break; case 7: fputs(ANSI_COLOR_YELLOW,ptr->out); break; case 8: fputs(ANSI_COLOR_RED,ptr->out); break; case 9: fputs(ANSI_COLOR_BLUE,ptr->out); break; default:break; } if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0) { fputs(ANSI_UNDERLINE_SET,ptr->out); } } int ch=ptr->screen[y][x].ch; fputc(ch,ptr->out); if(is_term) { fputs(ANSI_COLOR_RESET,ptr->out); if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0) { fputs(ANSI_UNDERLINE_UNSET,ptr->out); } } } fputc('\n',ptr->out); } return 0; } static int html_loop(tview_t* tv) { //tv->my_drawaln(tv, tv->curr_tid, tv->left_pos); return 0; } static int html_underline(tview_t* tv) { return (1 << UNDERLINE_FLAG); } /* static void init_pair(html_tview_t *tv,int id_ge_1, const char* pen, const char* paper) { } */ tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples) { char* colstr=getenv("COLUMNS"); html_tview_t *tv = (html_tview_t*)calloc(1, sizeof(html_tview_t)); tview_t* base=(tview_t*)tv; if(tv==0) { fprintf(stderr,"Calloc failed\n"); return 0; } tv->row_count=0; tv->screen=NULL; tv->out=stdout; tv->attributes=0; base_tv_init(base,fn,fn_fa,samples); /* initialize callbacks */ #define SET_CALLBACK(fun) base->my_##fun=html_##fun; SET_CALLBACK(destroy); SET_CALLBACK(mvprintw); SET_CALLBACK(mvaddch); SET_CALLBACK(attron); SET_CALLBACK(attroff); SET_CALLBACK(clear); SET_CALLBACK(colorpair); SET_CALLBACK(drawaln); SET_CALLBACK(loop); SET_CALLBACK(underline); #undef SET_CALLBACK if(colstr!=0) { base->mcol=atoi(colstr); if(base->mcol<10) base->mcol=80; } base->mrow=99999; /* init_pair(tv,1, "blue", "white"); init_pair(tv,2, "green", "white"); init_pair(tv,3, "yellow", "white"); init_pair(tv,4, "white", "white"); init_pair(tv,5, "green", "white"); init_pair(tv,6, "cyan", "white"); init_pair(tv,7, "yellow", "white"); init_pair(tv,8, "red", "white"); init_pair(tv,9, "blue", "white"); */ return base; } tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples) { tview_t* tv=html_tv_init(fn,fn_fa,samples); tv->my_drawaln=text_drawaln; return tv; } samtools-0.1.19/bamshuf.c000066400000000000000000000070231212162403000152250ustar00rootroot00000000000000#include #include #include #include #include #include "sam.h" #include "ksort.h" #define DEF_CLEVEL 1 static inline unsigned hash_Wang(unsigned key) { key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16); return key; } static inline unsigned hash_X31_Wang(const char *s) { unsigned h = *s; if (h) { for (++s ; *s; ++s) h = (h << 5) - h + *s; return hash_Wang(h); } else return 0; } typedef struct { unsigned key; bam1_t *b; } elem_t; static inline int elem_lt(elem_t x, elem_t y) { if (x.key < y.key) return 1; if (x.key == y.key) { int t; t = strcmp(bam_get_qname(x.b), bam_get_qname(y.b)); if (t < 0) return 1; return (t == 0 && ((x.b->core.flag>>6&3) < (y.b->core.flag>>6&3))); } else return 0; } KSORT_INIT(bamshuf, elem_t, elem_lt) static void bamshuf(const char *fn, int n_files, const char *pre, int clevel, int is_stdout) { BGZF *fp, *fpw, **fpt; char **fnt, modew[8]; bam1_t *b; int i, l; bam_hdr_t *h; int64_t *cnt; // split fp = strcmp(fn, "-")? bgzf_open(fn, "r") : bgzf_dopen(fileno(stdin), "r"); assert(fp); h = bam_hdr_read(fp); fnt = (char**)calloc(n_files, sizeof(void*)); fpt = (BGZF**)calloc(n_files, sizeof(void*)); cnt = (int64_t*)calloc(n_files, 8); l = strlen(pre); for (i = 0; i < n_files; ++i) { fnt[i] = (char*)calloc(l + 10, 1); sprintf(fnt[i], "%s.%.4d.bam", pre, i); fpt[i] = bgzf_open(fnt[i], "w1"); bam_hdr_write(fpt[i], h); } b = bam_init1(); while (bam_read1(fp, b) >= 0) { uint32_t x; x = hash_X31_Wang(bam_get_qname(b)) % n_files; bam_write1(fpt[x], b); ++cnt[x]; } bam_destroy1(b); for (i = 0; i < n_files; ++i) bgzf_close(fpt[i]); free(fpt); bgzf_close(fp); // merge sprintf(modew, "w%d", (clevel >= 0 && clevel <= 9)? clevel : DEF_CLEVEL); if (!is_stdout) { // output to a file char *fnw = (char*)calloc(l + 5, 1); sprintf(fnw, "%s.bam", pre); fpw = bgzf_open(fnw, modew); free(fnw); } else fpw = bgzf_dopen(fileno(stdout), modew); // output to stdout bam_hdr_write(fpw, h); bam_hdr_destroy(h); for (i = 0; i < n_files; ++i) { int64_t j, c = cnt[i]; elem_t *a; fp = bgzf_open(fnt[i], "r"); bam_hdr_destroy(bam_hdr_read(fp)); a = (elem_t*)calloc(c, sizeof(elem_t)); for (j = 0; j < c; ++j) { a[j].b = bam_init1(); assert(bam_read1(fp, a[j].b) >= 0); a[j].key = hash_X31_Wang(bam_get_qname(a[j].b)); } bgzf_close(fp); unlink(fnt[i]); free(fnt[i]); ks_introsort(bamshuf, c, a); for (j = 0; j < c; ++j) { bam_write1(fpw, a[j].b); bam_destroy1(a[j].b); } free(a); } bgzf_close(fpw); free(fnt); free(cnt); } int main_bamshuf(int argc, char *argv[]) { int c, n_files = 64, clevel = DEF_CLEVEL, is_stdout = 0, is_un = 0; while ((c = getopt(argc, argv, "n:l:uO")) >= 0) { switch (c) { case 'n': n_files = atoi(optarg); break; case 'l': clevel = atoi(optarg); break; case 'u': is_un = 1; break; case 'O': is_stdout = 1; break; } } if (is_un) clevel = 0; if (optind + 2 > argc) { fprintf(stderr, "\nUsage: bamshuf [-Ou] [-n nFiles] [-c cLevel] \n\n"); fprintf(stderr, "Options: -O output to stdout\n"); fprintf(stderr, " -u uncompressed BAM output\n"); fprintf(stderr, " -l INT compression level [%d]\n", DEF_CLEVEL); fprintf(stderr, " -n INT number of temporary files [%d]\n", n_files); fprintf(stderr, "\n"); return 1; } bamshuf(argv[optind], n_files, argv[optind+1], clevel, is_stdout); return 0; } samtools-0.1.19/bamtk.c000066400000000000000000000124331212162403000146770ustar00rootroot00000000000000#include #include #include #include #include "bam.h" #ifdef _USE_KNETFILE #include "knetfile.h" #endif int bam_taf2baf(int argc, char *argv[]); int bam_mpileup(int argc, char *argv[]); int bam_merge(int argc, char *argv[]); int bam_index(int argc, char *argv[]); int bam_sort(int argc, char *argv[]); int bam_tview_main(int argc, char *argv[]); int bam_mating(int argc, char *argv[]); int bam_rmdup(int argc, char *argv[]); int bam_flagstat(int argc, char *argv[]); int bam_fillmd(int argc, char *argv[]); int bam_idxstats(int argc, char *argv[]); int main_samview(int argc, char *argv[]); int main_import(int argc, char *argv[]); int main_reheader(int argc, char *argv[]); int main_cut_target(int argc, char *argv[]); int main_phase(int argc, char *argv[]); int main_cat(int argc, char *argv[]); int main_depth(int argc, char *argv[]); int main_bam2fq(int argc, char *argv[]); int main_pad2unpad(int argc, char *argv[]); int main_bedcov(int argc, char *argv[]); int main_bamshuf(int argc, char *argv[]); int faidx_main(int argc, char *argv[]); static int usage() { fprintf(stderr, "\n"); fprintf(stderr, "Program: samtools (Tools for alignments in the SAM format)\n"); fprintf(stderr, "Version: %s\n\n", BAM_VERSION); fprintf(stderr, "Usage: samtools [options]\n\n"); fprintf(stderr, "Command: view SAM<->BAM conversion\n"); fprintf(stderr, " sort sort alignment file\n"); fprintf(stderr, " mpileup multi-way pileup\n"); fprintf(stderr, " depth compute the depth\n"); fprintf(stderr, " faidx index/extract FASTA\n"); #if _CURSES_LIB != 0 fprintf(stderr, " tview text alignment viewer\n"); #endif fprintf(stderr, " index index alignment\n"); fprintf(stderr, " idxstats BAM index stats (r595 or later)\n"); fprintf(stderr, " fixmate fix mate information\n"); fprintf(stderr, " flagstat simple stats\n"); fprintf(stderr, " calmd recalculate MD/NM tags and '=' bases\n"); fprintf(stderr, " merge merge sorted alignments\n"); fprintf(stderr, " rmdup remove PCR duplicates\n"); fprintf(stderr, " reheader replace BAM header\n"); fprintf(stderr, " cat concatenate BAMs\n"); fprintf(stderr, " bedcov read depth per BED region\n"); fprintf(stderr, " targetcut cut fosmid regions (for fosmid pool only)\n"); fprintf(stderr, " phase phase heterozygotes\n"); fprintf(stderr, " bamshuf shuffle and group alignments by name\n"); // fprintf(stderr, " depad convert padded BAM to unpadded BAM\n"); // not stable fprintf(stderr, "\n"); #ifdef _WIN32 fprintf(stderr, "\ Note: The Windows version of SAMtools is mainly designed for read-only\n\ operations, such as viewing the alignments and generating the pileup.\n\ Binary files generated by the Windows version may be buggy.\n\n"); #endif return 1; } int main(int argc, char *argv[]) { #ifdef _WIN32 setmode(fileno(stdout), O_BINARY); setmode(fileno(stdin), O_BINARY); #ifdef _USE_KNETFILE knet_win32_init(); #endif #endif if (argc < 2) return usage(); if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1); else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1); else if (strcmp(argv[1], "mpileup") == 0) return bam_mpileup(argc-1, argv+1); else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1); else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1); else if (strcmp(argv[1], "idxstats") == 0) return bam_idxstats(argc-1, argv+1); else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1); else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1); else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1); else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1); else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1); else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1); else if (strcmp(argv[1], "reheader") == 0) return main_reheader(argc-1, argv+1); else if (strcmp(argv[1], "cat") == 0) return main_cat(argc-1, argv+1); else if (strcmp(argv[1], "targetcut") == 0) return main_cut_target(argc-1, argv+1); else if (strcmp(argv[1], "phase") == 0) return main_phase(argc-1, argv+1); else if (strcmp(argv[1], "depth") == 0) return main_depth(argc-1, argv+1); else if (strcmp(argv[1], "bam2fq") == 0) return main_bam2fq(argc-1, argv+1); else if (strcmp(argv[1], "pad2unpad") == 0) return main_pad2unpad(argc-1, argv+1); else if (strcmp(argv[1], "depad") == 0) return main_pad2unpad(argc-1, argv+1); else if (strcmp(argv[1], "bedcov") == 0) return main_bedcov(argc-1, argv+1); else if (strcmp(argv[1], "bamshuf") == 0) return main_bamshuf(argc-1, argv+1); else if (strcmp(argv[1], "pileup") == 0) { fprintf(stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n"); return 1; } #if _CURSES_LIB != 0 else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1); #endif else { fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; } return 0; } samtools-0.1.19/bcftools/000077500000000000000000000000001212162403000152455ustar00rootroot00000000000000samtools-0.1.19/bcftools/Makefile000066400000000000000000000023201212162403000167020ustar00rootroot00000000000000CC= gcc CFLAGS= -g -Wall -O2 #-m64 #-arch ppc DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE LOBJS= bcf.o vcf.o bcfutils.o prob1.o em.o kfunc.o kmin.o index.o fet.o mut.o bcf2qcall.o OMISC= .. AOBJS= call1.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o $(OMISC)/bedidx.o PROG= bcftools INCLUDES= SUBDIRS= . .SUFFIXES:.c .o .c.o: $(CC) -c $(CFLAGS) $(DFLAGS) -I.. $(INCLUDES) $< -o $@ all-recur lib-recur clean-recur cleanlocal-recur install-recur: @target=`echo $@ | sed s/-recur//`; \ wdir=`pwd`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ cd $$subdir; \ $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ INCLUDES="$(INCLUDES)" LIBPATH="$(LIBPATH)" $$target || exit 1; \ cd $$wdir; \ done; all:$(PROG) lib:libbcf.a libbcf.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) bcftools:lib $(AOBJS) $(CC) $(CFLAGS) -o $@ $(AOBJS) -L. $(LIBPATH) -lbcf -lm -lz -lpthread bcf.o:bcf.h vcf.o:bcf.h index.o:bcf.h bcfutils.o:bcf.h prob1.o:prob1.h bcf.h call1.o:prob1.h bcf.h bcf2qcall.o:bcf.h main.o:bcf.h bcf.pdf:bcf.tex pdflatex bcf cleanlocal: rm -fr gmon.out *.o a.out *.dSYM $(PROG) *~ *.a bcf.aux bcf.log bcf.pdf *.class libbcf.*.dylib libbcf.so* clean:cleanlocal-recur samtools-0.1.19/bcftools/README000066400000000000000000000036561212162403000161370ustar00rootroot00000000000000The view command of bcftools calls variants, tests Hardy-Weinberg equilibrium (HWE), tests allele balances and estimates allele frequency. This command calls a site as a potential variant if P(ref|D,F) is below 0.9 (controlled by the -p option), where D is data and F is the prior allele frequency spectrum (AFS). The view command performs two types of allele balance tests, both based on Fisher's exact test for 2x2 contingency tables with the row variable being reference allele or not. In the first table, the column variable is strand. Two-tail P-value is taken. We test if variant bases tend to come from one strand. In the second table, the column variable is whether a base appears in the first or the last 11bp of the read. One-tail P-value is taken. We test if variant bases tend to occur towards the end of reads, which is usually an indication of misalignment. Site allele frequency is estimated in two ways. In the first way, the frequency is esimated as \argmax_f P(D|f) under the assumption of HWE. Prior AFS is not used. In the second way, the frequency is estimated as the posterior expectation of allele counts \sum_k kP(k|D,F), dividied by the total number of haplotypes. HWE is not assumed, but the estimate depends on the prior AFS. The two estimates largely agree when the signal is strong, but may differ greatly on weak sites as in this case, the prior plays an important role. To test HWE, we calculate the posterior distribution of genotypes (ref-hom, het and alt-hom). Chi-square test is performed. It is worth noting that the model used here is prior dependent and assumes HWE, which is different from both models for allele frequency estimate. The new model actually yields a third estimate of site allele frequency. The estimate allele frequency spectrum is printed to stderr per 64k sites. The estimate is in fact only the first round of a EM procedure. The second model (not the model for HWE testing) is used to estimate the AFS.samtools-0.1.19/bcftools/bcf.c000066400000000000000000000245111212162403000161460ustar00rootroot00000000000000#include #include #include #include "kstring.h" #include "bcf.h" bcf_t *bcf_open(const char *fn, const char *mode) { bcf_t *b; b = calloc(1, sizeof(bcf_t)); if (strchr(mode, 'w')) { b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdout), mode); } else { b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), mode); } return b; } int bcf_close(bcf_t *b) { int ret; if (b == 0) return 0; ret = bgzf_close(b->fp); free(b); return ret; } int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h) { if (b == 0 || h == 0) return -1; bgzf_write(b->fp, "BCF\4", 4); bgzf_write(b->fp, &h->l_nm, 4); bgzf_write(b->fp, h->name, h->l_nm); bgzf_write(b->fp, &h->l_smpl, 4); bgzf_write(b->fp, h->sname, h->l_smpl); bgzf_write(b->fp, &h->l_txt, 4); bgzf_write(b->fp, h->txt, h->l_txt); bgzf_flush(b->fp); return 16 + h->l_nm + h->l_smpl + h->l_txt; } bcf_hdr_t *bcf_hdr_read(bcf_t *b) { uint8_t magic[4]; bcf_hdr_t *h; if (b == 0) return 0; h = calloc(1, sizeof(bcf_hdr_t)); bgzf_read(b->fp, magic, 4); bgzf_read(b->fp, &h->l_nm, 4); h->name = malloc(h->l_nm); bgzf_read(b->fp, h->name, h->l_nm); bgzf_read(b->fp, &h->l_smpl, 4); h->sname = malloc(h->l_smpl); bgzf_read(b->fp, h->sname, h->l_smpl); bgzf_read(b->fp, &h->l_txt, 4); h->txt = malloc(h->l_txt); bgzf_read(b->fp, h->txt, h->l_txt); bcf_hdr_sync(h); return h; } void bcf_hdr_destroy(bcf_hdr_t *h) { if (h == 0) return; free(h->name); free(h->sname); free(h->txt); free(h->ns); free(h->sns); free(h); } static inline char **cnt_null(int l, char *str, int *_n) { int n = 0; char *p, **list; *_n = 0; if (l == 0 || str == 0) return 0; for (p = str; p != str + l; ++p) if (*p == 0) ++n; *_n = n; list = calloc(n, sizeof(void*)); list[0] = str; for (p = str, n = 1; p < str + l - 1; ++p) if (*p == 0) list[n++] = p + 1; return list; } int bcf_hdr_sync(bcf_hdr_t *b) { if (b == 0) return -1; if (b->ns) free(b->ns); if (b->sns) free(b->sns); if (b->l_nm) b->ns = cnt_null(b->l_nm, b->name, &b->n_ref); else b->ns = 0, b->n_ref = 0; b->sns = cnt_null(b->l_smpl, b->sname, &b->n_smpl); return 0; } int bcf_sync(bcf1_t *b) { char *p, *tmp[5]; int i, n, n_smpl = b->n_smpl; ks_tokaux_t aux; // set ref, alt, flt, info, fmt b->ref = b->alt = b->flt = b->info = b->fmt = 0; for (p = b->str, n = 0; p < b->str + b->l_str; ++p) { if (*p == 0 && p+1 != b->str + b->l_str) { if (n == 5) { ++n; break; } else tmp[n++] = p + 1; } } if (n != 5) { fprintf(stderr, "[%s] incorrect number of fields (%d != 5) at %d:%d\n", __func__, n, b->tid, b->pos); return -1; } b->ref = tmp[0]; b->alt = tmp[1]; b->flt = tmp[2]; b->info = tmp[3]; b->fmt = tmp[4]; // set n_alleles if (*b->alt == 0) b->n_alleles = 1; else { for (p = b->alt, n = 1; *p; ++p) if (*p == ',') ++n; b->n_alleles = n + 1; } // set n_gi and gi[i].fmt for (p = b->fmt, n = 1; *p; ++p) if (*p == ':') ++n; if (n > b->m_gi) { int old_m = b->m_gi; b->m_gi = n; kroundup32(b->m_gi); b->gi = realloc(b->gi, b->m_gi * sizeof(bcf_ginfo_t)); memset(b->gi + old_m, 0, (b->m_gi - old_m) * sizeof(bcf_ginfo_t)); } b->n_gi = n; for (p = kstrtok(b->fmt, ":", &aux), n = 0; p; p = kstrtok(0, 0, &aux)) b->gi[n++].fmt = bcf_str2int(p, aux.p - p); // set gi[i].len for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("PL", 2)) { b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2; } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) { b->gi[i].len = 2; } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) { b->gi[i].len = 1; } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { b->gi[i].len = 4; } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2 * 4; } b->gi[i].data = realloc(b->gi[i].data, n_smpl * b->gi[i].len); } return 0; } int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b) { int i, l = 0; if (b == 0) return -1; bgzf_write(bp->fp, &b->tid, 4); bgzf_write(bp->fp, &b->pos, 4); bgzf_write(bp->fp, &b->qual, 4); bgzf_write(bp->fp, &b->l_str, 4); bgzf_write(bp->fp, b->str, b->l_str); l = 12 + b->l_str; for (i = 0; i < b->n_gi; ++i) { bgzf_write(bp->fp, b->gi[i].data, b->gi[i].len * h->n_smpl); l += b->gi[i].len * h->n_smpl; } return l; } int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b) { int i, l = 0; if (b == 0) return -1; if (bgzf_read(bp->fp, &b->tid, 4) == 0) return -1; b->n_smpl = h->n_smpl; bgzf_read(bp->fp, &b->pos, 4); bgzf_read(bp->fp, &b->qual, 4); bgzf_read(bp->fp, &b->l_str, 4); if (b->l_str > b->m_str) { b->m_str = b->l_str; kroundup32(b->m_str); b->str = realloc(b->str, b->m_str); } bgzf_read(bp->fp, b->str, b->l_str); l = 12 + b->l_str; if (bcf_sync(b) < 0) return -2; for (i = 0; i < b->n_gi; ++i) { bgzf_read(bp->fp, b->gi[i].data, b->gi[i].len * h->n_smpl); l += b->gi[i].len * h->n_smpl; } return l; } int bcf_destroy(bcf1_t *b) { int i; if (b == 0) return -1; free(b->str); for (i = 0; i < b->m_gi; ++i) free(b->gi[i].data); free(b->gi); free(b); return 0; } static inline void fmt_str(const char *p, kstring_t *s) { if (*p == 0) kputc('.', s); else kputs(p, s); } void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s) { int i, j, x; s->l = 0; if (h->n_ref) kputs(h->ns[b->tid], s); else kputw(b->tid, s); kputc('\t', s); kputw(b->pos + 1, s); kputc('\t', s); fmt_str(b->str, s); kputc('\t', s); fmt_str(b->ref, s); kputc('\t', s); fmt_str(b->alt, s); kputc('\t', s); ksprintf(s, "%.3g", b->qual); kputc('\t', s); fmt_str(b->flt, s); kputc('\t', s); fmt_str(b->info, s); if (b->fmt[0]) { kputc('\t', s); fmt_str(b->fmt, s); } x = b->n_alleles * (b->n_alleles + 1) / 2; if (b->n_gi == 0) return; int iPL = -1; if ( b->n_alleles > 2 ) { for (i=0; in_gi; i++) { if ( b->gi[i].fmt == bcf_str2int("PL", 2) ) { iPL = i; break; } } } for (j = 0; j < h->n_smpl; ++j) { int ploidy = b->ploidy ? b->ploidy[j] : 2; kputc('\t', s); for (i = 0; i < b->n_gi; ++i) { if (i) kputc(':', s); if (b->gi[i].fmt == bcf_str2int("PL", 2)) { uint8_t *d = (uint8_t*)b->gi[i].data + j * x; int k; if ( ploidy==1 ) for (k=0; kn_alleles; k++) { if (k>0) kputc(',', s); kputw(d[(k+1)*(k+2)/2-1], s); } else for (k = 0; k < x; ++k) { if (k > 0) kputc(',', s); kputw(d[k], s); } } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) { kputw(((uint16_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { kputw(((uint8_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { kputw(((int32_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("GT", 2)) { int y = ((uint8_t*)b->gi[i].data)[j]; if ( ploidy==1 ) { if ( y>>7&1 ) kputc('.', s); else kputc('0' + (y>>3&7), s); } else { if ( y>>7&1 ) kputsn("./.", 3, s); else { kputc('0' + (y>>3&7), s); kputc("/|"[y>>6&1], s); kputc('0' + (y&7), s); } } } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { float *d = (float*)b->gi[i].data + j * x; int k; //printf("- %lx\n", d); for (k = 0; k < x; ++k) { if (k > 0) kputc(',', s); ksprintf(s, "%.2f", d[k]); } } else kputc('.', s); // custom fields } } } char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b) { kstring_t s; s.l = s.m = 0; s.s = 0; bcf_fmt_core(h, b, &s); return s.s; } int bcf_append_info(bcf1_t *b, const char *info, int l) { int shift = b->fmt - b->str; int l_fmt = b->l_str - shift; char *ori = b->str; if (b->l_str + l > b->m_str) { // enlarge if necessary b->m_str = b->l_str + l; kroundup32(b->m_str); b->str = realloc(b->str, b->m_str); } memmove(b->str + shift + l, b->str + shift, l_fmt); // move the FORMAT field memcpy(b->str + shift - 1, info, l); // append to the INFO field b->str[shift + l - 1] = '\0'; b->fmt = b->str + shift + l; b->l_str += l; if (ori != b->str) bcf_sync(b); // synchronize when realloc changes the pointer return 0; } int remove_tag(char *str, const char *tag, char delim) { char *tmp = str, *p; int len_diff = 0, ori_len = strlen(str); while ( *tmp && (p = strstr(tmp,tag)) ) { if ( p>str ) { if ( *(p-1)!=delim ) { tmp=p+1; continue; } // shared substring p--; } char *q=p+1; while ( *q && *q!=delim ) q++; if ( p==str && *q ) q++; // the tag is first, don't move the delim char len_diff += q-p; if ( ! *q ) { *p = 0; break; } // the tag was last, no delim follows else memmove(p,q,ori_len-(int)(p-str)-(int)(q-p)); // *q==delim } if ( len_diff==ori_len ) str[0]='.', str[1]=0, len_diff--; return len_diff; } void rm_info(kstring_t *s, const char *key) { char *p = s->s; int n = 0; while ( n<4 ) { if ( !*p ) n++; p++; } char *q = p+1; while ( *q && q-s->sl ) q++; int nrm = remove_tag(p, key, ';'); if ( nrm ) memmove(q-nrm, q, s->s+s->l-q+1); s->l -= nrm; } int bcf_cpy(bcf1_t *r, const bcf1_t *b) { char *t1 = r->str; bcf_ginfo_t *t2 = r->gi; int i, t3 = r->m_str, t4 = r->m_gi; *r = *b; r->str = t1; r->gi = t2; r->m_str = t3; r->m_gi = t4; if (r->m_str < b->m_str) { r->m_str = b->m_str; r->str = realloc(r->str, r->m_str); } memcpy(r->str, b->str, r->m_str); bcf_sync(r); // calling bcf_sync() is simple but inefficient for (i = 0; i < r->n_gi; ++i) memcpy(r->gi[i].data, b->gi[i].data, r->n_smpl * r->gi[i].len); return 0; } int bcf_is_indel(const bcf1_t *b) { char *p; if (strlen(b->ref) > 1) return 1; for (p = b->alt; *p; ++p) if (*p != ',' && p[1] != ',' && p[1] != '\0') return 1; return 0; } samtools-0.1.19/bcftools/bcf.h000066400000000000000000000160721212162403000161560ustar00rootroot00000000000000/* The MIT License Copyright (c) 2010 Broad Institute Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Contact: Heng Li */ #ifndef BCF_H #define BCF_H #define BCF_VERSION "0.1.19-96b5f2294a" #include #include #ifndef BCF_LITE #include "bgzf.h" typedef BGZF *bcfFile; #else typedef gzFile bcfFile; #define bgzf_open(fn, mode) gzopen(fn, mode) #define bgzf_fdopen(fd, mode) gzdopen(fd, mode) #define bgzf_close(fp) gzclose(fp) #define bgzf_read(fp, buf, len) gzread(fp, buf, len) #define bgzf_write(fp, buf, len) #define bgzf_flush(fp) #endif /* A member in the structs below is said to "primary" if its content cannot be inferred from other members in any of structs below; a member is said to be "derived" if its content can be derived from other members. For example, bcf1_t::str is primary as this comes from the input data, while bcf1_t::info is derived as it can always be correctly set if we know bcf1_t::str. Derived members are for quick access to the content and must be synchronized with the primary data. */ typedef struct { uint32_t fmt; // format of the block, set by bcf_str2int(). int len; // length of data for each individual void *data; // concatenated data // derived info: fmt, len (<-bcf1_t::fmt) } bcf_ginfo_t; typedef struct { int32_t tid, pos; // refID and 0-based position int32_t l_str, m_str; // length and the allocated size of ->str float qual; // SNP quality char *str; // concatenated string of variable length strings in VCF (from col.2 to col.7) char *ref, *alt, *flt, *info, *fmt; // they all point to ->str; no memory allocation int n_gi, m_gi; // number and the allocated size of geno fields bcf_ginfo_t *gi; // array of geno fields int n_alleles, n_smpl; // number of alleles and samples // derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl) uint8_t *ploidy; // ploidy of all samples; if NULL, ploidy of 2 is assumed. } bcf1_t; typedef struct { int32_t n_ref, n_smpl; // number of reference sequences and samples int32_t l_nm; // length of concatenated sequence names; 0 padded int32_t l_smpl; // length of concatenated sample names; 0 padded int32_t l_txt; // length of header text (lines started with ##) char *name, *sname, *txt; // concatenated sequence names, sample names and header text char **ns, **sns; // array of sequence and sample names; point to name and sname, respectively // derived info: n_ref (<-name), n_smpl (<-sname), ns (<-name), sns (<-sname) } bcf_hdr_t; typedef struct { int is_vcf; // if the file in operation is a VCF void *v; // auxillary data structure for VCF bcfFile fp; // file handler for BCF } bcf_t; struct __bcf_idx_t; typedef struct __bcf_idx_t bcf_idx_t; #ifdef __cplusplus extern "C" { #endif // open a BCF file; for BCF file only bcf_t *bcf_open(const char *fn, const char *mode); // close file int bcf_close(bcf_t *b); // read one record from BCF; return -1 on end-of-file, and <-1 for errors int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b); // call this function if b->str is changed int bcf_sync(bcf1_t *b); // write a BCF record int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b); // read the BCF header; BCF only bcf_hdr_t *bcf_hdr_read(bcf_t *b); // write the BCF header int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h); // set bcf_hdr_t::ns and bcf_hdr_t::sns int bcf_hdr_sync(bcf_hdr_t *b); // destroy the header void bcf_hdr_destroy(bcf_hdr_t *h); // destroy a record int bcf_destroy(bcf1_t *b); // BCF->VCF conversion char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b); // append more info int bcf_append_info(bcf1_t *b, const char *info, int l); // remove tag int remove_tag(char *string, const char *tag, char delim); // remove info tag, string is the kstring holder of bcf1_t.str void rm_info(kstring_t *string, const char *key); // copy int bcf_cpy(bcf1_t *r, const bcf1_t *b); // open a VCF or BCF file if "b" is set in "mode" bcf_t *vcf_open(const char *fn, const char *mode); // close a VCF/BCF file int vcf_close(bcf_t *bp); // read the VCF/BCF header bcf_hdr_t *vcf_hdr_read(bcf_t *bp); // read the sequence dictionary from a separate file; required for VCF->BCF conversion int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn); // read a VCF/BCF record; return -1 on end-of-file and <-1 for errors int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b); // write the VCF header int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h); // write a VCF record int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b); // keep the first n alleles and discard the rest int bcf_shrink_alt(bcf1_t *b, int n); // keep the masked alleles and discard the rest void bcf_fit_alt(bcf1_t *b, int mask); // convert GL to PL int bcf_gl2pl(bcf1_t *b); // if the site is an indel int bcf_is_indel(const bcf1_t *b); bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list); int bcf_subsam(int n_smpl, int *list, bcf1_t *b); // move GT to the first FORMAT field int bcf_fix_gt(bcf1_t *b); // update PL generated by old samtools int bcf_fix_pl(bcf1_t *b); // convert PL to GLF-like 10-likelihood GL int bcf_gl10(const bcf1_t *b, uint8_t *gl); // convert up to 4 INDEL alleles to GLF-like 10-likelihood GL int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl); // string hash table void *bcf_build_refhash(bcf_hdr_t *h); void bcf_str2id_destroy(void *_hash); void bcf_str2id_thorough_destroy(void *_hash); int bcf_str2id_add(void *_hash, const char *str); int bcf_str2id(void *_hash, const char *str); void *bcf_str2id_init(); // indexing related functions int bcf_idx_build(const char *fn); uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg); int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end); bcf_idx_t *bcf_idx_load(const char *fn); void bcf_idx_destroy(bcf_idx_t *idx); #ifdef __cplusplus } #endif static inline uint32_t bcf_str2int(const char *str, int l) { int i; uint32_t x = 0; for (i = 0; i < l && i < 4; ++i) { if (str[i] == 0) return x; x = x<<8 | str[i]; } return x; } #endif samtools-0.1.19/bcftools/bcf.tex000066400000000000000000000101711212162403000165210ustar00rootroot00000000000000\documentclass[10pt,pdftex]{article} \usepackage{color} \definecolor{gray}{rgb}{0.7,0.7,0.7} \setlength{\topmargin}{0.0cm} \setlength{\textheight}{21.5cm} \setlength{\oddsidemargin}{0cm} \setlength{\textwidth}{16.5cm} \setlength{\columnsep}{0.6cm} \begin{document} \begin{center} \begin{tabular}{|l|l|l|l|l|} \hline \multicolumn{2}{|c|}{\bf Field} & \multicolumn{1}{c|}{\bf Descrption} & \multicolumn{1}{c|}{\bf Type} & \multicolumn{1}{c|}{\bf Value} \\\hline\hline \multicolumn{2}{|l|}{\sf magic} & Magic string & {\tt char[4]} & {\tt BCF\char92 4} \\\hline \multicolumn{2}{|l|}{\sf l\_seqnm} & Length of concatenated sequence names & {\tt int32\_t} & \\\hline \multicolumn{2}{|l|}{\sf seqnm} & Concatenated names, {\tt NULL} padded & {\tt char[{\sf l\_seqnm}]} & \\\hline \multicolumn{2}{|l|}{\sf l\_smpl} & Length of concatenated sample names & {\tt int32\_t} & \\\hline \multicolumn{2}{|l|}{\sf smpl} & Concatenated sample names & {\tt char[{\sf l\_smpl}]} & \\\hline \multicolumn{2}{|l|}{\sf l\_meta} & Length of the meta text (double-hash lines)& {\tt int32\_t} & \\\hline \multicolumn{2}{|l|}{\sf meta} & Meta text, {\tt NULL} terminated & {\tt char[{\sf l\_meta}]} & \\\hline \multicolumn{5}{|c|}{\it \color{gray}{List of records until the end of the file}}\\\cline{2-5} & {\sf seq\_id} & Reference sequence ID & {\tt int32\_t} & \\\cline{2-5} & {\sf pos} & Position & {\tt int32\_t} & \\\cline{2-5} & {\sf qual} & Variant quality & {\tt float} & \\\cline{2-5} & {\sf l\_str} & Length of {\sf str} & {\tt int32\_t} & \\\cline{2-5} & {\sf str} & {\tt ID+REF+ALT+FILTER+INFO+FORMAT}, {\tt NULL} padded & {\tt char[{\sf l\_str}]} &\\\cline{2-5} & \multicolumn{4}{c|}{Blocks of data; \#blocks and formats defined by {\tt FORMAT} (table below)}\\ \hline \end{tabular} \end{center} \begin{center} \begin{tabular}{clp{9cm}} \hline \multicolumn{1}{l}{\bf Field} & \multicolumn{1}{l}{\bf Type} & \multicolumn{1}{l}{\bf Description} \\\hline {\tt DP} & {\tt uint16\_t[n]} & Read depth \\ {\tt GL} & {\tt float[n*G]} & Log10 likelihood of data; $G=\frac{A(A+1)}{2}$, $A=\#\{alleles\}$\\ {\tt GT} & {\tt uint8\_t[n]} & {\tt missing\char60\char60 7 | phased\char60\char60 6 | allele1\char60\char60 3 | allele2} \\ {\tt \_GT} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic GT; the first int equals the max ploidy $P$. If the highest bit is set, the allele is not present (e.g. due to different ploidy between samples).} \\ {\tt GQ} & {\tt uint8\_t[n]} & {Genotype quality}\\ {\tt HQ} & {\tt uint8\_t[n*2]} & {Haplotype quality}\\ {\tt \_HQ} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic HQ}\\ {\tt IBD} & {\tt uint32\_t[n*2]} & {IBD}\\ {\tt \_IBD} & {\tt uint8\_t+uint32\_t[n*P]} & {Generic IBD}\\ {\tt PL} & {\tt uint8\_t[n*G]} & {Phred-scaled likelihood of data}\\ {\tt PS} & {\tt uint32\_t[n]} & {Phase set}\\ %{\tt SP} & {\tt uint8\_t[n]} & {Strand bias P-value (bcftools only)}\\ \emph{Integer} & {\tt int32\_t[n*X]} & {Fix-sized custom Integer; $X$ defined in the header}\\ \emph{Numeric} & {\tt double[n*X]} & {Fix-sized custom Numeric}\\ \emph{String} & {\tt uint32\_t+char*} & {\tt NULL} padded concat. strings (int equals to the length) \\ \hline \end{tabular} \end{center} \begin{itemize} \item A BCF file is in the {\tt BGZF} format. \item All multi-byte numbers are little-endian. \item In a string, a missing value `.' is an empty C string ``{\tt \char92 0}'' (not ``{\tt .\char92 0}'') \item For {\tt GL} and {\tt PL}, likelihoods of genotypes appear in the order of alleles in {\tt REF} and then {\tt ALT}. For example, if {\tt REF=C}, {\tt ALT=T,A}, likelihoods appear in the order of {\tt CC,CT,TT,CA,TA,AA} (NB: the ordering is different from the one in the original BCF proposal). \item Predefined {\tt FORMAT} fields can be missing from VCF headers, but custom {\tt FORMAT} fields are required to be explicitly defined in the headers. \item A {\tt FORMAT} field with its name starting with `{\tt \_}' is specific to BCF only. It gives an alternative binary representation of the corresponding VCF field, in case the default representation is unable to keep the genotype information, for example, when the ploidy is not 2 or there are more than 8 alleles. \end{itemize} \end{document} samtools-0.1.19/bcftools/bcf2qcall.c000066400000000000000000000057241212162403000172520ustar00rootroot00000000000000#include #include #include #include #include "bcf.h" static int8_t nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; static int read_I16(bcf1_t *b, int anno[16]) { char *p; int i; if ((p = strstr(b->info, "I16=")) == 0) return -1; p += 4; for (i = 0; i < 16; ++i) { anno[i] = strtol(p, &p, 10); if (anno[i] == 0 && (errno == EINVAL || errno == ERANGE)) return -2; ++p; } return 0; } int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b) { int a[4], k, g[10], l, map[4], k1, j, i, i0, anno[16], dp, mq, d_rest; char *s; if (b->ref[1] != 0 || b->n_alleles > 4) return -1; // ref is not a single base for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL if (read_I16(b, anno) != 0) return -1; // no I16; FIXME: can be improved d_rest = dp = anno[0] + anno[1] + anno[2] + anno[3]; if (dp == 0) return -1; // depth is zero mq = (int)(sqrt((double)(anno[9] + anno[11]) / dp) + .499); i0 = i; a[0] = nt4_table[(int)b->ref[0]]; if (a[0] > 3) return -1; // ref is not A/C/G/T a[1] = a[2] = a[3] = -2; // -1 has a special meaning if (b->alt[0] == 0) return -1; // no alternate allele map[0] = map[1] = map[2] = map[3] = -2; map[a[0]] = 0; for (k = 0, s = b->alt, k1 = -1; k < 3 && *s; ++k, s += 2) { if (s[1] != ',' && s[1] != 0) return -1; // ALT is not single base a[k+1] = nt4_table[(int)*s]; if (a[k+1] >= 0) map[a[k+1]] = k+1; else k1 = k+1; if (s[1] == 0) break; } for (k = 0; k < 4; ++k) if (map[k] < 0) map[k] = k1; for (i = 0; i < h->n_smpl; ++i) { int d; uint8_t *p = b->gi[i0].data + i * b->gi[i0].len; for (j = 0; j < b->gi[i0].len; ++j) if (p[j]) break; d = (int)((double)d_rest / (h->n_smpl - i) + .499); if (d == 0) d = 1; if (j == b->gi[i0].len) d = 0; d_rest -= d; for (k = j = 0; k < 4; ++k) { for (l = k; l < 4; ++l) { int t, x = map[k], y = map[l]; if (x > y) t = x, x = y, y = t; // swap g[j++] = p[y * (y+1) / 2 + x]; } } printf("%s\t%d\t%c", h->ns[b->tid], b->pos+1, *b->ref); printf("\t%d\t%d\t0", d, mq); for (j = 0; j < 10; ++j) printf("\t%d", g[j]); printf("\t%s\n", h->sns[i]); } return 0; } samtools-0.1.19/bcftools/bcfutils.c000066400000000000000000000317031212162403000172300ustar00rootroot00000000000000#include #include #include #include "bcf.h" #include "kstring.h" #include "khash.h" KHASH_MAP_INIT_STR(str2id, int) #ifdef _WIN32 #define srand48(x) srand(x) #define drand48() ((double)rand() / RAND_MAX) #endif // FIXME: valgrind report a memory leak in this function. Probably it does not get deallocated... void *bcf_build_refhash(bcf_hdr_t *h) { khash_t(str2id) *hash; int i, ret; hash = kh_init(str2id); for (i = 0; i < h->n_ref; ++i) { khint_t k; k = kh_put(str2id, hash, h->ns[i], &ret); // FIXME: check ret kh_val(hash, k) = i; } return hash; } void *bcf_str2id_init() { return kh_init(str2id); } void bcf_str2id_destroy(void *_hash) { khash_t(str2id) *hash = (khash_t(str2id)*)_hash; if (hash) kh_destroy(str2id, hash); // Note that strings are not freed. } void bcf_str2id_thorough_destroy(void *_hash) { khash_t(str2id) *hash = (khash_t(str2id)*)_hash; khint_t k; if (hash == 0) return; for (k = 0; k < kh_end(hash); ++k) if (kh_exist(hash, k)) free((char*)kh_key(hash, k)); kh_destroy(str2id, hash); } int bcf_str2id(void *_hash, const char *str) { khash_t(str2id) *hash = (khash_t(str2id)*)_hash; khint_t k; if (!hash) return -1; k = kh_get(str2id, hash, str); return k == kh_end(hash)? -1 : kh_val(hash, k); } int bcf_str2id_add(void *_hash, const char *str) { khint_t k; int ret; khash_t(str2id) *hash = (khash_t(str2id)*)_hash; if (!hash) return -1; k = kh_put(str2id, hash, str, &ret); if (ret == 0) return kh_val(hash, k); kh_val(hash, k) = kh_size(hash) - 1; return kh_val(hash, k); } void bcf_fit_alt(bcf1_t *b, int mask) { mask |= 1; // REF must be always present int i,j,nals=0; for (i=0; in_alleles <= nals ) return; // update ALT, in principle any of the alleles can be removed char *p; if ( nals>1 ) { char *dst, *src; int n=0, nalts=nals-1; for (src=dst=p=b->alt, i=1; *p; p++) { if ( *p!=',' ) continue; if ( mask&1<=nalts ) { *dst=0; break; } src = p+1; } if ( nalt, *p = '\0'; p++; memmove(p, b->flt, b->str + b->l_str - b->flt); b->l_str -= b->flt - p; // update PL and GT int ipl=-1, igt=-1; for (i = 0; i < b->n_gi; ++i) { bcf_ginfo_t *g = b->gi + i; if (g->fmt == bcf_str2int("PL", 2)) ipl = i; if (g->fmt == bcf_str2int("GT", 2)) igt = i; } // .. create mapping between old and new indexes int npl = nals * (nals+1) / 2; int *map = malloc(sizeof(int)*(npl>b->n_alleles ? npl : b->n_alleles)); int kori=0,knew=0; for (i=0; in_alleles; i++) { for (j=0; j<=i; j++) { int skip=0; if ( i && !(mask&1<n_smpl; for (i = 0; i < b->n_gi; ++i) { bcf_ginfo_t *g = b->gi + i; if (g->fmt == bcf_str2int("PL", 2)) { g->len = npl; uint8_t *d = (uint8_t*)g->data; int ismpl, npl_ori = b->n_alleles * (b->n_alleles + 1) / 2; for (knew=ismpl=0; ismpln_alleles; i++) map[i] = mask&1<gi[igt].data)[i]; int a1 = (gt>>3)&7; int a2 = gt&7; assert( map[a1]>=0 && map[a2]>=0 ); ((uint8_t*)b->gi[igt].data)[i] = ((1<<7|1<<6)>) | map[a1]<<3 | map[a2]; } free(map); b->n_alleles = nals; bcf_sync(b); } int bcf_shrink_alt(bcf1_t *b, int n) { char *p; int i, j, k, n_smpl = b->n_smpl; if (b->n_alleles <= n) return -1; // update ALT if (n > 1) { for (p = b->alt, k = 1; *p; ++p) if (*p == ',' && ++k == n) break; *p = '\0'; } else p = b->alt, *p = '\0'; ++p; memmove(p, b->flt, b->str + b->l_str - b->flt); b->l_str -= b->flt - p; // update PL for (i = 0; i < b->n_gi; ++i) { bcf_ginfo_t *g = b->gi + i; if (g->fmt == bcf_str2int("PL", 2)) { int l, x = b->n_alleles * (b->n_alleles + 1) / 2; uint8_t *d = (uint8_t*)g->data; g->len = n * (n + 1) / 2; for (l = k = 0; l < n_smpl; ++l) { uint8_t *dl = d + l * x; for (j = 0; j < g->len; ++j) d[k++] = dl[j]; } } // FIXME: to add GL } b->n_alleles = n; bcf_sync(b); return 0; } int bcf_gl2pl(bcf1_t *b) { char *p; int i, n_smpl = b->n_smpl; bcf_ginfo_t *g; float *d0; uint8_t *d1; if (strstr(b->fmt, "PL")) return -1; if ((p = strstr(b->fmt, "GL")) == 0) return -1; *p = 'P'; for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("GL", 2)) break; g = b->gi + i; g->fmt = bcf_str2int("PL", 2); g->len /= 4; // 4 == sizeof(float) d0 = (float*)g->data; d1 = (uint8_t*)g->data; for (i = 0; i < n_smpl * g->len; ++i) { int x = (int)(-10. * d0[i] + .499); if (x > 255) x = 255; if (x < 0) x = 0; d1[i] = x; } return 0; } /* FIXME: this function will fail given AB:GTX:GT. BCFtools never * produces such FMT, but others may do. */ int bcf_fix_gt(bcf1_t *b) { char *s; int i; uint32_t tmp; bcf_ginfo_t gt; // check the presence of the GT FMT if ((s = strstr(b->fmt, ":GT")) == 0) return 0; // no GT or GT is already the first assert(s[3] == '\0' || s[3] == ':'); // :GTX in fact tmp = bcf_str2int("GT", 2); for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == tmp) break; if (i == b->n_gi) return 0; // no GT in b->gi; probably a bug... gt = b->gi[i]; // move GT to the first for (; i > 0; --i) b->gi[i] = b->gi[i-1]; b->gi[0] = gt; if ( s[3]==0 ) memmove(b->fmt + 3, b->fmt, s - b->fmt); // :GT else memmove(b->fmt + 3, b->fmt, s - b->fmt + 1); // :GT: b->fmt[0] = 'G'; b->fmt[1] = 'T'; b->fmt[2] = ':'; return 0; } int bcf_fix_pl(bcf1_t *b) { int i; uint32_t tmp; uint8_t *PL, *swap; bcf_ginfo_t *gi; // pinpoint PL tmp = bcf_str2int("PL", 2); for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == tmp) break; if (i == b->n_gi) return 0; // prepare gi = b->gi + i; PL = (uint8_t*)gi->data; swap = alloca(gi->len); // loop through individuals for (i = 0; i < b->n_smpl; ++i) { int k, l, x; uint8_t *PLi = PL + i * gi->len; memcpy(swap, PLi, gi->len); for (k = x = 0; k < b->n_alleles; ++k) for (l = k; l < b->n_alleles; ++l) PLi[l*(l+1)/2 + k] = swap[x++]; } return 0; } int bcf_smpl_covered(const bcf1_t *b) { int i, j, n = 0; uint32_t tmp; bcf_ginfo_t *gi; // pinpoint PL tmp = bcf_str2int("PL", 2); for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == tmp) break; if (i == b->n_gi) return 0; // count how many samples having PL!=[0..0] gi = b->gi + i; for (i = 0; i < b->n_smpl; ++i) { uint8_t *PLi = ((uint8_t*)gi->data) + i * gi->len; for (j = 0; j < gi->len; ++j) if (PLi[j]) break; if (j < gi->len) ++n; } return n; } static void *locate_field(const bcf1_t *b, const char *fmt, int l) { int i; uint32_t tmp; tmp = bcf_str2int(fmt, l); for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == tmp) break; return i == b->n_gi? 0 : b->gi[i].data; } int bcf_anno_max(bcf1_t *b) { int k, max_gq, max_sp, n_het; kstring_t str; uint8_t *gt, *gq; int32_t *sp; max_gq = max_sp = n_het = 0; gt = locate_field(b, "GT", 2); if (gt == 0) return -1; gq = locate_field(b, "GQ", 2); sp = locate_field(b, "SP", 2); if (sp) for (k = 0; k < b->n_smpl; ++k) if (gt[k]&0x3f) max_sp = max_sp > (int)sp[k]? max_sp : sp[k]; if (gq) for (k = 0; k < b->n_smpl; ++k) if (gt[k]&0x3f) max_gq = max_gq > (int)gq[k]? max_gq : gq[k]; for (k = 0; k < b->n_smpl; ++k) { int a1, a2; a1 = gt[k]&7; a2 = gt[k]>>3&7; if ((!a1 && a2) || (!a2 && a1)) { // a het if (gq == 0) ++n_het; else if (gq[k] >= 20) ++n_het; } } if (n_het) max_sp -= (int)(4.343 * log(n_het) + .499); if (max_sp < 0) max_sp = 0; memset(&str, 0, sizeof(kstring_t)); if (*b->info) kputc(';', &str); ksprintf(&str, "MXSP=%d;MXGQ=%d", max_sp, max_gq); bcf_append_info(b, str.s, str.l); free(str.s); return 0; } // FIXME: only data are shuffled; the header is NOT int bcf_shuffle(bcf1_t *b, int seed) { int i, j, *a; if (seed > 0) srand48(seed); a = malloc(b->n_smpl * sizeof(int)); for (i = 0; i < b->n_smpl; ++i) a[i] = i; for (i = b->n_smpl; i > 1; --i) { int tmp; j = (int)(drand48() * i); tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; } for (j = 0; j < b->n_gi; ++j) { bcf_ginfo_t *gi = b->gi + j; uint8_t *swap, *data = (uint8_t*)gi->data; swap = malloc(gi->len * b->n_smpl); for (i = 0; i < b->n_smpl; ++i) memcpy(swap + gi->len * a[i], data + gi->len * i, gi->len); free(gi->data); gi->data = swap; } free(a); return 0; } bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list) { int i, ret, j; khint_t k; bcf_hdr_t *h; khash_t(str2id) *hash; kstring_t s; s.l = s.m = 0; s.s = 0; hash = kh_init(str2id); for (i = 0; i < h0->n_smpl; ++i) { k = kh_put(str2id, hash, h0->sns[i], &ret); kh_val(hash, k) = i; } for (i = j = 0; i < n; ++i) { k = kh_get(str2id, hash, samples[i]); if (k != kh_end(hash)) { list[j++] = kh_val(hash, k); kputs(samples[i], &s); kputc('\0', &s); } } if (j < n) { fprintf(stderr, "<%s> %d samples in the list but not in BCF.", __func__, n - j); exit(1); } kh_destroy(str2id, hash); h = calloc(1, sizeof(bcf_hdr_t)); *h = *h0; h->ns = 0; h->sns = 0; h->name = malloc(h->l_nm); memcpy(h->name, h0->name, h->l_nm); h->txt = calloc(1, h->l_txt + 1); memcpy(h->txt, h0->txt, h->l_txt); h->l_smpl = s.l; h->sname = s.s; bcf_hdr_sync(h); return h; } int bcf_subsam(int n_smpl, int *list, bcf1_t *b) { int i, j; for (j = 0; j < b->n_gi; ++j) { bcf_ginfo_t *gi = b->gi + j; uint8_t *swap; swap = malloc(gi->len * b->n_smpl); for (i = 0; i < n_smpl; ++i) memcpy(swap + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len); free(gi->data); gi->data = swap; } b->n_smpl = n_smpl; return 0; } static int8_t nt4_table[128] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4 }; int bcf_gl10(const bcf1_t *b, uint8_t *gl) { int a[4], k, l, map[4], k1, j, i; const bcf_ginfo_t *PL; char *s; if (b->ref[1] != 0 || b->n_alleles > 4) return -1; // ref is not a single base or >4 alleles for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL PL = b->gi + i; a[0] = nt4_table[(int)b->ref[0]]; if (a[0] > 3 || a[0] < 0) return -1; // ref is not A/C/G/T a[1] = a[2] = a[3] = -2; // -1 has a special meaning if (b->alt[0] == 0) return -1; // no alternate allele map[0] = map[1] = map[2] = map[3] = -2; map[a[0]] = 0; for (k = 0, s = b->alt, k1 = -1; k < 3 && *s; ++k, s += 2) { if (s[1] != ',' && s[1] != 0) return -1; // ALT is not single base a[k+1] = nt4_table[(int)*s]; if (a[k+1] >= 0) map[a[k+1]] = k+1; else k1 = k + 1; if (s[1] == 0) break; // the end of the ALT string } for (k = 0; k < 4; ++k) if (map[k] < 0) map[k] = k1; for (i = 0; i < b->n_smpl; ++i) { const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual uint8_t *g = gl + 10 * i; for (k = j = 0; k < 4; ++k) { for (l = k; l < 4; ++l) { int t, x = map[k], y = map[l]; if (x > y) t = x, x = y, y = t; // make sure x is the smaller g[j++] = p[y * (y+1) / 2 + x]; } } } return 0; } int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl) { int k, l, j, i; const bcf_ginfo_t *PL; if (b->alt[0] == 0) return -1; // no alternate allele for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL PL = b->gi + i; for (i = 0; i < b->n_smpl; ++i) { const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual uint8_t *g = gl + 10 * i; for (k = j = 0; k < 4; ++k) { for (l = k; l < 4; ++l) { int t, x = k, y = l; if (x > y) t = x, x = y, y = t; // make sure x is the smaller x = y * (y+1) / 2 + x; g[j++] = x < PL->len? p[x] : 255; } } } return 0; } samtools-0.1.19/bcftools/call1.c000066400000000000000000000632601212162403000164140ustar00rootroot00000000000000#include #include #include #include #include #include "bcf.h" #include "prob1.h" #include "kstring.h" #include "time.h" #ifdef _WIN32 #define srand48(x) srand(x) #define lrand48() rand() #endif #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 16384) #define VC_NO_GENO 2 #define VC_BCFOUT 4 #define VC_CALL 8 #define VC_VARONLY 16 #define VC_VCFIN 32 #define VC_UNCOMP 64 #define VC_KEEPALT 256 #define VC_ACGT_ONLY 512 #define VC_QCALL 1024 #define VC_CALL_GT 2048 #define VC_ADJLD 4096 #define VC_NO_INDEL 8192 #define VC_ANNO_MAX 16384 #define VC_FIX_PL 32768 #define VC_EM 0x10000 #define VC_PAIRCALL 0x20000 #define VC_QCNT 0x40000 #define VC_INDEL_ONLY 0x80000 typedef struct { int flag, prior_type, n1, n_sub, *sublist, n_perm; uint32_t *trio_aux; char *prior_file, **subsam, *fn_dict; uint8_t *ploidy; double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt, min_ma_lrt; void *bed; } viewconf_t; void *bed_read(const char *fn); void bed_destroy(void *_h); int bed_overlap(const void *_h, const char *chr, int beg, int end); static double ttest(int n1, int n2, int a[4]) { extern double kf_betai(double a, double b, double x); double t, v, u1, u2; if (n1 == 0 || n2 == 0 || n1 + n2 < 3) return 1.0; u1 = (double)a[0] / n1; u2 = (double)a[2] / n2; if (u1 <= u2) return 1.; t = (u1 - u2) / sqrt(((a[1] - n1 * u1 * u1) + (a[3] - n2 * u2 * u2)) / (n1 + n2 - 2) * (1./n1 + 1./n2)); v = n1 + n2 - 2; // printf("%d,%d,%d,%d,%lf,%lf,%lf\n", a[0], a[1], a[2], a[3], t, u1, u2); return t < 0.? 1. : .5 * kf_betai(.5*v, .5, v/(v+t*t)); } static int test16_core(int anno[16], anno16_t *a) { extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); double left, right; int i; a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.; memcpy(a->d, anno, 4 * sizeof(int)); a->depth = anno[0] + anno[1] + anno[2] + anno[3]; a->is_tested = (anno[0] + anno[1] > 0 && anno[2] + anno[3] > 0); if (a->depth == 0) return -1; a->mq = (int)(sqrt((anno[9] + anno[11]) / a->depth) + .499); kt_fisher_exact(anno[0], anno[1], anno[2], anno[3], &left, &right, &a->p[0]); for (i = 1; i < 4; ++i) a->p[i] = ttest(anno[0] + anno[1], anno[2] + anno[3], anno+4*i); return 0; } int test16(bcf1_t *b, anno16_t *a) { char *p; int i, anno[16]; a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.; a->d[0] = a->d[1] = a->d[2] = a->d[3] = 0.; a->mq = a->depth = a->is_tested = 0; if ((p = strstr(b->info, "I16=")) == 0) return -1; p += 4; for (i = 0; i < 16; ++i) { errno = 0; anno[i] = strtol(p, &p, 10); if (anno[i] == 0 && (errno == EINVAL || errno == ERANGE)) return -2; ++p; } return test16_core(anno, a); } static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[10], int cons_llr, int64_t cons_gt) { kstring_t s; int has_I16, is_var; double fq, r; anno16_t a; has_I16 = test16(b, &a) >= 0? 1 : 0; //rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s); kputs(b->info, &s); if (b->info[0]) kputc(';', &s); { // print EM if (em[0] >= 0) ksprintf(&s, "AF1=%.4g", 1 - em[0]); if (em[4] >= 0 && em[4] <= 0.05) ksprintf(&s, ";G3=%.4g,%.4g,%.4g;HWE=%.3g", em[3], em[2], em[1], em[4]); if (em[5] >= 0 && em[6] >= 0) ksprintf(&s, ";AF2=%.4g,%.4g", 1 - em[5], 1 - em[6]); if (em[7] >= 0) ksprintf(&s, ";LRT=%.3g", em[7]); if (em[8] >= 0) ksprintf(&s, ";LRT2=%.3g", em[8]); } if (cons_llr > 0) { ksprintf(&s, ";CLR=%d", cons_llr); if (cons_gt > 0) ksprintf(&s, ";UGT=%c%c%c;CGT=%c%c%c", cons_gt&0xff, cons_gt>>8&0xff, cons_gt>>16&0xff, cons_gt>>32&0xff, cons_gt>>40&0xff, cons_gt>>48&0xff); } if (pr == 0) { // if pr is unset, return kputc('\0', &s); kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; bcf_sync(b); return 1; } is_var = (pr->p_ref < pref); r = is_var? pr->p_ref : pr->p_var; // ksprintf(&s, ";CI95=%.4g,%.4g", pr->cil, pr->cih); // FIXME: when EM is not used, ";" should be omitted! ksprintf(&s, ";AC1=%d", pr->ac); if (has_I16) ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); fq = pr->p_ref_folded < 0.5? -4.343 * log(pr->p_ref_folded) : 4.343 * log(pr->p_var_folded); if (fq < -999) fq = -999; if (fq > 999) fq = 999; ksprintf(&s, ";FQ=%.3g", fq); if (pr->cmp[0] >= 0.) { // two sample groups int i, q[3]; for (i = 1; i < 3; ++i) { double x = pr->cmp[i] + pr->cmp[0]/2.; q[i] = x == 0? 255 : (int)(-4.343 * log(x) + .499); if (q[i] > 255) q[i] = 255; } if (pr->perm_rank >= 0) ksprintf(&s, ";PR=%d", pr->perm_rank); // ksprintf(&s, ";LRT3=%.3g", pr->lrt); ksprintf(&s, ";PCHI2=%.3g;PC2=%d,%d", q[1], q[2], pr->p_chi2); } if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]); kputc('\0', &s); rm_info(&s, "QS="); rm_info(&s, "I16="); kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; b->qual = r < 1e-100? 999 : -4.343 * log(r); if (b->qual > 999) b->qual = 999; bcf_sync(b); if (!is_var) bcf_shrink_alt(b, 1); else if (!(flag&VC_KEEPALT)) bcf_shrink_alt(b, pr->rank0 < 2? 2 : pr->rank0+1); if (is_var && (flag&VC_CALL_GT)) { // call individual genotype int i, x, old_n_gi = b->n_gi; s.m = b->m_str; s.l = b->l_str - 1; s.s = b->str; kputs(":GT:GQ", &s); kputc('\0', &s); b->m_str = s.m; b->l_str = s.l; b->str = s.s; bcf_sync(b); for (i = 0; i < b->n_smpl; ++i) { x = bcf_p1_call_gt(pa, pr->f_exp, i); ((uint8_t*)b->gi[old_n_gi].data)[i] = (x&3) == 0? 1<<3|1 : (x&3) == 1? 1 : 0; ((uint8_t*)b->gi[old_n_gi+1].data)[i] = x>>2; } } return is_var; } static char **read_samples(const char *fn, int *_n) { gzFile fp; kstream_t *ks; kstring_t s; int dret, n = 0, max = 0; char **sam = 0; *_n = 0; s.l = s.m = 0; s.s = 0; fp = gzopen(fn, "r"); if (fp == 0) { // interpret as sample names, not as a file name const char *t = fn, *p = t; while (*t) { t++; if ( *t==',' || !*t ) { sam = realloc(sam, sizeof(void*)*(n+1)); sam[n] = (char*) malloc(sizeof(char)*(t-p+2)); memcpy(sam[n], p, t-p); sam[n][t-p] = 0; sam[n][t-p+1] = 2; // assume diploid p = t+1; n++; } } *_n = n; return sam; // fail to open file } ks = ks_init(fp); while (ks_getuntil(ks, 0, &s, &dret) >= 0) { int l; if (max == n) { max = max? max<<1 : 4; sam = realloc(sam, sizeof(void*)*max); } l = s.l; sam[n] = malloc(s.l + 2); strcpy(sam[n], s.s); sam[n][l+1] = 2; // by default, diploid if (dret != '\n') { if (ks_getuntil(ks, 0, &s, &dret) >= 0) { // read ploidy, 1 or 2 int x = (int)s.s[0] - '0'; if (x == 1 || x == 2) sam[n][l+1] = x; else fprintf(stderr, "(%s) ploidy can only be 1 or 2; assume diploid\n", __func__); } if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); } ++n; } ks_destroy(ks); gzclose(fp); free(s.s); *_n = n; return sam; } static void write_header(bcf_hdr_t *h) { kstring_t str; str.l = h->l_txt? h->l_txt - 1 : 0; str.m = str.l + 1; str.s = h->txt; if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); // if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); //if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); h->l_txt = str.l + 1; h->txt = str.s; } double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]); int bcfview(int argc, char *argv[]) { extern int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b); extern void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x); extern int bcf_fix_gt(bcf1_t *b); extern int bcf_anno_max(bcf1_t *b); extern int bcf_shuffle(bcf1_t *b, int seed); extern uint32_t *bcf_trio_prep(int is_x, int is_son); extern int bcf_trio_call(uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt); extern int bcf_pair_call(const bcf1_t *b); extern int bcf_min_diff(const bcf1_t *b); extern int bcf_p1_get_M(bcf_p1aux_t *b); extern gzFile bcf_p1_fp_lk; bcf_t *bp, *bout = 0; bcf1_t *b, *blast; int c, *seeds = 0; uint64_t n_processed = 0, qcnt[256]; viewconf_t vc; bcf_p1aux_t *p1 = 0; bcf_hdr_t *hin, *hout; int tid, begin, end; char moder[4], modew[4]; tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; vc.min_ma_lrt = -1; memset(qcnt, 0, 8 * 256); while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:K:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; case 'l': vc.bed = bed_read(optarg); if (!vc.bed) { fprintf(stderr,"Could not read \"%s\"\n", optarg); return 1; } break; case 'D': vc.fn_dict = strdup(optarg); break; case 'F': vc.flag |= VC_FIX_PL; break; case 'N': vc.flag |= VC_ACGT_ONLY; break; case 'G': vc.flag |= VC_NO_GENO; break; case 'A': vc.flag |= VC_KEEPALT; break; case 'b': vc.flag |= VC_BCFOUT; break; case 'S': vc.flag |= VC_VCFIN; break; case 'c': vc.flag |= VC_CALL; break; case 'e': vc.flag |= VC_EM; break; case 'v': vc.flag |= VC_VARONLY | VC_CALL; break; case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break; case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break; case 'I': vc.flag |= VC_NO_INDEL; break; case 'w': vc.flag |= VC_INDEL_ONLY; break; case 'M': vc.flag |= VC_ANNO_MAX; break; case 'Y': vc.flag |= VC_QCNT; break; case 'm': vc.min_ma_lrt = atof(optarg); break; case 't': vc.theta = atof(optarg); break; case 'p': vc.pref = atof(optarg); break; case 'i': vc.indel_frac = atof(optarg); break; case 'Q': vc.flag |= VC_QCALL; break; case 'L': vc.flag |= VC_ADJLD; break; case 'U': vc.n_perm = atoi(optarg); break; case 'C': vc.min_lrt = atof(optarg); break; case 'X': vc.min_perm_p = atof(optarg); break; case 'd': vc.min_smpl_frac = atof(optarg); break; case 'K': bcf_p1_fp_lk = gzopen(optarg, "w"); break; case 's': vc.subsam = read_samples(optarg, &vc.n_sub); vc.ploidy = calloc(vc.n_sub + 1, 1); for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1]; tid = -1; break; case 'T': if (strcmp(optarg, "trioauto") == 0) vc.trio_aux = bcf_trio_prep(0, 0); else if (strcmp(optarg, "trioxd") == 0) vc.trio_aux = bcf_trio_prep(1, 0); else if (strcmp(optarg, "trioxs") == 0) vc.trio_aux = bcf_trio_prep(1, 1); else if (strcmp(optarg, "pair") == 0) vc.flag |= VC_PAIRCALL; else { fprintf(stderr, "[%s] Option '-T' can only take value trioauto, trioxd or trioxs.\n", __func__); return 1; } break; case 'P': if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL; else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2; else if (strcmp(optarg, "flat") == 0) vc.prior_type = MC_PTYPE_FLAT; else vc.prior_file = strdup(optarg); break; } } if (argc == optind) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bcftools view [options] [reg]\n\n"); fprintf(stderr, "Input/output options:\n\n"); fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n"); fprintf(stderr, " -b output BCF instead of VCF\n"); fprintf(stderr, " -D FILE sequence dictionary for VCF->BCF conversion [null]\n"); fprintf(stderr, " -F PL generated by r921 or before (which generate old ordering)\n"); fprintf(stderr, " -G suppress all individual genotype information\n"); fprintf(stderr, " -l FILE list of sites (chr pos) or regions (BED) to output [all sites]\n"); fprintf(stderr, " -L calculate LD for adjacent sites\n"); fprintf(stderr, " -N skip sites where REF is not A/C/G/T\n"); fprintf(stderr, " -Q output the QCALL likelihood format\n"); fprintf(stderr, " -s FILE list of samples to use [all samples]\n"); fprintf(stderr, " -S input is VCF\n"); fprintf(stderr, " -u uncompressed BCF output (force -b)\n"); fprintf(stderr, "\nConsensus/variant calling options:\n\n"); fprintf(stderr, " -c SNP calling (force -e)\n"); fprintf(stderr, " -d FLOAT skip loci where less than FLOAT fraction of samples covered [0]\n"); fprintf(stderr, " -e likelihood based analyses\n"); fprintf(stderr, " -g call genotypes at variant sites (force -c)\n"); fprintf(stderr, " -i FLOAT indel-to-substitution ratio [%.4g]\n", vc.indel_frac); fprintf(stderr, " -I skip indels\n"); fprintf(stderr, " -m FLOAT alternative model for multiallelic and rare-variant calling, include if P(chi^2)>=FLOAT\n"); fprintf(stderr, " -p FLOAT variant if P(ref|D)BCF conversion please specify the sequence dictionary with -D\n", __func__); return 1; } if (vc.n1 <= 0) vc.n_perm = 0; // TODO: give a warning here! if (vc.n_perm > 0) { seeds = malloc(vc.n_perm * sizeof(int)); srand48(time(0)); for (c = 0; c < vc.n_perm; ++c) seeds[c] = lrand48(); } b = calloc(1, sizeof(bcf1_t)); blast = calloc(1, sizeof(bcf1_t)); strcpy(moder, "r"); if (!(vc.flag & VC_VCFIN)) strcat(moder, "b"); strcpy(modew, "w"); if (vc.flag & VC_BCFOUT) strcat(modew, "b"); if (vc.flag & VC_UNCOMP) strcat(modew, "u"); bp = vcf_open(argv[optind], moder); hin = hout = vcf_hdr_read(bp); if (vc.fn_dict && (vc.flag & VC_VCFIN)) vcf_dictread(bp, hin, vc.fn_dict); bout = vcf_open("-", modew); if (!(vc.flag & VC_QCALL)) { if (vc.n_sub) { vc.sublist = calloc(vc.n_sub, sizeof(int)); hout = bcf_hdr_subsam(hin, vc.n_sub, vc.subsam, vc.sublist); } write_header(hout); // always print the header vcf_hdr_write(bout, hout); } if (vc.flag & VC_CALL) { p1 = bcf_p1_init(hout->n_smpl, vc.ploidy); if (vc.prior_file) { if (bcf_p1_read_prior(p1, vc.prior_file) < 0) { fprintf(stderr, "[%s] fail to read the prior AFS.\n", __func__); return 1; } } else bcf_p1_init_prior(p1, vc.prior_type, vc.theta); if (vc.n1 > 0 && vc.min_lrt > 0.) { // set n1 bcf_p1_set_n1(p1, vc.n1); bcf_p1_init_subprior(p1, vc.prior_type, vc.theta); } if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac); // otherwise use the default indel_frac } if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) { void *str2id = bcf_build_refhash(hout); if (bcf_parse_region(str2id, argv[optind+1], &tid, &begin, &end) >= 0) { bcf_idx_t *idx; idx = bcf_idx_load(argv[optind]); if (idx) { uint64_t off; off = bcf_idx_query(idx, tid, begin); if (off == 0) { fprintf(stderr, "[%s] no records in the query region.\n", __func__); return 1; // FIXME: a lot of memory leaks... } bgzf_seek(bp->fp, off, SEEK_SET); bcf_idx_destroy(idx); } } } if (bcf_p1_fp_lk && p1) { int32_t M = bcf_p1_get_M(p1); gzwrite(bcf_p1_fp_lk, &M, 4); } while (vcf_read(bp, hin, b) > 0) { int is_indel, cons_llr = -1; int64_t cons_gt = -1; double em[10]; if ((vc.flag & VC_VARONLY) && strcmp(b->alt, "X") == 0) continue; if ((vc.flag & VC_VARONLY) && vc.min_smpl_frac > 0.) { extern int bcf_smpl_covered(const bcf1_t *b); int n = bcf_smpl_covered(b); if ((double)n / b->n_smpl < vc.min_smpl_frac) continue; } if (vc.n_sub) bcf_subsam(vc.n_sub, vc.sublist, b); if (vc.flag & VC_FIX_PL) bcf_fix_pl(b); is_indel = bcf_is_indel(b); if ((vc.flag & VC_NO_INDEL) && is_indel) continue; if ((vc.flag & VC_INDEL_ONLY) && !is_indel) continue; if ((vc.flag & VC_ACGT_ONLY) && !is_indel) { int x; if (b->ref[0] == 0 || b->ref[1] != 0) continue; x = toupper(b->ref[0]); if (x != 'A' && x != 'C' && x != 'G' && x != 'T') continue; } if (vc.bed && !bed_overlap(vc.bed, hin->ns[b->tid], b->pos, b->pos + strlen(b->ref))) continue; if (tid >= 0) { int l = strlen(b->ref); l = b->pos + (l > 0? l : 1); if (b->tid != tid || b->pos >= end) break; if (!(l > begin && end > b->pos)) continue; } ++n_processed; if ((vc.flag & VC_QCNT) && !is_indel) { // summarize the difference int x = bcf_min_diff(b); if (x > 255) x = 255; if (x >= 0) ++qcnt[x]; } if (vc.flag & VC_QCALL) { // output QCALL format; STOP here bcf_2qcall(hout, b); continue; } if (vc.trio_aux) // do trio calling bcf_trio_call(vc.trio_aux, b, &cons_llr, &cons_gt); else if (vc.flag & VC_PAIRCALL) cons_llr = bcf_pair_call(b); if (vc.flag & (VC_CALL|VC_ADJLD|VC_EM)) bcf_gl2pl(b); if (vc.flag & VC_EM) bcf_em1(b, vc.n1, 0x1ff, em); else { int i; for (i = 0; i < 9; ++i) em[i] = -1.; } if ( !(vc.flag&VC_KEEPALT) && (vc.flag&VC_CALL) && vc.min_ma_lrt>=0 ) { bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions int gts = call_multiallelic_gt(b, p1, vc.min_ma_lrt, vc.flag&VC_VARONLY); if ( gts<=1 && vc.flag & VC_VARONLY ) continue; } else if (vc.flag & VC_CALL) { // call variants bcf_p1rst_t pr; int calret; gzwrite(bcf_p1_fp_lk, &b->tid, 4); gzwrite(bcf_p1_fp_lk, &b->pos, 4); gzwrite(bcf_p1_fp_lk, &em[0], sizeof(double)); calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); if (n_processed % 100000 == 0) { fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); bcf_p1_dump_afs(p1); } if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue; if (vc.n_perm && vc.n1 > 0 && pr.p_chi2 < vc.min_perm_p) { // permutation test bcf_p1rst_t r; int i, n = 0; for (i = 0; i < vc.n_perm; ++i) { #ifdef BCF_PERM_LRT // LRT based permutation is much faster but less robust to artifacts double x[10]; bcf_shuffle(b, seeds[i]); bcf_em1(b, vc.n1, 1<<7, x); if (x[7] < em[7]) ++n; #else bcf_shuffle(b, seeds[i]); bcf_p1_cal(b, 1, p1, &r); if (pr.p_chi2 >= r.p_chi2) ++n; #endif } pr.perm_rank = n; } if (calret >= 0) update_bcf1(b, p1, &pr, vc.pref, vc.flag, em, cons_llr, cons_gt); } else if (vc.flag & VC_EM) update_bcf1(b, 0, 0, 0, vc.flag, em, cons_llr, cons_gt); if (vc.flag & VC_ADJLD) { // compute LD double f[4], r2; if ((r2 = bcf_pair_freq(blast, b, f)) >= 0) { kstring_t s; s.m = s.l = 0; s.s = 0; if (*b->info) kputc(';', &s); ksprintf(&s, "NEIR=%.3f;NEIF4=%.3f,%.3f,%.3f,%.3f", r2, f[0], f[1], f[2], f[3]); bcf_append_info(b, s.s, s.l); free(s.s); } bcf_cpy(blast, b); } if (vc.flag & VC_ANNO_MAX) bcf_anno_max(b); if (vc.flag & VC_NO_GENO) { // do not output GENO fields b->n_gi = 0; b->fmt[0] = '\0'; b->l_str = b->fmt - b->str + 1; } else bcf_fix_gt(b); vcf_write(bout, hout, b); } if (bcf_p1_fp_lk) gzclose(bcf_p1_fp_lk); if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); if (hin != hout) bcf_hdr_destroy(hout); bcf_hdr_destroy(hin); bcf_destroy(b); bcf_destroy(blast); vcf_close(bp); vcf_close(bout); if (vc.fn_dict) free(vc.fn_dict); if (vc.ploidy) free(vc.ploidy); if (vc.trio_aux) free(vc.trio_aux); if (vc.n_sub) { int i; for (i = 0; i < vc.n_sub; ++i) free(vc.subsam[i]); free(vc.subsam); free(vc.sublist); } if (vc.bed) bed_destroy(vc.bed); if (vc.flag & VC_QCNT) for (c = 0; c < 256; ++c) fprintf(stderr, "QT\t%d\t%lld\n", c, (long long)qcnt[c]); if (seeds) free(seeds); if (p1) bcf_p1_destroy(p1); return 0; } samtools-0.1.19/bcftools/em.c000066400000000000000000000214311212162403000160130ustar00rootroot00000000000000#include #include #include #include "bcf.h" #include "kmin.h" static double g_q2p[256]; #define ITER_MAX 50 #define ITER_TRY 10 #define EPS 1e-5 extern double kf_gammaq(double, double); /* Generic routines */ // get the 3 genotype likelihoods static double *get_pdg3(const bcf1_t *b) { double *pdg; const uint8_t *PL = 0; int i, PL_len = 0; // initialize g_q2p if necessary if (g_q2p[0] == 0.) for (i = 0; i < 256; ++i) g_q2p[i] = pow(10., -i / 10.); // set PL and PL_len for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("PL", 2)) { PL = (const uint8_t*)b->gi[i].data; PL_len = b->gi[i].len; break; } } if (i == b->n_gi) return 0; // no PL // fill pdg pdg = malloc(3 * b->n_smpl * sizeof(double)); for (i = 0; i < b->n_smpl; ++i) { const uint8_t *pi = PL + i * PL_len; double *p = pdg + i * 3; p[0] = g_q2p[pi[2]]; p[1] = g_q2p[pi[1]]; p[2] = g_q2p[pi[0]]; } return pdg; } // estimate site allele frequency in a very naive and inaccurate way static double est_freq(int n, const double *pdg) { int i, gcnt[3], tmp1; // get a rough estimate of the genotype frequency gcnt[0] = gcnt[1] = gcnt[2] = 0; for (i = 0; i < n; ++i) { const double *p = pdg + i * 3; if (p[0] != 1. || p[1] != 1. || p[2] != 1.) { int which = p[0] > p[1]? 0 : 1; which = p[which] > p[2]? which : 2; ++gcnt[which]; } } tmp1 = gcnt[0] + gcnt[1] + gcnt[2]; return (tmp1 == 0)? -1.0 : (.5 * gcnt[1] + gcnt[2]) / tmp1; } /* Single-locus EM */ typedef struct { int beg, end; const double *pdg; } minaux1_t; static double prob1(double f, void *data) { minaux1_t *a = (minaux1_t*)data; double p = 1., l = 0., f3[3]; int i; // printf("brent %lg\n", f); if (f < 0 || f > 1) return 1e300; f3[0] = (1.-f)*(1.-f); f3[1] = 2.*f*(1.-f); f3[2] = f*f; for (i = a->beg; i < a->end; ++i) { const double *pdg = a->pdg + i * 3; p *= pdg[0] * f3[0] + pdg[1] * f3[1] + pdg[2] * f3[2]; if (p < 1e-200) l -= log(p), p = 1.; } return l - log(p); } // one EM iteration for allele frequency estimate static double freq_iter(double *f, const double *_pdg, int beg, int end) { double f0 = *f, f3[3], err; int i; // printf("em %lg\n", *f); f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0; for (i = beg, f0 = 0.; i < end; ++i) { const double *pdg = _pdg + i * 3; f0 += (pdg[1] * f3[1] + 2. * pdg[2] * f3[2]) / (pdg[0] * f3[0] + pdg[1] * f3[1] + pdg[2] * f3[2]); } f0 /= (end - beg) * 2; err = fabs(f0 - *f); *f = f0; return err; } /* The following function combines EM and Brent's method. When the signal from * the data is strong, EM is faster but sometimes, EM may converge very slowly. * When this happens, we switch to Brent's method. The idea is learned from * Rasmus Nielsen. */ static double freqml(double f0, int beg, int end, const double *pdg) { int i; double f; for (i = 0, f = f0; i < ITER_TRY; ++i) if (freq_iter(&f, pdg, beg, end) < EPS) break; if (i == ITER_TRY) { // haven't converged yet; try Brent's method minaux1_t a; a.beg = beg; a.end = end; a.pdg = pdg; kmin_brent(prob1, f0 == f? .5*f0 : f0, f, (void*)&a, EPS, &f); } return f; } // one EM iteration for genotype frequency estimate static double g3_iter(double g[3], const double *_pdg, int beg, int end) { double err, gg[3]; int i; gg[0] = gg[1] = gg[2] = 0.; // printf("%lg,%lg,%lg\n", g[0], g[1], g[2]); for (i = beg; i < end; ++i) { double sum, tmp[3]; const double *pdg = _pdg + i * 3; tmp[0] = pdg[0] * g[0]; tmp[1] = pdg[1] * g[1]; tmp[2] = pdg[2] * g[2]; sum = (tmp[0] + tmp[1] + tmp[2]) * (end - beg); gg[0] += tmp[0] / sum; gg[1] += tmp[1] / sum; gg[2] += tmp[2] / sum; } err = fabs(gg[0] - g[0]) > fabs(gg[1] - g[1])? fabs(gg[0] - g[0]) : fabs(gg[1] - g[1]); err = err > fabs(gg[2] - g[2])? err : fabs(gg[2] - g[2]); g[0] = gg[0]; g[1] = gg[1]; g[2] = gg[2]; return err; } // perform likelihood ratio test static double lk_ratio_test(int n, int n1, const double *pdg, double f3[3][3]) { double r; int i; for (i = 0, r = 1.; i < n1; ++i) { const double *p = pdg + i * 3; r *= (p[0] * f3[1][0] + p[1] * f3[1][1] + p[2] * f3[1][2]) / (p[0] * f3[0][0] + p[1] * f3[0][1] + p[2] * f3[0][2]); } for (; i < n; ++i) { const double *p = pdg + i * 3; r *= (p[0] * f3[2][0] + p[1] * f3[2][1] + p[2] * f3[2][2]) / (p[0] * f3[0][0] + p[1] * f3[0][1] + p[2] * f3[0][2]); } return r; } // x[0]: ref frequency // x[1..3]: alt-alt, alt-ref, ref-ref frequenc // x[4]: HWE P-value // x[5..6]: group1 freq, group2 freq // x[7]: 1-degree P-value // x[8]: 2-degree P-value int bcf_em1(const bcf1_t *b, int n1, int flag, double x[10]) { double *pdg; int i, n, n2; if (b->n_alleles < 2) return -1; // one allele only // initialization if (n1 < 0 || n1 > b->n_smpl) n1 = 0; if (flag & 1<<7) flag |= 7<<5; // compute group freq if LRT is required if (flag & 0xf<<1) flag |= 0xf<<1; n = b->n_smpl; n2 = n - n1; pdg = get_pdg3(b); if (pdg == 0) return -1; for (i = 0; i < 10; ++i) x[i] = -1.; // set to negative { if ((x[0] = est_freq(n, pdg)) < 0.) { free(pdg); return -1; // no data } x[0] = freqml(x[0], 0, n, pdg); } if (flag & (0xf<<1|3<<8)) { // estimate the genotype frequency and test HWE double *g = x + 1, f3[3], r; f3[0] = g[0] = (1 - x[0]) * (1 - x[0]); f3[1] = g[1] = 2 * x[0] * (1 - x[0]); f3[2] = g[2] = x[0] * x[0]; for (i = 0; i < ITER_MAX; ++i) if (g3_iter(g, pdg, 0, n) < EPS) break; // Hardy-Weinberg equilibrium (HWE) for (i = 0, r = 1.; i < n; ++i) { double *p = pdg + i * 3; r *= (p[0] * g[0] + p[1] * g[1] + p[2] * g[2]) / (p[0] * f3[0] + p[1] * f3[1] + p[2] * f3[2]); } x[4] = kf_gammaq(.5, log(r)); } if ((flag & 7<<5) && n1 > 0 && n1 < n) { // group frequency x[5] = freqml(x[0], 0, n1, pdg); x[6] = freqml(x[0], n1, n, pdg); } if ((flag & 1<<7) && n1 > 0 && n1 < n) { // 1-degree P-value double f[3], f3[3][3], tmp; f[0] = x[0]; f[1] = x[5]; f[2] = x[6]; for (i = 0; i < 3; ++i) f3[i][0] = (1-f[i])*(1-f[i]), f3[i][1] = 2*f[i]*(1-f[i]), f3[i][2] = f[i]*f[i]; tmp = log(lk_ratio_test(n, n1, pdg, f3)); if (tmp < 0) tmp = 0; x[7] = kf_gammaq(.5, tmp); } if ((flag & 3<<8) && n1 > 0 && n1 < n) { // 2-degree P-value double g[3][3], tmp; for (i = 0; i < 3; ++i) memcpy(g[i], x + 1, 3 * sizeof(double)); for (i = 0; i < ITER_MAX; ++i) if (g3_iter(g[1], pdg, 0, n1) < EPS) break; for (i = 0; i < ITER_MAX; ++i) if (g3_iter(g[2], pdg, n1, n) < EPS) break; tmp = log(lk_ratio_test(n, n1, pdg, g)); if (tmp < 0) tmp = 0; x[8] = kf_gammaq(1., tmp); } // free free(pdg); return 0; } /* Two-locus EM (LD) */ #define _G1(h, k) ((h>>1&1) + (k>>1&1)) #define _G2(h, k) ((h&1) + (k&1)) // 0: the previous site; 1: the current site static int pair_freq_iter(int n, double *pdg[2], double f[4]) { double ff[4]; int i, k, h; // printf("%lf,%lf,%lf,%lf\n", f[0], f[1], f[2], f[3]); memset(ff, 0, 4 * sizeof(double)); for (i = 0; i < n; ++i) { double *p[2], sum, tmp; p[0] = pdg[0] + i * 3; p[1] = pdg[1] + i * 3; for (k = 0, sum = 0.; k < 4; ++k) for (h = 0; h < 4; ++h) sum += f[k] * f[h] * p[0][_G1(k,h)] * p[1][_G2(k,h)]; for (k = 0; k < 4; ++k) { tmp = f[0] * (p[0][_G1(0,k)] * p[1][_G2(0,k)] + p[0][_G1(k,0)] * p[1][_G2(k,0)]) + f[1] * (p[0][_G1(1,k)] * p[1][_G2(1,k)] + p[0][_G1(k,1)] * p[1][_G2(k,1)]) + f[2] * (p[0][_G1(2,k)] * p[1][_G2(2,k)] + p[0][_G1(k,2)] * p[1][_G2(k,2)]) + f[3] * (p[0][_G1(3,k)] * p[1][_G2(3,k)] + p[0][_G1(k,3)] * p[1][_G2(k,3)]); ff[k] += f[k] * tmp / sum; } } for (k = 0; k < 4; ++k) f[k] = ff[k] / (2 * n); return 0; } double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]) { const bcf1_t *b[2]; int i, j, n_smpl; double *pdg[2], flast[4], r, f0[2]; // initialize others if (b0->n_smpl != b1->n_smpl) return -1; // different number of samples n_smpl = b0->n_smpl; b[0] = b0; b[1] = b1; f[0] = f[1] = f[2] = f[3] = -1.; if (b[0]->n_alleles < 2 || b[1]->n_alleles < 2) return -1; // one allele only pdg[0] = get_pdg3(b0); pdg[1] = get_pdg3(b1); if (pdg[0] == 0 || pdg[1] == 0) { free(pdg[0]); free(pdg[1]); return -1; } // set the initial value f0[0] = est_freq(n_smpl, pdg[0]); f0[1] = est_freq(n_smpl, pdg[1]); f[0] = (1 - f0[0]) * (1 - f0[1]); f[3] = f0[0] * f0[1]; f[1] = (1 - f0[0]) * f0[1]; f[2] = f0[0] * (1 - f0[1]); // iteration for (j = 0; j < ITER_MAX; ++j) { double eps = 0; memcpy(flast, f, 4 * sizeof(double)); pair_freq_iter(n_smpl, pdg, f); for (i = 0; i < 4; ++i) { double x = fabs(f[i] - flast[i]); if (x > eps) eps = x; } if (eps < EPS) break; } // free free(pdg[0]); free(pdg[1]); { // calculate r^2 double p[2], q[2], D; p[0] = f[0] + f[1]; q[0] = 1 - p[0]; p[1] = f[0] + f[2]; q[1] = 1 - p[1]; D = f[0] * f[3] - f[1] * f[2]; r = sqrt(D * D / (p[0] * p[1] * q[0] * q[1])); // printf("R(%lf,%lf,%lf,%lf)=%lf\n", f[0], f[1], f[2], f[3], r); if (isnan(r)) r = -1.; } return r; } samtools-0.1.19/bcftools/fet.c000066400000000000000000000060621212162403000161730ustar00rootroot00000000000000#include #include /* This program is implemented with ideas from this web page: * * http://www.langsrud.com/fisher.htm */ // log\binom{n}{k} static double lbinom(int n, int k) { if (k == 0 || n == k) return 0; return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1); } // n11 n12 | n1_ // n21 n22 | n2_ //-----------+---- // n_1 n_2 | n // hypergeometric distribution static double hypergeo(int n11, int n1_, int n_1, int n) { return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1)); } typedef struct { int n11, n1_, n_1, n; double p; } hgacc_t; // incremental version of hypergenometric distribution static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux) { if (n1_ || n_1 || n) { aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n; } else { // then only n11 changed; the rest fixed if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) { if (n11 == aux->n11 + 1) { // incremental aux->p *= (double)(aux->n1_ - aux->n11) / n11 * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1); aux->n11 = n11; return aux->p; } if (n11 == aux->n11 - 1) { // incremental aux->p *= (double)aux->n11 / (aux->n1_ - n11) * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11); aux->n11 = n11; return aux->p; } } aux->n11 = n11; } aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n); return aux->p; } double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two) { int i, j, max, min; double p, q, left, right; hgacc_t aux; int n1_, n_1, n; n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail min = n1_ + n_1 - n; if (min < 0) min = 0; // min n11, for left tail *two = *_left = *_right = 1.; if (min == max) return 1.; // no need to do test q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table // left tail p = hypergeo_acc(min, 0, 0, 0, &aux); for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) // loop until underflow left += p, p = hypergeo_acc(i, 0, 0, 0, &aux); --i; if (p < 1.00000001 * q) left += p; else --i; // right tail p = hypergeo_acc(max, 0, 0, 0, &aux); for (right = 0., j = max - 1; p < 0.99999999 * q; --j) // loop until underflow right += p, p = hypergeo_acc(j, 0, 0, 0, &aux); ++j; if (p < 1.00000001 * q) right += p; else ++j; // two-tail *two = left + right; if (*two > 1.) *two = 1.; // adjust left and right if (abs(i - n11) < abs(j - n11)) right = 1. - left + q; else left = 1.0 - right + q; *_left = left; *_right = right; return q; } #ifdef FET_MAIN #include int main(int argc, char *argv[]) { char id[1024]; int n11, n12, n21, n22; double left, right, twotail, prob; while (scanf("%s%d%d%d%d", id, &n11, &n12, &n21, &n22) == 5) { prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail); printf("%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", id, n11, n12, n21, n22, prob, left, right, twotail); } return 0; } #endif samtools-0.1.19/bcftools/index.c000066400000000000000000000201001212162403000165110ustar00rootroot00000000000000#include #include #include #include "bam_endian.h" #include "kstring.h" #include "bcf.h" #ifdef _USE_KNETFILE #include "knetfile.h" #endif #define TAD_LIDX_SHIFT 13 typedef struct { int32_t n, m; uint64_t *offset; } bcf_lidx_t; struct __bcf_idx_t { int32_t n; bcf_lidx_t *index2; }; /************ * indexing * ************/ static inline void insert_offset2(bcf_lidx_t *index2, int _beg, int _end, uint64_t offset) { int i, beg, end; beg = _beg >> TAD_LIDX_SHIFT; end = (_end - 1) >> TAD_LIDX_SHIFT; if (index2->m < end + 1) { int old_m = index2->m; index2->m = end + 1; kroundup32(index2->m); index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8); memset(index2->offset + old_m, 0, 8 * (index2->m - old_m)); } if (beg == end) { if (index2->offset[beg] == 0) index2->offset[beg] = offset; } else { for (i = beg; i <= end; ++i) if (index2->offset[i] == 0) index2->offset[i] = offset; } if (index2->n < end + 1) index2->n = end + 1; } bcf_idx_t *bcf_idx_core(bcf_t *bp, bcf_hdr_t *h) { bcf_idx_t *idx; int32_t last_coor, last_tid; uint64_t last_off; kstring_t *str; BGZF *fp = bp->fp; bcf1_t *b; int ret; b = calloc(1, sizeof(bcf1_t)); str = calloc(1, sizeof(kstring_t)); idx = (bcf_idx_t*)calloc(1, sizeof(bcf_idx_t)); idx->n = h->n_ref; idx->index2 = calloc(h->n_ref, sizeof(bcf_lidx_t)); last_tid = 0xffffffffu; last_off = bgzf_tell(fp); last_coor = 0xffffffffu; while ((ret = bcf_read(bp, h, b)) > 0) { int end, tmp; if (last_tid != b->tid) { // change of chromosomes last_tid = b->tid; } else if (last_coor > b->pos) { fprintf(stderr, "[bcf_idx_core] the input is out of order\n"); free(str->s); free(str); free(idx); bcf_destroy(b); return 0; } tmp = strlen(b->ref); end = b->pos + (tmp > 0? tmp : 1); insert_offset2(&idx->index2[b->tid], b->pos, end, last_off); last_off = bgzf_tell(fp); last_coor = b->pos; } free(str->s); free(str); bcf_destroy(b); return idx; } void bcf_idx_destroy(bcf_idx_t *idx) { int i; if (idx == 0) return; for (i = 0; i < idx->n; ++i) free(idx->index2[i].offset); free(idx->index2); free(idx); } /****************** * index file I/O * ******************/ void bcf_idx_save(const bcf_idx_t *idx, BGZF *fp) { int32_t i, ti_is_be; ti_is_be = bam_is_big_endian(); bgzf_write(fp, "BCI\4", 4); if (ti_is_be) { uint32_t x = idx->n; bgzf_write(fp, bam_swap_endian_4p(&x), 4); } else bgzf_write(fp, &idx->n, 4); for (i = 0; i < idx->n; ++i) { bcf_lidx_t *index2 = idx->index2 + i; // write linear index (index2) if (ti_is_be) { int x = index2->n; bgzf_write(fp, bam_swap_endian_4p(&x), 4); } else bgzf_write(fp, &index2->n, 4); if (ti_is_be) { // big endian int x; for (x = 0; (int)x < index2->n; ++x) bam_swap_endian_8p(&index2->offset[x]); bgzf_write(fp, index2->offset, 8 * index2->n); for (x = 0; (int)x < index2->n; ++x) bam_swap_endian_8p(&index2->offset[x]); } else bgzf_write(fp, index2->offset, 8 * index2->n); } } static bcf_idx_t *bcf_idx_load_core(BGZF *fp) { int i, ti_is_be; char magic[4]; bcf_idx_t *idx; ti_is_be = bam_is_big_endian(); if (fp == 0) { fprintf(stderr, "[%s] fail to load index.\n", __func__); return 0; } bgzf_read(fp, magic, 4); if (strncmp(magic, "BCI\4", 4)) { fprintf(stderr, "[%s] wrong magic number.\n", __func__); return 0; } idx = (bcf_idx_t*)calloc(1, sizeof(bcf_idx_t)); bgzf_read(fp, &idx->n, 4); if (ti_is_be) bam_swap_endian_4p(&idx->n); idx->index2 = (bcf_lidx_t*)calloc(idx->n, sizeof(bcf_lidx_t)); for (i = 0; i < idx->n; ++i) { bcf_lidx_t *index2 = idx->index2 + i; int j; bgzf_read(fp, &index2->n, 4); if (ti_is_be) bam_swap_endian_4p(&index2->n); index2->m = index2->n; index2->offset = (uint64_t*)calloc(index2->m, 8); bgzf_read(fp, index2->offset, index2->n * 8); if (ti_is_be) for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]); } return idx; } bcf_idx_t *bcf_idx_load_local(const char *fnidx) { BGZF *fp; fp = bgzf_open(fnidx, "r"); if (fp) { bcf_idx_t *idx = bcf_idx_load_core(fp); bgzf_close(fp); return idx; } else return 0; } #ifdef _USE_KNETFILE static void download_from_remote(const char *url) { const int buf_size = 1 * 1024 * 1024; char *fn; FILE *fp; uint8_t *buf; knetFile *fp_remote; int l; if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return; l = strlen(url); for (fn = (char*)url + l - 1; fn >= url; --fn) if (*fn == '/') break; ++fn; // fn now points to the file name fp_remote = knet_open(url, "r"); if (fp_remote == 0) { fprintf(stderr, "[download_from_remote] fail to open remote file.\n"); return; } if ((fp = fopen(fn, "w")) == 0) { fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n"); knet_close(fp_remote); return; } buf = (uint8_t*)calloc(buf_size, 1); while ((l = knet_read(fp_remote, buf, buf_size)) != 0) fwrite(buf, 1, l, fp); free(buf); fclose(fp); knet_close(fp_remote); } #else static void download_from_remote(const char *url) { return; } #endif static char *get_local_version(const char *fn) { struct stat sbuf; char *fnidx = (char*)calloc(strlen(fn) + 5, 1); strcat(strcpy(fnidx, fn), ".bci"); if ((strstr(fnidx, "ftp://") == fnidx || strstr(fnidx, "http://") == fnidx)) { char *p, *url; int l = strlen(fnidx); for (p = fnidx + l - 1; p >= fnidx; --p) if (*p == '/') break; url = fnidx; fnidx = strdup(p + 1); if (stat(fnidx, &sbuf) == 0) { free(url); return fnidx; } fprintf(stderr, "[%s] downloading the index file...\n", __func__); download_from_remote(url); free(url); } if (stat(fnidx, &sbuf) == 0) return fnidx; free(fnidx); return 0; } bcf_idx_t *bcf_idx_load(const char *fn) { bcf_idx_t *idx; char *fname = get_local_version(fn); if (fname == 0) return 0; idx = bcf_idx_load_local(fname); free(fname); return idx; } int bcf_idx_build2(const char *fn, const char *_fnidx) { char *fnidx; BGZF *fpidx; bcf_t *bp; bcf_idx_t *idx; bcf_hdr_t *h; if ((bp = bcf_open(fn, "r")) == 0) { fprintf(stderr, "[bcf_idx_build2] fail to open the BAM file.\n"); return -1; } h = bcf_hdr_read(bp); idx = bcf_idx_core(bp, h); bcf_close(bp); if (_fnidx == 0) { fnidx = (char*)calloc(strlen(fn) + 5, 1); strcpy(fnidx, fn); strcat(fnidx, ".bci"); } else fnidx = strdup(_fnidx); fpidx = bgzf_open(fnidx, "w"); if (fpidx == 0) { fprintf(stderr, "[bcf_idx_build2] fail to create the index file.\n"); free(fnidx); bcf_idx_destroy(idx); return -1; } bcf_idx_save(idx, fpidx); bcf_idx_destroy(idx); bgzf_close(fpidx); free(fnidx); return 0; } int bcf_idx_build(const char *fn) { return bcf_idx_build2(fn, 0); } /******************************************** * parse a region in the format chr:beg-end * ********************************************/ int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end) { char *s, *p; int i, l, k; l = strlen(str); p = s = (char*)malloc(l+1); /* squeeze out "," */ for (i = k = 0; i != l; ++i) if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; s[k] = 0; for (i = 0; i != k; ++i) if (s[i] == ':') break; s[i] = 0; if ((*tid = bcf_str2id(str2id, s)) < 0) { free(s); return -1; } if (i == k) { /* dump the whole sequence */ *begin = 0; *end = 1<<29; free(s); return 0; } for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; *begin = atoi(p); if (i < k) { p = s + i + 1; *end = atoi(p); } else *end = 1<<29; if (*begin > 0) --*begin; free(s); if (*begin > *end) return -1; return 0; } /******************************* * retrieve a specified region * *******************************/ uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg) { uint64_t min_off, *offset; int i; if (beg < 0) beg = 0; offset = idx->index2[tid].offset; for (i = beg>>TAD_LIDX_SHIFT; i < idx->index2[tid].n && offset[i] == 0; ++i); min_off = (i == idx->index2[tid].n)? offset[idx->index2[tid].n-1] : offset[i]; return min_off; } int bcf_main_index(int argc, char *argv[]) { if (argc == 1) { fprintf(stderr, "Usage: bcftools index \n"); return 1; } bcf_idx_build(argv[1]); return 0; } samtools-0.1.19/bcftools/kfunc.c000066400000000000000000000115401212162403000165200ustar00rootroot00000000000000#include /* Log gamma function * \log{\Gamma(z)} * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 */ double kf_lgamma(double z) { double x = 0; x += 0.1659470187408462e-06 / (z+7); x += 0.9934937113930748e-05 / (z+6); x -= 0.1385710331296526 / (z+5); x += 12.50734324009056 / (z+4); x -= 176.6150291498386 / (z+3); x += 771.3234287757674 / (z+2); x -= 1259.139216722289 / (z+1); x += 676.5203681218835 / z; x += 0.9999999999995183; return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5); } /* complementary error function * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 */ double kf_erfc(double x) { const double p0 = 220.2068679123761; const double p1 = 221.2135961699311; const double p2 = 112.0792914978709; const double p3 = 33.912866078383; const double p4 = 6.37396220353165; const double p5 = .7003830644436881; const double p6 = .03526249659989109; const double q0 = 440.4137358247522; const double q1 = 793.8265125199484; const double q2 = 637.3336333788311; const double q3 = 296.5642487796737; const double q4 = 86.78073220294608; const double q5 = 16.06417757920695; const double q6 = 1.755667163182642; const double q7 = .08838834764831844; double expntl, z, p; z = fabs(x) * M_SQRT2; if (z > 37.) return x > 0.? 0. : 2.; expntl = exp(z * z * - .5); if (z < 10. / M_SQRT2) // for small z p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0) / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0); else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))); return x > 0.? 2. * p : 2. * (1. - p); } /* The following computes regularized incomplete gamma functions. * Formulas are taken from Wiki, with additional input from Numerical * Recipes in C (for modified Lentz's algorithm) and AS245 * (http://lib.stat.cmu.edu/apstat/245). * * A good online calculator is available at: * * http://www.danielsoper.com/statcalc/calc23.aspx * * It calculates upper incomplete gamma function, which equals * kf_gammaq(s,z)*tgamma(s). */ #define KF_GAMMA_EPS 1e-14 #define KF_TINY 1e-290 // regularized lower incomplete gamma function, by series expansion static double _kf_gammap(double s, double z) { double sum, x; int k; for (k = 1, sum = x = 1.; k < 100; ++k) { sum += (x *= z / (s + k)); if (x / sum < KF_GAMMA_EPS) break; } return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum)); } // regularized upper incomplete gamma function, by continued fraction static double _kf_gammaq(double s, double z) { int j; double C, D, f; f = 1. + z - s; C = f; D = 0.; // Modified Lentz's algorithm for computing continued fraction // See Numerical Recipes in C, 2nd edition, section 5.2 for (j = 1; j < 100; ++j) { double a = j * (s - j), b = (j<<1) + 1 + z - s, d; D = b + a * D; if (D < KF_TINY) D = KF_TINY; C = b + a / C; if (C < KF_TINY) C = KF_TINY; D = 1. / D; d = C * D; f *= d; if (fabs(d - 1.) < KF_GAMMA_EPS) break; } return exp(s * log(z) - z - kf_lgamma(s) - log(f)); } double kf_gammap(double s, double z) { return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z); } double kf_gammaq(double s, double z) { return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z); } /* Regularized incomplete beta function. The method is taken from * Numerical Recipe in C, 2nd edition, section 6.4. The following web * page calculates the incomplete beta function, which equals * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): * * http://www.danielsoper.com/statcalc/calc36.aspx */ static double kf_betai_aux(double a, double b, double x) { double C, D, f; int j; if (x == 0.) return 0.; if (x == 1.) return 1.; f = 1.; C = f; D = 0.; // Modified Lentz's algorithm for computing continued fraction for (j = 1; j < 200; ++j) { double aa, d; int m = j>>1; aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1)) : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m)); D = 1. + aa * D; if (D < KF_TINY) D = KF_TINY; C = 1. + aa / C; if (C < KF_TINY) C = KF_TINY; D = 1. / D; d = C * D; f *= d; if (fabs(d - 1.) < KF_GAMMA_EPS) break; } return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f; } double kf_betai(double a, double b, double x) { return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x); } #ifdef KF_MAIN #include int main(int argc, char *argv[]) { double x = 5.5, y = 3; double a, b; printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x)); printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y)); a = 2; b = 2; x = 0.5; printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b))); return 0; } #endif samtools-0.1.19/bcftools/kmin.c000066400000000000000000000161451212162403000163560ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008, 2010 by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Hooke-Jeeves algorithm for nonlinear minimization Based on the pseudocodes by Bell and Pike (CACM 9(9):684-685), and the revision by Tomlin and Smith (CACM 12(11):637-638). Both of the papers are comments on Kaupe's Algorithm 178 "Direct Search" (ACM 6(6):313-314). The original algorithm was designed by Hooke and Jeeves (ACM 8:212-229). This program is further revised according to Johnson's implementation at Netlib (opt/hooke.c). Hooke-Jeeves algorithm is very simple and it works quite well on a few examples. However, it might fail to converge due to its heuristic nature. A possible improvement, as is suggested by Johnson, may be to choose a small r at the beginning to quickly approach to the minimum and a large r at later step to hit the minimum. */ #include #include #include #include "kmin.h" static double __kmin_hj_aux(kmin_f func, int n, double *x1, void *data, double fx1, double *dx, int *n_calls) { int k, j = *n_calls; double ftmp; for (k = 0; k != n; ++k) { x1[k] += dx[k]; ftmp = func(n, x1, data); ++j; if (ftmp < fx1) fx1 = ftmp; else { /* search the opposite direction */ dx[k] = 0.0 - dx[k]; x1[k] += dx[k] + dx[k]; ftmp = func(n, x1, data); ++j; if (ftmp < fx1) fx1 = ftmp; else x1[k] -= dx[k]; /* back to the original x[k] */ } } *n_calls = j; return fx1; /* here: fx1=f(n,x1) */ } double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls) { double fx, fx1, *x1, *dx, radius; int k, n_calls = 0; x1 = (double*)calloc(n, sizeof(double)); dx = (double*)calloc(n, sizeof(double)); for (k = 0; k != n; ++k) { /* initial directions, based on MGJ */ dx[k] = fabs(x[k]) * r; if (dx[k] == 0) dx[k] = r; } radius = r; fx1 = fx = func(n, x, data); ++n_calls; for (;;) { memcpy(x1, x, n * sizeof(double)); /* x1 = x */ fx1 = __kmin_hj_aux(func, n, x1, data, fx, dx, &n_calls); while (fx1 < fx) { for (k = 0; k != n; ++k) { double t = x[k]; dx[k] = x1[k] > x[k]? fabs(dx[k]) : 0.0 - fabs(dx[k]); x[k] = x1[k]; x1[k] = x1[k] + x1[k] - t; } fx = fx1; if (n_calls >= max_calls) break; fx1 = func(n, x1, data); ++n_calls; fx1 = __kmin_hj_aux(func, n, x1, data, fx1, dx, &n_calls); if (fx1 >= fx) break; for (k = 0; k != n; ++k) if (fabs(x1[k] - x[k]) > .5 * fabs(dx[k])) break; if (k == n) break; } if (radius >= eps) { if (n_calls >= max_calls) break; radius *= r; for (k = 0; k != n; ++k) dx[k] *= r; } else break; /* converge */ } free(x1); free(dx); return fx1; } // I copied this function somewhere several years ago with some of my modifications, but I forgot the source. double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin) { double bound, u, r, q, fu, tmp, fa, fb, fc, c; const double gold1 = 1.6180339887; const double gold2 = 0.3819660113; const double tiny = 1e-20; const int max_iter = 100; double e, d, w, v, mid, tol1, tol2, p, eold, fv, fw; int iter; fa = func(a, data); fb = func(b, data); if (fb > fa) { // swap, such that f(a) > f(b) tmp = a; a = b; b = tmp; tmp = fa; fa = fb; fb = tmp; } c = b + gold1 * (b - a), fc = func(c, data); // golden section extrapolation while (fb > fc) { bound = b + 100.0 * (c - b); // the farthest point where we want to go r = (b - a) * (fb - fc); q = (b - c) * (fb - fa); if (fabs(q - r) < tiny) { // avoid 0 denominator tmp = q > r? tiny : 0.0 - tiny; } else tmp = q - r; u = b - ((b - c) * q - (b - a) * r) / (2.0 * tmp); // u is the parabolic extrapolation point if ((b > u && u > c) || (b < u && u < c)) { // u lies between b and c fu = func(u, data); if (fu < fc) { // (b,u,c) bracket the minimum a = b; b = u; fa = fb; fb = fu; break; } else if (fu > fb) { // (a,b,u) bracket the minimum c = u; fc = fu; break; } u = c + gold1 * (c - b); fu = func(u, data); // golden section extrapolation } else if ((c > u && u > bound) || (c < u && u < bound)) { // u lies between c and bound fu = func(u, data); if (fu < fc) { // fb > fc > fu b = c; c = u; u = c + gold1 * (c - b); fb = fc; fc = fu; fu = func(u, data); } else { // (b,c,u) bracket the minimum a = b; b = c; c = u; fa = fb; fb = fc; fc = fu; break; } } else if ((u > bound && bound > c) || (u < bound && bound < c)) { // u goes beyond the bound u = bound; fu = func(u, data); } else { // u goes the other way around, use golden section extrapolation u = c + gold1 * (c - b); fu = func(u, data); } a = b; b = c; c = u; fa = fb; fb = fc; fc = fu; } if (a > c) u = a, a = c, c = u; // swap // now, afb and fb tol1) { // related to parabolic interpolation r = (b - w) * (fb - fv); q = (b - v) * (fb - fw); p = (b - v) * q - (b - w) * r; q = 2.0 * (q - r); if (q > 0.0) p = 0.0 - p; else q = 0.0 - q; eold = e; e = d; if (fabs(p) >= fabs(0.5 * q * eold) || p <= q * (a - b) || p >= q * (c - b)) { d = gold2 * (e = (b >= mid ? a - b : c - b)); } else { d = p / q; u = b + d; // actual parabolic interpolation happens here if (u - a < tol2 || c - u < tol2) d = (mid > b)? tol1 : 0.0 - tol1; } } else d = gold2 * (e = (b >= mid ? a - b : c - b)); // golden section interpolation u = fabs(d) >= tol1 ? b + d : b + (d > 0.0? tol1 : -tol1); fu = func(u, data); if (fu <= fb) { // u is the minimum point so far if (u >= b) a = b; else c = b; v = w; w = b; b = u; fv = fw; fw = fb; fb = fu; } else { // adjust (a,c) and (u,v,w) if (u < b) a = u; else c = u; if (fu <= fw || w == b) { v = w; w = u; fv = fw; fw = fu; } else if (fu <= fv || v == b || v == w) { v = u; fv = fu; } } } *xmin = b; return fb; } samtools-0.1.19/bcftools/kmin.h000066400000000000000000000031231212162403000163530ustar00rootroot00000000000000/* Copyright (c) 2008, 2010 by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KMIN_H #define KMIN_H #define KMIN_RADIUS 0.5 #define KMIN_EPS 1e-7 #define KMIN_MAXCALL 50000 typedef double (*kmin_f)(int, double*, void*); typedef double (*kmin1_f)(double, void*); #ifdef __cplusplus extern "C" { #endif double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls); double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin); #ifdef __cplusplus } #endif #endif samtools-0.1.19/bcftools/main.c000066400000000000000000000121001212162403000163270ustar00rootroot00000000000000#include #include #include #include #include "knetfile.h" #include "bcf.h" #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 0x10000) int bcfview(int argc, char *argv[]); int bcf_main_index(int argc, char *argv[]); #define BUF_SIZE 0x10000 int bcf_cat(int n, char * const *fn) { int i; bcf_t *out; uint8_t *buf; buf = malloc(BUF_SIZE); out = bcf_open("-", "w"); for (i = 0; i < n; ++i) { bcf_t *in; bcf_hdr_t *h; off_t end; struct stat s; in = bcf_open(fn[i], "r"); h = bcf_hdr_read(in); if (i == 0) bcf_hdr_write(out, h); bcf_hdr_destroy(h); #ifdef _USE_KNETFILE fstat(knet_fileno((knetFile*)in->fp->fp), &s); end = s.st_size - 28; while (knet_tell((knetFile*)in->fp->fp) < end) { int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp); knet_read(in->fp->fp, buf, size); fwrite(buf, 1, size, out->fp->fp); } #else abort(); // FIXME: not implemented #endif bcf_close(in); } bcf_close(out); free(buf); return 0; } extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]); int bcf_main_ldpair(int argc, char *argv[]) { bcf_t *fp; bcf_hdr_t *h; bcf1_t *b0, *b1; bcf_idx_t *idx; kstring_t str; void *str2id; gzFile fplist; kstream_t *ks; int dret, lineno = 0; if (argc < 3) { fprintf(stderr, "Usage: bcftools ldpair \n"); return 1; } fplist = gzopen(argv[2], "rb"); ks = ks_init(fplist); memset(&str, 0, sizeof(kstring_t)); fp = bcf_open(argv[1], "rb"); h = bcf_hdr_read(fp); str2id = bcf_build_refhash(h); idx = bcf_idx_load(argv[1]); if (idx == 0) { fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__); return 1; } b0 = calloc(1, sizeof(bcf1_t)); b1 = calloc(1, sizeof(bcf1_t)); while (ks_getuntil(ks, '\n', &str, &dret) >= 0) { char *p, *q; int k; int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1; ++lineno; for (p = q = str.s, k = 0; *p; ++p) { if (*p == ' ' || *p == '\t') { *p = '\0'; if (k == 0) tid0 = bcf_str2id(str2id, q); else if (k == 1) pos0 = atoi(q) - 1; else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0; else if (k == 3) pos1 = atoi(q) - 1; q = p + 1; ++k; } } if (k == 3) pos1 = atoi(q) - 1; if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) { uint64_t off; double r, f[4]; off = bcf_idx_query(idx, tid0, pos0); bgzf_seek(fp->fp, off, SEEK_SET); while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0); off = bcf_idx_query(idx, tid1, pos1); bgzf_seek(fp->fp, off, SEEK_SET); while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1); r = bcf_pair_freq(b0, b1, f); r *= r; printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1, r, f[0], f[1], f[2], f[3]); } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno); } bcf_destroy(b0); bcf_destroy(b1); bcf_idx_destroy(idx); bcf_str2id_destroy(str2id); bcf_hdr_destroy(h); bcf_close(fp); free(str.s); ks_destroy(ks); gzclose(fplist); return 0; } int bcf_main_ld(int argc, char *argv[]) { bcf_t *fp; bcf_hdr_t *h; bcf1_t **b, *b0; int i, j, m, n; double f[4]; if (argc == 1) { fprintf(stderr, "Usage: bcftools ld \n"); return 1; } fp = bcf_open(argv[1], "rb"); h = bcf_hdr_read(fp); // read the entire BCF m = n = 0; b = 0; b0 = calloc(1, sizeof(bcf1_t)); while (bcf_read(fp, h, b0) >= 0) { if (m == n) { m = m? m<<1 : 16; b = realloc(b, sizeof(void*) * m); } b[n] = calloc(1, sizeof(bcf1_t)); bcf_cpy(b[n++], b0); } bcf_destroy(b0); // compute pair-wise r^2 printf("%d\n", n); // the number of loci for (i = 0; i < n; ++i) { printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1); for (j = 0; j < i; ++j) { double r = bcf_pair_freq(b[i], b[j], f); printf("\t%.3f", r*r); } printf("\t1.000\n"); } // free for (i = 0; i < n; ++i) bcf_destroy(b[i]); free(b); bcf_hdr_destroy(h); bcf_close(fp); return 0; } int main(int argc, char *argv[]) { if (argc == 1) { fprintf(stderr, "\n"); fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n"); fprintf(stderr, "Version: %s\n\n", BCF_VERSION); fprintf(stderr, "Usage: bcftools \n\n"); fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n"); fprintf(stderr, " index index BCF\n"); fprintf(stderr, " cat concatenate BCFs\n"); fprintf(stderr, " ld compute all-pair r^2\n"); fprintf(stderr, " ldpair compute r^2 between requested pairs\n"); fprintf(stderr, "\n"); return 1; } if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1); else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1); else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1); else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ... else { fprintf(stderr, "[main] Unrecognized command.\n"); return 1; } return 0; } samtools-0.1.19/bcftools/mut.c000066400000000000000000000073501212162403000162230ustar00rootroot00000000000000#include #include #include "bcf.h" #define MAX_GENO 359 int8_t seq_bitcnt[] = { 4, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; char *seq_nt16rev = "XACMGRSVTWYHKDBN"; uint32_t *bcf_trio_prep(int is_x, int is_son) { int i, j, k, n, map[10]; uint32_t *ret; ret = calloc(MAX_GENO, 4); for (i = 0, k = 0; i < 4; ++i) for (j = i; j < 4; ++j) map[k++] = 1<n_smpl != 3) return -1; // not a trio for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL gl10 = alloca(10 * b->n_smpl); if (bcf_gl10(b, gl10) < 0) { if (bcf_gl10_indel(b, gl10) < 0) return -1; } PL = b->gi + i; for (i = 0, k = 0; i < 4; ++i) for (j = i; j < 4; ++j) map[k++] = seq_nt16rev[1<data)[j * PL->len] != 0) break; if (j < 3) { // we need to go through the complex procedure uint8_t *g[3]; int minc = 1<<30, minc_j = -1, minf = 0, gtf = 0, gtc = 0; g[0] = gl10; g[1] = gl10 + 10; g[2] = gl10 + 20; for (j = 1; j <= (int)prep[0]; ++j) { // compute LK with constraint int sum = g[0][prep[j]&0xff] + g[1][prep[j]>>8&0xff] + g[2][prep[j]>>16&0xff]; if (sum < minc) minc = sum, minc_j = j; } gtc |= map[prep[minc_j]&0xff]; gtc |= map[prep[minc_j]>>8&0xff]<<8; gtc |= map[prep[minc_j]>>16]<<16; for (j = 0; j < 3; ++j) { // compute LK without constraint int min = 1<<30, min_k = -1; for (k = 0; k < 10; ++k) if (g[j][k] < min) min = g[j][k], min_k = k; gtf |= map[min_k]<<(j*8); minf += min; } *llr = minc - minf; *gt = (int64_t)gtc<<32 | gtf; } else *llr = 0, *gt = -1; return 0; } int bcf_pair_call(const bcf1_t *b) { int i, j, k; const bcf_ginfo_t *PL; if (b->n_smpl != 2) return -1; // not a pair for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL PL = b->gi + i; for (j = 0; j < 2; ++j) // check if ref hom is the most probable in all members if (((uint8_t*)PL->data)[j * PL->len] != 0) break; if (j < 2) { // we need to go through the complex procedure uint8_t *g[2]; int minc = 1<<30, minf = 0; g[0] = PL->data; g[1] = (uint8_t*)PL->data + PL->len; for (j = 0; j < PL->len; ++j) // compute LK with constraint minc = minc < g[0][j] + g[1][j]? minc : g[0][j] + g[1][j]; for (j = 0; j < 2; ++j) { // compute LK without constraint int min = 1<<30; for (k = 0; k < PL->len; ++k) min = min < g[j][k]? min : g[j][k]; minf += min; } return minc - minf; } else return 0; } int bcf_min_diff(const bcf1_t *b) { int i, min = 1<<30; const bcf_ginfo_t *PL; for (i = 0; i < b->n_gi; ++i) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL PL = b->gi + i; for (i = 0; i < b->n_smpl; ++i) { int m1, m2, j; const uint8_t *p = (uint8_t*)PL->data; m1 = m2 = 1<<30; for (j = 0; j < PL->len; ++j) { if ((int)p[j] < m1) m2 = m1, m1 = p[j]; else if ((int)p[j] < m2) m2 = p[j]; } min = min < m2 - m1? min : m2 - m1; } return min; } samtools-0.1.19/bcftools/prob1.c000066400000000000000000001012251212162403000164350ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include "prob1.h" #include "kstring.h" #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 16384) #define MC_MAX_EM_ITER 16 #define MC_EM_EPS 1e-5 #define MC_DEF_INDEL 0.15 gzFile bcf_p1_fp_lk; unsigned char seq_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; struct __bcf_p1aux_t { int n, M, n1, is_indel; uint8_t *ploidy; // haploid or diploid ONLY double *q2p, *pdg; // pdg -> P(D|g) double *phi, *phi_indel; double *z, *zswap; // aux for afs double *z1, *z2, *phi1, *phi2; // only calculated when n1 is set double **hg; // hypergeometric distribution double *lf; // log factorial double t, t1, t2; double *afs, *afs1; // afs: accumulative AFS; afs1: site posterior distribution const uint8_t *PL; // point to PL int PL_len; }; void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x) { int i; for (i = 0; i < ma->M; ++i) ma->phi_indel[i] = ma->phi[i] * x; ma->phi_indel[ma->M] = 1. - ma->phi[ma->M] * x; } static void init_prior(int type, double theta, int M, double *phi) { int i; if (type == MC_PTYPE_COND2) { for (i = 0; i <= M; ++i) phi[i] = 2. * (i + 1) / (M + 1) / (M + 2); } else if (type == MC_PTYPE_FLAT) { for (i = 0; i <= M; ++i) phi[i] = 1. / (M + 1); } else { double sum; for (i = 0, sum = 0.; i < M; ++i) sum += (phi[i] = theta / (M - i)); phi[M] = 1. - sum; } } void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta) { init_prior(type, theta, ma->M, ma->phi); bcf_p1_indel_prior(ma, MC_DEF_INDEL); } void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta) { if (ma->n1 <= 0 || ma->n1 >= ma->M) return; init_prior(type, theta, 2*ma->n1, ma->phi1); init_prior(type, theta, 2*(ma->n - ma->n1), ma->phi2); } int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn) { gzFile fp; kstring_t s; kstream_t *ks; long double sum; int dret, k; memset(&s, 0, sizeof(kstring_t)); fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); ks = ks_init(fp); memset(ma->phi, 0, sizeof(double) * (ma->M + 1)); while (ks_getuntil(ks, '\n', &s, &dret) >= 0) { if (strstr(s.s, "[afs] ") == s.s) { char *p = s.s + 6; for (k = 0; k <= ma->M; ++k) { int x; double y; x = strtol(p, &p, 10); if (x != k && (errno == EINVAL || errno == ERANGE)) return -1; ++p; y = strtod(p, &p); if (y == 0. && (errno == EINVAL || errno == ERANGE)) return -1; ma->phi[ma->M - k] += y; } } } ks_destroy(ks); gzclose(fp); free(s.s); for (sum = 0., k = 0; k <= ma->M; ++k) sum += ma->phi[k]; fprintf(stderr, "[prior]"); for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum; for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]); fputc('\n', stderr); for (sum = 0., k = 1; k < ma->M; ++k) sum += ma->phi[ma->M - k] * (2.* k * (ma->M - k) / ma->M / (ma->M - 1)); fprintf(stderr, "[%s] heterozygosity=%lf, ", __func__, (double)sum); for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k] / ma->M; fprintf(stderr, "theta=%lf\n", (double)sum); bcf_p1_indel_prior(ma, MC_DEF_INDEL); return 0; } bcf_p1aux_t *bcf_p1_init(int n, uint8_t *ploidy) { bcf_p1aux_t *ma; int i; ma = calloc(1, sizeof(bcf_p1aux_t)); ma->n1 = -1; ma->n = n; ma->M = 2 * n; if (ploidy) { ma->ploidy = malloc(n); memcpy(ma->ploidy, ploidy, n); for (i = 0, ma->M = 0; i < n; ++i) ma->M += ploidy[i]; if (ma->M == 2 * n) { free(ma->ploidy); ma->ploidy = 0; } } ma->q2p = calloc(256, sizeof(double)); ma->pdg = calloc(3 * ma->n, sizeof(double)); ma->phi = calloc(ma->M + 1, sizeof(double)); ma->phi_indel = calloc(ma->M + 1, sizeof(double)); ma->phi1 = calloc(ma->M + 1, sizeof(double)); ma->phi2 = calloc(ma->M + 1, sizeof(double)); ma->z = calloc(ma->M + 1, sizeof(double)); ma->zswap = calloc(ma->M + 1, sizeof(double)); ma->z1 = calloc(ma->M + 1, sizeof(double)); // actually we do not need this large ma->z2 = calloc(ma->M + 1, sizeof(double)); ma->afs = calloc(ma->M + 1, sizeof(double)); ma->afs1 = calloc(ma->M + 1, sizeof(double)); ma->lf = calloc(ma->M + 1, sizeof(double)); for (i = 0; i < 256; ++i) ma->q2p[i] = pow(10., -i / 10.); for (i = 0; i <= ma->M; ++i) ma->lf[i] = lgamma(i + 1); bcf_p1_init_prior(ma, MC_PTYPE_FULL, 1e-3); // the simplest prior return ma; } int bcf_p1_get_M(bcf_p1aux_t *b) { return b->M; } int bcf_p1_set_n1(bcf_p1aux_t *b, int n1) { if (n1 == 0 || n1 >= b->n) return -1; if (b->M != b->n * 2) { fprintf(stderr, "[%s] unable to set `n1' when there are haploid samples.\n", __func__); return -1; } b->n1 = n1; return 0; } void bcf_p1_set_ploidy(bcf1_t *b, bcf_p1aux_t *ma) { // bcf_p1aux_t fields are not visible outside of prob1.c, hence this wrapper. // Ideally, this should set ploidy per site to allow pseudo-autosomal regions b->ploidy = ma->ploidy; } void bcf_p1_destroy(bcf_p1aux_t *ma) { if (ma) { int k; free(ma->lf); if (ma->hg && ma->n1 > 0) { for (k = 0; k <= 2*ma->n1; ++k) free(ma->hg[k]); free(ma->hg); } free(ma->ploidy); free(ma->q2p); free(ma->pdg); free(ma->phi); free(ma->phi_indel); free(ma->phi1); free(ma->phi2); free(ma->z); free(ma->zswap); free(ma->z1); free(ma->z2); free(ma->afs); free(ma->afs1); free(ma); } } extern double kf_gammap(double s, double z); int test16(bcf1_t *b, anno16_t *a); // Wigginton 2005, PMID: 15789306 // written by Jan Wigginton double calc_hwe(int obs_hom1, int obs_hom2, int obs_hets) { if (obs_hom1 + obs_hom2 + obs_hets == 0 ) return 1; assert(obs_hom1 >= 0 && obs_hom2 >= 0 && obs_hets >= 0); int obs_homc = obs_hom1 < obs_hom2 ? obs_hom2 : obs_hom1; int obs_homr = obs_hom1 < obs_hom2 ? obs_hom1 : obs_hom2; int rare_copies = 2 * obs_homr + obs_hets; int genotypes = obs_hets + obs_homc + obs_homr; double *het_probs = (double*) calloc(rare_copies+1, sizeof(double)); /* start at midpoint */ int mid = rare_copies * (2 * genotypes - rare_copies) / (2 * genotypes); /* check to ensure that midpoint and rare alleles have same parity */ if ((rare_copies & 1) ^ (mid & 1)) mid++; int curr_hets = mid; int curr_homr = (rare_copies - mid) / 2; int curr_homc = genotypes - curr_hets - curr_homr; het_probs[mid] = 1.0; double sum = het_probs[mid]; for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) { het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * (curr_hets - 1.0) / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0)); sum += het_probs[curr_hets - 2]; /* 2 fewer heterozygotes for next iteration -> add one rare, one common homozygote */ curr_homr++; curr_homc++; } curr_hets = mid; curr_homr = (rare_copies - mid) / 2; curr_homc = genotypes - curr_hets - curr_homr; for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) { het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr * curr_homc /((curr_hets + 2.0) * (curr_hets + 1.0)); sum += het_probs[curr_hets + 2]; /* add 2 heterozygotes for next iteration -> subtract one rare, one common homozygote */ curr_homr--; curr_homc--; } int i; for (i = 0; i <= rare_copies; i++) het_probs[i] /= sum; /* p-value calculation for p_hwe */ double p_hwe = 0.0; for (i = 0; i <= rare_copies; i++) { if (het_probs[i] > het_probs[obs_hets]) continue; p_hwe += het_probs[i]; } p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe; free(het_probs); return p_hwe; } static void _bcf1_set_ref(bcf1_t *b, int idp) { kstring_t s; int old_n_gi = b->n_gi; s.m = b->m_str; s.l = b->l_str - 1; s.s = b->str; kputs(":GT", &s); kputc('\0', &s); b->m_str = s.m; b->l_str = s.l; b->str = s.s; bcf_sync(b); // Call GTs int isample, an = 0; for (isample = 0; isample < b->n_smpl; isample++) { if ( idp>=0 && ((uint16_t*)b->gi[idp].data)[isample]==0 ) ((uint8_t*)b->gi[old_n_gi].data)[isample] = 1<<7; else { ((uint8_t*)b->gi[old_n_gi].data)[isample] = 0; an += b->ploidy ? b->ploidy[isample] : 2; } } bcf_fit_alt(b,1); b->qual = 999; // Prepare BCF for output: ref, alt, filter, info, format memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s); { ksprintf(&s, "AN=%d;", an); kputs(b->info, &s); anno16_t a; int has_I16 = test16(b, &a) >= 0? 1 : 0; if (has_I16 ) { if ( a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]); ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); } kputc('\0', &s); rm_info(&s, "I16="); rm_info(&s, "QS="); } kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; bcf_sync(b); } int call_multiallelic_gt(bcf1_t *b, bcf_p1aux_t *ma, double threshold, int var_only) { int nals = 1; char *p; for (p=b->alt; *p; p++) { if ( *p=='X' || p[0]=='.' ) break; if ( p[0]==',' ) nals++; } if ( b->alt[0] && !*p ) nals++; if ( nals>4 ) { if ( *b->ref=='N' ) return 0; fprintf(stderr,"Not ready for this, more than 4 alleles at %d: %s, %s\n", b->pos+1, b->ref,b->alt); exit(1); } // find PL, DV and DP FORMAT indexes uint8_t *pl = NULL; int i, npl = 0, idp = -1, idv = -1; for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("PL", 2)) { pl = (uint8_t*)b->gi[i].data; npl = b->gi[i].len; } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) idp=i; else if (b->gi[i].fmt == bcf_str2int("DV", 2)) idv=i; } if ( nals==1 ) { if ( !var_only ) _bcf1_set_ref(b, idp); return 1; } if ( !pl ) return -1; assert(ma->q2p[0] == 1); // Init P(D|G) int npdg = nals*(nals+1)/2; double *pdg,*_pdg; _pdg = pdg = malloc(sizeof(double)*ma->n*npdg); for (i=0; in; i++) { int j; double sum = 0; for (j=0; jq2p[pl[j]]; sum += _pdg[j]; } if ( sum ) for (j=0; jinfo, "QS=")) == 0) { fprintf(stderr,"INFO/QS is required with -m, exiting\n"); exit(1); } double qsum[4]; if ( sscanf(p+3,"%lf,%lf,%lf,%lf",&qsum[0],&qsum[1],&qsum[2],&qsum[3])!=4 ) { fprintf(stderr,"Could not parse %s\n",p); exit(1); } // Calculate the most likely combination of alleles, remembering the most and second most likely set int ia,ib,ic, max_als=0, max_als2=0; double ref_lk = 0, max_lk = INT_MIN, max_lk2 = INT_MIN, lk_sum = INT_MIN, lk_sums[3]; for (ia=0; ian; isample++) { double *p = pdg + isample*npdg; // assert( log(p[iaa]) <= 0 ); lk_tot += log(p[iaa]); } if ( ia==0 ) ref_lk = lk_tot; if ( max_lklk_sum ? lk_tot + log(1+exp(lk_sum-lk_tot)) : lk_sum + log(1+exp(lk_tot-lk_sum)); } lk_sums[0] = lk_sum; if ( nals>1 ) { for (ia=0; ian; isample++) { double *p = pdg + isample*npdg; //assert( log(fa*p[iaa] + fb*p[ibb] + fab*p[iab]) <= 0 ); if ( b->ploidy && b->ploidy[isample]==1 ) lk_tot += log(fa*p[iaa] + fb*p[ibb]); else lk_tot += log(fa*p[iaa] + fb*p[ibb] + fab*p[iab]); } if ( max_lklk_sum ? lk_tot + log(1+exp(lk_sum-lk_tot)) : lk_sum + log(1+exp(lk_tot-lk_sum)); } } lk_sums[1] = lk_sum; } if ( nals>2 ) { for (ia=0; ian; isample++) { double *p = pdg + isample*npdg; //assert( log(fa*p[iaa] + fb*p[ibb] + fc*p[icc] + fab*p[iab] + fac*p[iac] + fbc*p[ibc]) <= 0 ); if ( b->ploidy && b->ploidy[isample]==1 ) lk_tot += log(fa*p[iaa] + fb*p[ibb] + fc*p[icc]); else lk_tot += log(fa*p[iaa] + fb*p[ibb] + fc*p[icc] + fab*p[iab] + fac*p[iac] + fbc*p[ibc]); } if ( max_lklk_sum ? lk_tot + log(1+exp(lk_sum-lk_tot)) : lk_sum + log(1+exp(lk_tot-lk_sum)); } } } lk_sums[2] = lk_sum; } // Should we add another allele, does it increase the likelihood significantly? int n1=0, n2=0; for (i=0; in_gi; s.m = b->m_str; s.l = b->l_str - 1; s.s = b->str; kputs(":GT:GQ", &s); kputc('\0', &s); b->m_str = s.m; b->l_str = s.l; b->str = s.s; bcf_sync(b); // Call GTs int isample, gts=0, ac[4] = {0,0,0,0}; int nRR = 0, nAA = 0, nRA = 0, max_dv = 0; for (isample = 0; isample < b->n_smpl; isample++) { int ploidy = b->ploidy ? b->ploidy[isample] : 2; double *p = pdg + isample*npdg; int ia, als = 0; double lk = 0, lk_s = 0; for (ia=0; ia lk ) { lk = _lk; als = ia<<3 | ia; } lk_s += _lk; } if ( ploidy==2 ) { for (ia=0; ia lk ) { lk = _lk; als = ib<<3 | ia; } lk_s += _lk; } } } lk = -log(1-lk/lk_s)/0.2302585; int dp = 0; if ( idp>=0 && (dp=((uint16_t*)b->gi[idp].data)[isample])==0 ) { // no coverage ((uint8_t*)b->gi[old_n_gi].data)[isample] = 1<<7; ((uint8_t*)b->gi[old_n_gi+1].data)[isample] = 0; continue; } if ( lk>99 ) lk = 99; ((uint8_t*)b->gi[old_n_gi].data)[isample] = als; ((uint8_t*)b->gi[old_n_gi+1].data)[isample] = (int)lk; // For MDV annotation int dv; if ( als && idv>=0 && (dv=((uint16_t*)b->gi[idv].data)[isample]) ) { if ( max_dv < dv ) max_dv = dv; } // For HWE annotation; multiple ALT alleles treated as one if ( !als ) nRR++; else if ( !(als>>3&7) || !(als&7) ) nRA++; else nAA++; gts |= 1<<(als>>3&7) | 1<<(als&7); ac[ als>>3&7 ]++; ac[ als&7 ]++; } free(pdg); bcf_fit_alt(b,max_als); // The VCF spec is ambiguous about QUAL: is it the probability of anything else // (that is QUAL(non-ref) = P(ref)+P(any non-ref other than ALT)) or is it // QUAL(non-ref)=P(ref) and QUAL(ref)=1-P(ref)? Assuming the latter. b->qual = gts>1 ? -4.343*(ref_lk - lk_sum) : -4.343*log(1-exp(ref_lk - lk_sum)); if ( b->qual>999 ) b->qual = 999; // Prepare BCF for output: ref, alt, filter, info, format memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s); { int an=0, nalts=0; for (i=0; i0 && ac[i] ) nalts++; } ksprintf(&s, "AN=%d;", an); if ( nalts ) { kputs("AC=", &s); for (i=1; i0 ) kputc(',', &s); } kputc(';', &s); } kputs(b->info, &s); anno16_t a; int has_I16 = test16(b, &a) >= 0? 1 : 0; if (has_I16 ) { if ( a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]); ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); ksprintf(&s, ";QBD=%e", b->qual/(a.d[0] + a.d[1] + a.d[2] + a.d[3])); if ( max_dv ) ksprintf(&s, ";MDV=%d", max_dv); } if ( nAA+nRA ) { double hwe = calc_hwe(nAA, nRR, nRA); ksprintf(&s, ";HWE=%e", hwe); } kputc('\0', &s); rm_info(&s, "I16="); rm_info(&s, "QS="); } kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; bcf_sync(b); return gts; } static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma) { int i, j; long *p, tmp; p = alloca(b->n_alleles * sizeof(long)); memset(p, 0, sizeof(long) * b->n_alleles); for (j = 0; j < ma->n; ++j) { const uint8_t *pi = ma->PL + j * ma->PL_len; double *pdg = ma->pdg + j * 3; pdg[0] = ma->q2p[pi[2]]; pdg[1] = ma->q2p[pi[1]]; pdg[2] = ma->q2p[pi[0]]; for (i = 0; i < b->n_alleles; ++i) p[i] += (int)pi[(i+1)*(i+2)/2-1]; } for (i = 0; i < b->n_alleles; ++i) p[i] = p[i]<<4 | i; for (i = 1; i < b->n_alleles; ++i) // insertion sort for (j = i; j > 0 && p[j] < p[j-1]; --j) tmp = p[j], p[j] = p[j-1], p[j-1] = tmp; for (i = b->n_alleles - 1; i >= 0; --i) if ((p[i]&0xf) == 0) break; return i; } int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k) { double sum, g[3]; double max, f3[3], *pdg = ma->pdg + k * 3; int q, i, max_i, ploidy; ploidy = ma->ploidy? ma->ploidy[k] : 2; if (ploidy == 2) { f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0; } else { f3[0] = 1. - f0; f3[1] = 0; f3[2] = f0; } for (i = 0, sum = 0.; i < 3; ++i) sum += (g[i] = pdg[i] * f3[i]); for (i = 0, max = -1., max_i = 0; i < 3; ++i) { g[i] /= sum; if (g[i] > max) max = g[i], max_i = i; } max = 1. - max; if (max < 1e-308) max = 1e-308; q = (int)(-4.343 * log(max) + .499); if (q > 99) q = 99; return q<<2|max_i; } #define TINY 1e-20 static void mc_cal_y_core(bcf_p1aux_t *ma, int beg) { double *z[2], *tmp, *pdg; int _j, last_min, last_max; assert(beg == 0 || ma->M == ma->n*2); z[0] = ma->z; z[1] = ma->zswap; pdg = ma->pdg; memset(z[0], 0, sizeof(double) * (ma->M + 1)); memset(z[1], 0, sizeof(double) * (ma->M + 1)); z[0][0] = 1.; last_min = last_max = 0; ma->t = 0.; if (ma->M == ma->n * 2) { int M = 0; for (_j = beg; _j < ma->n; ++_j) { int k, j = _j - beg, _min = last_min, _max = last_max, M0; double p[3], sum; M0 = M; M += 2; pdg = ma->pdg + _j * 3; p[0] = pdg[0]; p[1] = 2. * pdg[1]; p[2] = pdg[2]; for (; _min < _max && z[0][_min] < TINY; ++_min) z[0][_min] = z[1][_min] = 0.; for (; _max > _min && z[0][_max] < TINY; --_max) z[0][_max] = z[1][_max] = 0.; _max += 2; if (_min == 0) k = 0, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k]; if (_min <= 1) k = 1, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1]; for (k = _min < 2? 2 : _min; k <= _max; ++k) z[1][k] = (M0-k+1)*(M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1] + k*(k-1)* p[2] * z[0][k-2]; for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k]; ma->t += log(sum / (M * (M - 1.))); for (k = _min; k <= _max; ++k) z[1][k] /= sum; if (_min >= 1) z[1][_min-1] = 0.; if (_min >= 2) z[1][_min-2] = 0.; if (j < ma->n - 1) z[1][_max+1] = z[1][_max+2] = 0.; if (_j == ma->n1 - 1) { // set pop1; ma->n1==-1 when unset ma->t1 = ma->t; memcpy(ma->z1, z[1], sizeof(double) * (ma->n1 * 2 + 1)); } tmp = z[0]; z[0] = z[1]; z[1] = tmp; last_min = _min; last_max = _max; } //for (_j = 0; _j < last_min; ++_j) z[0][_j] = 0.; // TODO: are these necessary? //for (_j = last_max + 1; _j < ma->M; ++_j) z[0][_j] = 0.; } else { // this block is very similar to the block above; these two might be merged in future int j, M = 0; for (j = 0; j < ma->n; ++j) { int k, M0, _min = last_min, _max = last_max; double p[3], sum; pdg = ma->pdg + j * 3; for (; _min < _max && z[0][_min] < TINY; ++_min) z[0][_min] = z[1][_min] = 0.; for (; _max > _min && z[0][_max] < TINY; --_max) z[0][_max] = z[1][_max] = 0.; M0 = M; M += ma->ploidy[j]; if (ma->ploidy[j] == 1) { p[0] = pdg[0]; p[1] = pdg[2]; _max++; if (_min == 0) k = 0, z[1][k] = (M0+1-k) * p[0] * z[0][k]; for (k = _min < 1? 1 : _min; k <= _max; ++k) z[1][k] = (M0+1-k) * p[0] * z[0][k] + k * p[1] * z[0][k-1]; for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k]; ma->t += log(sum / M); for (k = _min; k <= _max; ++k) z[1][k] /= sum; if (_min >= 1) z[1][_min-1] = 0.; if (j < ma->n - 1) z[1][_max+1] = 0.; } else if (ma->ploidy[j] == 2) { p[0] = pdg[0]; p[1] = 2 * pdg[1]; p[2] = pdg[2]; _max += 2; if (_min == 0) k = 0, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k]; if (_min <= 1) k = 1, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1]; for (k = _min < 2? 2 : _min; k <= _max; ++k) z[1][k] = (M0-k+1)*(M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1] + k*(k-1)* p[2] * z[0][k-2]; for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k]; ma->t += log(sum / (M * (M - 1.))); for (k = _min; k <= _max; ++k) z[1][k] /= sum; if (_min >= 1) z[1][_min-1] = 0.; if (_min >= 2) z[1][_min-2] = 0.; if (j < ma->n - 1) z[1][_max+1] = z[1][_max+2] = 0.; } tmp = z[0]; z[0] = z[1]; z[1] = tmp; last_min = _min; last_max = _max; } } if (z[0] != ma->z) memcpy(ma->z, z[0], sizeof(double) * (ma->M + 1)); if (bcf_p1_fp_lk) gzwrite(bcf_p1_fp_lk, ma->z, sizeof(double) * (ma->M + 1)); } static void mc_cal_y(bcf_p1aux_t *ma) { if (ma->n1 > 0 && ma->n1 < ma->n && ma->M == ma->n * 2) { // NB: ma->n1 is ineffective when there are haploid samples int k; long double x; memset(ma->z1, 0, sizeof(double) * (2 * ma->n1 + 1)); memset(ma->z2, 0, sizeof(double) * (2 * (ma->n - ma->n1) + 1)); ma->t1 = ma->t2 = 0.; mc_cal_y_core(ma, ma->n1); ma->t2 = ma->t; memcpy(ma->z2, ma->z, sizeof(double) * (2 * (ma->n - ma->n1) + 1)); mc_cal_y_core(ma, 0); // rescale z x = expl(ma->t - (ma->t1 + ma->t2)); for (k = 0; k <= ma->M; ++k) ma->z[k] *= x; } else mc_cal_y_core(ma, 0); } #define CONTRAST_TINY 1e-30 extern double kf_gammaq(double s, double z); // incomplete gamma function for chi^2 test static inline double chi2_test(int a, int b, int c, int d) { double x, z; x = (double)(a+b) * (c+d) * (b+d) * (a+c); if (x == 0.) return 1; z = a * d - b * c; return kf_gammaq(.5, .5 * z * z * (a+b+c+d) / x); } // chi2=(a+b+c+d)(ad-bc)^2/[(a+b)(c+d)(a+c)(b+d)] static inline double contrast2_aux(const bcf_p1aux_t *p1, double sum, int k1, int k2, double x[3]) { double p = p1->phi[k1+k2] * p1->z1[k1] * p1->z2[k2] / sum * p1->hg[k1][k2]; int n1 = p1->n1, n2 = p1->n - p1->n1; if (p < CONTRAST_TINY) return -1; if (.5*k1/n1 < .5*k2/n2) x[1] += p; else if (.5*k1/n1 > .5*k2/n2) x[2] += p; else x[0] += p; return p * chi2_test(k1, k2, (n1<<1) - k1, (n2<<1) - k2); } static double contrast2(bcf_p1aux_t *p1, double ret[3]) { int k, k1, k2, k10, k20, n1, n2; double sum; // get n1 and n2 n1 = p1->n1; n2 = p1->n - p1->n1; if (n1 <= 0 || n2 <= 0) return 0.; if (p1->hg == 0) { // initialize the hypergeometric distribution /* NB: the hg matrix may take a lot of memory when there are many samples. There is a way to avoid precomputing this matrix, but it is slower and quite intricate. The following computation in this block can be accelerated with a similar strategy, but perhaps this is not a serious concern for now. */ double tmp = lgamma(2*(n1+n2)+1) - (lgamma(2*n1+1) + lgamma(2*n2+1)); p1->hg = calloc(2*n1+1, sizeof(void*)); for (k1 = 0; k1 <= 2*n1; ++k1) { p1->hg[k1] = calloc(2*n2+1, sizeof(double)); for (k2 = 0; k2 <= 2*n2; ++k2) p1->hg[k1][k2] = exp(lgamma(k1+k2+1) + lgamma(p1->M-k1-k2+1) - (lgamma(k1+1) + lgamma(k2+1) + lgamma(2*n1-k1+1) + lgamma(2*n2-k2+1) + tmp)); } } { // compute long double suml = 0; for (k = 0; k <= p1->M; ++k) suml += p1->phi[k] * p1->z[k]; sum = suml; } { // get the max k1 and k2 double max; int max_k; for (k = 0, max = 0, max_k = -1; k <= 2*n1; ++k) { double x = p1->phi1[k] * p1->z1[k]; if (x > max) max = x, max_k = k; } k10 = max_k; for (k = 0, max = 0, max_k = -1; k <= 2*n2; ++k) { double x = p1->phi2[k] * p1->z2[k]; if (x > max) max = x, max_k = k; } k20 = max_k; } { // We can do the following with one nested loop, but that is an O(N^2) thing. The following code block is much faster for large N. double x[3], y; long double z = 0., L[2]; x[0] = x[1] = x[2] = 0; L[0] = L[1] = 0; for (k1 = k10; k1 >= 0; --k1) { for (k2 = k20; k2 >= 0; --k2) { if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break; else z += y; } for (k2 = k20 + 1; k2 <= 2*n2; ++k2) { if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break; else z += y; } } ret[0] = x[0]; ret[1] = x[1]; ret[2] = x[2]; x[0] = x[1] = x[2] = 0; for (k1 = k10 + 1; k1 <= 2*n1; ++k1) { for (k2 = k20; k2 >= 0; --k2) { if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break; else z += y; } for (k2 = k20 + 1; k2 <= 2*n2; ++k2) { if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break; else z += y; } } ret[0] += x[0]; ret[1] += x[1]; ret[2] += x[2]; if (ret[0] + ret[1] + ret[2] < 0.95) { // in case of bad things happened ret[0] = ret[1] = ret[2] = 0; L[0] = L[1] = 0; for (k1 = 0, z = 0.; k1 <= 2*n1; ++k1) for (k2 = 0; k2 <= 2*n2; ++k2) if ((y = contrast2_aux(p1, sum, k1, k2, ret)) >= 0) z += y; if (ret[0] + ret[1] + ret[2] < 0.95) // It seems that this may be caused by floating point errors. I do not really understand why... z = 1.0, ret[0] = ret[1] = ret[2] = 1./3; } return (double)z; } } static double mc_cal_afs(bcf_p1aux_t *ma, double *p_ref_folded, double *p_var_folded) { int k; long double sum = 0., sum2; double *phi = ma->is_indel? ma->phi_indel : ma->phi; memset(ma->afs1, 0, sizeof(double) * (ma->M + 1)); mc_cal_y(ma); // compute AFS for (k = 0, sum = 0.; k <= ma->M; ++k) sum += (long double)phi[k] * ma->z[k]; for (k = 0; k <= ma->M; ++k) { ma->afs1[k] = phi[k] * ma->z[k] / sum; if (isnan(ma->afs1[k]) || isinf(ma->afs1[k])) return -1.; } // compute folded variant probability for (k = 0, sum = 0.; k <= ma->M; ++k) sum += (long double)(phi[k] + phi[ma->M - k]) / 2. * ma->z[k]; for (k = 1, sum2 = 0.; k < ma->M; ++k) sum2 += (long double)(phi[k] + phi[ma->M - k]) / 2. * ma->z[k]; *p_var_folded = sum2 / sum; *p_ref_folded = (phi[k] + phi[ma->M - k]) / 2. * (ma->z[ma->M] + ma->z[0]) / sum; // the expected frequency for (k = 0, sum = 0.; k <= ma->M; ++k) { ma->afs[k] += ma->afs1[k]; sum += k * ma->afs1[k]; } return sum / ma->M; } int bcf_p1_cal(const bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst) { int i, k; long double sum = 0.; ma->is_indel = bcf_is_indel(b); rst->perm_rank = -1; // set PL and PL_len for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("PL", 2)) { ma->PL = (uint8_t*)b->gi[i].data; ma->PL_len = b->gi[i].len; break; } } if (i == b->n_gi) return -1; // no PL if (b->n_alleles < 2) return -1; // FIXME: find a better solution // rst->rank0 = cal_pdg(b, ma); rst->f_exp = mc_cal_afs(ma, &rst->p_ref_folded, &rst->p_var_folded); rst->p_ref = ma->afs1[ma->M]; for (k = 0, sum = 0.; k < ma->M; ++k) sum += ma->afs1[k]; rst->p_var = (double)sum; { // compute the allele count double max = -1; rst->ac = -1; for (k = 0; k <= ma->M; ++k) if (max < ma->z[k]) max = ma->z[k], rst->ac = k; rst->ac = ma->M - rst->ac; } // calculate f_flat and f_em for (k = 0, sum = 0.; k <= ma->M; ++k) sum += (long double)ma->z[k]; rst->f_flat = 0.; for (k = 0; k <= ma->M; ++k) { double p = ma->z[k] / sum; rst->f_flat += k * p; } rst->f_flat /= ma->M; { // estimate equal-tail credible interval (95% level) int l, h; double p; for (i = 0, p = 0.; i <= ma->M; ++i) if (p + ma->afs1[i] > 0.025) break; else p += ma->afs1[i]; l = i; for (i = ma->M, p = 0.; i >= 0; --i) if (p + ma->afs1[i] > 0.025) break; else p += ma->afs1[i]; h = i; rst->cil = (double)(ma->M - h) / ma->M; rst->cih = (double)(ma->M - l) / ma->M; } if (ma->n1 > 0) { // compute LRT double max0, max1, max2; for (k = 0, max0 = -1; k <= ma->M; ++k) if (max0 < ma->z[k]) max0 = ma->z[k]; for (k = 0, max1 = -1; k <= ma->n1 * 2; ++k) if (max1 < ma->z1[k]) max1 = ma->z1[k]; for (k = 0, max2 = -1; k <= ma->M - ma->n1 * 2; ++k) if (max2 < ma->z2[k]) max2 = ma->z2[k]; rst->lrt = log(max1 * max2 / max0); rst->lrt = rst->lrt < 0? 1 : kf_gammaq(.5, rst->lrt); } else rst->lrt = -1.0; rst->cmp[0] = rst->cmp[1] = rst->cmp[2] = rst->p_chi2 = -1.0; if (do_contrast && rst->p_var > 0.5) // skip contrast2() if the locus is a strong non-variant rst->p_chi2 = contrast2(ma, rst->cmp); return 0; } void bcf_p1_dump_afs(bcf_p1aux_t *ma) { int k; fprintf(stderr, "[afs]"); for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lf", k, ma->afs[ma->M - k]); fprintf(stderr, "\n"); memset(ma->afs, 0, sizeof(double) * (ma->M + 1)); } samtools-0.1.19/bcftools/prob1.h000066400000000000000000000026611212162403000164460ustar00rootroot00000000000000#ifndef BCF_PROB1_H #define BCF_PROB1_H #include "bcf.h" struct __bcf_p1aux_t; typedef struct __bcf_p1aux_t bcf_p1aux_t; typedef struct { int rank0, perm_rank; // NB: perm_rank is always set to -1 by bcf_p1_cal() int ac; // ML alternative allele count double f_exp, f_flat, p_ref_folded, p_ref, p_var_folded, p_var; double cil, cih; double cmp[3], p_chi2, lrt; // used by contrast2() } bcf_p1rst_t; typedef struct { double p[4]; int mq, depth, is_tested, d[4]; } anno16_t; #define MC_PTYPE_FULL 1 #define MC_PTYPE_COND2 2 #define MC_PTYPE_FLAT 3 #ifdef __cplusplus extern "C" { #endif bcf_p1aux_t *bcf_p1_init(int n, uint8_t *ploidy); void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta); void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta); void bcf_p1_destroy(bcf_p1aux_t *ma); void bcf_p1_set_ploidy(bcf1_t *b, bcf_p1aux_t *ma); int bcf_p1_cal(const bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst); int call_multiallelic_gt(bcf1_t *b, bcf_p1aux_t *ma, double threshold, int var_only); int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k); void bcf_p1_dump_afs(bcf_p1aux_t *ma); int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn); int bcf_p1_set_n1(bcf_p1aux_t *b, int n1); void bcf_p1_set_folded(bcf_p1aux_t *p1a); // only effective when set_n1() is not called int bcf_em1(const bcf1_t *b, int n1, int flag, double x[10]); #ifdef __cplusplus } #endif #endif samtools-0.1.19/bcftools/vcf.c000066400000000000000000000156001212162403000161710ustar00rootroot00000000000000#include #include #include #include #include "bcf.h" #include "kstring.h" #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 4096) typedef struct { gzFile fp; FILE *fpout; kstream_t *ks; void *refhash; kstring_t line; int max_ref; } vcf_t; bcf_hdr_t *vcf_hdr_read(bcf_t *bp) { kstring_t meta, smpl; int dret; vcf_t *v; bcf_hdr_t *h; if (!bp->is_vcf) return bcf_hdr_read(bp); h = calloc(1, sizeof(bcf_hdr_t)); v = (vcf_t*)bp->v; v->line.l = 0; memset(&meta, 0, sizeof(kstring_t)); memset(&smpl, 0, sizeof(kstring_t)); while (ks_getuntil(v->ks, '\n', &v->line, &dret) >= 0) { if (v->line.l < 2) continue; if (v->line.s[0] != '#') { free(meta.s); free(smpl.s); free(h); return 0; // no sample line } if (v->line.s[0] == '#' && v->line.s[1] == '#') { kputsn(v->line.s, v->line.l, &meta); kputc('\n', &meta); } else if (v->line.s[0] == '#') { int k; ks_tokaux_t aux; char *p; for (p = kstrtok(v->line.s, "\t\n", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) { if (k >= 9) { kputsn(p, aux.p - p, &smpl); kputc('\0', &smpl); } } break; } } kputc('\0', &meta); h->name = 0; h->sname = smpl.s; h->l_smpl = smpl.l; h->txt = meta.s; h->l_txt = meta.l; bcf_hdr_sync(h); return h; } bcf_t *vcf_open(const char *fn, const char *mode) { bcf_t *bp; vcf_t *v; if (strchr(mode, 'b')) return bcf_open(fn, mode); bp = calloc(1, sizeof(bcf_t)); v = calloc(1, sizeof(vcf_t)); bp->is_vcf = 1; bp->v = v; v->refhash = bcf_str2id_init(); if (strchr(mode, 'r')) { v->fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); v->ks = ks_init(v->fp); } else if (strchr(mode, 'w')) v->fpout = strcmp(fn, "-")? fopen(fn, "w") : stdout; return bp; } int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn) { vcf_t *v; gzFile fp; kstream_t *ks; kstring_t s, rn; int dret; if (bp == 0) return -1; if (!bp->is_vcf) return 0; s.l = s.m = 0; s.s = 0; rn.m = rn.l = h->l_nm; rn.s = h->name; v = (vcf_t*)bp->v; fp = gzopen(fn, "r"); ks = ks_init(fp); while (ks_getuntil(ks, 0, &s, &dret) >= 0) { bcf_str2id_add(v->refhash, strdup(s.s)); kputs(s.s, &rn); kputc('\0', &rn); if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); } ks_destroy(ks); gzclose(fp); h->l_nm = rn.l; h->name = rn.s; bcf_hdr_sync(h); free(s.s); return 0; } int vcf_close(bcf_t *bp) { vcf_t *v; if (bp == 0) return -1; if (!bp->is_vcf) return bcf_close(bp); v = (vcf_t*)bp->v; if (v->fp) { ks_destroy(v->ks); gzclose(v->fp); } if (v->fpout) fclose(v->fpout); free(v->line.s); bcf_str2id_thorough_destroy(v->refhash); free(v); free(bp); return 0; } int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h) { vcf_t *v = (vcf_t*)bp->v; int i, has_ver = 0; if (!bp->is_vcf) return bcf_hdr_write(bp, h); if (h->l_txt > 0) { if (strstr(h->txt, "##fileformat=")) has_ver = 1; if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n"); fwrite(h->txt, 1, h->l_txt - 1, v->fpout); } if (h->l_txt == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n"); fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"); for (i = 0; i < h->n_smpl; ++i) fprintf(v->fpout, "\t%s", h->sns[i]); fputc('\n', v->fpout); return 0; } int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) { vcf_t *v = (vcf_t*)bp->v; extern void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s); if (!bp->is_vcf) return bcf_write(bp, h, b); bcf_fmt_core(h, b, &v->line); fwrite(v->line.s, 1, v->line.l, v->fpout); fputc('\n', v->fpout); return v->line.l + 1; } int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) { int dret, k, i, sync = 0; vcf_t *v = (vcf_t*)bp->v; char *p, *q; kstring_t str, rn; ks_tokaux_t aux, a2; if (!bp->is_vcf) return bcf_read(bp, h, b); v->line.l = 0; str.l = 0; str.m = b->m_str; str.s = b->str; rn.l = rn.m = h->l_nm; rn.s = h->name; if (ks_getuntil(v->ks, '\n', &v->line, &dret) < 0) return -1; b->n_smpl = h->n_smpl; for (p = kstrtok(v->line.s, "\t", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) { *(char*)aux.p = 0; if (k == 0) { // ref int tid = bcf_str2id(v->refhash, p); if (tid < 0) { tid = bcf_str2id_add(v->refhash, strdup(p)); kputs(p, &rn); kputc('\0', &rn); sync = 1; } b->tid = tid; } else if (k == 1) { // pos b->pos = atoi(p) - 1; } else if (k == 5) { // qual b->qual = (p[0] >= '0' && p[0] <= '9')? atof(p) : 0; } else if (k <= 8) { // variable length strings kputs(p, &str); kputc('\0', &str); b->l_str = str.l; b->m_str = str.m; b->str = str.s; if (k == 8) bcf_sync(b); } else { // k > 9 if (strncmp(p, "./.", 3) == 0) { for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 1<<7; } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { ((int32_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) { ((uint16_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) { int y = b->n_alleles * (b->n_alleles + 1) / 2; memset((uint8_t*)b->gi[i].data + (k - 9) * y, 0, y); } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { int y = b->n_alleles * (b->n_alleles + 1) / 2; memset((float*)b->gi[i].data + (k - 9) * y, 0, y * 4); } } goto endblock; } for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6; } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { double _x = strtod(q, &q); int x = (int)(_x + .499); if (x > 255) x = 255; ((uint8_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { int x = strtol(q, &q, 10); if (x > 0xffff) x = 0xffff; ((uint32_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) { int x = strtol(q, &q, 10); if (x > 0xffff) x = 0xffff; ((uint16_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) { int x, y, j; uint8_t *data = (uint8_t*)b->gi[i].data; y = b->n_alleles * (b->n_alleles + 1) / 2; for (j = 0; j < y; ++j) { x = strtol(q, &q, 10); if (x > 255) x = 255; data[(k-9) * y + j] = x; ++q; } } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { int j, y; float x, *data = (float*)b->gi[i].data; y = b->n_alleles * (b->n_alleles + 1) / 2; for (j = 0; j < y; ++j) { x = strtod(q, &q); data[(k-9) * y + j] = x > 0? -x/10. : x; ++q; } } } endblock: i = i; } } h->l_nm = rn.l; h->name = rn.s; if (sync) bcf_hdr_sync(h); return v->line.l + 1; } samtools-0.1.19/bcftools/vcfutils.pl000077500000000000000000000365331212162403000174560ustar00rootroot00000000000000#!/usr/bin/perl -w # Author: lh3 use strict; use warnings; use Getopt::Std; &main; exit; sub main { &usage if (@ARGV < 1); my $command = shift(@ARGV); my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter, hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats, gapstats=>\&gapstats, splitchr=>\&splitchr, vcf2fq=>\&vcf2fq); die("Unknown command \"$command\".\n") if (!defined($func{$command})); &{$func{$command}}; } sub splitchr { my %opts = (l=>5000000); getopts('l:', \%opts); my $l = $opts{l}; die(qq/Usage: vcfutils.pl splitchr [-l $opts{l}] \n/) if (@ARGV == 0 && -t STDIN); while (<>) { my @t = split; my $last = 0; for (my $i = 0; $i < $t[1];) { my $e = ($t[1] - $i) / $l < 1.1? $t[1] : $i + $l; print "$t[0]:".($i+1)."-$e\n"; $i = $e; } } } sub subsam { die(qq/Usage: vcfutils.pl subsam [samples]\n/) if (@ARGV == 0); my ($fh, %h); my $fn = shift(@ARGV); my @col; open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die; $h{$_} = 1 for (@ARGV); while (<$fh>) { if (/^##/) { print; } elsif (/^#/) { my @t = split; my @s = @t[0..8]; # all fixed fields + FORMAT for (9 .. $#t) { if ($h{$t[$_]}) { push(@s, $t[$_]); push(@col, $_); } } pop(@s) if (@s == 9); # no sample selected; remove the FORMAT field print join("\t", @s), "\n"; } else { my @t = split; if (@col == 0) { print join("\t", @t[0..7]), "\n"; } else { print join("\t", @t[0..8], map {$t[$_]} @col), "\n"; } } } close($fh); } sub listsam { die(qq/Usage: vcfutils.pl listsam \n/) if (@ARGV == 0 && -t STDIN); while (<>) { if (/^#/ && !/^##/) { my @t = split; print join("\n", @t[9..$#t]), "\n"; exit; } } } sub fillac { die(qq/Usage: vcfutils.pl fillac \n\nNote: The GT field MUST BE present and always appear as the first field.\n/) if (@ARGV == 0 && -t STDIN); while (<>) { if (/^#/) { print; } else { my @t = split; my @c = (0, 0); my $n = 0; my $s = -1; @_ = split(":", $t[8]); for (0 .. $#_) { if ($_[$_] eq 'GT') { $s = $_; last; } } if ($s < 0) { print join("\t", @t), "\n"; next; } for (9 .. $#t) { if ($t[$_] =~ /^0,0,0/) { } elsif ($t[$_] =~ /^([^\s:]+:){$s}(\d+).(\d+)/) { ++$c[$2]; ++$c[$3]; $n += 2; } } my $AC = "AC=" . join("\t", @c[1..$#c]) . ";AN=$n"; my $info = $t[7]; $info =~ s/(;?)AC=(\d+)//; $info =~ s/(;?)AN=(\d+)//; if ($info eq '.') { $info = $AC; } else { $info .= ";$AC"; } $t[7] = $info; print join("\t", @t), "\n"; } } } sub ldstats { my %opts = (t=>0.9); getopts('t:', \%opts); die("Usage: vcfutils.pl ldstats [-t $opts{t}] \n") if (@ARGV == 0 && -t STDIN); my $cutoff = $opts{t}; my ($last, $lastchr) = (0x7fffffff, ''); my ($x, $y, $n) = (0, 0, 0); while (<>) { if (/^([^#\s]+)\s(\d+)/) { my ($chr, $pos) = ($1, $2); if (/NEIR=([\d\.]+)/) { ++$n; ++$y, $x += $pos - $last if ($lastchr eq $chr && $pos > $last && $1 > $cutoff); } $last = $pos; $lastchr = $chr; } } print "Number of SNP intervals in strong LD (r > $opts{t}): $y\n"; print "Fraction: ", $y/$n, "\n"; print "Length: $x\n"; } sub qstats { my %opts = (r=>'', s=>0.02, v=>undef); getopts('r:s:v', \%opts); die("Usage: vcfutils.pl qstats [-r ref.vcf] \n Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions #joint ts/tv #joint/#ref #joint/#non-indel \n") if (@ARGV == 0 && -t STDIN); my %ts = (AG=>1, GA=>1, CT=>1, TC=>1); my %h = (); my $is_vcf = defined($opts{v})? 1 : 0; if ($opts{r}) { # read the reference positions my $fh; open($fh, $opts{r}) || die; while (<$fh>) { next if (/^#/); if ($is_vcf) { my @t = split; $h{$t[0],$t[1]} = $t[4]; } else { $h{$1,$2} = 1 if (/^(\S+)\s+(\d+)/); } } close($fh); } my $hsize = scalar(keys %h); my @a; while (<>) { next if (/^#/); my @t = split; next if (length($t[3]) != 1 || uc($t[3]) eq 'N'); $t[3] = uc($t[3]); $t[4] = uc($t[4]); my @s = split(',', $t[4]); $t[5] = 3 if ($t[5] eq '.' || $t[5] < 0); next if (length($s[0]) != 1); my $hit; if ($is_vcf) { $hit = 0; my $aa = $h{$t[0],$t[1]}; if (defined($aa)) { my @aaa = split(",", $aa); for (@aaa) { $hit = 1 if ($_ eq $s[0]); } } } else { $hit = defined($h{$t[0],$t[1]})? 1 : 0; } push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $hit]); } push(@a, [-1, 0, 0, 0]); # end marker die("[qstats] No SNP data!\n") if (@a == 0); @a = sort {$b->[0]<=>$a->[0]} @a; my $next = $opts{s}; my $last = $a[0]; my @c = (0, 0, 0, 0); my @lc; $lc[1] = $lc[2] = 0; for my $p (@a) { if ($p->[0] == -1 || ($p->[0] != $last && $c[0]/@a > $next)) { my @x; $x[0] = sprintf("%.4f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100); $x[1] = sprintf("%.4f", $hsize? $c[3] / $hsize : 0); $x[2] = sprintf("%.4f", $c[3] / $c[1]); my $a = $c[1] - $lc[1]; my $b = $c[2] - $lc[2]; $x[3] = sprintf("%.4f", $a-$b? $b / ($a-$b) : 100); print join("\t", $last, @c, @x), "\n"; $next = $c[0]/@a + $opts{s}; $lc[1] = $c[1]; $lc[2] = $c[2]; } ++$c[0]; $c[1] += $p->[1]; $c[2] += $p->[2]; $c[3] += $p->[3]; $last = $p->[0]; } } sub varFilter { my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4); getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts); die(qq/ Usage: vcfutils.pl varFilter [options] Options: -Q INT minimum RMS mapping quality for SNPs [$opts{Q}] -d INT minimum read depth [$opts{d}] -D INT maximum read depth [$opts{D}] -a INT minimum number of alternate bases [$opts{a}] -w INT SNP within INT bp around a gap to be filtered [$opts{w}] -W INT window size for filtering adjacent gaps [$opts{W}] -1 FLOAT min P-value for strand bias (given PV4) [$opts{1}] -2 FLOAT min P-value for baseQ bias [$opts{2}] -3 FLOAT min P-value for mapQ bias [$opts{3}] -4 FLOAT min P-value for end distance bias [$opts{4}] -e FLOAT min P-value for HWE (plus F<0) [$opts{e}] -p print filtered variants Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. \n/) if (@ARGV == 0 && -t STDIN); # calculate the window size my ($ol, $ow) = ($opts{W}, $opts{w}); my $max_dist = $ol > $ow? $ol : $ow; # the core loop my @staging; # (indel_filtering_score, flt_tag, indel_span; chr, pos, ...) while (<>) { my @t = split; if (/^#/) { print; next; } next if ($t[4] eq '.'); # skip non-var sites next if ($t[3] eq 'N'); # skip sites with unknown ref ('N') # check if the site is a SNP my $type = 1; # SNP if (length($t[3]) > 1) { $type = 2; # MNP my @s = split(',', $t[4]); for (@s) { $type = 3 if (length != length($t[3])); } } else { my @s = split(',', $t[4]); for (@s) { $type = 3 if (length > 1); } } # clear the out-of-range elements while (@staging) { # Still on the same chromosome and the first element's window still affects this position? last if ($staging[0][3] eq $t[0] && $staging[0][4] + $staging[0][2] + $max_dist >= $t[1]); varFilter_aux(shift(@staging), $opts{p}); # calling a function is a bit slower, not much } my $flt = 0; # parse annotations my ($dp, $mq, $dp_alt) = (-1, -1, -1); if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) { $dp = $1 + $2 + $3 + $4; $dp_alt = $3 + $4; } if ($t[7] =~ /DP=(\d+)/i) { $dp = $1; } $mq = $1 if ($t[7] =~ /MQ=(\d+)/i); # the depth and mapQ filter if ($dp >= 0) { if ($dp < $opts{d}) { $flt = 2; } elsif ($dp > $opts{D}) { $flt = 3; } } $flt = 4 if ($dp_alt >= 0 && $dp_alt < $opts{a}); $flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q}); $flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/ && ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4})); $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S}))); # HWE filter if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) { my $p = 2*$1 + $2; my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0; $flt = 9 if ($f < 0); } my $score = $t[5] * 100 + $dp_alt; my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs if ($flt == 0) { if ($type == 3) { # an indel # filtering SNPs and MNPs for my $x (@staging) { next if (($x->[0]&3) == 3 || $x->[1] || $x->[4] + $x->[2] + $ow < $t[1]); $x->[1] = 5; } # check the staging list for indel filtering for my $x (@staging) { next if (($x->[0]&3) != 3 || $x->[1] || $x->[4] + $x->[2] + $ol < $t[1]); if ($x->[0]>>2 < $score) { $x->[1] = 6; } else { $flt = 6; last; } } } else { # SNP or MNP for my $x (@staging) { next if (($x->[0]&3) != 3 || $x->[4] + $x->[2] + $ow < $t[1]); if ($x->[4] + length($x->[7]) - 1 == $t[1] && substr($x->[7], -1, 1) eq substr($t[4], 0, 1) && length($x->[7]) - length($x->[6]) == 1) { $x->[1] = 5; } else { $flt = 5; } last; } # check MNP for my $x (@staging) { next if (($x->[0]&3) == 3 || $x->[4] + $x->[2] < $t[1]); if ($x->[0]>>2 < $score) { $x->[1] = 8; } else { $flt = 8; last; } } } } push(@staging, [$score<<2|$type, $flt, $rlen, @t]); } # output the last few elements in the staging list while (@staging) { varFilter_aux(shift @staging, $opts{p}); } } sub varFilter_aux { my ($first, $is_print) = @_; if ($first->[1] == 0) { print join("\t", @$first[3 .. @$first-1]), "\n"; } elsif ($is_print) { print STDERR join("\t", substr("UQdDaGgPMS", $first->[1], 1), @$first[3 .. @$first-1]), "\n"; } } sub gapstats { my (@c0, @c1); $c0[$_] = $c1[$_] = 0 for (0 .. 10000); while (<>) { next if (/^#/); my @t = split; next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel my @s = split(',', $t[4]); for my $x (@s) { my $l = length($x) - length($t[3]) + 5000; if ($x =~ /^-/) { $l = -(length($x) - 1) + 5000; } elsif ($x =~ /^\+/) { $l = length($x) - 1 + 5000; } $c0[$l] += 1 / @s; } } for (my $i = 0; $i < 10000; ++$i) { next if ($c0[$i] == 0); $c1[0] += $c0[$i]; $c1[1] += $c0[$i] if (($i-5000)%3 == 0); printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]); } printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]); } sub ucscsnp2vcf { die("Usage: vcfutils.pl \n") if (@ARGV == 0 && -t STDIN); print "##fileformat=VCFv4.0\n"; print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"), "\n"; while (<>) { my @t = split("\t"); my $indel = ($t[9] =~ /^[ACGT](\/[ACGT])+$/)? 0 : 1; my $pos = $t[2] + 1; my @alt; push(@alt, $t[7]); if ($t[6] eq '-') { $t[9] = reverse($t[9]); $t[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/; } my @a = split("/", $t[9]); for (@a) { push(@alt, $_) if ($_ ne $alt[0]); } if ($indel) { --$pos; for (0 .. $#alt) { $alt[$_] =~ tr/-//d; $alt[$_] = "N$alt[$_]"; } } my $ref = shift(@alt); my $af = $t[13] > 0? ";AF=$t[13]" : ''; my $valid = ($t[12] eq 'unknown')? '' : ";valid=$t[12]"; my $info = "molType=$t[10];class=$t[11]$valid$af"; print join("\t", $t[1], $pos, $t[4], $ref, join(",", @alt), 0, '.', $info), "\n"; } } sub hapmap2vcf { die("Usage: vcfutils.pl \n") if (@ARGV == 0); my $fn = shift(@ARGV); # parse UCSC SNP warn("Parsing UCSC SNPs...\n"); my ($fh, %map); open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die; while (<$fh>) { my @t = split; next if ($t[3] - $t[2] != 1); # not SNP @{$map{$t[4]}} = @t[1,3,7]; } close($fh); # write VCF warn("Writing VCF...\n"); print "##fileformat=VCFv4.0\n"; while (<>) { my @t = split; if ($t[0] eq 'rs#') { # the first line print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", @t[11..$#t]), "\n"; } else { next unless ($map{$t[0]}); next if (length($t[1]) != 3); # skip non-SNPs my $a = \@{$map{$t[0]}}; my $ref = $a->[2]; my @u = split('/', $t[1]); if ($u[1] eq $ref) { $u[1] = $u[0]; $u[0] = $ref; } elsif ($u[0] ne $ref) { next; } my $alt = $u[1]; my %w; $w{$u[0]} = 0; $w{$u[1]} = 1; my @s = (@$a[0,1], $t[0], $ref, $alt, 0, '.', '.', 'GT'); my $is_tri = 0; for (@t[11..$#t]) { if ($_ eq 'NN') { push(@s, './.'); } else { my @a = ($w{substr($_,0,1)}, $w{substr($_,1,1)}); if (!defined($a[0]) || !defined($a[1])) { $is_tri = 1; last; } push(@s, "$a[0]/$a[1]"); } } next if ($is_tri); print join("\t", @s), "\n"; } } } sub vcf2fq { my %opts = (d=>3, D=>100000, Q=>10, l=>5); getopts('d:D:Q:l:', \%opts); die(qq/ Usage: vcfutils.pl vcf2fq [options] Options: -d INT minimum depth [$opts{d}] -D INT maximum depth [$opts{D}] -Q INT min RMS mapQ [$opts{Q}] -l INT INDEL filtering window [$opts{l}] \n/) if (@ARGV == 0 && -t STDIN); my ($last_chr, $seq, $qual, $last_pos, @gaps); my $_Q = $opts{Q}; my $_d = $opts{d}; my $_D = $opts{D}; my %het = (AC=>'M', AG=>'R', AT=>'W', CA=>'M', CG=>'S', CT=>'Y', GA=>'R', GC=>'S', GT=>'K', TA=>'W', TC=>'Y', TG=>'K'); $last_chr = ''; while (<>) { next if (/^#/); my @t = split; if ($last_chr ne $t[0]) { &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr); ($last_chr, $last_pos) = ($t[0], 0); $seq = $qual = ''; @gaps = (); } die("[vcf2fq] unsorted input\n") if ($t[1] - $last_pos < 0); if ($t[1] - $last_pos > 1) { $seq .= 'n' x ($t[1] - $last_pos - 1); $qual .= '!' x ($t[1] - $last_pos - 1); } if (length($t[3]) == 1 && $t[7] !~ /INDEL/ && $t[4] =~ /^([A-Za-z.])(,[A-Za-z])*$/) { # a SNP or reference my ($ref, $alt) = ($t[3], $1); my ($b, $q); $q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/); if ($q < 0) { $_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0; $b = ($_ < .5 || $alt eq '.')? $ref : $alt; $q = -$q; } else { $b = $het{"$ref$alt"}; $b ||= 'N'; } $b = lc($b); $b = uc($b) if (($t[7] =~ /MQ=(\d+)/ && $1 >= $_Q) && ($t[7] =~ /DP=(\d+)/ && $1 >= $_d && $1 <= $_D)); $q = int($q + 33 + .499); $q = chr($q <= 126? $q : 126); $seq .= $b; $qual .= $q; } elsif ($t[4] ne '.') { # an INDEL push(@gaps, [$t[1], length($t[3])]); } $last_pos = $t[1]; } &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}); } sub v2q_post_process { my ($chr, $seq, $qual, $gaps, $l) = @_; for my $g (@$gaps) { my $beg = $g->[0] > $l? $g->[0] - $l : 0; my $end = $g->[0] + $g->[1] + $l; $end = length($$seq) if ($end > length($$seq)); substr($$seq, $beg, $end - $beg) = lc(substr($$seq, $beg, $end - $beg)); } print "\@$chr\n"; &v2q_print_str($seq); print "+\n"; &v2q_print_str($qual); } sub v2q_print_str { my ($s) = @_; my $l = length($$s); for (my $i = 0; $i < $l; $i += 60) { print substr($$s, $i, 60), "\n"; } } sub usage { die(qq/ Usage: vcfutils.pl []\n Command: subsam get a subset of samples listsam list the samples fillac fill the allele count field qstats SNP stats stratified by QUAL hapmap2vcf convert the hapmap format to VCF ucscsnp2vcf convert UCSC SNP SQL dump to VCF varFilter filtering short variants (*) vcf2fq VCF->fastq (**) Notes: Commands with description endting with (*) may need bcftools specific annotations. \n/); } samtools-0.1.19/bedcov.c000066400000000000000000000061071212162403000150440ustar00rootroot00000000000000#include #include #include #include #include #include #include "kstring.h" #include "bgzf.h" #include "bam.h" #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 16384) typedef struct { bamFile fp; bam_iter_t iter; int min_mapQ; } aux_t; static int read_bam(void *data, bam1_t *b) { aux_t *aux = (aux_t*)data; int ret = bam_iter_read(aux->fp, aux->iter, b); if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP; return ret; } int main_bedcov(int argc, char *argv[]) { extern void bam_init_header_hash(bam_header_t*); gzFile fp; kstring_t str; kstream_t *ks; bam_index_t **idx; bam_header_t *h = 0; aux_t **aux; int *n_plp, dret, i, n, c, min_mapQ = 0; int64_t *cnt; const bam_pileup1_t **plp; while ((c = getopt(argc, argv, "Q:")) >= 0) { switch (c) { case 'Q': min_mapQ = atoi(optarg); break; } } if (optind + 2 > argc) { fprintf(stderr, "Usage: samtools bedcov [...]\n"); return 1; } memset(&str, 0, sizeof(kstring_t)); n = argc - optind - 1; aux = calloc(n, sizeof(void*)); idx = calloc(n, sizeof(void*)); for (i = 0; i < n; ++i) { aux[i] = calloc(1, sizeof(aux_t)); aux[i]->min_mapQ = min_mapQ; aux[i]->fp = bam_open(argv[i+optind+1], "r"); idx[i] = bam_index_load(argv[i+optind+1]); if (aux[i]->fp == 0 || idx[i] == 0) { fprintf(stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]); return 2; } bgzf_set_cache_size(aux[i]->fp, 20); if (i == 0) h = bam_header_read(aux[0]->fp); } bam_init_header_hash(h); cnt = calloc(n, 8); fp = gzopen(argv[optind], "rb"); ks = ks_init(fp); n_plp = calloc(n, sizeof(int)); plp = calloc(n, sizeof(void*)); while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) { char *p, *q; int tid, beg, end, pos; bam_mplp_t mplp; for (p = q = str.s; *p && *p != '\t'; ++p); if (*p != '\t') goto bed_error; *p = 0; tid = bam_get_tid(h, q); *p = '\t'; if (tid < 0) goto bed_error; for (q = p = p + 1; isdigit(*p); ++p); if (*p != '\t') goto bed_error; *p = 0; beg = atoi(q); *p = '\t'; for (q = p = p + 1; isdigit(*p); ++p); if (*p == '\t' || *p == 0) { int c = *p; *p = 0; end = atoi(q); *p = c; } else goto bed_error; for (i = 0; i < n; ++i) { if (aux[i]->iter) bam_iter_destroy(aux[i]->iter); aux[i]->iter = bam_iter_query(idx[i], tid, beg, end); } mplp = bam_mplp_init(n, read_bam, (void**)aux); bam_mplp_set_maxcnt(mplp, 64000); memset(cnt, 0, 8 * n); while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) if (pos >= beg && pos < end) for (i = 0; i < n; ++i) cnt[i] += n_plp[i]; for (i = 0; i < n; ++i) { kputc('\t', &str); kputl(cnt[i], &str); } puts(str.s); bam_mplp_destroy(mplp); continue; bed_error: fprintf(stderr, "Errors in BED line '%s'\n", str.s); } free(n_plp); free(plp); ks_destroy(ks); gzclose(fp); free(cnt); for (i = 0; i < n; ++i) { if (aux[i]->iter) bam_iter_destroy(aux[i]->iter); bam_index_destroy(idx[i]); bam_close(aux[i]->fp); free(aux[i]); } bam_header_destroy(h); free(aux); free(idx); free(str.s); return 0; } samtools-0.1.19/bedidx.c000066400000000000000000000074521212162403000150450ustar00rootroot00000000000000#include #include #include #include #include #ifdef _WIN32 #define drand48() ((double)rand() / RAND_MAX) #endif #include "ksort.h" KSORT_INIT_GENERIC(uint64_t) #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 8192) typedef struct { int n, m; uint64_t *a; int *idx; } bed_reglist_t; #include "khash.h" KHASH_MAP_INIT_STR(reg, bed_reglist_t) #define LIDX_SHIFT 13 typedef kh_reg_t reghash_t; int *bed_index_core(int n, uint64_t *a, int *n_idx) { int i, j, m, *idx; m = *n_idx = 0; idx = 0; for (i = 0; i < n; ++i) { int beg, end; beg = a[i]>>32 >> LIDX_SHIFT; end = ((uint32_t)a[i]) >> LIDX_SHIFT; if (m < end + 1) { int oldm = m; m = end + 1; kroundup32(m); idx = realloc(idx, m * sizeof(int)); for (j = oldm; j < m; ++j) idx[j] = -1; } if (beg == end) { if (idx[beg] < 0) idx[beg] = i; } else { for (j = beg; j <= end; ++j) if (idx[j] < 0) idx[j] = i; } *n_idx = end + 1; } return idx; } void bed_index(void *_h) { reghash_t *h = (reghash_t*)_h; khint_t k; for (k = 0; k < kh_end(h); ++k) { if (kh_exist(h, k)) { bed_reglist_t *p = &kh_val(h, k); if (p->idx) free(p->idx); ks_introsort(uint64_t, p->n, p->a); p->idx = bed_index_core(p->n, p->a, &p->m); } } } int bed_overlap_core(const bed_reglist_t *p, int beg, int end) { int i, min_off; if (p->n == 0) return 0; min_off = (beg>>LIDX_SHIFT >= p->n)? p->idx[p->n-1] : p->idx[beg>>LIDX_SHIFT]; if (min_off < 0) { // TODO: this block can be improved, but speed should not matter too much here int n = beg>>LIDX_SHIFT; if (n > p->n) n = p->n; for (i = n - 1; i >= 0; --i) if (p->idx[i] >= 0) break; min_off = i >= 0? p->idx[i] : 0; } for (i = min_off; i < p->n; ++i) { if ((int)(p->a[i]>>32) >= end) break; // out of range; no need to proceed if ((int32_t)p->a[i] > beg && (int32_t)(p->a[i]>>32) < end) return 1; // find the overlap; return } return 0; } int bed_overlap(const void *_h, const char *chr, int beg, int end) { const reghash_t *h = (const reghash_t*)_h; khint_t k; if (!h) return 0; k = kh_get(reg, h, chr); if (k == kh_end(h)) return 0; return bed_overlap_core(&kh_val(h, k), beg, end); } void *bed_read(const char *fn) { reghash_t *h = kh_init(reg); gzFile fp; kstream_t *ks; int dret; kstring_t *str; // read the list fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); if (fp == 0) return 0; str = calloc(1, sizeof(kstring_t)); ks = ks_init(fp); while (ks_getuntil(ks, 0, str, &dret) >= 0) { // read the chr name int beg = -1, end = -1; bed_reglist_t *p; khint_t k = kh_get(reg, h, str->s); if (k == kh_end(h)) { // absent from the hash table int ret; char *s = strdup(str->s); k = kh_put(reg, h, s, &ret); memset(&kh_val(h, k), 0, sizeof(bed_reglist_t)); } p = &kh_val(h, k); if (dret != '\n') { // if the lines has other characters if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) { beg = atoi(str->s); // begin if (dret != '\n') { if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) { end = atoi(str->s); // end if (end < beg) end = -1; } } } } if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); // skip the rest of the line if (end < 0 && beg > 0) end = beg, beg = beg - 1; // if there is only one column if (beg >= 0 && end > beg) { if (p->n == p->m) { p->m = p->m? p->m<<1 : 4; p->a = realloc(p->a, p->m * 8); } p->a[p->n++] = (uint64_t)beg<<32 | end; } } ks_destroy(ks); gzclose(fp); free(str->s); free(str); bed_index(h); return h; } void bed_destroy(void *_h) { reghash_t *h = (reghash_t*)_h; khint_t k; for (k = 0; k < kh_end(h); ++k) { if (kh_exist(h, k)) { free(kh_val(h, k).a); free(kh_val(h, k).idx); free((char*)kh_key(h, k)); } } kh_destroy(reg, h); } samtools-0.1.19/bgzf.c000066400000000000000000000512071212162403000145330ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology 2011 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include "bgzf.h" #ifdef _USE_KNETFILE #include "knetfile.h" typedef knetFile *_bgzf_file_t; #define _bgzf_open(fn, mode) knet_open(fn, mode) #define _bgzf_dopen(fp, mode) knet_dopen(fp, mode) #define _bgzf_close(fp) knet_close(fp) #define _bgzf_fileno(fp) ((fp)->fd) #define _bgzf_tell(fp) knet_tell(fp) #define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence) #define _bgzf_read(fp, buf, len) knet_read(fp, buf, len) #define _bgzf_write(fp, buf, len) knet_write(fp, buf, len) #else // ~defined(_USE_KNETFILE) #if defined(_WIN32) || defined(_MSC_VER) #define ftello(fp) ftell(fp) #define fseeko(fp, offset, whence) fseek(fp, offset, whence) #else // ~defined(_WIN32) extern off_t ftello(FILE *stream); extern int fseeko(FILE *stream, off_t offset, int whence); #endif // ~defined(_WIN32) typedef FILE *_bgzf_file_t; #define _bgzf_open(fn, mode) fopen(fn, mode) #define _bgzf_dopen(fp, mode) fdopen(fp, mode) #define _bgzf_close(fp) fclose(fp) #define _bgzf_fileno(fp) fileno(fp) #define _bgzf_tell(fp) ftello(fp) #define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence) #define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp) #define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp) #endif // ~define(_USE_KNETFILE) #define BLOCK_HEADER_LENGTH 18 #define BLOCK_FOOTER_LENGTH 8 /* BGZF/GZIP header (speciallized from RFC 1952; little endian): +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ */ static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; #ifdef BGZF_CACHE typedef struct { int size; uint8_t *block; int64_t end_offset; } cache_t; #include "khash.h" KHASH_MAP_INIT_INT64(cache, cache_t) #endif static inline void packInt16(uint8_t *buffer, uint16_t value) { buffer[0] = value; buffer[1] = value >> 8; } static inline int unpackInt16(const uint8_t *buffer) { return buffer[0] | buffer[1] << 8; } static inline void packInt32(uint8_t *buffer, uint32_t value) { buffer[0] = value; buffer[1] = value >> 8; buffer[2] = value >> 16; buffer[3] = value >> 24; } static BGZF *bgzf_read_init() { BGZF *fp; fp = calloc(1, sizeof(BGZF)); fp->is_write = 0; fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE); fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE); #ifdef BGZF_CACHE fp->cache = kh_init(cache); #endif return fp; } static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level { BGZF *fp; fp = calloc(1, sizeof(BGZF)); fp->is_write = 1; fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE); fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE); fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; return fp; } // get the compress level from the mode string static int mode2level(const char *__restrict mode) { int i, compress_level = -1; for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break; if (mode[i]) compress_level = (int)mode[i] - '0'; if (strchr(mode, 'u')) compress_level = 0; return compress_level; } BGZF *bgzf_open(const char *path, const char *mode) { BGZF *fp = 0; assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE); if (strchr(mode, 'r') || strchr(mode, 'R')) { _bgzf_file_t fpr; if ((fpr = _bgzf_open(path, "r")) == 0) return 0; fp = bgzf_read_init(); fp->fp = fpr; } else if (strchr(mode, 'w') || strchr(mode, 'W')) { FILE *fpw; if ((fpw = fopen(path, "w")) == 0) return 0; fp = bgzf_write_init(mode2level(mode)); fp->fp = fpw; } return fp; } BGZF *bgzf_dopen(int fd, const char *mode) { BGZF *fp = 0; assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE); if (strchr(mode, 'r') || strchr(mode, 'R')) { _bgzf_file_t fpr; if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0; fp = bgzf_read_init(); fp->fp = fpr; } else if (strchr(mode, 'w') || strchr(mode, 'W')) { FILE *fpw; if ((fpw = fdopen(fd, "w")) == 0) return 0; fp = bgzf_write_init(mode2level(mode)); fp->fp = fpw; } return fp; } static int bgzf_compress(void *_dst, int *dlen, void *src, int slen, int level) { uint32_t crc; z_stream zs; uint8_t *dst = (uint8_t*)_dst; // compress the body zs.zalloc = NULL; zs.zfree = NULL; zs.next_in = src; zs.avail_in = slen; zs.next_out = dst + BLOCK_HEADER_LENGTH; zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; if (deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1; // -15 to disable zlib header/footer if (deflate(&zs, Z_FINISH) != Z_STREAM_END) return -1; if (deflateEnd(&zs) != Z_OK) return -1; *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; // write the header memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes // write the footer crc = crc32(crc32(0L, NULL, 0L), src, slen); packInt32((uint8_t*)&dst[*dlen - 8], crc); packInt32((uint8_t*)&dst[*dlen - 4], slen); return 0; } // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. static int deflate_block(BGZF *fp, int block_length) { int comp_size = BGZF_MAX_BLOCK_SIZE; if (bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level) != 0) { fp->errcode |= BGZF_ERR_ZLIB; return -1; } fp->block_offset = 0; return comp_size; } // Inflate the block in fp->compressed_block into fp->uncompressed_block static int inflate_block(BGZF* fp, int block_length) { z_stream zs; zs.zalloc = NULL; zs.zfree = NULL; zs.next_in = fp->compressed_block + 18; zs.avail_in = block_length - 16; zs.next_out = fp->uncompressed_block; zs.avail_out = BGZF_MAX_BLOCK_SIZE; if (inflateInit2(&zs, -15) != Z_OK) { fp->errcode |= BGZF_ERR_ZLIB; return -1; } if (inflate(&zs, Z_FINISH) != Z_STREAM_END) { inflateEnd(&zs); fp->errcode |= BGZF_ERR_ZLIB; return -1; } if (inflateEnd(&zs) != Z_OK) { fp->errcode |= BGZF_ERR_ZLIB; return -1; } return zs.total_out; } static int check_header(const uint8_t *header) { return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0 && unpackInt16((uint8_t*)&header[10]) == 6 && header[12] == 'B' && header[13] == 'C' && unpackInt16((uint8_t*)&header[14]) == 2); } #ifdef BGZF_CACHE static void free_cache(BGZF *fp) { khint_t k; khash_t(cache) *h = (khash_t(cache)*)fp->cache; if (fp->is_write) return; for (k = kh_begin(h); k < kh_end(h); ++k) if (kh_exist(h, k)) free(kh_val(h, k).block); kh_destroy(cache, h); } static int load_block_from_cache(BGZF *fp, int64_t block_address) { khint_t k; cache_t *p; khash_t(cache) *h = (khash_t(cache)*)fp->cache; k = kh_get(cache, h, block_address); if (k == kh_end(h)) return 0; p = &kh_val(h, k); if (fp->block_length != 0) fp->block_offset = 0; fp->block_address = block_address; fp->block_length = p->size; memcpy(fp->uncompressed_block, p->block, BGZF_MAX_BLOCK_SIZE); _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET); return p->size; } static void cache_block(BGZF *fp, int size) { int ret; khint_t k; cache_t *p; khash_t(cache) *h = (khash_t(cache)*)fp->cache; if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return; if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > fp->cache_size) { /* A better way would be to remove the oldest block in the * cache, but here we remove a random one for simplicity. This * should not have a big impact on performance. */ for (k = kh_begin(h); k < kh_end(h); ++k) if (kh_exist(h, k)) break; if (k < kh_end(h)) { free(kh_val(h, k).block); kh_del(cache, h, k); } } k = kh_put(cache, h, fp->block_address, &ret); if (ret == 0) return; // if this happens, a bug! p = &kh_val(h, k); p->size = fp->block_length; p->end_offset = fp->block_address + size; p->block = malloc(BGZF_MAX_BLOCK_SIZE); memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE); } #else static void free_cache(BGZF *fp) {} static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} static void cache_block(BGZF *fp, int size) {} #endif int bgzf_read_block(BGZF *fp) { uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; int count, size = 0, block_length, remaining; int64_t block_address; block_address = _bgzf_tell((_bgzf_file_t)fp->fp); if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; count = _bgzf_read(fp->fp, header, sizeof(header)); if (count == 0) { // no data read fp->block_length = 0; return 0; } if (count != sizeof(header) || !check_header(header)) { fp->errcode |= BGZF_ERR_HEADER; return -1; } size = count; block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" compressed_block = (uint8_t*)fp->compressed_block; memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); remaining = block_length - BLOCK_HEADER_LENGTH; count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); if (count != remaining) { fp->errcode |= BGZF_ERR_IO; return -1; } size += count; if ((count = inflate_block(fp, block_length)) < 0) return -1; if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. fp->block_address = block_address; fp->block_length = count; cache_block(fp, size); return 0; } ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length) { ssize_t bytes_read = 0; uint8_t *output = data; if (length <= 0) return 0; assert(fp->is_write == 0); while (bytes_read < length) { int copy_length, available = fp->block_length - fp->block_offset; uint8_t *buffer; if (available <= 0) { if (bgzf_read_block(fp) != 0) return -1; available = fp->block_length - fp->block_offset; if (available <= 0) break; } copy_length = length - bytes_read < available? length - bytes_read : available; buffer = fp->uncompressed_block; memcpy(output, buffer + fp->block_offset, copy_length); fp->block_offset += copy_length; output += copy_length; bytes_read += copy_length; } if (fp->block_offset == fp->block_length) { fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); fp->block_offset = fp->block_length = 0; } return bytes_read; } /***** BEGIN: multi-threading *****/ typedef struct { BGZF *fp; struct mtaux_t *mt; void *buf; int i, errcode, toproc; } worker_t; typedef struct mtaux_t { int n_threads, n_blks, curr, done; volatile int proc_cnt; void **blk; int *len; worker_t *w; pthread_t *tid; pthread_mutex_t lock; pthread_cond_t cv; } mtaux_t; static int worker_aux(worker_t *w) { int i, tmp, stop = 0; // wait for condition: to process or all done pthread_mutex_lock(&w->mt->lock); while (!w->toproc && !w->mt->done) pthread_cond_wait(&w->mt->cv, &w->mt->lock); if (w->mt->done) stop = 1; w->toproc = 0; pthread_mutex_unlock(&w->mt->lock); if (stop) return 1; // to quit the thread w->errcode = 0; for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) { int clen = BGZF_MAX_BLOCK_SIZE; if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->fp->compress_level) != 0) w->errcode |= BGZF_ERR_ZLIB; memcpy(w->mt->blk[i], w->buf, clen); w->mt->len[i] = clen; } tmp = __sync_fetch_and_add(&w->mt->proc_cnt, 1); return 0; } static void *mt_worker(void *data) { while (worker_aux(data) == 0); return 0; } int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) { int i; mtaux_t *mt; pthread_attr_t attr; if (!fp->is_write || fp->mt || n_threads <= 1) return -1; mt = calloc(1, sizeof(mtaux_t)); mt->n_threads = n_threads; mt->n_blks = n_threads * n_sub_blks; mt->len = calloc(mt->n_blks, sizeof(int)); mt->blk = calloc(mt->n_blks, sizeof(void*)); for (i = 0; i < mt->n_blks; ++i) mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE); mt->tid = calloc(mt->n_threads, sizeof(pthread_t)); // tid[0] is not used, as the worker 0 is launched by the master mt->w = calloc(mt->n_threads, sizeof(worker_t)); for (i = 0; i < mt->n_threads; ++i) { mt->w[i].i = i; mt->w[i].mt = mt; mt->w[i].fp = fp; mt->w[i].buf = malloc(BGZF_MAX_BLOCK_SIZE); } pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_mutex_init(&mt->lock, 0); pthread_cond_init(&mt->cv, 0); for (i = 1; i < mt->n_threads; ++i) // worker 0 is effectively launched by the master thread pthread_create(&mt->tid[i], &attr, mt_worker, &mt->w[i]); fp->mt = mt; return 0; } static void mt_destroy(mtaux_t *mt) { int i; // signal all workers to quit pthread_mutex_lock(&mt->lock); mt->done = 1; mt->proc_cnt = 0; pthread_cond_broadcast(&mt->cv); pthread_mutex_unlock(&mt->lock); for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->tid[i], 0); // worker 0 is effectively launched by the master thread // free other data allocated on heap for (i = 0; i < mt->n_blks; ++i) free(mt->blk[i]); for (i = 0; i < mt->n_threads; ++i) free(mt->w[i].buf); free(mt->blk); free(mt->len); free(mt->w); free(mt->tid); pthread_cond_destroy(&mt->cv); pthread_mutex_destroy(&mt->lock); free(mt); } static void mt_queue(BGZF *fp) { mtaux_t *mt = (mtaux_t*)fp->mt; assert(mt->curr < mt->n_blks); // guaranteed by the caller memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset); mt->len[mt->curr] = fp->block_offset; fp->block_offset = 0; ++mt->curr; } static int mt_flush(BGZF *fp) { int i; mtaux_t *mt = (mtaux_t*)fp->mt; if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail // signal all the workers to compress pthread_mutex_lock(&mt->lock); for (i = 0; i < mt->n_threads; ++i) mt->w[i].toproc = 1; mt->proc_cnt = 0; pthread_cond_broadcast(&mt->cv); pthread_mutex_unlock(&mt->lock); // worker 0 is doing things here worker_aux(&mt->w[0]); // wait for all the threads to complete while (mt->proc_cnt < mt->n_threads); // dump data to disk for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode; for (i = 0; i < mt->curr; ++i) if (fwrite(mt->blk[i], 1, mt->len[i], fp->fp) != mt->len[i]) fp->errcode |= BGZF_ERR_IO; mt->curr = 0; return 0; } static int mt_lazy_flush(BGZF *fp) { mtaux_t *mt = (mtaux_t*)fp->mt; if (fp->block_offset) mt_queue(fp); if (mt->curr == mt->n_blks) return mt_flush(fp); return -1; } static ssize_t mt_write(BGZF *fp, const void *data, ssize_t length) { const uint8_t *input = data; ssize_t rest = length; while (rest) { int copy_length = BGZF_BLOCK_SIZE - fp->block_offset < rest? BGZF_BLOCK_SIZE - fp->block_offset : rest; memcpy(fp->uncompressed_block + fp->block_offset, input, copy_length); fp->block_offset += copy_length; input += copy_length; rest -= copy_length; if (fp->block_offset == BGZF_BLOCK_SIZE) mt_lazy_flush(fp); } return length - rest; } /***** END: multi-threading *****/ int bgzf_flush(BGZF *fp) { if (!fp->is_write) return 0; if (fp->mt) return mt_flush(fp); while (fp->block_offset > 0) { int block_length; block_length = deflate_block(fp, fp->block_offset); if (block_length < 0) return -1; if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) { fp->errcode |= BGZF_ERR_IO; // possibly truncated file return -1; } fp->block_address += block_length; } return 0; } int bgzf_flush_try(BGZF *fp, ssize_t size) { if (fp->block_offset + size > BGZF_BLOCK_SIZE) { if (fp->mt) return mt_lazy_flush(fp); else return bgzf_flush(fp); } return -1; } ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length) { const uint8_t *input = data; int block_length = BGZF_BLOCK_SIZE, bytes_written = 0; assert(fp->is_write); if (fp->mt) return mt_write(fp, data, length); while (bytes_written < length) { uint8_t* buffer = fp->uncompressed_block; int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written; memcpy(buffer + fp->block_offset, input, copy_length); fp->block_offset += copy_length; input += copy_length; bytes_written += copy_length; if (fp->block_offset == block_length && bgzf_flush(fp)) break; } return bytes_written; } int bgzf_close(BGZF* fp) { int ret, count, block_length; if (fp == 0) return -1; if (fp->is_write) { if (bgzf_flush(fp) != 0) return -1; fp->compress_level = -1; block_length = deflate_block(fp, 0); // write an empty block count = fwrite(fp->compressed_block, 1, block_length, fp->fp); if (fflush(fp->fp) != 0) { fp->errcode |= BGZF_ERR_IO; return -1; } if (fp->mt) mt_destroy(fp->mt); } ret = fp->is_write? fclose(fp->fp) : _bgzf_close(fp->fp); if (ret != 0) return -1; free(fp->uncompressed_block); free(fp->compressed_block); free_cache(fp); free(fp); return 0; } void bgzf_set_cache_size(BGZF *fp, int cache_size) { if (fp) fp->cache_size = cache_size; } int bgzf_check_EOF(BGZF *fp) { static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; uint8_t buf[28]; off_t offset; offset = _bgzf_tell((_bgzf_file_t)fp->fp); if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0; _bgzf_read(fp->fp, buf, 28); _bgzf_seek(fp->fp, offset, SEEK_SET); return (memcmp(magic, buf, 28) == 0)? 1 : 0; } int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) { int block_offset; int64_t block_address; if (fp->is_write || where != SEEK_SET) { fp->errcode |= BGZF_ERR_MISUSE; return -1; } block_offset = pos & 0xFFFF; block_address = pos >> 16; if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) { fp->errcode |= BGZF_ERR_IO; return -1; } fp->block_length = 0; // indicates current block has not been loaded fp->block_address = block_address; fp->block_offset = block_offset; return 0; } int bgzf_is_bgzf(const char *fn) { uint8_t buf[16]; int n; _bgzf_file_t fp; if ((fp = _bgzf_open(fn, "r")) == 0) return 0; n = _bgzf_read(fp, buf, 16); _bgzf_close(fp); if (n != 16) return 0; return memcmp(g_magic, buf, 16) == 0? 1 : 0; } int bgzf_getc(BGZF *fp) { int c; if (fp->block_offset >= fp->block_length) { if (bgzf_read_block(fp) != 0) return -2; /* error */ if (fp->block_length == 0) return -1; /* end-of-file */ } c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; if (fp->block_offset == fp->block_length) { fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); fp->block_offset = 0; fp->block_length = 0; } return c; } #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif int bgzf_getline(BGZF *fp, int delim, kstring_t *str) { int l, state = 0; unsigned char *buf = (unsigned char*)fp->uncompressed_block; str->l = 0; do { if (fp->block_offset >= fp->block_length) { if (bgzf_read_block(fp) != 0) { state = -2; break; } if (fp->block_length == 0) { state = -1; break; } } for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l); if (l < fp->block_length) state = 1; l -= fp->block_offset; if (str->l + l + 1 >= str->m) { str->m = str->l + l + 2; kroundup32(str->m); str->s = (char*)realloc(str->s, str->m); } memcpy(str->s + str->l, buf + fp->block_offset, l); str->l += l; fp->block_offset += l + 1; if (fp->block_offset >= fp->block_length) { fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); fp->block_offset = 0; fp->block_length = 0; } } while (state == 0); if (str->l == 0 && state < 0) return state; str->s[str->l] = 0; return str->l; } samtools-0.1.19/bgzf.h000066400000000000000000000142661212162403000145440ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology 2011, 2012 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* The BGZF library was originally written by Bob Handsaker from the Broad * Institute. It was later improved by the SAMtools developers. */ #ifndef __BGZF_H #define __BGZF_H #include #include #include #include #define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE #define BGZF_MAX_BLOCK_SIZE 0x10000 #define BGZF_ERR_ZLIB 1 #define BGZF_ERR_HEADER 2 #define BGZF_ERR_IO 4 #define BGZF_ERR_MISUSE 8 typedef struct { int errcode:16, is_write:2, compress_level:14; int cache_size; int block_length, block_offset; int64_t block_address; void *uncompressed_block, *compressed_block; void *cache; // a pointer to a hash table void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading void *mt; // only used for multi-threading } BGZF; #ifndef KSTRING_T #define KSTRING_T kstring_t typedef struct __kstring_t { size_t l, m; char *s; } kstring_t; #endif #ifdef __cplusplus extern "C" { #endif /****************** * Basic routines * ******************/ /** * Open an existing file descriptor for reading or writing. * * @param fd file descriptor * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored. * @return BGZF file handler; 0 on error */ BGZF* bgzf_dopen(int fd, const char *mode); #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility /** * Open the specified file for reading or writing. */ BGZF* bgzf_open(const char* path, const char *mode); /** * Close the BGZF and free all associated resources. * * @param fp BGZF file handler * @return 0 on success and -1 on error */ int bgzf_close(BGZF *fp); /** * Read up to _length_ bytes from the file storing into _data_. * * @param fp BGZF file handler * @param data data array to read into * @param length size of data to read * @return number of bytes actually read; 0 on end-of-file and -1 on error */ ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length); /** * Write _length_ bytes from _data_ to the file. * * @param fp BGZF file handler * @param data data array to write * @param length size of data to write * @return number of bytes actually written; -1 on error */ ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length); /** * Write the data in the buffer to the file. */ int bgzf_flush(BGZF *fp); /** * Return a virtual file pointer to the current location in the file. * No interpetation of the value should be made, other than a subsequent * call to bgzf_seek can be used to position the file at the same point. * Return value is non-negative on success. */ #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) /** * Set the file to read from the location specified by _pos_. * * @param fp BGZF file handler * @param pos virtual file offset returned by bgzf_tell() * @param whence must be SEEK_SET * @return 0 on success and -1 on error */ int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence); /** * Check if the BGZF end-of-file (EOF) marker is present * * @param fp BGZF file handler opened for reading * @return 1 if EOF is present; 0 if not or on I/O error */ int bgzf_check_EOF(BGZF *fp); /** * Check if a file is in the BGZF format * * @param fn file name * @return 1 if _fn_ is BGZF; 0 if not or on I/O error */ int bgzf_is_bgzf(const char *fn); /********************* * Advanced routines * *********************/ /** * Set the cache size. Only effective when compiled with -DBGZF_CACHE. * * @param fp BGZF file handler * @param size size of cache in bytes; 0 to disable caching (default) */ void bgzf_set_cache_size(BGZF *fp, int size); /** * Flush the file if the remaining buffer size is smaller than _size_ */ int bgzf_flush_try(BGZF *fp, ssize_t size); /** * Read one byte from a BGZF file. It is faster than bgzf_read() * @param fp BGZF file handler * @return byte read; -1 on end-of-file or error */ int bgzf_getc(BGZF *fp); /** * Read one line from a BGZF file. It is faster than bgzf_getc() * * @param fp BGZF file handler * @param delim delimitor * @param str string to write to; must be initialized * @return length of the string; 0 on end-of-file; negative on error */ int bgzf_getline(BGZF *fp, int delim, kstring_t *str); /** * Read the next BGZF block. */ int bgzf_read_block(BGZF *fp); /** * Enable multi-threading (only effective on writing) * * @param fp BGZF file handler; must be opened for writing * @param n_threads #threads used for writing * @param n_sub_blks #blocks processed by each thread; a value 64-256 is recommended */ int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); #ifdef __cplusplus } #endif #endif samtools-0.1.19/bgzip.c000066400000000000000000000134171212162403000147170ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include "bgzf.h" static const int WINDOW_SIZE = 64 * 1024; static int bgzip_main_usage() { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n"); fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n"); fprintf(stderr, " -d decompress\n"); fprintf(stderr, " -f overwrite files without asking\n"); fprintf(stderr, " -b INT decompress at virtual file pointer INT\n"); fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n"); fprintf(stderr, " -h give this help\n"); fprintf(stderr, "\n"); return 1; } static int write_open(const char *fn, int is_forced) { int fd = -1; char c; if (!is_forced) { if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) { fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); scanf("%c", &c); if (c != 'Y' && c != 'y') { fprintf(stderr, "[bgzip] not overwritten\n"); exit(1); } } } if (fd < 0) { if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) { fprintf(stderr, "[bgzip] %s: Fail to write\n", fn); exit(1); } } return fd; } static void fail(BGZF* fp) { fprintf(stderr, "Error: %s\n", fp->error); exit(1); } int main(int argc, char **argv) { int c, compress, pstdout, is_forced; BGZF *fp; void *buffer; long start, end, size; compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){ switch(c){ case 'h': return bgzip_main_usage(); case 'd': compress = 0; break; case 'c': pstdout = 1; break; case 'b': start = atol(optarg); break; case 's': size = atol(optarg); break; case 'f': is_forced = 1; break; } } if (size >= 0) end = start + size; if (end >= 0 && end < start) { fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); return 1; } if (compress == 1) { struct stat sbuf; int f_src = fileno(stdin); int f_dst = fileno(stdout); if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if ((f_src = open(argv[optind], O_RDONLY)) < 0) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if (pstdout) f_dst = fileno(stdout); else { char *name = malloc(strlen(argv[optind]) + 5); strcpy(name, argv[optind]); strcat(name, ".gz"); f_dst = write_open(name, is_forced); if (f_dst < 0) return 1; free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdout)) ) return bgzip_main_usage(); fp = bgzf_fdopen(f_dst, "w"); buffer = malloc(WINDOW_SIZE); while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_write(fp, buffer, c) < 0) fail(fp); // f_dst will be closed here if (bgzf_close(fp) < 0) fail(fp); if (argc > optind && !pstdout) unlink(argv[optind]); free(buffer); close(f_src); return 0; } else { struct stat sbuf; int f_dst; if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } char *name; int len = strlen(argv[optind]); if ( strcmp(argv[optind]+len-3,".gz") ) { fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); return 1; } fp = bgzf_open(argv[optind], "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); return 1; } if (pstdout) { f_dst = fileno(stdout); } else { name = strdup(argv[optind]); name[strlen(name) - 3] = '\0'; f_dst = write_open(name, is_forced); free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdin)) ) return bgzip_main_usage(); else { f_dst = fileno(stdout); fp = bgzf_fdopen(fileno(stdin), "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); return 1; } } buffer = malloc(WINDOW_SIZE); if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp); while (1) { if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); if (c == 0) break; if (c < 0) fail(fp); start += c; write(f_dst, buffer, c); if (end >= 0 && start >= end) break; } free(buffer); if (bgzf_close(fp) < 0) fail(fp); if (!pstdout) unlink(argv[optind]); return 0; } } samtools-0.1.19/cut_target.c000066400000000000000000000130431212162403000157400ustar00rootroot00000000000000#include #include #include #include "bam.h" #include "errmod.h" #include "faidx.h" #define ERR_DEP 0.83f typedef struct { int e[2][3], p[2][2]; } score_param_t; /* Note that although the two matrics have 10 parameters in total, only 4 * (probably 3) are free. Changing the scoring matrices in a sort of symmetric * way will not change the result. */ static score_param_t g_param = { {{0,0,0},{-4,1,6}}, {{0,-14000}, {0,0}} }; typedef struct { int min_baseQ, tid, max_bases; uint16_t *bases; bamFile fp; bam_header_t *h; char *ref; faidx_t *fai; errmod_t *em; } ct_t; static uint16_t gencns(ct_t *g, int n, const bam_pileup1_t *plp) { int i, j, ret, tmp, k, sum[4], qual; float q[16]; if (n > g->max_bases) { // enlarge g->bases g->max_bases = n; kroundup32(g->max_bases); g->bases = realloc(g->bases, g->max_bases * 2); } for (i = k = 0; i < n; ++i) { const bam_pileup1_t *p = plp + i; uint8_t *seq; int q, baseQ, b; if (p->is_refskip || p->is_del) continue; baseQ = bam1_qual(p->b)[p->qpos]; if (baseQ < g->min_baseQ) continue; seq = bam1_seq(p->b); b = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos)]; if (b > 3) continue; q = baseQ < p->b->core.qual? baseQ : p->b->core.qual; if (q < 4) q = 4; if (q > 63) q = 63; g->bases[k++] = q<<5 | bam1_strand(p->b)<<4 | b; } if (k == 0) return 0; errmod_cal(g->em, k, 4, g->bases, q); for (i = 0; i < 4; ++i) sum[i] = (int)(q[i<<2|i] + .499) << 2 | i; for (i = 1; i < 4; ++i) // insertion sort for (j = i; j > 0 && sum[j] < sum[j-1]; --j) tmp = sum[j], sum[j] = sum[j-1], sum[j-1] = tmp; qual = (sum[1]>>2) - (sum[0]>>2); k = k < 256? k : 255; ret = (qual < 63? qual : 63) << 2 | (sum[0]&3); return ret<<8|k; } static void process_cns(bam_header_t *h, int tid, int l, uint16_t *cns) { int i, f[2][2], *prev, *curr, *swap_tmp, s; uint8_t *b; // backtrack array b = calloc(l, 1); f[0][0] = f[0][1] = 0; prev = f[0]; curr = f[1]; // fill the backtrack matrix for (i = 0; i < l; ++i) { int c = (cns[i] == 0)? 0 : (cns[i]>>8 == 0)? 1 : 2; int tmp0, tmp1; // compute f[0] tmp0 = prev[0] + g_param.e[0][c] + g_param.p[0][0]; // (s[i+1],s[i])=(0,0) tmp1 = prev[1] + g_param.e[0][c] + g_param.p[1][0]; // (0,1) if (tmp0 > tmp1) curr[0] = tmp0, b[i] = 0; else curr[0] = tmp1, b[i] = 1; // compute f[1] tmp0 = prev[0] + g_param.e[1][c] + g_param.p[0][1]; // (s[i+1],s[i])=(1,0) tmp1 = prev[1] + g_param.e[1][c] + g_param.p[1][1]; // (1,1) if (tmp0 > tmp1) curr[1] = tmp0, b[i] |= 0<<1; else curr[1] = tmp1, b[i] |= 1<<1; // swap swap_tmp = prev; prev = curr; curr = swap_tmp; } // backtrack s = prev[0] > prev[1]? 0 : 1; for (i = l - 1; i > 0; --i) { b[i] |= s<<2; s = b[i]>>s&1; } // print for (i = 0, s = -1; i <= l; ++i) { if (i == l || ((b[i]>>2&3) == 0 && s >= 0)) { if (s >= 0) { int j; printf("%s:%d-%d\t0\t%s\t%d\t60\t%dM\t*\t0\t0\t", h->target_name[tid], s+1, i, h->target_name[tid], s+1, i-s); for (j = s; j < i; ++j) { int c = cns[j]>>8; if (c == 0) putchar('N'); else putchar("ACGT"[c&3]); } putchar('\t'); for (j = s; j < i; ++j) putchar(33 + (cns[j]>>8>>2)); putchar('\n'); } //if (s >= 0) printf("%s\t%d\t%d\t%d\n", h->target_name[tid], s, i, i - s); s = -1; } else if ((b[i]>>2&3) && s < 0) s = i; } free(b); } static int read_aln(void *data, bam1_t *b) { extern int bam_prob_realn_core(bam1_t *b, const char *ref, int flag); ct_t *g = (ct_t*)data; int ret, len; ret = bam_read1(g->fp, b); if (ret >= 0 && g->fai && b->core.tid >= 0 && (b->core.flag&4) == 0) { if (b->core.tid != g->tid) { // then load the sequence free(g->ref); g->ref = fai_fetch(g->fai, g->h->target_name[b->core.tid], &len); g->tid = b->core.tid; } bam_prob_realn_core(b, g->ref, 1<<1|1); } return ret; } int main_cut_target(int argc, char *argv[]) { int c, tid, pos, n, lasttid = -1, lastpos = -1, l, max_l; const bam_pileup1_t *p; bam_plp_t plp; uint16_t *cns; ct_t g; memset(&g, 0, sizeof(ct_t)); g.min_baseQ = 13; g.tid = -1; while ((c = getopt(argc, argv, "f:Q:i:o:0:1:2:")) >= 0) { switch (c) { case 'Q': g.min_baseQ = atoi(optarg); break; // quality cutoff case 'i': g_param.p[0][1] = -atoi(optarg); break; // 0->1 transition (in) PENALTY case '0': g_param.e[1][0] = atoi(optarg); break; // emission SCORE case '1': g_param.e[1][1] = atoi(optarg); break; case '2': g_param.e[1][2] = atoi(optarg); break; case 'f': g.fai = fai_load(optarg); if (g.fai == 0) fprintf(stderr, "[%s] fail to load the fasta index.\n", __func__); break; } } if (argc == optind) { fprintf(stderr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] [-f ref] \n"); return 1; } l = max_l = 0; cns = 0; g.fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); g.h = bam_header_read(g.fp); g.em = errmod_init(1 - ERR_DEP); plp = bam_plp_init(read_aln, &g); while ((p = bam_plp_auto(plp, &tid, &pos, &n)) != 0) { if (tid < 0) break; if (tid != lasttid) { // change of chromosome if (cns) process_cns(g.h, lasttid, l, cns); if (max_l < g.h->target_len[tid]) { max_l = g.h->target_len[tid]; kroundup32(max_l); cns = realloc(cns, max_l * 2); } l = g.h->target_len[tid]; memset(cns, 0, max_l * 2); lasttid = tid; } cns[pos] = gencns(&g, n, p); lastpos = pos; } process_cns(g.h, lasttid, l, cns); free(cns); bam_header_destroy(g.h); bam_plp_destroy(plp); bam_close(g.fp); if (g.fai) { fai_destroy(g.fai); free(g.ref); } errmod_destroy(g.em); free(g.bases); return 0; } samtools-0.1.19/errmod.c000066400000000000000000000066351212162403000151000ustar00rootroot00000000000000#include #include "errmod.h" #include "ksort.h" KSORT_INIT_GENERIC(uint16_t) typedef struct __errmod_coef_t { double *fk, *beta, *lhet; } errmod_coef_t; typedef struct { double fsum[16], bsum[16]; uint32_t c[16]; } call_aux_t; static errmod_coef_t *cal_coef(double depcorr, double eta) { int k, n, q; long double sum, sum1; double *lC; errmod_coef_t *ec; ec = calloc(1, sizeof(errmod_coef_t)); // initialize ->fk ec->fk = (double*)calloc(256, sizeof(double)); ec->fk[0] = 1.0; for (n = 1; n != 256; ++n) ec->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta; // initialize ->coef ec->beta = (double*)calloc(256 * 256 * 64, sizeof(double)); lC = (double*)calloc(256 * 256, sizeof(double)); for (n = 1; n != 256; ++n) { double lgn = lgamma(n+1); for (k = 1; k <= n; ++k) lC[n<<8|k] = lgn - lgamma(k+1) - lgamma(n-k+1); } for (q = 1; q != 64; ++q) { double e = pow(10.0, -q/10.0); double le = log(e); double le1 = log(1.0 - e); for (n = 1; n <= 255; ++n) { double *beta = ec->beta + (q<<16|n<<8); sum1 = sum = 0.0; for (k = n; k >= 0; --k, sum1 = sum) { sum = sum1 + expl(lC[n<<8|k] + k*le + (n-k)*le1); beta[k] = -10. / M_LN10 * logl(sum1 / sum); } } } // initialize ->lhet ec->lhet = (double*)calloc(256 * 256, sizeof(double)); for (n = 0; n < 256; ++n) for (k = 0; k < 256; ++k) ec->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n; free(lC); return ec; } errmod_t *errmod_init(float depcorr) { errmod_t *em; em = (errmod_t*)calloc(1, sizeof(errmod_t)); em->depcorr = depcorr; em->coef = cal_coef(depcorr, 0.03); return em; } void errmod_destroy(errmod_t *em) { if (em == 0) return; free(em->coef->lhet); free(em->coef->fk); free(em->coef->beta); free(em->coef); free(em); } // qual:6, strand:1, base:4 int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q) { call_aux_t aux; int i, j, k, w[32]; if (m > m) return -1; memset(q, 0, m * m * sizeof(float)); if (n == 0) return 0; // calculate aux.esum and aux.fsum if (n > 255) { // then sample 255 bases ks_shuffle(uint16_t, n, bases); n = 255; } ks_introsort(uint16_t, n, bases); memset(w, 0, 32 * sizeof(int)); memset(&aux, 0, sizeof(call_aux_t)); for (j = n - 1; j >= 0; --j) { // calculate esum and fsum uint16_t b = bases[j]; int q = b>>5 < 4? 4 : b>>5; if (q > 63) q = 63; k = b&0x1f; aux.fsum[k&0xf] += em->coef->fk[w[k]]; aux.bsum[k&0xf] += em->coef->fk[w[k]] * em->coef->beta[q<<16|n<<8|aux.c[k&0xf]]; ++aux.c[k&0xf]; ++w[k]; } // generate likelihood for (j = 0; j != m; ++j) { float tmp1, tmp3; int tmp2, bar_e; // homozygous for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k != m; ++k) { if (k == j) continue; tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k]; } if (tmp2) { bar_e = (int)(tmp1 / tmp3 + 0.499); if (bar_e > 63) bar_e = 63; q[j*m+j] = tmp1; } // heterozygous for (k = j + 1; k < m; ++k) { int cjk = aux.c[j] + aux.c[k]; for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) { if (i == j || i == k) continue; tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i]; } if (tmp2) { bar_e = (int)(tmp1 / tmp3 + 0.499); if (bar_e > 63) bar_e = 63; q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]] + tmp1; } else q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k } for (k = 0; k != m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0; } return 0; } samtools-0.1.19/errmod.h000066400000000000000000000006721212162403000151000ustar00rootroot00000000000000#ifndef ERRMOD_H #define ERRMOD_H #include struct __errmod_coef_t; typedef struct { double depcorr; struct __errmod_coef_t *coef; } errmod_t; errmod_t *errmod_init(float depcorr); void errmod_destroy(errmod_t *em); /* n: number of bases m: maximum base bases[i]: qual:6, strand:1, base:4 q[i*m+j]: phred-scaled likelihood of (i,j) */ int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); #endif samtools-0.1.19/examples/000077500000000000000000000000001212162403000152505ustar00rootroot00000000000000samtools-0.1.19/examples/00README.txt000066400000000000000000000015661212162403000171160ustar00rootroot00000000000000File ex1.fa contains two sequences cut from the human genome build36. They were exatracted with command: samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550 Sequence names were changed manually for simplicity. File ex1.sam.gz contains MAQ alignments exatracted with: (samtools view NA18507_maq.bam 2:2044001-2045500; samtools view NA18507_maq.bam 20:68001-69500) and processed with `samtools fixmate' to make it self-consistent as a standalone alignment. To try samtools, you may run the following commands: samtools faidx ex1.fa # index the reference FASTA samtools import ex1.fa.fai ex1.sam.gz ex1.bam # SAM->BAM samtools index ex1.bam # index BAM samtools tview ex1.bam ex1.fa # view alignment samtools pileup -cf ex1.fa ex1.bam # pileup and consensus samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz samtools-0.1.19/examples/Makefile000066400000000000000000000036551212162403000167210ustar00rootroot00000000000000all:../libbam.a ../samtools ../bcftools/bcftools \ ex1.glf ex1.pileup.gz ex1.bam.bai ex1f-rmduppe.bam ex1f-rmdupse.bam ex1.glfview.gz ex1.bcf calDepth @echo; echo \# You can now launch the viewer with: \'samtools tview ex1.bam ex1.fa\'; echo; ex1.fa.fai:ex1.fa ../samtools faidx ex1.fa ex1.bam:ex1.sam.gz ex1.fa.fai ../samtools import ex1.fa.fai ex1.sam.gz ex1.bam ex1.bam.bai:ex1.bam ../samtools index ex1.bam ex1.pileup.gz:ex1.bam ex1.fa ../samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz ex1.glf:ex1.bam ex1.fa ../samtools pileup -gf ex1.fa ex1.bam > ex1.glf ex1.glfview.gz:ex1.glf ../samtools glfview ex1.glf | gzip > ex1.glfview.gz ex1a.bam:ex1.bam ../samtools view -h ex1.bam | awk 'BEGIN{FS=OFS="\t"}{if(/^@/)print;else{$$1=$$1"a";print}}' | ../samtools view -bS - > $@ ex1b.bam:ex1.bam ../samtools view -h ex1.bam | awk 'BEGIN{FS=OFS="\t"}{if(/^@/)print;else{$$1=$$1"b";print}}' | ../samtools view -bS - > $@ ex1f.rg: (echo "@RG ID:ex1 LB:ex1 SM:ex1"; echo "@RG ID:ex1a LB:ex1 SM:ex1"; echo "@RG ID:ex1b LB:ex1b SM:ex1b") > $@ ex1f.bam:ex1.bam ex1a.bam ex1b.bam ex1f.rg ../samtools merge -rh ex1f.rg $@ ex1.bam ex1a.bam ex1b.bam ex1f-rmduppe.bam:ex1f.bam ../samtools rmdup ex1f.bam $@ ex1f-rmdupse.bam:ex1f.bam ../samtools rmdup -S ex1f.bam $@ ex1.bcf:ex1.bam ex1.fa.fai ../samtools mpileup -gf ex1.fa ex1.bam > $@ ../bcftools/bcftools: (cd ../bcftools; make bcftools) ../samtools: (cd ..; make samtools) ../libbam.a: (cd ..; make libbam.a) calDepth:../libbam.a calDepth.c gcc -g -Wall -O2 -I.. calDepth.c -o $@ -L.. -lbam -lm -lz clean: rm -fr *.bam *.bai *.glf* *.fai *.pileup* *~ calDepth *.dSYM ex1*.rg ex1.bcf # ../samtools pileup ex1.bam|perl -ape '$_=$F[4];s/(\d+)(??{".{$1}"})|\^.//g;@_=(tr/A-Z//,tr/a-z//);$_=join("\t",@F[0,1],@_)."\n"' # ../samtools pileup -cf ex1.fa ex1.bam|perl -ape '$_=$F[8];s/\^.//g;s/(\d+)(??{".{$1}"})|\^.//g;@_=(tr/A-Za-z//,tr/,.//);$_=join("\t",@F[0,1],@_)."\n"' samtools-0.1.19/examples/bam2bed.c000066400000000000000000000026241212162403000167140ustar00rootroot00000000000000#include #include "sam.h" static int fetch_func(const bam1_t *b, void *data) { samfile_t *fp = (samfile_t*)data; uint32_t *cigar = bam1_cigar(b); const bam1_core_t *c = &b->core; int i, l; if (b->core.tid < 0) return 0; for (i = l = 0; i < c->n_cigar; ++i) { int op = cigar[i]&0xf; if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) l += cigar[i]>>4; } printf("%s\t%d\t%d\t%s\t%d\t%c\n", fp->header->target_name[c->tid], c->pos, c->pos + l, bam1_qname(b), c->qual, (c->flag&BAM_FREVERSE)? '-' : '+'); return 0; } int main(int argc, char *argv[]) { samfile_t *fp; if (argc == 1) { fprintf(stderr, "Usage: bam2bed [region]\n"); return 1; } if ((fp = samopen(argv[1], "rb", 0)) == 0) { fprintf(stderr, "bam2bed: Fail to open BAM file %s\n", argv[1]); return 1; } if (argc == 2) { /* if a region is not specified */ bam1_t *b = bam_init1(); while (samread(fp, b) >= 0) fetch_func(b, fp); bam_destroy1(b); } else { int ref, beg, end; bam_index_t *idx; if ((idx = bam_index_load(argv[1])) == 0) { fprintf(stderr, "bam2bed: BAM indexing file is not available.\n"); return 1; } bam_parse_region(fp->header, argv[2], &ref, &beg, &end); if (ref < 0) { fprintf(stderr, "bam2bed: Invalid region %s\n", argv[2]); return 1; } bam_fetch(fp->x.bam, idx, ref, beg, end, fp, fetch_func); bam_index_destroy(idx); } samclose(fp); return 0; } samtools-0.1.19/examples/calDepth.c000066400000000000000000000031311212162403000171360ustar00rootroot00000000000000#include #include "sam.h" typedef struct { int beg, end; samfile_t *in; } tmpstruct_t; // callback for bam_fetch() static int fetch_func(const bam1_t *b, void *data) { bam_plbuf_t *buf = (bam_plbuf_t*)data; bam_plbuf_push(b, buf); return 0; } // callback for bam_plbuf_init() static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) { tmpstruct_t *tmp = (tmpstruct_t*)data; if ((int)pos >= tmp->beg && (int)pos < tmp->end) printf("%s\t%d\t%d\n", tmp->in->header->target_name[tid], pos + 1, n); return 0; } int main(int argc, char *argv[]) { tmpstruct_t tmp; if (argc == 1) { fprintf(stderr, "Usage: calDepth [region]\n"); return 1; } tmp.beg = 0; tmp.end = 0x7fffffff; tmp.in = samopen(argv[1], "rb", 0); if (tmp.in == 0) { fprintf(stderr, "Fail to open BAM file %s\n", argv[1]); return 1; } if (argc == 2) { // if a region is not specified sampileup(tmp.in, -1, pileup_func, &tmp); } else { int ref; bam_index_t *idx; bam_plbuf_t *buf; idx = bam_index_load(argv[1]); // load BAM index if (idx == 0) { fprintf(stderr, "BAM indexing file is not available.\n"); return 1; } bam_parse_region(tmp.in->header, argv[2], &ref, &tmp.beg, &tmp.end); // parse the region if (ref < 0) { fprintf(stderr, "Invalid region %s\n", argv[2]); return 1; } buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, fetch_func); bam_plbuf_push(0, buf); // finalize pileup bam_index_destroy(idx); bam_plbuf_destroy(buf); } samclose(tmp.in); return 0; } samtools-0.1.19/examples/chk_indel.c000066400000000000000000000045771212162403000173510ustar00rootroot00000000000000/* To compile, copy this file to the samtools source code directory and compile with: gcc -g -O2 -Wall chk_indel_rg.c -o chk_indel_rg -Wall -I. -L. -lbam -lz */ #include #include "bam.h" typedef struct { long cnt[4]; // short:ins, short:del, long:ins, long:del } rgcnt_t; #include "khash.h" KHASH_MAP_INIT_STR(rgcnt, rgcnt_t) #define MAX_LEN 127 #define Q_THRES 10 #define L_THRES 6 // short: <=L_THRES; otherwise long int main(int argc, char *argv[]) { bamFile fp; bam1_t *b; int i, x; khash_t(rgcnt) *h; khint_t k; if (argc == 1) { fprintf(stderr, "Usage: chk_indel_rg \n\n"); fprintf(stderr, "Output: filename, RG, #ins-in-short-homopolymer, #del-in-short, #ins-in-long, #del-in-long\n"); return 1; } h = kh_init(rgcnt); fp = bam_open(argv[1], "r"); bam_header_destroy(bam_header_read(fp)); // we do not need the header b = bam_init1(); while (bam_read1(fp, b) >= 0) { if (b->core.n_cigar >= 3 && b->core.qual >= Q_THRES) { const uint8_t *seq; const uint32_t *cigar = bam1_cigar(b); char *rg; for (i = 0; i < b->core.n_cigar; ++i) // check if there are 1bp indels if (bam_cigar_oplen(cigar[i]) == 1 && (bam_cigar_op(cigar[i]) == BAM_CDEL || bam_cigar_op(cigar[i]) == BAM_CINS)) break; if (i == b->core.n_cigar) continue; // no 1bp ins or del if ((rg = (char*)bam_aux_get(b, "RG")) == 0) continue; // no RG tag seq = bam1_seq(b); for (i = x = 0; i < b->core.n_cigar; ++i) { int op = bam_cigar_op(cigar[i]); if (bam_cigar_oplen(cigar[i]) == 1 && (op == BAM_CDEL || op == BAM_CINS)) { int c, j, hrun, which; c = bam1_seqi(seq, x); for (j = x + 1, hrun = 0; j < b->core.l_qseq; ++j, ++hrun) // calculate the hompolymer run length if (bam1_seqi(seq, j) != c) break; k = kh_get(rgcnt, h, rg + 1); if (k == kh_end(h)) { // absent char *key = strdup(rg + 1); k = kh_put(rgcnt, h, key, &c); memset(&kh_val(h, k), 0, sizeof(rgcnt_t)); } which = (hrun <= L_THRES? 0 : 1)<<1 | (op == BAM_CINS? 0 : 1); ++kh_val(h, k).cnt[which]; } if (bam_cigar_type(op)&1) ++x; } } } for (k = 0; k != kh_end(h); ++k) { if (!kh_exist(h, k)) continue; printf("%s\t%s", argv[1], kh_key(h, k)); for (i = 0; i < 4; ++i) printf("\t%ld", kh_val(h, k).cnt[i]); putchar('\n'); free((char*)kh_key(h, k)); } bam_destroy1(b); bam_close(fp); kh_destroy(rgcnt, h); return 0; } samtools-0.1.19/examples/ex1.fa000066400000000000000000000062311212162403000162570ustar00rootroot00000000000000>seq1 CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC TCCCTCGTCTTCTTA >seq2 TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC CAGAAAAAAATATTTACAGTAACT samtools-0.1.19/examples/ex1.sam.gz000066400000000000000000003376051212162403000171040ustar00rootroot00000000000000‹º±ÜI¬\ÉŽãJv]ÓßÑ©L%¥,ÆlÒKõÑ‘l?ºñ£g³ð^2ƒqÃIÙtèþyJÉ8ÙNj5‘cX£÷?dÐBF yÀó¡C°†¯ôºühu¹¤KL1¥x¹ÐsÂ=?æwùmAhuAØìñ8>îP7n{\e)gƲÏsc‰ø ­M!¥ƒ pB‡Ö<ñ9SUäIªH.ÛDƒÜ>€L“ ÆB§¹"®[tçþ{¸+`Ì ˆROmeÞDGQ>ݶuÛ ‚HzQìp:p‹2Ji¢m<Ä9zégÖ² ®{þ²H~¶ÅRײ®?ÞLø˜Y ü0_jý\LEÁ´•°#Ä0‡¸*BRƒ)D{)«L¢n¡GïXê™mØ¡á ⟅Zz9r1·è €ßW1µ¤ÚW-ZSpÑQh…HD^ÛÐVÁg€¸ƒ }¼®àç cÅ»àÒþⵆߩ/ærù4î$ks8™zfÕÊ”r1JFIb:3^EMÆö’t•ÓP‰•—ÒUKr¼1œ÷l7¾“"èÎY´5Âá!EcL2çOç~1ê6#áØ,b48£›PR0õ¬SiÀ´ÕsA ÞÞŠ^ãÄK •²5VÒgw>Ïôr‡R´æ%£‰ÚÈaÑuèÉ …¨VáåÏ€2 Q0ár©w“ôW“£¬õ²zâÌ ¦'fâ ãÁ+[rDžQ–ø…ltµ…²wæ£@dL_Dr .^¿dý‘æg´Ý³>ÜÇrÑ»y¹ä>¸•½‹\K­¨¼,Úà]µŠòz•ÔŸ½l¥—*‘íHº¼÷§Ïù&Ïr SŠIÎ^ÊsF†@W¨Ü(Nnò&­ymÙs a‘”Y¯”J­ÿð'qúË_fîÓnr¸Ž #GpM ðå£<'ÑÈf«_E9¸&Æ<»GÝ~|´o©ãÔëFDF/±ð ›=œÙËÔ1 xC:º"JZõè4Û®êÒ!YE´X¾·çy~Ö Tuú¹ ‘¯wDÝÕ\€V³˜ä&†„Ô(˜êù‚Ó'×™õl›Ò‚IV¿9+çÎÞì«]ùè܉4#«ýÔœ, û9Äk&%yÍoÅšŠ¹ ú›€8ßßët~ÇC:Úó¹ž_´ž_ô£!.9¢K¤ òL˜ÜǽÒsæ`E¿¨†ðSô2 á¿€…ºjf'ªšn¾ ˆ±qÉ'§©õÜB7;“%ß¹z ‡†äx¬‰É=ž¨Ä´‰òibL>!xzÂ,/ÞQˆGÈ@ðpzøQÂ1”vD©µ(-^‡ÈÛkOФvÞ.ØdóyÝÁèàjàš]ôš¯ºÍWR•6€Öm¡ü^Æ9½ôaj7Ûñ££uHÍsjìñ¿2 œJlµs5ÐáRzt[=)íÀš»L”ËJw­JÁWÝd2¿ ñÆi“ê%ˆHAØ´íéÍËQ®³v¨%Dé`DUè€2#R,Ð-˜‹½¢¹üžz6ÄoHI„j T=-O;}·[ÄVôS¹¤&ñmk?›Êõ~±€ØÓ½¯ó+öw_^_ußœ¶P0†}ÀFÛŽÝ| u¿…tãúÿB:$»—õþ4$‹¯/êM§$ÞtãŽ;¸¯é²æ ÷Ln/÷¦˜7Qž{ð™ rFòy&©!K-á«›R=›ÏгæT3˺3w00S¿ å_,ˆ€‘Ú¡hInÛQm`m5³äx10P€ËvéÔ ÿ¢þš¹¿Ç9·ïhÖqê %w{6WÕ7QNÜ•2GŽ.!Æ_–糟×ö5)ªjeà'§ã0=`Új!}0Ù/{)ÿ{7"9?Ï•6‡rJ¬#—CåJ yLœ†;gPРj!5Nï÷±„1ÂL¶“nâ··v9ŠJ’ ˜4º1!bO©xÆ$âë}òáDƒW¼X-¨åÓSp¥ºâÕ"ÈÜQÍJA£)iˆ®ÆÏ¹)Îùƒb³:~>?¥›­¼s Mоæ @n Ix–ÎÛÉҋܪx.É ’Ö¾êJ¿^ôâLÅø%‹T¡ š¸m–CÍ8n‰|†X®d0'+šrÑr%Üç|´rbZXxý¤¨¾>iâ°ðJÂ=ÑÔ“a|ÌŽSfv» sÞœ*—ÏfsÌÁÃËì⤮¹ :,½çµÁ»J7¹² óiy$Ô%DGóªk*¥ÁU"·¿¹oâ] …XE6ÿåµ ü~ˆÞÃdžMÇ6†$mRayÜ­ïûØåvŠ¡OLPò Æ¶¤pÜ’¨¨tÕ£¼.É’ŠQ÷‡¥/ž³éã)Æåß9QézêGË ¢2¤«Ü99nL¹²îªµE/@³Hã`P_š4ý®~PÒucê¨Òîi­¡ÒòÎÌU´L.¯£¼“Í* H¼œR²º¯†6ͧhµõ,ï6Û<Ùä>M¿ €õ OiO½s€ØÈΙ-Å¡ÿËC. Ïä¹éCÁFùPE¦vGLiêÅ4ÎSÅecžk®ºÚúã¥mkµOé}N›6é4Ô c–ˆöZR=äPãŠD÷]»ˆ™ø×fÇ@QRÝÌžHܤ¶´Ï÷ Mü‚CJ/ø½xº^.b•©æ24cI´,0y64“½“·Õ6̧ÐÙ|½\ÏÈd–ä-ñ…FQ­œ†& ^ˆ~ævÚ-׌r2Š%¯i“‚:Ôá|®›ã¡Vð<ο”R±mì¸â]½~áûåðýžæ•.S™®Vg"úž‹>އcc­—l­ÕŸv>=*+= žwšh¬ Ál$<#›Î¢LœyEhœÕÊ ^I`xϸϮWKSeD˜ö HX…ÎëË8]¡ê/ூ8P¦nBÿ¾Ô!êæ1©û€ c›h4cêÁ0GclŽ17±†ñ:åAémyœ!·‡9$Ó:ê®®Í)œM/O—iФÕd‡G(cõt•¨S«æ¼sPÏ\„~zžRV9RH'hf}Åé“ëBÒËõÜ ~I=»áAICÂÑé$ã%rÆÖÝeuÝù½\á•äÈ1—õ³—§zÚ‚ BG éš—X;‘"h«XŸU.;ïèìØsæhdQЛ&z5¦æ4G2¢ô¡ÔöWÞFCâ&‡ORû«]h@ìJÑU—ãó^$&N;å@™Vnâ¼=ð_~å^Ä‹Êç+è®óàÛ3 ¾#ê/]rõÇò‹—ëG9;7ytÁ 0a¡ØÊîéKëi äcݰ}xë—«CÊK[“”‡ó²\ (5wýk@³N{“zÈ󸝮üÊ ¶_‚ ’r&·n˜Âhöýõry»ŸuEöغŒ“H>½BÃÏ4Dº ô¾ÐêVþï©y0©í©ÜŸ`r_æ¶Šc£ØÔ gÕâo¢|ÚÕ+•èOŽt;O{Å–’F±­ ewÕ&FDP­½ö¬Ž¿¬½\þÛKü«ýƒ@Š?þx\v¹]<ÌOG‰LRZ¼*1re:NDäÑ–=0C¢*U;Ž2÷^‚-Zà#û­m“3”ž¡,EyºÑ(m›£ºÛµl3 â"oÂÔ¦‘òxl ˆÇæX»¨ëe=‘27Ë(ºmI¬ Çrß(xñÊ…Ì·…t­úm¤iœb¥ñEœ/™ý†ed †`#abÀ“à-sÙråbºí:bYÜc|ý`€Ü¡²y°Eiúô¨Q¹ŽùÐÄ wp6TXîe–QL8WÙ¼ ¤y'ÝÁfàÛ07JBm暯Æî¾.@ûúÀÖƒãÑNÕ“èû˜—§× °¯oMò¥hI™9x°pB0 ~Æ.©7¹sAžýäCì¥ù Ïõ:ËÒ¶¹Ñsó€ÝTgšÙÆx­hýO}ˆŒ\ÁqyÛ܆±C’Èw} š'–’'¾4)¶aNgêÛîÆ^Ó%Ú׊rö‚ ÞiÔ|¤IJÁr´æÊ"`ñ¼\¹]ªîÊ•e\x(6‰ %«WjA»hDµéÔ²ôöÕ¬¢SP‘û掩š¹%0 0©ùHºé:ëa3Dá±\¹o—þdkßP¬”ã6ý íŸ;w­” wªul_/ Š-íìÌÃ4’`ú—aG[Ì›/¹ÏÅ=ÇËòÔH$ò®ã=@ÇTœ5|Ü*Ðm¢ÂXz:¸†X'™„t•ó•nŠ‚º²]òçuŠó;ï±ø®Zp6Õ 2÷Þº&ñAf"&D™ÁÛœ’Ì`YœIXØ„ÌÄËb“$^Ym‰–ÜT–èÅé˜X…[îÆ6äåi g÷¦6ioÚ6Ìke~­ú: UuFëÆ˜F}p®±ÆZëæ½é=œÄQ°§:Åâ&NŠ…Qê±æ-J;p>’¸Ïü ¸\\š·¦wN‰ÎFð çp0‚Á9Œœ·çu çÁ‚súÐ&ëZ­g>޹ý劬|R‘}²urªX¼û;S-y[ž%ïMu±ù´œ©¿ºu²ëO šç)¡Ê/uªI©SçÁ™=HÙTf‘R9Ï_å-ýzú0 ¼“ÞSV õ4°%Åè(dÖy>8W’6`NÂýÄßg×zÀË×ËÛ_‰Ò8 |ªd#hœEž~Î"GÚc]y ù]yvÐÞ™<7†ºÙ —OåÉsœ¼¯$/™Ìö·A²w×"êɯO[?ž02šËåµ"{·Õ&ÈÂ=MŸ!u·ÝùP)ÍR¥y²´C9N½bIræO¢ð§uœß•gý–N®óÞÖrè|m“ü(P–}ZUÔ6ÌÜÒ¢S;RèN™è²zŽuy“ü¦ÙXܧЊŽ9PêÆ–òÈ —gwÚ%5wKÛyH¡OˆI0&ë¨eD»=†¢ BË ¢7Í y¼¢Ñ½@{B?™SÊ|„×KÜ«å¹'ˆ=CÜèòÓm~u“r´ ™RëãÛñýü^ŸNmcíéxlBSǃö¶A¨6‡Cæ}ͬT'“·p‡¨w¹p‡ÄÍ—YT;„×û¡0¯²·ôý)K,ï­pu[͉ӫé¦$ɧ󋿤so6×øI²ÙÛzlÍClר&1cêR£Õs2¯y[ ¡rUíÐÄÕV›r¡ä÷Z‚TZB²iéð?ÖºÔv¬¸ë|tT.*®ØJÙÚç:ïãèD¸³A>êÖÕª~›i¥ž¶‚íSG¤!{Þ ¼t£¹'Rݾ¹-¨·Û‚'*—ÉwåÔox¢ît ªÖRßX&QnI²Ì뮢Ì]˜{Å"ON ¿}Hú¬Óçák6§mìäTéŸVnh‡—ŽÊ4H¦y¶ÑAµƒ1ÑV¯Mf-ånÐÕ¦len½yYØ.~/}3ÆuÍìž:CTvÚ…D‚¿Ñ9LA–Í$3«zýðI‰‘ËuPß3Õãò›I?Põ´sæê^+×q‘ùN’m-]8 c Àéû–Ï…&³ 'y Ÿyü©H—ÃØ¶¦á¬–$£.6·‚AõB°é#Ò¡Lyjï2Ü–Aªý¤}‚vòEçô˜†m‚Ü…sîßû/µ¡¸ú[i{Þ{L¤SiÊ‘r'J'fï´seó±j»@0?SÆNÏäÒ ÞiéL™Ñâ9Ò²ör'Òø¨’üš#•S¤fé4!âq4­ i¿ñd^¬7¬¢é,>¸QóZ`ÞòYH=Lê) ùÆñtÐ$Iwpð¡Çã—J.Ð&ßžÀõõµÐÎå‹…S§;¨>Ê|Üí”çU~ˆ˜FD¥ º&Ð|òæ“m/³kæž¶Ë/—Ÿow¢ýɹr³‰“Ž‹LÓthéŽãKŽiœGZ5ìŠÜWÒ_”xñI#qOÊùxÎ )ËéÔ•swÒÊ:N˜”ýÕ&¯Ç”x¾AëA¤;ÜTÉŒTfùBÑiÊœ>Ûlü%•ëÕtèõ6´R:ð}ööÕÃÌé³V5”m^Ý&ÒK‡ÉZÀhŸ¥Š¨êŽŽ\\µîS¹¾ºTõxE~ußÛØ¯óðXAÒç1Xv«Ö!7Â÷ÈõÙæyºè0%ÚO:ß\²ãÀÚ;,bˆºa0]>ÄXø?Í¿ ™±Zû KNÿ^6nõpWç§ ;xÖÿ£íÏ–#ב­aðš±­¿Œ”8`$©Ý&“õ/ë‹®+ÝŸ÷‹Ær€¤ 0ê|±3%eÖ9Ukcpø¸V6=¶3¬¨.ºEu[Îþàíj— ²ËvðþÔ_Ó¯ä¬â9ûùjnw5+ä@݃߹KìVòÙ:Gxþ}lÿæ^ƒç­lÞ³ ¯Hù$CžU‹$€órøsÿÊd°Âš`ËŽâéÂên—,ûAÍi’!èÃlÒ};Ñ%Ìåê\<¦1Ëá8WŽ.g]ߺÔÙˆi~y§hÖÏJ7¿G­”I†¸÷¶ÁèmŒKHD.î6•`:Ûz¿šª9ä1.Ç‹’ÎÑÎ!à–áù )9¿5ã¥àÇ'Gõ®—åÍýƒÏm†ŸuNŠ÷>hE£AOÖ=±" ˜ªéå# š8ƒpÕ# JÓÂù×ÎoU‚×È Ïù‚±µ¥£Î/~ëÕbšÜ×Þ_z÷y½Þbí£+åÞ2·ë 6+9F@1µÐë* |õ¶Ûd&„…˜ÇË*9ƬõÒ[–£{Ö1f•`Ò#jYÕ)û¹:azB¹¸áíxà‚v#¢\Ư¸ôèZç}À Þx`©Ë‰ƒ„Jwï—"øþ‡¹?á›Zªî–Öô õh&g·õ®‘¸¢€oÁoÔ£ÑÄ])©XÆßò\•ñ |EŒÇ“ôyÏ®eÕ <"-Hf9LÉ­“Ÿ`(£\îÒiàza@ÐI,¯Z'LÓ•a¦wž}ˆ i¯îºO¨@¯mÔîõ8Ä9,|'pFß7‚Š.=• @(Ÿ v¡ßÅœ~«ŠÏ0®ô:vÓó.:-þìyÂL&8ÃÞj7^k©jžE¤´ ð—,qúYÅëfXfwûa•þSÄùÛüóz¹oŸåç——g÷óËËýéå%‰Gãn Ÿ†g%ù `=<¸³¼óͨÆx ˆCd !Å4¤P9væ£.·e8F·p‰ã(µ”Ö ¥Þ‡ûô´ì²Þïý¥]d#‰D¤3à?`ÚE-OõbþSè%”«5žèÀùŸÖ-å¼P5µZMZÞo×KfA;m@µE¾ß!…CÛ]@º3LËý_ǃ”“17æºùàÃd·~ðse¤.Ç‹Yéªg‰úÊÜ¥òÔAÛðÕÀɿɹo‘”.Á<.CjáGxY¸‡×aÌ»îKÏf†–ù$Ý'ƒhÊ`‹•vÞuuT¸h ˜ú\ü˜F.ÉBsT¹V¤ðyçðÔì\=“’VÈ¿…'¤Û½Àl5#œ~¢­çéçòuÊÒ ow‰¤‚*÷Í×¢N~ê=CclûJþq—üµ—þùÄxcÒ8ã;zR¨I³ˆó`A'€×£Ð“Ãuä(%fëÉË;C·¢Î0"Jzz¶Ü6! ßR½Ìì)9dFº w¾ó4.]ÛR‰G±õŒè@>si1wl[¬Lj™ƒÊ4GUÅÉ yéââ |Bô\o0¢e˜ \,~®RFeÈKÙÎŧV5E˜ä’àVß—ÞáÌ uÝÚghA7¤=¿ó.Zzóƒmç0}¿ó¸N^T¡.‡fè!RþÒ;:$FK@áRoÙFÅÏkiÕ\0’â½²”Ó»L|®’hÐ=É èÓ»µíÇέëGÿ1ËøË<<ýŸg¢›j󩃲®ÜíecÖ„8Z:-š$AâOžG¿ Ðßÿ†›î~ýÕðWÝÿ:/çåÏŸ´#QYöÙÆ )Ûx~™”ÀÆãÖŸl9ÑÚ:¤Ýø1¿(O_·|¥p~6½—?8Oà¦Spði¾Þ…É·;$Ñôb”~}G¡òÖ@N_BÃá4×õHx¸G—™4)L^q&~~ÝìP~§(©%r»ê˜WþóêVõFÚ[Á§zÒçKÚícPOd‰©Û¶uŽ3Ÿ&¤‹B”„´¢û÷øOq=¿šÞÏ>÷÷§”U»*¯D–XJôEB3çž IômR¬„r»òÚ·˜y•W¥æN¾|XûâþS™ÊJ=¸˜hu»K|º¥óswPú-इiœÌ•,^9I’#]Ü!Užü®õˆªAêk‹c¸ìª k9)¾ª;·ýþ.Ó²Íç±R7xù¡"Ì<¹2d™ݵJ)‡*&ÚÖ¶8÷~´9ŒÉ2FJæix )}‚Se M”3V‘øìHÀWG¾é*Ã#C¯a§{C•~‡`ý8Ò¨;[Ó(‘å¡Ì(9iŸiÏ.¥žœi’ˆð”dëÉÚµeïàË wL›o?MÂýɺŸR6ýŠœc(ƒ@#]9¿D©~‡Tø3ÚÓ(sÅr]õ;õo¿ï/}MSùޱzÛvÍ­SëÑŠmŒ…Ê’@ÑçÉé'îö_š!ZeƉ%ãd}t ¨ý3÷Ïý{?úR}Ÿ±ùчÚÿ,ôºž|ætnD^P<ðñ\èf \$¦ùCÚ…ìz–y sœêy Ô¬‡.è  ˆÝ§1d|ëóù4Am¼$¨«lÀTðÀI"¨’èEÝ’²Ï~4+wD«û¸:ÁvFìh¢úE ßˆš“3ürŽ,´Ï4@VЇ[¯•/„IÃW´‹_%Ô¾kp†ä¢%#BÀlÀ*ø,z‹¿6—©å–ô½3îÖ£ÙIí¼Ä{hΊ.¤‚³?µæ–?¤Ç¼Å¾"âü{÷â Á’'œæÎT.èþcV¸î§»¼|D3¤êÛ­çåféÇ+Á\&Âq‹hHxÖñ pæ.Õiæ$"/yœ’†­eNt MkÊ.3Á|½M{I;¡—•Ú.²M‘§iÔ© 3›µ§£ÙÓÅ×rÞqJVÔ|϶XK‡×²óÉa¾ááA˜l-±ÄiðY,.츭·MçO¼E¤d@ÿp‘ 6 EÍÿ^2ÜŒš=ä­sï¹&u”œÐ¶çî}‰™æ ‰|f-ôŽ¡¤*JR#ÜO·žîMR|ÓM"oY±”` 6¦9Ð̾1¿w˜.KV.ÌÖýà¶ÞEsR²æ¶AF¼öÍ%˜«•ß7`¢õZ(8*Ý¥Ó¹¹`²Áyw£6FºŽ ;—Êó#ý^rVš»?Œs™þ¢¯UOŸyo9¼Gy—œÊ‡É­(?(m_½¤ô°¶‹=H˜'‘‘îâ³*—òBŒ„ÔH b\¥›"â‡q4åý>Ý»4‰SÅDGÃön5qßÛJµ¡”5(O¬'ÊKÐqs!òÕŒýÖˆEÙ»QÑùä ŠY2èQT.({ÐÙŠBÒIOs{ù9ÊyšN0Ö¼!3VÔ—ÿˆ—³ŸÃΊr¢jQèfD˽‰WTlSXÔ~î_£bï)„><Ö/²µÂ Ñ©T2©|¥vŠ[fF}ÏJïâÕàm&ÚéóÏU{šÓvÚN/.jTº*¨ÍÌ¢â* 5ÚN©ZÀ“åœÎû6·ŠмÙçŠ×çà¼e–éŽ3ZD)ÔaõコSª;'e½Eìè‡î”ê0(ê|<©ÆÈèsï ÒT¦¯YÍ|§’äFJ;‡j¸3ø¢(zq³fb˜xõ7B'Hí?»C¯$Â3¤öë†+öÐc¯)g_ÆùÕ¬Ïû„whÃÌÀ?@žEƒkV9œ^æ}]OÈŠîQe˜ê×Éx¯é3xPVwŸCê8ÕdFÒàØ‘q`ú%ƒêù¾÷+qó9ÌcKO>¾æ½hgmeÁÙMÖf¤ÆØ-â×ÔÉI­l¶‰«H·jñDÎvÈ3›ÐV¿ì{®•è‘æ¶0^©6û¾ ©ÖV»¨Ýbþ3”»Æežrš2²EÂx­•Í»Ó;{ÔVNqºsúé¬|†³àdM+[r"ý¯uE5O…Û /Ãt÷éCOËãt:¶^í1-mB¸·Þ°*pŠõ2µ^5†ù•…™ÿ¨{4)kMü5ºà€ÚQ`°Îð¾ Í_&ÕÓ¨by=W;ªçà„:WO"w¯gI~Ó[Ú€Y+¤ƒ¢g¡Àübe%‰­€gB<ÈߤOÑZ>un9ßà0kZØgMÀ)BÊÞLýö휗–*+b¬f ‘˜©ìTS^Ðm8¹fïÆw⺭oíÜßçç“L©–•V|îÖMì¾ÁÅ|®€+…YúØ 0{™CôÜÍéÓT%°”*Ô1Û¤7‰:Š<Îyú‡å÷†’x þ¼È–&V°×¯xCÍÇ+„uÊ&/“ÌŒýúoçèR TÓ†–4B0IÝÑ$õé‚~çÊ󛿬ÓÕ»ŒN¹ì½ÌÓšžZˆœ'R{¤hÅ+=éž©þõ×-´É§‹ì¸Lè†m¾å±¾ÖßY˜?Mà>ΘÑõ“åO%FU"yr”¼Ӕד,ÔpPF>ZÙ~ À­O¨i÷ ÷¾ŒÓÏ|_›³©TgqöýM–hÿ6ûÄ"±¿t^—p² 4ÔJài3ûÙ†a~¿ôÔ¯ªÖEõhÉx¥Î09IR½TJ.€ŒG“ ÉUºi*xO³U»y Š2ChÍÀ1u¡ò½ô\¥YbhmN~/H³¶i¢Ëôl;¬m:ëŸLÆŠîD£¶ãÙó{$CçÐwÅz>µ>ºÂV¼º:†™µð91ÔæÑŒEr7ÛéìY–öIyÛÈr'ʈÏcÿr½Sp/M·Y%#øjv »´–ßßî@«O5méÏ1ñ‹_NˆûXiÁƒ{G¶ÞN§â~¨¡—³óû˜ ;ül´¹FA›UÚN'ÏÙö]²(¢¬cù»V[®Q,Ç”"õ;ß·ÕHbè_.z:¶2åp–cÏ…‘Î8TI´ì&iV û£í" ¿Kú§M኿OÄ™F7ßÅÑ©÷TŠš²š´ìVñ·¾•~¬|ÂDõ®Ãåì TµgìΠºÿÖ0¿Tõÿ¸ú~<ƒ¨ÉÄŽp¤ ûúo€òBã~Ъª >4ø8+Å`£gt¡ùLQþ¦G5ƒÞ)zh#§ÏhŒd³··|>Àbã{Û”s]OÞõpH‘v-ƒ(|ÞCÜMÂqJíµ-k`îeyÖªŠuÞà¹ïÔh¡(Âiçc yíuýz†Iò™Ù?QΗœ ýx½å~á"u^ö.r¶¼ïŦ©qg¢Ê@ih–lòŽ—t”—¸BRG ÷¬>½Äùr(÷DEðξœîèˆVq;§z7ì2q7jèXpW@ªµÐ/ÄAzŸ¦±Ÿô4vî}Ñ댜Lí×9­#“[RŒXш]R|Fd´2[“Ï<›)³ùåÁµT1pC©¸s4ZŠ¿¾Xgæ£$î}{ÁÕ% w¹ÔsÈ–¡ w´JÒð£Õqࡺ€´¸´H='[Ÿñ¥.鱪G­[\ðú÷ÜNÙ$6iESÆ™ãMŠ~žìޤ *SšHÜ- Š‘5$ÅkOÂ0­\òd ÙÊ’_Í.g5|ª4ŒC‡‡ŸEz‚SÒÀêC^ äïω:ðçd¤œöwªŠç%Uc[­¾àŒ4dõyàs _žçõÓXB7 § ]¾_&NÊ( 1¤¼ïDÝ"¯@¿Ž€~5 oœPcxz^›‰ÂS§ôàΧšWÛ€ªÝ)®¨[R‹šè*çÒažz÷KMK³æ#a>ºÝØñ Í¶:¥¢å¬`%!¶ÉȃÁ‹å¾/"r—lSˆDq‡¤[|÷êKÍpò¤ :Š ’P%”‡m%ê†úƒôšú¤òɦm×¹¥”‹€¥/ ε¿mô…¹¤h[•…"é¸N+rK%{G!xÇüÖsÍqb|­¥1ÕÍÎïÝ“GyF¹,Êvå9ÅDRˆD£ W^Í£úp¿0%À¦‡FY–À4i-‰£ UgÔZÚý¡­ÂIÄ÷F ÌÑâˆ]^÷ªd\¦a0½bœÿ |ƒ?ø@W -;DDçþêÛ˯~“ÂÀå°‚ÖÒëÄ-m«˜óëd; 6ƒ,8’tÎò‹åu:µ9ØÚaÏ-U¡[H&²IèvÎ;s‡b Ê'ì ç2¤è`wÞÒ¹ŠJUŽT0Š]þÜÈÖÇme¤‹$ªù`háò¼BªTºÉEâàE’¾€ñÇ˯… |DÝsy@)ø—ÎÅ% 6Á*rT-5;íag¼ï“ qç@‹ÆcçÅÎÏI_CÝ8M*¾–®§OÒ{Î&Q³ž œ°²ßC2%ûß0 e4vÞxv„š_œ4 e£ á±ßV©ÄCÒ&èþ–bT\Ê@tQå‡ÔyÄå%ýe—Ÿò\†<ÿ·Ž)L"w*·£¼³Át”ý¬Á928Âýs9ZJ^B Oƒêk¶‘Y[[…tᚃ´ zlg‹FÁÑý¨Ý Ÿ”jC*™¤!Á¼­$»ô}›Rný´ß)FŒÔ?ôáuêYÑå€öô4A-ˆÕ:DÏ•—ZMù"ίT´[ƒ6ŸðFQöãr´æé.{D”¤ ÁPrK"o1Mäî„Æ3j#’ž)ÿöÚžMÍÎ0œ«pIO\8È#UíÑff{Ò1¥ázçM^ËÝ&^7”*ܦ"TÒñÏlŸÃ9g\ãË“òÔÀªF´‡b.Í0cñuáì¢x\DéïÒF>ùB\àʘü÷«ÔYÝu󯉅’µÔÅAoÆXm ýß^dc^~HóåBlh»ê ²6ÃX#¨ª÷Ò`{¤?;¤{Ë´pN,—*iµ]X'– rf`!$†0ñÙtÝrH‡a‰Øè§Ò€¦° ãèÄÿGpÍ»(:×Ì]~Ë:NAÙ;ó¿ïÎßú¿?,äV%‰rÛùÉùnâõï0«é¹vŽæü†Æ¯Ißõ,ÁcÈçâ’À6<7}J¼·ÎKÄ:ÔÛ]Ä6]ŽÞЬ»ùå~%» ?ãímj•š&b³ëAcvŸûÉï3ñÜE 5Nk½m·Ä-§Xêº÷™"Œ—Ó’zhüÜ}{¤ß¤ÿÙüóÏô2 ª¬?R}#çm_À[o‘LÃúª@ÌÔïA¯CÇÕk·Ò(cnÇP­Ç¤æ}oÿz3IçÁÝÞæóOœ8ÈØÎ¡jt»žCÕxÝÑc›B+ÖŒãàÙõÈØ„ÆñW¿¸,h…r‡üo«QHçf…÷F[åg"Š07·‡%þ|G÷H8œz»ev ªðJCcLƒ9sMW9œº_WSÓj–QNÓX … ^ ;Ê«=´š1HÃ@.g?%V/—·r·#k±&)j‘ÚXëâ3GDèè|„<‡s¼¥æ‡þ1$b>& stès¶Cÿà69÷s‰CqÒûƒ¿z›­Ðb9svæyåÐýD%§’å˲]njʉê­vW´Ì552Ðàqþ6²yÓç¤}gè´¯Í=Vè¦Qt¤jÁÒIBzhßhM@_ÜŽ;Œ%„'ÛþcC ˆ[‘+—è`sv®Ÿ¬Mm‹ûÔSßmâ—·sÝÜà“î†v¯ó˜Ž|Æâo]F|ïÀ‡å®P§ íc¤Ê‡wçH¿½£ç3ä‘z‡™¦1êë|­t"SW!c’ÓÖ6¹%…Î69ÏuHé3v‹\×Ö^‘šÞùZð Y3k²¤Åö€FŽy×~SqFs:ÐÉ’OK©ê%õ|)ên•óÜæ›{é´S¦¬õ‰y„à!QÜüXΉEÒ-¯@Üܬ˜ÅͼC ØÛvt¡ýHDNóKèƒBÐ *ãå†,ô\¡š¥m¦ÉK>Í4·¨øÈ‚­NC" Eú¿¶åöŠ“:©N‘DPÒÓ6V&“`u ±¦òz^%‰V‡%ÔÒ+Å£Jþ#\G²&YKðÈÜ·³TbìqR "ø’F´O@ùZZZÝ#›ZJ…=ãC"î³ÿ’ò”Sµu+ºœGãmùæDú0i;·ö~9Ç#<_'ØHÐŽa w£" ²¥Ñ¥ ÿFŸi¦oóL?»hûcšº«ë™QÜÎ(/7Zåkòe˜Þ+:êAƒc )ˆÆ/.wøeg˜0W§‰óš9êV“—…­eœ$d2†‘ÿ|1çª÷œ…iÌeÐOûüÎ.ÐhÆPlšó l-C†g/A¹m¨Øm(¥# ëôÛLÏ`áZ—(Y*™ÙК§1ÕñÍã4Æ÷ÕàĤl:æsb«¯œKŒ™dp•yáâ7Õüp9âÏ1Äïu¤SP*=ØÆïìß—Y¼¹¿·Ù0§™(‘I-.Äõ oh‰¡å`yUAöÔQYXNÁFlwp×?MîIÿ˜jZ{n­Jø¤d•ÍIÅ»7_œR†»Eõ,ým´šþLˆ1Ü) …'=¥!YÔJnp„™fp‡À²ž:N¤áÅG ¿XÕ‹!sXGšöÂØ¬¶i°QÃÂJ^&|!ƒôJÏ ·=[(¬MpŒ‹8Ñ $ýñL剎îRÕ|´sÞGaF÷r·ÜÃtVË&…D¨z•PúÔ©nûEohF'¥v^†X­S±^ %=CpdY1Ï‘*éÔ½R­ÌmãYxÝL2¢e¤$õµ%ü/¬èšúŠu|7 &y•ÂU*àt@í´ïO€2©9MÇ%§Ú YPÄè{¯Æ¥ÿjƒ¨OPVqm£!<œ¢|]]!Ê}ù _Ô¹»½§/UÄÒ¤\µ~3“íÞ®<'?ê= e– 8žó=xé9 Tqr_ÁU…ÍÝÞN- v•qþ4KÆ\ÏKÏ‚ˆó‘Õu¾ØŒœ'[Ò~wHû:¤ë 5ÙyÞ]?zyI³ÂŽ * .â¶u¾h©Ûý-VÿuEI«Û=\PBA‚üH×2eÎ(DÑ(V† ‹oCé} ¼S(rã+²Š—ùÁ×Y Ìi…ê!:§<[7H_’*ýúŠRa&¡³ËÏ^#`e.9Ì6?z›† —YúÛ¨VÙ U²cfÛG:è{Cm£†Aå)eÍÚr}—` çy»ØŠF—sÔ…%éyBú£g•f&A|V³¢è·›ÆãùÃÝŠ–½æŒÖø†Ó¦^‰×/¬ç7Þqo ™þàKIúµIзÏCŠÝ$NÖ£œ©¢ž‘"HÚöÜÓDu!È<6"¯ÞPkß²óiØŒœ‚ø¼­ÁÙ¬­þ³ý„k%>ìÇõÉè…oÙ`p˜ˆïrðN³¢ôá¥FECòM¶-’©W™e1-G âÀv`CÑÑ–û¤C¯+÷üYÌ´‘JíOfŃ„zPœUb”‘ßd"¿IxÎ2Nê³õç’Ó!d Ö' é@Hzölœ‹¥”C«ªpîWÑ£GÒzÄñÙ¦ž˜¶{«Qæä·½OÞMãY…Îq~/4ÖàŠ }@§õÛ4ÉP§ïfMâ8Æ]M':í—H"¹‹Dæ9HgâÅŸûݘéo¾oÌx¸ï}Ì—Ñvƒéùª&}LÉò œø¨·ÖŸÎï0wˆ‘êÅÛY¼û]n_4?eº´Û©l äUž@¡Ð»ÌÍ’q§rÙu¥ÂøGæ6m¸`[ôå]gñêOÚ ,O—s}1´ƒTbËòLœGF™Þs@þ§m)'_šÐÆÅʽë®Ûvó“ ëIŒ‹(—â '8Z·{iåIÛ–ËÕÌœ öf?-ä=[C_ƒµA‰uR#ÅÆJðw~mJR¡5[¤©{ *ˆèAâ8{"êp Çy—þ<´óå°“š·ˆØ"áÐmÂ/‚3)#긺ç—)>°˜ã¾ 3T³:ãž#ôî1Š;Áyd0ÏKÔÀ%” =KšÆ™Çû¾y©®‚Ms~½ówA’n] døWY³!tãÛ «ÞKlå'nÆi°Î8‰öò]JÜö[@°¨Øê$zﺦæþ­ì#iPÍq&”’`äÒ³­çÞHlAÈTÂùµh9î>ÓšÝ wÖp¥R¸‹i’œ:*®^O¡’J4îŠk/‡»Ëª™ `_nv§ñ)Ml<ÎeüÐho‚²S P1Ç©›ðÒÛ# •Ši;ÍûÕ8ÅX± ‡”± !3Œ€ÌF£âý‘¶†\®{Y¯ø2v'ñ½å@QgÈ€JÒQ)c¤ú1 ŸrTr†×E<À7¾ÏÖ¾JOÏTªíXäÅò£Ûaå,>Ðø #ÅàÙ›òhç;ªA½Ì-M%¸5•(Óýñv·ªæ ‚K€b,¼óÒ»øÖ,S{¨òêÚg|OÞÓ­º@ gT‡¿´÷_¿Íûôþ>LæÝP*bò¤M›–7êM—íi(8@ªb@W–ä~iÏëÊÖÇÎ5Kº Zo”œG˜aü;ëþïóË”džîj•Û{âX'7ßÅÍLZEºµµíš3AÅÖYÕÒb_ŸÛúK­×ygoPñøtî)UCÇ”ÉÌ+4EQƒã5Ëåß(ÿÍsµóþ̤L—¶ºUäLmÿ?É*éQ!6ÜMéÅ΢šCÌ_luEÂgÛ1ó |­B6RQ¿†²‚:·ƒ 7b•ô’4í[DúKdû—›´¾cÆ{YãéêP†ºtëtŸH”œu[F¤cг² êùqÍA-+’f½Ùþ+þt[ßø9P·¨Á&ˆé]GêZŸ= PÊhMT "7jÊ,Þ­ÂÓb’w £[šF6’¿ý â‹UÔ]“?§ïw%®Iwn9ý“N©[Ç(ƒs £);vx¢Ç_'zû ÿÓ$IžØßË>¨•;ê úGûQ³¼™gœ f“ªjÙ¤þì²güÚãò½b²v' R—Ié|Ò9U˜ÇiYº ÄÁí mqiIHsšcÝ{fqËf5#¼Pº­m[ºóYŠ2Ðð^mUÈmr~ûéò1ðXÚѨër¤‘c Ë@ý˜Ä©êçõëŸ*Ü-çZu£U¬(1°GUwšbÕòQýi‚\)¾Ð¯ŸÝÚŠ ÇqL€ºVtæBL»E]t3©cÞ‡’YÐ÷.Ýj»›ð4Lsè1t×*MòŸ‡Õ9ÙÏõ6 ¬ÍšK˜r¼ç"ªéÚ´XåÞ-úN4*».Fµ¬ ÊÕ_Ur,cmiäF„Ù¦÷ùî,«˜Ä±›2TÛÕX ])fYÏñÑÕ`…ÌÉw£>Ì8¾Ø—áå/†„Gû2/ü|œ.°=b'€è.Ä´9_¢tÖÔêår9ëEíeÀ ±YÜ].zsŸM{9n—‹­GI Îõn1‘iÎo•_Øæß?ÿý÷þé¾PÒììÁ¿ Hýp×êCºXó­¿Üÿ4ázH¨»XаQa·ÂÌZõmЄ+ã̳fðOZ—¬!A7‚;±¨ôóRo„³ ýÚåƒzªJ6ó€ Xº;º¾CÞwT\ P ž¥V}²*®¨{ý§µýµÎZUsäÆÂ䫱Šbüš„É=Ò¯3¤á‘ßh{Úª t9»ߦ®?´«Š&Å¡dd³wÿ­Ï©—”U}vQêØ´î}i ‚Ž*ï‹•Î3Q›ýËæSÇéŠó'ƉÕnnw4–»aºß¬ÅŠN¢æ^ùU~u¡kŠÝû„«4Ö~fü¤¬«PF“lA%7P¾O®hô$0"ê çªËÙ?´ì;ã$I•^[£Š(R±žQõç?%œ`(õ$ˆ[šb§±›o˜JLþž$ÀÁìH²#¹Ò *OPk&‹Hw½]¹¼ÕÕɬ5ñãüi„·¥¼$ÅÖâejK—éaÿ1f××U˜‹NŒ‹ý ÂZ·#”8ºä\F_þ‘HuÒ,ZÅŠÑަsz3ŒàCØÞzNꥉªoÊ8‰'A?=ß»©³oWû϶iQ´%©v>9Uê•$²F@¿Î€Âáo½„îL!3ôÀR¯útüE=öH¢ÈÁH6Ö­çTZ ^5Åõä%,èk`%“¸D•(MŠ£“Ï-&òZõ@{㧯Ý(›|§>Z§)2 y/ê<–½ôÒýÜ­–iðy ̟ߘßß ©Ó·ûíöqûøÐƒø©­sôqj’_Ƴ¢L›¼JkcóG‰¤'?Ý&žè‹É€ZÏÑrþN÷¬“OÓôŠî~‚ÞÞó³èç[{úèÜkPC„Ô:ø¨ñ+gŸÜ+j9PÍÃgKã­çÛþBð×–ëá~| =Èû«”Ók:?Z¡x” ×±kÏ )dìÞh„ð7°yâünþþÕ?¦¿ÝŸV"Ø ew^<ñŽÊ\{»y™¡· …à¼çÖ½KÐÖ4¬ÂË‘š¿æWiçÏîs¾M·LJô™²Ž jŠ˜dïbË Qé@ƒ¤R.®èO#Æ··[ëö‰®»ö[ÓÜÕCê {“z]KÙ⨼\i PŒAHuÔ^ï,·÷åÆSvÆ]BdËï¼lœ­ Érrô<ÏlÀø“Lj¸þïüwv/‡»ô‹Bõ¾Ã'ÿ4ÅN~\{ò j[gé5)}oG”ÅõÚùbD£y¾”Ä9zzÛá–¤buO}ªH¾­(êmP’*ã9ª™H0r;éŠ^÷ñ÷Z¡Y Ð ¥t>uûÿuô6è÷á6 _¼Ooy_ŸÍ2üŒJÎJ†¯1¥G´må—Ô(ïæÕ,© ají§3=MÎwÛ§Ÿväe.¨•\ òÄ=šºx0‰õ4‘«Q¹áæÉ! Gaz`Ê $ñ;1¾!iãåtuƒí}¿qçO3S7}šÏi¼“ÅŸnD}}¢)Qˆö”ÈU°ùš{Î*Åw¬BV0öHïó»Ûú÷÷Ù=Miò©0AúÎzßlÒ»àNóèN§/}74e”Qd8¦Ç ɇÉá2P­¾§ÊUè{eK¶ù¿øf)Aý‚ÅÐ˔ؓ€~ú)ÁZvð}n1H1Š–÷ðp¶:-Ūqó+hò­ÈQ¾ÑÛ'kí{ê•Tr[ /ÈGø‚ò.nÓ‡G´ˆó§yR÷yº+æ:g¶ڥ˛X Ô»ˆVq³£œRo°AªãÄÒ˜_ÍŸû§Rîúçè¾t³ôW½;ºõIý&Ç&7‚Œ|㣜Žõ‘]ÎõÅ|¶Ú½ìn¿T;÷A¿ ú>#'—ò;W‰¯BÛ *–àtW‹Ã”ž; ò8e– µö•®(‘>º+¯À ¶¹NNÕù“œñæúGƒÄh®lWAóK¹<„³Àvðø3¢ÓCã¤Ê0ÎKû~#JµÛ¶ŒGר¢h·1Þî;ç Ô4E˜ÎØ»“8έ1¨Øl…'@«8¥Ÿ}3$Ì)üí„r20­¼n}ç–]ŠËÀèb‘AArÐ}>TB U¦5ñ&Œ¾¹ÃI”ŸÛ` éS£õ0“Ëão†õ§y¸ “ìç}¼ÑBÞ°Ö"#R·‰hõóëæ÷oóôùÑÝ?oãçm¥ŠIÙbò/Òâ2÷‡©Qšt7ÆE¡ë‚4ÑÓ©ˆÑ½¸žùö‡(ï$ÇkAH˜ÔP¸ð Z†·À~5ŸGÒª¦ˆñ7/·¼Àç ÃzÕ„f¤lÙ•g/„m×`©µ‘·fºg/óNh@ANJak“* ðFfÕZÃe©} R³¦ÓÂ@ßà C}”‰L)—Ê®ý"(êåNZÒÈR¤¾¡ÌR[ƒ”~>|D ¼7[#ˆo®€¥€×Ûls ‡´Œ—aUá÷ä¾Ú·Ï%yúGrO”w”0ûÔ-€‚Æ T¿U@)FƯu¤,×ÍïoÓ|OY*øëèZ¡ùÉ~ &H0sF@ç`y ‚ò8MRæìbV'$É<;˜jÑL̃PN ˆ¸¾3ª©9¦9úÏydrlÆ8—þ¹WŠE0KØ;§‹/('°Ãü^ü ¹Ž¼ÓªŽg´[DíÞ c  8ûÔJùPäÒµg5ð•éä± †_„1Ë‹sú:MäK²)ƒ<0NÓjûÝ+:&U«IIGTBëlºè€q(ÊÊ ×·½’=¢ `dA·‡@Õê;KÆ™½ ýZþFH]¬ù$>IÿÐ#‚P¯gúAߦ)ƒ´Ç‡»\gÁ2ðŠöL©°¶òhQ-ØòûÇw?’ ¼7-ÝŽÈ–€.Fò»9{}õ2L*0®WçÍýÁùËD»5IÍÔhÅÉŠ¾l=äf ×K>öŽŽ(ÛÚüãyÖ:œ‘í^Ã9¥:p†ºóõü˜¨=+^ÑÉèþh9—päØ)õÆÞPMÄö"V“œÄN+ï“–÷=ŽÄ›.÷Ò«EêBO”ß¹ÇÞ¾Œl¯ˆô­·Me”?M‰Êèêñd$A°¢Õ›(pâH5Qí©J ´lË:.82Û¿ÈŒ0 ½¤·ID£ø6-2Á©CºnuèÜ­ï#¹§D |»J¼çi°žÖ¦ ó‘yÑš¦i)¥‘)EDµlëmâÚ/9ÜPz!ŸÙ»NƒcŸÚ˜ÞH9Hù÷²#%P;D¢42!…óäìß½¤4EžŽŠÔ"± äKGÁ%aUË«v."Áøe'ç÷ÓËÔ;¶ïÊ™–T<$èbxo{Tô2 µW 5%óôÀûipFõ€A¦Q˜ÍiVœbOëÀ¥|’¡¹}ýCã<窵µ´+Š˜Á d±‡Láw]Q‘¡ 3ó$MÓ}ãƒÛ'žÊƒ×«^— €Nñɰx5mO-D®füÙ­))ƒÀÌ}Ñp–c˜¼dÆ¥ ÇÈïUÇãà£æ/T…gl ÈäuRóÓÉïÝÕÝamzj@ïJ]ž¶Z µ ×3¦£ò’YWTóË®ýhPè·ûÿ¢ž;âhÍ®ÀV½L´\ízXôÏ\8Á‘è¶ëÄxvº eñ|>¢uI×Hþ€4?žLôÒ"Ô”SÀø•ƒÉ(gŸÊISŒ•µƒ°W{݃AÀ‰×½#šÊÌÓ„˜„îvÃ65÷: ¨í¡?°%™gô×}¨Á™d˜×Ò¢–ØezÚ­’ UT¾-_Ð(Ö:ÍÖ3LZ{¸`ÔM—“óèJ’ÈèZ ®HÖµ³M «W)çëé·}²œxã±”Cü]Kókß~ÉWU4A½§€òøQÒøÓ¡w(,uÙvkÂT‰¶„Ä·K8¿ÝÚ»°Ê/œ×$Œ/D²¢½¦ª1[ß r” Y-[ÏÈ©×^”³€ñëןÒiË‚rÖŠÉ_ýt€©šŠ¾\TçL)Úì·óÙG÷Þ7¸•q>bí«ämò-—RœTMq:é/—@~…‰êé,šp¢Ú !G*µqN…ªP«G_„º!ãïãÔ ™K¨¢`5U‹j-_Ñ~³kÊ(¿R[§ÆðÏ÷§Oû)ãðsÚ¾kiÕPS’4ƒ·~{Å9Õ Àg¾’þù]…¢Ï4#]xç¡ àοÊ#aȲÃÎX¾´Ìâ‹!ˆ…W`n&9M7^™VJ•ü ¨ˆ>‡fJ;§”ˆ·Ãªv§€ZÚK‹Û¨7cÜÕd&ã„R(*kDKJOý¾q°NÜCáur;CœjÜîsº*!;^+NAÓãä´ƒ±ý |º]^…¾“ìÄr÷™Àú* çÉë¿|ÿ×pVÕ™ÕV+nú£7 Í T*_«MŽ÷cùAáIÛoð¹:½ˆ.òö`C öV Qœ÷I[è¿ KI'¼«;Ò¯j1g¬Œ”£`ü1Löt}¦;¨ï>1 =Vò¡ufÛ«s$îXº[j¿œb‰îPZ*ã,œNkÕÓ%”aËõH´é¾‹hƒÈÞ&¦¥âJ$È:_ne÷O“D¿‹–hÊàÈõ“éõ:Týýú)®¨Ç«{•Š$Uª& hgœÃ§,+Ø)ÎL&ˆ™¦¥ ËÞ¦×ÂþµÚ“Ò*XÔ6ØÕµþH‘žF,1Ïà.¾ad?PÓ…¨©gæ¢3ó¬iÚ£`×»›„áZÍÈ“"œÄZbûÇpnœXáûþÖW±Ò¤"²ëó9°ð€~¼|N†öÊ'š±M™ôá¤ÛëÄç¡jµÔ@ËP±îš'«'LÌ2-5ÑãÆ¼šD®³¢(Þ´ ¨à¹{š®%…¤ókÿã6¿g3PfÙÒW~úÌ^ÀzÕ˜zžÕº;¯©R»¡D0ºÔèe Î^@þ¿;fKUï¶w[~Ÿï·ø) ÷)iÐqƒFL™v¤…È¡n Š)`(ù”×ó×ùxƹyJ‘¤äHŽsArªJ3˜TbPîú±)gÍ2¢)£\.½fô^ÎeöÜÙV9óçžÿÇRe±üúfî9gÄ}[{èWó§›ì(ÏhÍÚCàÙ>unçk’%þY¢öViÑkÊ€ò;/û%d*ÀtKŠÙ%µ2i·Éwå¬DO—ͽKT‹Ö9¤–§ó„J|”Š8Ò%˜U +ÓŽÙ»~ÔýÄð{€Sãß²¢à¤®Ñ›[M‰6"½³Mu,ô=Q^’Qp>GÅ)³Ì`‚¼df.ñ˜LVïÕ|Ê J“Ê“¨€+¯gÅEÎñ_aHeJ“¸­4¶a­›óïÑ”eÌÖ÷áôWi*NÀ3ù6¼šÆÁŠòMGyGáþo;¸£ìÊK&é ˆ'•…K0‘&I[0$Hd–þár‡Q꼆÷ˆÑþíŒÊ(5A£·õ Ó~~޳ûí¢f{9¤÷Í×ÃH’ÇXWæŒr˜Xl*/VÀä—ÝÿÆœÀH¸ÞõK bu³Êh#q¿+:¸¨©ï˜}£—†Qü¡mjvݸÇ2ô  ‡{JxƒA;Œ†al›— àÉ盚5<ÛàÁG‡OXEÛoÖÆØ€j}¯ùõàùYgOÅ:Íp)Oyù¯sÌPC˜AÀh»ÞyO†_ŽÔr^`%ÐDì‚øxHÎV Ôšó–Hq˜¿-©¤"‚­BªÜö˜ÑÝV=ØqºÛqôîåÖ”ýÒ) V­4“-Zd¾ºn·¦!±Ð¢ÆYuIðŸÐÂ.(~"ÉN“Ȥ*sŸ:>£qG„Wì \ø,HGœa·‘‰”ühê©€›˜È[b*Ýú~v×'»X (šÃßÀ˜Tƒt óüzW–9ºX•‰:ç¹;;àœ£aà…$NƒGÎ;2Þ é'І»ó:BÒp ¤ÂÓ1ÐútaÇÞ!–&2‘×Òdbù˜¦œL–uÔìõÒqÀÙÓó8øFËuE9šð,S4DuŽóç«yñW逻'·õåÇ''eA]µÌ!lîuYÊP›¿Ïé㿽ü}~yþë~=??ß_Ý÷¿/¯ bË3µG,>±l(ÃÊ¢LˆˆRn©4ŸKŸƒËîQ÷?Hц}‰À‰_)«¸³¾\)ãÛÁ’oÀüÌï_*­ghžî÷?î÷ûóëýõþrw_^_ß^_^î¯o··×ûÛým!j]¹™Ã’ê 2©0GgÁØ ÆVqr$£zO‚y¾ž¿?‡ct“ mΠÚËaQNõj}UÓ€œÔ©^RÇø¯óå»¶²bœj¯Âœiͨʋìy7È–?¡5¼qCœLJ5s`kôN•¦aJŒÕ!…Dzàœ|È`œÃþå…ÍqžOk!™x]R‡¾Öµ">ŠÁXqoKG©P ¯CÚ›Þl„3ãH_§èóH»^šgè1÷)9Ò4ÇŠŠ³ ¶Wp!N¨q[j _þ;¤ôõ:‚èáfU3]^AT¨XÓªKE1ü§¼(ù¹ÿ\ åý B%çWý&x¢tšd$®œ ˜ËÑÜ®Ôî}„Ç þ?Ð GëÉqZíý©zœ[\©ØùxHÝ€ÏÝ%½.§h†…Wh›âïâZL“öÁBO7ûþÞMê&f-åÔMÙèx‘cÈr§» á1ýôKj¿¤pG±CŠF|ûÁ/w>µ >mÿ©û÷Ï»|ŧmmÿôôÞßžnZýç} ×–Z„*áEÄ¹í¤¤q>åí÷$v¾h~–}üb‹ôK‹ôûÛÜéóžžðˆ³Ïýœ8Gñ;žHÖ²&eÜaŒMñœ¡fl†è„Hùýô¸ýDl:˜n_$’ÄgëÂÍû8§Ïxœ¡)^MÁº¬6wƒ3õLHRøV€ùýÂ7…P­÷Í_Óç»<Éûç ¸êr¬áû€ #É€kÞ ÈbüÜ÷ÐW ýx7ÂôÖN· üÓÂ5›6®QU×äjB ß¼ßþ4Mêý€"vqP—‰ƒ‚Jqg\ÙhNŧ̰Ûx™…/§ Úß§·x:CÁ—?¢Ô¡ÿK¸à²mGÌL0 ¼·Æv‹TÂéü‹Iò6Ov§V­²Ëé÷Õ³t7j]Üжvy–P‡…äcfëcœ‹–ÒÒÿá¾ëù¯oš©6ˆôƒ^Ú3¢Øœ”îf¡xº’aŒ·°¨à®§ö…³1îäÞ—Û)i”³§ÙC‘Ò–gì#¤]çµjÚ~¬U9˜ÔàôЈ)•sÝJà >O*R^‰äóJ¿çYΓT?9§Y•½ î>õJºÑmR|ß[–«+ýzÏá;Z¾MuZ†TóHÐP+ý&Îu÷jý2¦Šo#³æú XZÿ;¢ké]HÒÄÆ…NHå²4-oƒD‚þxAŸí«XÐàϵ9èò(Š÷ò OFtëÏZN§æ,áÙ¦3éVSy˜KCˆ¬‚äö{$®³ÕV·Ü€†ÑÊï#”»:m_'ßb“æŽ+3Áv5øFogè9‹µæ(~ûÚáéJþ¦v~"5ÒEä}ÊÒ9€)s}y5UÐrvä²k8PÞ4`»ÆS •p:yfsÞˆÏ`U}¿ t/C¹^ø¦lÎSÔ=žµ¼7±H.ƒÊK[bãÑæÏ&t›ö…¡™t×å¿™,ÔÔ6¥§âóëïXÿ„è}êél9Ié×SÊ€"õà¶ÝìƒfYÖ ‘G/ëª £CºYRÆ„·Š¦<ŠË‰f‘‰_¡ƒÏÕ‘©Ð%ØC„ÖE"†ÏjN6áíS'š¤Ÿês ‚ý´CðñŠö^á䓾K  “®ýß“Å|À’n»/Fë)ØÙ¶½ï{´ —©Œó»ñœ7g-m×åó>F´_`äC²ÑP]Q·~è÷’tÞúý×ê&sM;é”yYIJ¾'žIï¼|‹$É(GÊʶ¶í—<Ééz‰„ æg”ônMëÆySåñmû¹-%6œŽTh‹H•3ìôsà²ÓÉfWŠGÍ4—B%¤_ †Ž'xûÈ8Z=gºÿ••3RîC«Å‡.uHµ™Ì$4ÕzLήû?Ó¬çc‚”Ä5–!X«(ÐãH‘;%±dôë(_»·š«/*2Kâ’äî¼NL]s´ ôçÿüç›Ù–qÓ©½ûoªÕí›}jŸìüÏ?ò¹M(Naß!zò#œ"D¤ðPÀ‚Ũ[§^ß§Ö—GÐûþù:Ùùïæþç6~ÜŸ„úú¦»j†ðþToܥܽO‹‘q_InB@=|SÞ ª9ß„m5 Ïè÷Þ/Ýe‘ê&kS´Z*óÈÐt+ÔPò –üÐ)hRúk•Q/);¦5ÒíÚ¼Yií‹0¿­Ü«nÎŒ(5´îO7¥Ö¯Æý£TÞâkû” J8©0Ú{„Ôž«#¤ßéüöôtïSoìüöæö 3“¾ý2J+ÛW4êˆIµü˜JþØ! U@éÒiq>ùéT³UÃ?°»œ1Ù+¿±åÙR¢q/J PåͧsõÌ»ƒ÷¼Þýísµj§T¶]%æãC ì ÏR¤Û÷MÓsZ¾Œsâ•^Íêù‡ƒïn½t‘“à–T»xi½Km˜”H¿W¤¿{¤óüçïŸy~_ýP™ÖHòÍZ!rrüdŸÐƒóé.TÇ“P.Z ¢¶o‘:Bº_Ò¯o4W—4#’½OÁ§XYâÏQÒØ;ƒ£óHæçMæì"úÐG8À4A8q‚m;÷F;Er¼ˆñ7(y=»êè‹ÿãËËËpñ¶g·µä¾ˆ6>?Vsò¼T_°žÕ"\î-]Î{«ê‚.wYF÷†²Ô-—.™gâÑm, >1•¾8#V´ž˜ï ¦ÿFûnýìýíõcþXlÔí6ÿŸé%Ý÷XìÍ\÷D.—]wÞ@jD :,.¨ ç§Ñ³×ëüÚoýƒª©)ÖÐBj´ï©ÀêþÊ~CvЈnSx…ªPWMÀ)%¹ìž›|^jeS§š °±Kâû5¦Ðo”,nù—©4Ý+R²uåéqRwF'Ì/M‰œ"½OZÿqáÈm$Íi|Trò~ÞÉšÕø=ÀêÏ€ÜVõë«™•z ¢_k·N örçÆ¦À Ç…ÉL•O: oS•íË'µ¹= Ã_ÕNoïinäexs[ÞbåÜÏáÈbÞѧóh$¹sH\,ÊãPÕíl?qÆïSêÑ—¯R­ò“w˜q᡽â>Ô`©àTº¹z¼ÉîÈMyûaÿB "DÌ=¦bp0¹FV‚%Ê0Õ%ÚȯŸƒDï‚k§ËIçœ<ázFUtñÃeú%FŠœÔ±s›¹þByïûb힢&¨Ø £„x‘en”bU1_"¤(,çÒY¥ÔÒO’ŸÙÚwJ%ÏmOåРÁ”n j’¤ö¾¨9¯0gô=ovë# Æ{Ï5@QÛ"Ï ¯b>4TÛ`rÂÕ*š"HŒ5a‡ûP os(¯oüâïip²ÚQžG›ë¢jåÐøfgõç×ï· |¸`DÛ逾‹Ù¾‹ûûýï œ³ó}h7èÛå^ý';ÿI(SöXr:mš~Êi¼²%í6‘Wiƒð­èÏÉŠâ.!x„YjñŸzïý‡A¢|X\S‘iËÎI­—)0;Ð 9#Ï?RÚúïæµ ýû ‰~lë÷BÙl=‡$$!©ìHÓ½í,¦Ý¦ ¾˜¬#ûX—r[Ï©õîs Ò‰ÓƒoïÑ´ýñ±þÜXc‰-) í¡¸DÔÖeœ¤¸xíÓåé»À>ÔõÃh­q·%ótÝž4÷úsòü±î·žŽÂûªwžd«,è÷äŽgEÍ.§¡¹™&ίEMz=‹@~›ƒA¦“_É7Ú“6¥ËTV4gW°èÚÒ@Yɲ͞¢äsÛß ݧdAkÜ&?(d<͘íYšŒ³€ÀSЦŒÒ]#?\·hµ«ó¬Zø&âC¥­ÃeF¼½f Ûu¬no^µ¦ ÔlfÓŸÒq Güú€„A¢¡¹^#ÃâNp.ÐÙ,ƒt0Û)¨+$ïЉ¯´Ÿ±˜w •¹"}Ô¦r‘›W&¤Á‚ÎFuï "Zk]çþr•"%âl;†úµ@½Ó%qˆ… @…ßÔüÜž—Á2îòÒ˜å–S_Ù¬='V°Rx¥×ŠÍwH^¼`Íj¡^fiâç¥3ókŸªO³UiL‡ž—"N´á¿Ëw·í«è´ˆŽé´?¤Uô¢;å¯í”r¤= ´õUHMwHG(/à°úü(˜ñÈÙKКyTD“-©æ³ ÖEœPOÅàDóaCï×ÑÇHë­ºšjΨ2 Œ€Á]p~gqRÉ¥»›Á®{CHÿ?ÿ6oI\ê…ëŽ&­n• qkN Œ¶©õ‡XimÑ’­¥¡„}º UÿU ^ê>c4”7Øyå@[ê€)ПP ‘ó4†j^Dü”*â<~¢`Ómé'®œ}°¢­^rãåE9ÄD7¾´ª“p'ûº®¨åù§>t–qÒØHés5p žé€‡ Œqh¹³H}bèë*¬¨˜Í¡^Àu²ÖŒضùœ¶Â`dUË@¿5CÚ}¶­ø¤Õ„ Ý 1`9gŽ-ÔXR„Y£øû'ó¤bÕ×4+>Ûdûô=å[q9æ½¹o;•x6ç(­SažvZ;NîDCD•d«Š0œ}{{¸"ðt·Ÿî{…Ï—“&eG”' zª‹ôMéOAZºãëµw|/ùÉpÊÝËd›2̟ƽùz“l÷)L” â'˜¸מã4Ü?iCw^$f^uÞž]¦ËS·p¤Ìè¾hÑè&™4¸ïuÌ…z´æCJ;y ²óW©(Ýsú8]ívÄ;ÿÿ|-jõD zäóÅp·#ßVÁµ¡îû4¾9„[Sõ"-‚Ò;Ò´Ìâó91L[0ÓPƳá`’žäüùi(ó¨íççû§ü+óT镲&ë}2-3ùHN¼¹p¨~Ž×ò3¢oÓýþ¬æsŸnómi, J{ŠþtÚ¢—ë.Ê)´,;€_R“{ÛvHI¡ïã8ÊÉ¢üpsQé[¾ßàrŽ/ÄR Àt3ëÓ4íÀ_}³IÊœãÍ¿úãèÆ¼¨I¥bϵŸÀ¥’Г°ýPÓù’ø#@Épš˜ËêéK"f4­`(yR„áâæ¯‰Hû™4ëÉÏÉzíµ[þæ7xîMEÂÀÙc½%1Pë…y*N)eøÔ_L2¯n=saf£æájR¢ïÛÿ¡m·àï:y´žÎóCÁéæ7‡y`ìûõÌ ÍF:Êz4N„l3‚#ÓØ‚²äI×b<|Àœ`ñÚï:žM8žj£ûüÓ&˜ëÚ›)åT0Ȧú.÷S»„¡'gœfù6?M»{T'¬˜Jj®¦4*”o…¨Jê[KªLóª¸y¨ð0Àn©í±¶Ú˜é¸a²KûS fBRæ¿…Ñðíïø(+Ø=÷J•lë¹—/[ï=—~5­ãóŸ³ö·ËÞsF@}ÛzÁ^Q:£$ ^ »Ÿ¦—æcœµdÝäË¡5fz2L4¨6d‹0J²# «ß¶MX.ÈB“”tFå“ô­ï2™Á’OgG€{¦CðL+Vµ±­ƒhA²î‰šÄçÈ gþëÕõäŽô@ &D Õ²eåõ\°6òØc å?{ غþ µmI”¥³†=SåÆßYTÿšÃz 2$§ÿ¤z|u iŒ5»HjP4,η çÚ±´´¤\zžæ ež«¾%+Žwh{Ía‚Aw{N1Q@”@G0¿=Llþ4¿;ÛwgLí<Úî.„~·¯­¸¹ÿž§×tf°<)¼zRÐývWªWhŸ˜ïñ¯æ¡ò3>øëÄÞüWÒ¶ÿt©ñ™è¤´óí‰tòÙýÒ2A€ñî„B˜^mâVýº’ùù4ŽUù醦 Iâ,%Ó쪪eD`È©wÆÔ2~gÉ‹¼Ã׉z¤ìãýÑ›{aÇùYO¨ø\¯–í„××”“¬(Ò7õ/‡Ñª‰6ò±%3Qb/…²#z¬»,’)Þ'ÎBYê,,âünÐЉ§xæzýéÚМ¨FjØØsÍ #[?ÅÊ)À¯ÐÅ‘`JÚÉÍ0^D3êÛÆóÇ^ÏZSÆIÞu’  ]ïÚ"ý]w…ºXÑÐ ñÓi=Ÿ{ä/ BòfIJ¨fÖï(’OZ߯?s*e;Ä­¹"žïyvo¦ì=M] up;¥àâz–`OC¼bùNÃe»G¿[…´Íà Nå7ér¥(›wénÒý>Íæ}º{þŠ\t&íÍŽ™:“atEÔ¡ÜQá‰=Á¯¼ UzÑ×ÓÑέš ó:ˆšoæYÿ]sv­6d,g½Þ¥¨§©ê¢ø`>¥„]¦™ç##ZE§Gô¹r´ƒpƉqҢ눿J©¸”qØÒýš>аGJH– ½a‘b’žÓt̨aUN ‘'ŠVr1pYÄ=âÝ”qž]{¼U£;»©@BYƒˆ³ÁÒR熰, " iÿ[ G{_ŵ°“bØ´† ‘ã[ÞÔŸ<ÔŸf4†™ÔÜEE²ìNžÉ€?’gÃ˹ÝÏyø¡'NkRëƒBÛÿ(³ p_uÌ®é×ôg îäq»!$"ç×óÌÄœvk"V[ߌo‹’Ò×ó {OåÑñsœ"ï®èéUШ&"xlïy7 )ƒ¢)ªDŠè~4¹9—2òÚé¶ŽxiF™N‚ጠî½ïi¢¹¢–Ëgævž>#Q(§±HE,ë‘"QH“Dn g†èZÑ-IòSŒì|z³Éjâ“¥˜é}2ÕuëF¢—Û¶.åý€0ˆ“K(Ý•7‚,(&IØS½óÏ#Dõ ]·¤’ODXÝ•7¡4ZõÀ~ŽD£ûð²ÑψA³uå]›Äþ†tTË› §¬„í„U;A$†”Û'Ú6Ë@š§'kÌýF¼*­ ôJãP£äiž%¢Ø «Až ¢ˆ^±{ùù}G~å/6çÙ¢!éàŽ«]¡gguJî^Ç›­|?ŒU¤†¶âT±¥üÃTyÑ™ 󤉜üÞ¿Dµ~}—8NEšP­¯ÀI wp£ò'´º”ód áË¢jH<´?àüZqz—4±HTu ©qMmЧ ¥9 ¨@¦×ã¥Ø}}ŒÔ»zåÝ9Ï$­®f3âZ3)eR›_å“büâÈÏ:ªèuv$Þ¿ЬSê3eÓìçÇvÞsUo—˜ÚL“âF”F²ÐQ^ˆ²¨3ÉèÒŒ¨Êþ7Ž(©•ã* š™&ù¢ YiuÎý2ÎáÑrÇSOãNÏ¡¼ï”,ˆÁ`p+Kí#Û¾›È+ñ:}Eœq¡a%R£ç~‚æ]ì½’Ù®DNÐä`ª…à"ÌʯnÍf*}yœ4*ÚõÕ8OØ28«Ša©¸»Jfw•êVÔ9Ç#õ?üKžò¿øç_üþô?¹Ïå:Cª†µ­'{ãµgZ š-±%ƒtžÛžàÌù£YZϤù­Ïu¿eôFØ‚r¨ÆrŒ¬‚Úƒmþ¯¼M€NÒ·íàaÂHÑ/}ÒþGʤäŽÀxbL6šuhqòŠ®…®Ü@ZÜçÛž^%Êâû½÷eÿžºwådÞ^®m[Pâm€Öý˜„ÆSê’|WQ)`Ât ’d‚çtǃ‡]SF¹µ–hÏï;i= ó­}C3¹{컿ÝJsªš™&îñAÊzòwXÃÞK*g±~¯X¿~¼?âŸ#NštPîû8Ë»sNÞúù£?oêmv¼‰úózðW(É29 J[Ÿ#©XÔæýïç»BzdbI’iíÕÊs]ÒEÙp2· qô›×E)ì<¦Fú*vzÚ½yÚÓ–TôiíDƒÙe’éŠìÃO çwóôg¢8gº7xh¥‚ðÈžÉ.P^dƒOçFvˆì˜ŸPù#­/×3óЇðŽâN·ÀÖ>”Ìq(ÛÎÅrì¹Û¤£Ø¢rƒl6ˆ?9ˆ?žúÉšŒÆcÿFª‘ÇSUHwÉÚõcÝ?8òª+ r|AuLzÆ7¾¤fëÍQ³}fmüñ’îÞë°´íóú·×â¾I££a1Ó¡hÏQ=L¹Ù”×ô¤&âç\Ä”^ûñuÄÉC -¯Œ°áŒr9ªóÝ$E˜>áxuFHé£,¨!Ü«úd’5­ (³àÎìДÛ'Ѽ‰¬¨ö Ù%Œß¥)Á)×íZ“wðc—îí´àÉ—,PŽBíÖÑÿèˆ8ÿz¼¦ÉÍS£mW”tâõ9ÐïC Á¨Žcw{êÆñ\I2ÇIH’ |ëH¡3’åM—TùrHÅ’®æè& TEð£„ÿýúüùñœºNeú’½¬Ôú0qH7‘.oæocB>\›ûü ‡ôsê÷û\CrŠ=›ïd GŠÑ`c›"NtÌ!1zœ}d®Äp@‹ï¶ZÇÛmŠ’9àþëtÐ()ÕÄIì‘zȨÆ@*Ç #m¹›gl’vBa¹ ´¦ÿá¡|Nªr½žR>n •I*ÛaºGt_ç×àß/ &I+ÙåÉ«•j¢˜â3M©’›…jiIkÒ6£æ¥ÃÀ•? /þgáÌÕËå)£)¶-)ôãÁª)ãünFAµE8Íæ€æõòØ­póbŒg/Ø–tØ&|ª„¦pJ@¿¨{Ïý¢Í¹ªÚŒX\í^¦ŒiŒãh盜¦{2; j’&Jáh8|˜oã7ËF‰|‹ôŽm* '7+0nåænî®PÛ‹i3›Pè4{ ñ  Ïã³}¯ÆdˆîY¥œÙW=þe¬¹6Sr®w! •î|¶þÏW jÛÈ.ãy¨KÊ>ª×›ÑÛ¥'4¾£yÇ4QðMJôbÑ’v`KƒT!9^™ÚLÏdˆd žŠÙp*éS™tÏD0¡¯÷ðy}½¿ÞÜW|Oâ¦XŠ»³Gg¬r»]#8zÁ(.§{á•™œ/æY§MþÂ?B\¢(²ãnqÁÏgdæi4d_ dæi¾Áy£È’ßùuú˜ípïóî]RfL)Ê–‰Ë— éøuêâdOO¥Uýþz$z*¤ëõtî‚ÿžª·Ç@¿Ðßf²¬m¸[òÎøtÓç<|îØ“S2½O~†Œz3T7Tî ½Žº¸¤¿(ˆŸeLr§´ªÏ]x»ïΩ†ÜTÇ5N™9Ë`DÒ‰Ù0$°~B¯Ö4õw-@r|½€›Ð'³å•ÄYu‹ëôûs†ò÷«yû¸Ë·÷Ûm”÷ÑöŸ*ª=ä £,’ñ™l8Q.H~L£üŽñ¤E…ÕüÍM]î>Ð-øº¨ ÚrWSÚ·IÄDÌ:EœÇ#á.¢Ÿ]T2õ»ûTÖÈIn[Ïqjšlj‡*œî(*ÆK¹Þ«àWɽ…*GËYÃÔ]ÇÅ”†9Ö Ã}úƒfvç„Þ§Á'N'©ý̘ÿ¸:ìí“°î”Ú¹{}ZÇ£Òý±óÛ"¿?@MéCP‹þ<ç?«Cœ!¸»ŒÓ†Ó¹ÕŧùõP„Zß° ’J…“_yD‹w~—4øÏ¨4cæ’R$ìÆG¥FÐèÛ®)£<‚I9‡OzýSz÷º9 È¡X’j¤¡Ë çÀúŠ:\G?YåOR¿]§¿+.n–J³9µ ¿= ˆð[±ÇÊ’ŽÐ(`¥¶Çí7þ„ÐS¡×yžsK[ÜU¶#;›%wžŽ4zêAá=ýš~‘®\ uŸñD)yîõ¿µ¥þwH%¦.uSEnž¦°¾Œr{?÷½¤Øõ>ÃF^|–<ÿ‡‹ï03ଽé¶ÚËIr _‡@7,õŠfÿ¹ÑðêNìy#L—ËQi¬·ÔõØu’=KLmF‹ÀõY^Oj,BÝ^ðË´û\îÛØK2³§^§O=Ia¡6–J8¡—LûþûÙú7ÉfÖ^ûŽzaÜ ϘôÇ‚•dñkF‹ç3ùጎÒý$iÇF¤¡X–¬â{«zh óG¾ûí¼g‡Ü<玩»ä×)î+S (TË8Êj²LŠ~ÀóÍ­¥znïóôÔ&@ÏW4Ü&œQ¢Ü[™Á®’åiR4^=õq{îÇò¨‹Ò#l£)à }»ƒºØ§ÖÏ •¡†’8uæªÕoRÚýizêÝB?Ù4´«!ø„WÎÁS~m0û¦¢g)ò—­çv“‚ìH¸è·éF¹Üécº„!#M9é¦ÕˆÃeJooé©QcÔÖkl0ãW¾'Žšs XÙ‡óÃ÷>ÚaÆ?G“¶ÇM¹þ•G±Žº±Ñàlíà)Þ"œß»ÅÌàÜR}”Æ;êvÍ[o'gÒ"³ìúþ9Ùñ/H`6ãøwz3Ï!@îÂ’FŸÓ­ü†T:èoìÕä0;H ¢wøü`z˜ ]¢ÖÒ-²4,¯‡òËH‹ÆK„-{^=š2ë€z5ŒÉø9}7ÔH4Mî"9ã©ub”d¹'7§Á¼&Þä¬úP°+uVþ]è©&9‹v<›£}Ý,êþÆ—Sô¾w(ÒöÒ­¨`­PêÝê[U4-%Ú͵0©;©®„šóÚžÂOcYÓ›Åý×ëeR¾FýÊýr›â>ó½íÝ|¿9`ïó–¢qÓËOŸ0ûQþîÿÉí}×–wÑßYß“[\Ò]ïðȾ…yûÔ)OÖ¯<î=‰²†úlÊ£ä®÷äØ%”??Kç8 ²È8m;Q—¦ü¼ 4§kÍ6Ÿ÷f*ã{sJ0ÕYâ|Ç1=îͩˑĚæëµ%>¨SbiÎ;ÇÉóf¹ß‡Ýheæ^áS箓á?")é:ßWš>¡ÛB=þ¨æ1-.WjM }ȸ3Ú=WŒ¶2:¤®GÚ©ó˜Òó+øŸ.{ömkÀ®ÒÖš5¨›ä׃ú: šäée²/wœ××ùuA´mrß-Øù" ‹´µ*U¶Þâdh ÚÑ ´h¶‡‹*´§¤«_Ô4›³&ô'Ó=.¹#Š"-MDÛÕûà9¸zãjIâ]µQ]A“à A­ÃIL„šFØÞC*6K°sI+…‰Œ˜‚7l9u´ñƒ§/`üYgC)üüX.’_J¢§™žwcwcM;}‰ÍâKæëAÄB»2ίfœ¤¼ÁKˆ(w¥Û«ëé r³GÏ=B;ËBe¹ÏîðÒlKÒÌñ™WåŽBÊTQU·I„ºØ’r§Ôv‹üa ç/KŠ{̹%­%«@ù(û DÛ)Umâ˜PGvènIã—t^¯§F¡4ÒÄ@;*Î6mUôˆBL´G#n æ‚SûW (IÍù¶ú¡©à½˜l„4Êç ßt èÉ\ÓrPwc¬• oèníáòñnq«â×^íKfÎÜOÑw›jöÔ GGHÛeâ@¡ˆDªU@_»¢#|›?nÔ¤žÞÁ35h'ã+= ÎXù§¾„ó”Ý‘¼Ç]ÇpÕø•Âu®“»T/ƒ*™Q¯S€IÁý0›E‰“‡y0 w5íìþy~Ó}Úò–L‡fzIÖ‘†šR#|Ž#¤.¥×â‚îÏèrB%‘kczÙ:ùæ,šh»›×—)bÖ蔟 >Ç:Eß™Ìé!²“ÄYð>õ©RWM†¢rHëœ'ç™ôŠWí"ˆŽØžº¦ ”åï§Ü¾9ÿù:ýlxC èÝÜ£¯X‚TGfTI2øe˜çT©ïø•rzÖó=õ-Hó0*À¯½Ž ©ôú2e ‹Á_« îƒî!g n3TFÔ˜òcWŒÞ-M¹BxÞD–,Ñ‘yRÚ?¡E˜…T® ÷÷qè%¾HZd3ûœmF^•µˆö«ñ½xW$+S&è&BDê€f¹R¥ò…ðÆ"Î4©ýh¡w¾ZxŸ²² Û‚Bª¡]”K,ôcWˆa ž¯æbÖò=ßÿ±¶uàÈRóü‘_Ð¥=k©|‘š{âÿsŠöq>Õb8±N·-œ®&Aýɮ鴵ßê!W;Hs²wѺÆñḛߙuÅ›ù /hÝAMúrs„ { $´ç7J{F­³Eý‹ÚuæÒàr"gñï莙»•Óts‘Ô¿ïiL‘Ï&a$kS¨M=mª¥&ÄlMã ªmBòÈ„’v˜kÔÛ/¿û¯Óôzÿ;ß?îÖþÿæ/”8nÛ«Èno¾Ý½ùHí–V4ìþ±êeæ”–s'¾ø’OßL:´ÌÛ·qe|q æzLù­ç}p#ìÂ#Ή¡q1·ùÚ_ù)Í~Kã-eœß{ê‚ùùssû‰Üî&ÑÝN³Â‰ÙL-Ê8sa© ܤáOÓŸþÔ‡ÊÑÂd·ãî7zcxÞÌFž6ıS\ÐL/Ô~ù£€Ø«2löõû¶oáéóC!•š,TÒ„³hùÉ„Cºæ¬õ÷*×ëãdõîÚ릌ó«Óô0eÙ`v×¾ÀV¶ vuÞÏQׂLo»ô"$Í8Êï¥GoC‰\„È'Ñï`̽¿¼O/·YX¡ç›Ðâöb_­ËE°k ‚!2çœØ1³¤¬ödˆaé¨Ò‚:O¿Dä¿_Ò²ZX`Ð#Cã­ÙÖ&¯“gÚEK;¤&w ^-)%û_ÜŠd +é|íDæuÚKˆn§”/izÌj–ô9˜øq¢ã7/iYBt'ɺÀ$ÖU’ú¬oh‡ÚÁüMa~Á£.vsVÓ¹hoRý™ŒºVŸêIÓå‰$kf=3h (O™@UŽZbJËIo“Þ”÷U&¯ÍëG¢ñ iä— ¢ñE îŒvs§0/@ñ|”à T¥NdÄj§ëÅO°öƒßþ*¬°ë¦3§¤R2ꪑ{„øÐi4`Wã-QjäE!Á‘ãÐ¥@ŽÞWpj‹L—¾»_ΨÚ'ÜÕ‹1iFZŽí?ÏB Jš‚W¯„ó§éwꪈ1¯==PŒàd¿`*䥲i×™Ö·ïT =µøS†ºŒtißé@ñ­ÐĵUvÒÈ;±­gþ,M%2>ˆï÷ÔœÖ0©Ò¤ "R´FtÑŠž4³ Rm›"Êßß´”ŸO®úQ¾X¦‰é(Æþè FO)Ø¿|Uï"Ò Åj6r×׉õH›o½ôKšñîc˜|ëCŠøV‚‘òùós‹L†'£)¸]|Žƒo^º©´ ¿ Ro•æ>ÔIÎßü2½ûrz ã·ªøšöiXâö¾ˆ³JTòšÃ—Uefï§ZH³'*àDWTw_w þ'y´¦P²òT¶HD»HJF5Êì#é Î×ô´XºÄVx(^ å2ã¼T}É=â÷¯‚êU×ý«4EŸqÒ.8KßÒy9•ªŸò¾¦žJ¹×œ¡á¤ÇùPÛã^$ÍÐ ëž4þ"q¶³«ëW;šùÉ€«Ð—Ë`+8ý Œtÿ/(—€RÙò# ’·ƪŒôôíÔïi)¢ŠBŸä;’mìMd«¢NŒ“[Ûa%y™]Ý W¹Ó©À$³ÿìñ7ĵV‡MæaÓƒ…* •s©:;ÃÉcÓÁy¬ ˜Ø’ÌeQ.±üÌŽ†2B»ßûÈG1–æqk~z« “/ÒBNx%´ÂrÒIÐW—ô Iç¤8§RöÜóK úÌDÝ¢ž~ì¾ï¬f®h§Ñ½m`^k$ÔDPƒ^£%‘%*`ÆOåb²]S×Ù¶÷yE¬þ)ö¿€2©íÐÜý÷ù–Êu꘼ê€#$‘e6Ÿ3Ä@e”½âjbf\¹GyœŽ>ã.Å[¡7¥^Ôu`F£ÚË(4Q^Hüãïq¯)öþæƒU³—­$[Ä ${gIû.68(½h@cZ6ŒjÅÝÅôÑ“ï0ßÝ¦Š‘2í»b-ŠÏ²ãgTE~”ôÄ.éìSÓ7¿°Ì•¤¢²ü”2¤ÐnõÑH³9Eßð1Ÿ!¯v}A;r¢ÝûäÂS+ù­È"Ð…@¿€‘8í˜*AüÓ´xRmŸJ²ø¦Ï5Θv{¬Kd*¼gRÆ›¨ðýH.@p«aYK5ÏÙösÆH Ó°{yQA¹¤'1iŠU¡‚ò±IÙ‰ %H÷HR>zP|M£È„HDSÆÉõ’Ð=mçÔý¤õÑ¥S¯Y>*íiJHZŒ?öÜ‹†Ô‹êmÒrÍä!‰îDù¨¼vgÐe°9±¥G@EÅt çZbŠá-Ѥw EŠå}22ùŸ$¯•®•Ç0³7ÝAªlæ{ÿjRkÚÇeÝ\+OF©Ùp¬é’ê=ÖtAÝ›oFôCM#ø7&1÷U.ëhï+üüDl’ÁdÌùI|óm\%”¿ËÖÏ2\lu•{%4¢µ¸Q*j›ÑÓÚk?¹[Ĺæ%ÇÀ¼êמú‘¼£”´±Lf™“¤cÀa]Eé PGGuÅJÆÄÙå¿ë­·ÛÍô·VÜo7a¦á6Ü^œ…0²oóÛ’ Zzæ–èO…û?úÃ*¨MÊR¥EuAšÅðœº(ü>Ê÷Ìqx„–m§â¾Ýÿ©^Bÿ ¤ÊÝ#gMæ—.Öëë r·ìʸkì¬Èö‡™¡©WÖ˜(Rµ9¨J)Òïé÷†”ìè}íßRóôñán’yQô¤Z“rô„Ý_´j2Ó†KG·Fí´…zjŸbe«!^ðšUÍìwT–Þ1ÊÔD€§/—Zº<ò¬¥â†ª7DÙPÀxÊÐ4ƒcL(·Ù¦Yßr&ÝóðVµ;&‡5¬)¦Ž†¶ˆõç—eT2¯•ž§îïÎTU ’bûA&%…{$¯IÆH;C’:5HÕеkñÛ ³ËÏ£jO|¤›2Îïfœ‡ñííÖˆ$SÌ—ÌTN‘Ž›SöôCùË0SDy2º}¹Ê¿SôÞÖSEQUïû%K Q<»½¿¿@ ¹[ó’ò]z‚>h&Ž2ÕT©$‹¥ú®ìÝïŽwxDä7=QÊ U@ÓÕch&‚7£p9¶ùœê:oÔ)Åô¦ÖJu>pñ+ÓÌÊgîëÕ$ø8eü]”¢<ƒ` Õ˜èÙGy"õ›C¥ øiÍ›OD+"ç}tNS”\¿tFÙ›U•^}âÚ/¬©óPµÍ7 _ýJj3ÚÑ8O5~¡”ŽNêàû‹0(êéŸÈZõS:ÌSÉv§¼DK}݇HÁÚ@…óÒÄçÓïZ §/)ñI9 €3£‡è˜òHŠØ?ÄŠ ç?,é·»~ïV߬°ööÖ½XóÒY÷1¾¾Û›Õi¯ôÒÕ¿ð6 ‡)•X5yÖçT @¡ôÙªþz°¾ÁwlÇQtÃènÿ8¶ô^¥¬‹â'<ÚnŸ :ç5_D42ˆqutlthÍ’R9?²ß¾.¸)&X‚ÖÜwmŸ’ËU£H‹¤*Ú'F‹~_¥«9Ô`l%Ô1=³çv ‚£1£GÏ rJ$hÁ’`)Ñb8€Ê~Ó§û±ÌâPC{ w¥Ôw,JFŒH=:;²Y‡81–~ÖÎkhHê‰tB¶ŸŸ·Ïûðù–ºI¤¢œêœ])s‚TRRŸ" 7k³Y¾Ö¿ x¾:$YŒ±wu9§B$xŠâT#qµø{¥»4¢îU5R€ô©ÕŠ2eUžj'ÆÏ)ƒ !«7ˆUA5†I>Ö@­Ð®’~jB;+/©é>½ù~@º€ò§qK‰ ó.8¹Û=F%Ñ ÕºsB»Ò(˜&:¬J ÙÏë úuGÒYž’=ÐSÍ/+ÄU}Ã×E´õ‘J%T÷Àb¼Ë¨¨*â#¥%޹3¤ß»,%N¤¯î·ú`÷Ô³.j,³ü@úW"Ð'Xˆ-• XNV5`Õ/+\màýi`–RÍOzxÊç©V›<þk‡’´¦CbQ]Ю•Á·ù,)~Ƀ©ùz¨¤Ç zÝßœÓrŸ?þä‘ë?ôGC“ššgå`ˆYt #7¤¨‰²pR=< º˜¦ÁþQia5®­ãÄÊšìRÉÔ¤B,fó7Ú{·¨R0è€ïiì-æI±—]¿¥'ΪQIO²ŽZÁß EÀÂÝÒÏÁ.ýÇýcC7¾¾}Þ?ßRY›âõ îZa´ÛE,$^È*Þ§b¥w­k°ÎZ©*§+(²–Tûq»óWâk-«ô £•PgF+GŸ¿óçìn»‹¯íœr¹TdVö¢[ì¤*~RQO3> <Ç\_ù­[ÓÑ­D¸’ÁÔ&ÕRüxß9JPàNA &ûùw¿õ•au$XÊ©Ù]}ÔÿK@×?Y-½J”2#upHË­©ž ±íFÓ;ë-(£4ÍöȦŒò·i§öo»a…30 ÁU5˜u”À€ëê(ˆÏ£å£&ªùIÏ‹TFºO© x6ŠÉ=ùÓ4š *B—k~>QÑ+Ôúϵ~˜8Žêè--ã<"Û‰ªÙ¢,RØÚÙEI_Ð6¹G^}bó5H³ß‘Î+28ãt«ßMÍ4Ö€w)q2DÛ’¨²mj ¾êkèÓwûµÝŸÔ Ù ñÉ”C:¸ðTðnï€T„UmƒÒâCÛ¿¸'Θ†×¼wZÌQƒ’ Îð3Á=TE¯‘*–”ØåôDý î«»@o/û¦¬ýp¿´HÇQʃH9)ufN9Á$”ÕýZéÏÒ£vö¹Î6EÑ©¢y~HÈð?"mÄì/Q”pž !{õ˜Ä*U§ gbÁg%¤Ê³8ýHý2»ë?˜«I”EyÍ)J©‘7¢—Ž´÷Š@’éã|'ú뻞_ºkš‹+s_çìE Ã}¨V®ã”¿M÷þGJ£õÝÜ1­û­ï¸]èÎuãmJT•aIZY¡ÒꄉsþÖWzKË™>¥áÑ_ÿäžÂiN«U3ýØzT|œ0‘Šp‚ûÜ]¦Ë8ñégCJ|fB×›üqé¥Ñ+3ö2™Ý¾›¦ ò·‘7LpÝ­œô_?Îý¾,¬ )—§;(uêÛÁ½Âš&9Ôpç‡*¨|-wù3èH< Ì@ó’Ò‘F¸uUQŤ´EËP5Ð K2¯Í¾éç±V_÷ރà =´QŽ”z{F¡$‚Aò'Bùµ üB]*PxÌ·»pãË|·b²7ÑM­ù“Dù˄ǒ7ÃüÈ’2 S¶¨–?£ƒò\òçë ¤УÔ)sàÁÇZÇ´²|¿$ôÙ(s^Dš{žä<½ÎoÄE2) hLB·o+¶ßzn¼:¤“šÛÞÏÏÙ51e‘óN¯ª…Š:§©3Åù«Šs#ZË‘Jðÿf€~E@¿›ÛdµÛé›C÷ºŒýܬÕp”šZžÕÜ䤤‘àÅh’éÊóõN¸Ë"JyY¿Ïؾ²¶³(S5>Ѥ_7(Ò zîtm˯UFg‹H¿›w9¾*¥ægw•0ô¥‘/N]×ôG@y©wz˧#¨Î¬*_–¨€zx¥Üqxñ'÷ú€?é_€àAi±_Ïj'»U²)¢Ä³ºÐKŽÀËz½Úë}@Ýìí‚]Õ¥­oH¯Àzf«^òH«¥àP»§Š¡û·]øʫ }[÷^Q}W†XJƒ„T¾¶¯(ö¦ÕÓ®œò¥ÙivdVµÔ T¾ª&´y—¡îŽgügt~^ZÕpR®HÐG0=¬_íùR"~@•îЦ t¦¡TwñU§ÔM=ßU÷ü¬„º«ÛóíÏíMˆççîo'ÄB…¿Œ£ô©n•݇+kŠ éÿ{~ûesaE^Õa¹Y#ù‚×ÃâÇt_„Uo"ƒÊƼAÅ4R¦OPåõM×ršz€‰‚êgà$ ¦”Ô`ûû%‹^„й¹û6/R?d¼V‚Ü«Yto¦ü¹ Z¢Ï‹[Ô¨Ò§|2­nIoÈ«¸{ÿ>Éù¯ƒù®Õ¼€ ÐBa*ÔÏ*(Öý ¤àG4rüÀ?DÓ=%”»&Ë¿¡JaÌu¹P‹î[d¹Sµxª6›15¾Åãêo U;ßúý}tÖôeÔïz¾ïT¯â®‰l+ *Ñä¨è MTäICd)g+úM0³·~jý–¿Ý Ô™ 3VÈ -\^`†ë7ÄžwQáéòæ;gªõ³ññôäED“Ïe§´¤ñŒ»ÕôuœÄÕÊ0ƒXÆù½¶!Î{ö<к!$4N÷`‰Â™oČ뷨†‚iE›7åT/‹K¬˜—ìGçÎi‚´¥òÙcHÙS.5YÔ¾v…̾; Î›Š/UŒ+®mÖ<› ¿ O|#J‡©^b¾ÿð¤šû²@’EíÅ+ʰŸã !Óñê—ÖYÅÝIMµº|ò$u9àí|¾ˆp˜übw),iF±¥_~ ‡Êf^ýŠŽtÌQ÷9é!>a–ŠC¥Ä/õv×@åþ —h=4UÓîy’40Ý—$⌱vb™ó(cu‘ÎçøÎÇÑ ó·oó@Ïìà.—;ÒŠãÖtè7ú !’*¿4ƒº÷WÀ¬ê¡F%³ÍMª{P»ÍP 0£õü„9/ó¸¦'¦]Χ¯TR½ëL«Qè—Ñ1å¿hÃÔd5Ö­®s‹ZÁÞ3V?wN;ÎæHRXkòà ¢a¶’½R ÂÄ!x¡“@$d¡=iAì+}C*8.0Q,a+ÊIG­²ÕP§“U}”|•~(5AMÌÆHu÷©*¤ônžß¨Ë] ¹{õQA¼) Ü>½¯![Dú án¶°Ë'þ€Êêê N£ý“4x«šþ9b7.€Ûb-‡ìâÀ­-Éx£Z¸ñz+0Ÿìë´œ:=ÐïÛGrbË%ªõé@B%éhdª‹©R‘õi©‡¦naO>H–\/R+ðÂlilFòË­• ¡J &étÎã˜i¡:)û”ïU~ÐÂMAäÅÖ”³zº?î:…‰áÏ%“&æÖÒaíÀþãýÖÏæÔU-êõ 8ÎU…·-)§ ´rIúž/'¸³›áî§#OúÒðI„™axš¨´hÊ@Ã’jNŽ^BZ5Úå¹’úäøÜùxvÊ3Pš¦ˆòg{«NæÐœï‡r$oA˜:°vä«WÝcUƒµ}ðd”'¸QÛÏôRî!‘!è¢hNâÞ œ'k :ç®)£<@šÜ®#ˆÞ†˜=tÐDè5 Kò›Ÿ–Щ¿¿ µ‘ÆlÓ+³Ûø¬Ê8Qðr~Æ—IB[ÄßÜêÛhëÑ7K”>-é8ª…‰Î]4u9íÂ~¨  å•'“EjÕ"‚ø ÒÿÆH…˜Ïy©`rV ýh'¢wÄS[Eúäò¨î šçõÍÓ»±.šÒóû‰¹aH¨Êu>¢D•S½f†;| RçE¹³Q…4ý(lü› —Å¡æ:÷,5Ï3¾tk;°a™§Ur©ó„rô Îé}î¹È,dj^Èr)1™áôLxRL.7e‘EuÖ^¤&8ÎÈ1ÑAù´¸œ_Íô‚usžS¯µÍ³]¿ôPÁÁйði4Í[|:Îâiµ íhE¤¿~öC¦*ÿD {¨•òñ=®“ ø‘ì·‘%xëœ4~ò¼ Õ}`C™Ý=õÂÁ¿JñšÚ/Ó;èîGk4MèW³ºMö0’¾ŒÔçÐaEíhhy$-têD÷˜å-!ýBó,‚ÐiÍI†"J`wÏ@­K÷÷a‰¦UdPy­ohµK‹HO’}au§*[7Ú‘*ž³£Êù1½ÃÁcôç¨{öÕô†}"xfî »]ÙUM,U†|x¡Gì¥rgÖš)Ϥx.·Î/ê×ñŠþþþ4OÓpŸoîž/lS±ÃÿáÌX¾í¬É³û¯Œ‹ùœ›"ޱÚõ)íþoBb|…𤜘Xø‡àD )GÒ•ØÞTÏìò>IÑ9UBúãß±Æó"øªY¨OÂЂËùåÏ,çáþnÆyY|"ä}ˆÎ·mî$ ”Þ4€¡ÅKJë:F+<=ڙؓœG7wTÁú=LJ¨YÚ³i›•öi}v*ˆR—v_÷dY÷"tܼJ•º,=•ÓJg qŽó¡ÙÊÙ}aCRȘò².”¼…:‚JQfj úÊŽÚŠ<“'$˜¤ÿyz@ ;”(Ñík¡‡ÜGw+BÚw× (#=_ÏvÞ‘ÏÖŽO¹ŠyéÞ}é£êŽseZ«‚Í*Û€¢•rš‚ڽѕìükƒ_~Ry¤JRpŽqþ®¶ª@é°ZC˜ñ9¥ôÔ˜ªTT ”v÷fÙ¦lUšyZzç+*FSÂ¥‚Ð(tÜ¢fÿiÈúÖ ì6Š2éMzhãöH)½*ÝО¤¡>Bª³T!…9%eŒ>:ú¨×¿ªD©‘Iï!‹j1æÍ£+ùV}L+…ñ_OêÝ—ÒÄùî×LMM®¦ŠšÀòF#4 Vëo V0z¦¾Ggõ˜Ù³‚ÖC¶ ky*]¥žq;a‚Îÿø*åw¿Ž2<Мj!”ETÝ.µô2Ôñ>Ñi÷°†fR¿X…KOÿJ‘)Ü>¡vÀ»ÊÞ¨AùÞ”Èr–jßGSS¦:÷–*”|:ÞD¥˜/åÞREše˜»Æéì'mø*æÓ° ~vÒÅÔ" TŠèë5>Pñ@ó@ h3+1K{sïüä~|ÖÏïʸÇ,YZts—Î"1;bŸ!Éõ uÓ÷¤-Ï(OTèÞ{ÒuKÊ¢þikþ8¾MU’D“<@]Z“&¾NªÑ*zK=‘€ú}õ§yS½PÓd5¹ÔV/iîÏvz¾Mƒ>ïJí³´cžÌË…&ƒ¥{Õ™åÒ·¸¦êLލ\Û–V5„}g@¿¿š~’””¯_ú“'³Ý³ÃþeYUBè¿€ Ç¢és¬$;Ô/ d•g Ö´vdZ¼ê¨71┟óªÂƉÃ{Ôžt~ý·Ih¨+Z~—™TS{㜪Hx¦‹(=Ûnð£¾e¬Î«å?îÁºÖä)Ro•%µÄAgh„æèÏWóö†Xe"r¬ƒSðôeje‰èçjkƒÏR jÖ{_µÂ D½¹4‹IϨ‘Zs=—ÔnIzžÀtË?æ»òXo±Á¿½|L/mrXc•ÔÜÐGÐHsæŸ}zNAØEÄ~V:ï‚þ`$uœKÁÊå®z°àZcÎA`TüçPÝ­ê—äz«‡©B¼*·\ª:ÂZ~‚ž“¢‰QEiʈۯõ:¾=í'˜òßåÔÏ£qÛ­{Ï”¥Iöãi­ —¤ÀjèHá‘/k ¶SAv¼ìü1OÁ³5MFÏËüÜd†°åì¯oüF4Ô¡Ð*e¸®]HÔ@ÅlÒ8³ýþ/-cѬ+WzG¬‰ƒ;ûD¢Rš:‚1_¦ ºûÇճꫫèNs®5<ìèZq&B·¨&Ñ‘~7ê㼊˜“ã> •ʃيžšUÔÜO˜(U­D |:…I•Šã { >P“ªô ÄàÒqQ+ˆù²Úô¬:B·®HYåb˜âï_ý2ß0ë‰tkü¼Óõ2»DÀ }.—^ˆ 4h{ûñeµéaE·RaMiîƒG)Ò゙ïh{q{ØßzT‹I aHÒChÞYcÛtûÁ¡Q„¹®2 W¯ÿòXuØyâNŒ\@?*HzT`}¶sÛ?G؈瓦€mΪVe¬”ÏU¢õŸ§-£zp8© ¤mŠ(…£)ʃU-g€ÿÇ+¥÷h±!vo¾ýQã4ƒ§Ì­AºûLÖû†“´}J«$ag¥l¢–@ÕÝj¨JP¹iaÖ;ý0~ÎöÛï|í”’°R)rYšØÉ†¸¹"b&k‡6Øÿ"T8+î<>SÛ×;Ý©»L¯ÕCM•˜£¦aŠ$h£ kýY-u/ÕíÝíó0juMprÉrHkÈUEIO*wS,÷ÿÛ>(ú•aþþ4TpÅøŒêü¶«^î õø~¦wqµåÕ_åµ%ÞÔ¨ó²€Á»ÆÏˆÝ±¶í<¹%Q(4~‡5u¶ß3ETO³†TgÃ’rqRIo)ë”.¦²ê¦ÜcªÇþ™_Ç Ã j;¦zÖSš±^x E¶¢O)1 òíÖ­io¯ç¼­é÷Šö7F í„æÙÜžm«Ÿß&{{u—Wln©{%äÞ|GxžøaQùÊ5Y.Ùkêÿ7PMå¬oÛ4oA‰¶ÂI@„¥”êÕŽ¤›»hü­úóõ$ ÑeKÓ ¤›²žÛ>âý´ð³J% ÇŸÔÞ ä†¬G¢MôÔº¼ž†>âÒiEúɈðŽ ãmA*hµïv|»Ïc÷FâN¢¿S'ã4®ã47Xn»\æ—@SïÎ&8ëøÙí¢,BÑ#^6²Ñ»ö9.Ý6ôtc¡FÓBíä%|a£,Æ0,ª¿%¤?úó™@åëKPûHÃA_è1]¹§‚r× šÔ=ŸÙ%‹R¢úâ’f‡lͬ}Ì­t¯u:T¨­ZèêÜ£`e-éÀÏê„tË@Ýs ŸœYuÿU³éæóöæ¥ÿ,Lítʵ Öê™Py2´ˆö¬åf+7— YØF½‘35Ö“BnK•åPlÑâo¬h{_ȪXÔ¬‰e-,Ó!ëªF°W @€àPG@ã‡ËR¶¥ŒsùçµËÎL"—l«–þèÑÌ%Ùn¿STg¦‘©ªýê½»[¤¼ÈH½íÒÞßßßµ»Wi«neTéù­ŠŸ+å¥ K@ḺýôZcíÜM ¹é© õÆÙ?i€öCtNOWx ñôµ'¹VÿÙÙþ ÞšE@\APs·¥¼VÛ7~êª4“Zóì¾¥EJ±GA2² eV*Bê3ƒ¨‚ fªWŸ^ 7W%~dÈ~QÑ”4&&œ/ÐééëJp*®Ó¨»2¬CÔy£eJËúó³1”ÿFmEúæé¼)&²Bý©”å†'."BÑÁ§îš2ÌÓ…C'둞Ò¦„þ×àlë0!%ï¿ý¯ÂtIB*R¦…ª;©½nwÊaÞÄ^ý)µ²ÙÃüI`ÒºƒªçWúéþ¡C_ûÒߜϲ%–¨›•&˜ÌhGÓ’TQ•R¥} CyEÑË(§ÄUã4††°ó\{eÖŠ6ß72u’k¨ÆXEïÜ¿¡êÞ,ÑçNŠz£Ù¯i±öRmÀ³h§Âg­ú˜hÃz¨Ljf>sS¬wZp©=)æíQæùµ¨è u\:oÚý0~4¦f«½êSù5…Â;UZ g€ï‡Èò+ëé2ÐßF9«gÍR/:%þs VRW¨Üiis°\L;œh@1¬¿9¬ß֭͛‹Â!vý4öKˆûòá8d̶¯-˜+­![Æ­€ìÓÔJë ÉÒq›b¯:¯U³A©D=_W> LÞª©/ E[7új5¥¦WçO ­Ëì„ØÊò·ã8o$´PFÛXÍ{ëó¾åbòƒ¢Sø7xhewâj|iyªZk”,/¬ ¯h8€ÆîêŽAm iì²eÕLb·õ¤ ˜)…ËòH+|ûêÄ|ÂðI¢«¾–Ñ,ÖeX£L€ õª2Ôo„,ÇYëˆÄì¿XÕÐ+àà’TôD/¬Ÿ÷S¢ç8±¨/bŠ@x¯¯U«Yæ=ôغ[ÜÁRQx jØÖ[¬s¨?+…1>÷y~üŒX6&ÀÞóïVB³Ÿesñè¨i›ñcZTÁmEç0‰–æ`þn0sƒ,´ž£ ¦'ÓÞÓ‡`á/LC†)€œAt\SyµëØ([¯âUøDPiIýH»Í 5ãï–ôÕcŒv©Öv- C¼ªlû…ï)n~ÜÀ.”õ}+x š—íï<¬ü µµ(·ñnxÛ 2ÐïFƒý^'Qï&0O©÷rÆZåjüC88Îæ©KÝ€:–@¿‡æŠrvKGk:r®”$ñÚ2Òÿ+$9Å­íDt!ßzóŠ[绿@õ1à{ÔÛêKBPFƒÁƨx1pÕ5hg*.ê~YýÜÝ`ÄAI«º ͫ߻ËäSÛ¿žþüù3›7AŒázÎ Šç×5”.‡tR0œTI£VH¹·< 4Œ©ƒn(ñ™Ô“ðe?ÑÒ¦Wªfô:1ã‹Ú13ðÓKõC`¿ýšzõ­ls{~~îßn¢o¬¿ooPµxó½V£ž•Ü/jw¾¨< ØtÖ9V%å"e•5¤ë¢ÂÌ.Ló1«© ¨ŽXbÕÕÍ\ÅX=©iJ€°>Y:¥Ó–ýÓÑ·&…©%¨§)q´Zwmwµ%Æ8÷Ô@ýóùù¡o&ÔÕÐp£>Ï‹måªàš Æ è ãø:b !gÒ›Å÷סqñ±/¶ÖrVœÇÚº'ÀùUb8Á oÅ®h³+b=—³ÊØ+½W"}¯V²é;XÀÊiy`¢6L1ÛÝøâH³qà°ÒsæÚÂRw%Mæd×ÙY5‘ °~¬tP±¦§:v¹5­£ŠN% Òs…´eTx¯.>T }°¤)ÅÖcÒ¢å=¡VáBíÚUõÛì AÓòÝWƒÆ·I[eÓ9› å=Œ-ô ½UÏ›-¡MxY‰ÐÆ6¥ëtxýýg ­¯ûÕ¡ÓÊyà ‹ˆ)¦kCW`qIâ´¯j´…^”Ã>;42°eµÎŒªÈ¨¶^"2úóÃ~7««·~q®ƒà<»ö=‰ñÝ¥Ðe •o_±1*‚ÀÌIúˆ‚&rVAlh›"ÊÅèÁhl¿EH\)?‡3Í/œH9?<¬>H öb{¯eWªWcöPi¤… ~y¨÷þFÿøOë~»Ÿ[L½-¿T´œÒý Ù ñs•¬ÿ!-Y01âÉr-ƒÚ·Ñ+Ð _qM¿›]€BŸÛú7Ãø ã€Ä€p†@Sˆ:Á:Õq ÖøP¦mb¹ìË®UÐÛ&Ò×S—>uÁk’½ë†8°î£Äa%çºõ—H.`þ^-T¼ÓB|3šû޽²ŒÕQâ ‹b1ú.u ˆì Œº‘Öø¹±ép‚ørg{è@Íл§VõûŽìÖP5 îj…ª”¥Ù çqÖ*6ô}ôÆRÁ•,Aé 4ñ›­ §¹™¬Ë=-kñœ, éom´¨=;µ’yÛ¬ðÀØÊ2&í,àßcúcÆ£S{œÎô‰!…$–˜ÉŠÖ7öaz?–WqÍÜFø†©·ÎjÉrSPlâB hoqƶW7îM ç+"Ýí³wO©DÊH‘"†[LÂw<ôQZОF¶l¹šq:.Û¾œãÆåÊ‹'æí¹²†ˆ:ø“ ÄÌ@×·_J.çÉ#ájÀÎå›÷·Ë¬ì>+ˆ$¸wFÛ¯(h°mÝŠž˜)R¸Ju™UmçoWì6º—è(?ZÎ`ÙX£/÷¹Ì"'¼z‡Â›r¦5ö²vIáαþfúbÄÓøtÔ1¬T::¾…†ps~È☠ZXïg‰ºu݃ôn¡ sÄzmÕÍíÔ»M~a¡ý†óZãºLS È~rµ×r;þ"Í Z/Õg"Âc´;ѬsÕýœ&ù9¾ÂÊTF­j#FqÜ;‹w^êãžaŒ:‹*¤{`»ÂK‚´¬/š/0’×kô¸q´¥£F{Òó Ç5ÿÑûÜ@EvXÏ$õ‰ipôEŒIÃBHŒq¦Íøvͯðž/……Ûq‹9 ”á£WuˆcÂŽBí"ÌC—•Þ•^+Þ¬gçü”e!É:Û‡!Æö8•ßK0*„Sé&õ/;×>ÒVp¨@ï¼*¹Õù¤„xöû›Ó)h ã=ÕÈ  @xŸw‹št‰Gñ•;§­;¤wJBm}£b]O–u=—ç[ˆtçøe‡(xØ®ý¢iü+û¯ºÉ÷M@¸Y¹ß骢‘Õ¾a\·nm•9FÚyñ‡O*}|»ßýª,6Ø0„¹V^½â†X®~&ßú8þ»oh—Ÿ¨ÐÒº· 6Š¿ûÎnaå®*[ºã6;~¡rg´(2,Ý0‚½âhA;hc®,èÁG<ä£"Lá?¿øˆÆž¿{Üï"N:¢ëíÆšŠ‰Ýøwüù:/_°¥ÅKÑ*´³B•½ïc®ƒš|èt.ììàd6DS[9À8'Ec°!Î&€½OýàükL ÛÓozšˆ¨î†!v§?ÚXÆùÛØ4í³¿^— OÉ™-» þ yÇþƒJ#UÓWA=\Jú¸PåõQQž^¡Úö¸ÈCtâOùk héGWèÁM¹*<@è¦óܪÌUß,]·§@"#ÀŸçR#PfŽ3°Ú½úŠõ9‡ˆê¼}è/„üùJo?q6>rN³\÷U.êààf¸+%Ú(˜î©Î^óÌþ“Ó7Círsxh^¹šYcoJ¤e+_¶8ÇšR1µÉÇ<÷ÓôçÏÒAè ÈèG®}·ŠhÑ›rˆ´Ç¬ ªÚý]l:=»/H¨ŸŸzšï)ÒZöpç  ô‡cVL4š#…r¹pjëN)]¥À!¬è&¡R5þ]Ò”j¡ TÂã¬Q”ÞÁt×ál›òÛkÈ’PóïÚøõóõ|¶·iyô«ú;C…åíîÙ£ÿÁÇ̯a1m¿¥ÈBÔ °¨MÄu÷WZ6‡QÑÙEÁRý™.k}O=ä„,CYˆD¾€¦¹3?îìî"Ç„ðÖp]ñ>ŸÞhнé©èð‹õÀÜÃ.ÌhuT€ŠðÊ@µ_…×Ýåáþî‹$n!v‚OeÄ ë÷qWá-I¨• ¸Êî *çÜñÞ©ˆ‚!ÞT–`õLøÏ?hDñrÕ|¡*‚v¥³•mdÓcgΆdn çÁð\«“Ø|P Ì9¨E'¾÷‘ç1t~¨º”ùH÷­BæéÌÝcKßûëôÕdУk÷E”ë»°÷OÎ ó—‹ìlûùù2¡ñkú|ÕÂ…áòÛ£ËÊbA=i¨Ù29‚ê]ÑbýaXÇQ¢çØøµœØX?°âÃxŠïò2P_š`ÔO@&.çŠx ç;Ð?0¬§Ä_‡Òºµ\AÉQØ)bsX «yhN½w÷ž[ΊÞ^i¸ XáIÓñ{/Û4”o××ühÞ@ùnTí‰Á©ƒ2÷‰`S%•ÿ6G$ÏŠvZù†Ùâz"g²ï±Z°ìs’'Ö·Þm?üãŸÑaßŸÑ œMr@ýu»ûÝÇr¦©‘ ¹’€RûÈXˆhãó‚Þ j–VÓãŒgO—c À/'B-D DO%£ ß!¢~§,ÃÂÿŠ3Î2D¼¯tp&äC­&Šô¤rþ^4Ï!Ur“¨¼T€I+:ͽ%æg¬£û.ž\ùžŽ¡±¹ïl(Ís ¯×@]ƒtÄ õ¤ªæââ’ë~.Mú,CKjz£‰Ò!ªMí¨A9üt= ÷XOLÀDÝwsG =yŽŸz‘ªÀ8¢hõÈ)4c¨ô)…õ$¬Z ­'A}»Â£ª²ïç2cóU<˜‘C˜©»è>©ØsÖ~@®€1û4-çÒÔ!{Îøž=¦= R(Ñ“B{ìfG7¬Ë[¾íÌ´ÛðæðÞ·˜Éø¹ˆÙÞSçiK.:… pa®“ŠZ l„4Jë™Í.•îRè‡ÂpdoWny Ö,=ç`?}:a'ü÷óS»ßößOí~þ¿°5³´ù¶#»±n•$H•¼™¼Œô·™^ÇI޳:áQ¿šñ HÚF¢Ž¿¤'µD×´UJ gz£8yÎê|»Í·ü|̱˜Ú2Û!lFÛÒT9Iß¶Yv?LÇŽŸ³;}8ýèC“ô-­«/)âÌÒû¨I¢ÑuïOqÆ]Sº¸·7çD%¼¹eyÂж‰pÔùzDœ·Cº4mö’$_Ï-)ýðõóõ‡KÏíÿë|{1]×iƒa2*sðV à¢HM ßÅ=Q„Òë¡=_àì$ú8/5§ûô&ƧËAô2*ãVWtЫ8sÎñh£æ²SE¬D›¢v¯ë8¹ ¤×TÑoÿ®öV¥s=•Ü”˜”‚ü‡Å_†˜HÛ«öG)´ÎuöV«Jý[Óð©‘šÄÕ¶s1S{7€˜¨ 0O†yF5Ão±»šxMƒiuð®„óX"=¥jGRUP+®ÀpdÕjˆA?4ÆpŒ>ÁÎk°ÆÌÁ˜·™º5L¨ÝÀY5Y¬å6ãEJA'àòÔdDùÞO¤[ºYFëCaª{r(rQÝå÷umÝîz”#P¾¬"rzOœSÆúÛ¸‹¯À›Ó„™ªb ·ürQ˜ë•+â–5Â*ßh^5s¥–n³îíð@m,ç Ϊ·´ÖF#'ÚFÖJ{•’s˜_Ä@ñ·ƒ–ØßYËùýÅuß\Tí~²Ï\tIö§<‚¸Ô'4 ,CD§)È1VZü‰ìÿ÷~øùúÉ€tGõïëóË­ŸÿÆý;¿ENÖIìfKNò¹?`ÍK`s©8yŒ¡Ãã+N$Rá R©ççÇÏ~7ÿü£ÔßÛýãö×Üû¿»û_Õuo u¿ÝÝÏQÅiõG íMÜ-ñ¦zzäà‘ôÄ:ÚˆßN÷å+·Rß~Èð÷«¹ëû‡ûõçO÷W݇û½›FT<åüÜoË3U Ò,"—¤Ëñâx ‰ùBû£³a{8#FW†óð®»vº~®ö<#2A]QÑTω±cSgÄÇ{%œ..n…¾ÛD1ûh  »&’l –\Ë_#tç€j (ijߔìþO•‹ªjx݉£‡äiMUì§A5E Îq¢þ«,✠NùoûÈWÕ<;jwÃL<¤Aí‰*½ÓUPïx)ßÞH™Xx®!7;߯jA¢"šèe EÁ¯UD㆔¹#ü9Åéþ£ŸÆ~x9·)ôçf8¾OóûY࣒Ž$ð ÑÂF`iš¬íšÂ¢ØWÙ=Ñ\f@–áÈ{¤ E~þààjJNòu¬À°È(‘’fÆ)vø·ñ«æj'Aå}Z, ãRüžÕ¹€ÒWK­[QµaÛ[ØË³$¾£ ,éý€Þw¶û¢"c5xµ„è÷!Prœhêå^«ž+ò ô˜*;jBÀ3ÛcÊĘ£$:º%.--2*ˆ“æ©Ýhg0øtºº5ÎÞNK};]D ´^OQ:g ?ðó8w¬pûŒ;¨U4.¦ó£æüH²?FJýš}ÒÉ]3ã8}ºï[ù|"­‡ÌS kgJœ`@‘dxX"º½Á¢øC„¿Á lÄ$ïj;®vÚû •8”ê<áädS~ï»¶)ÃÄ’^©C¤UÌH±Î+Ãêpz…@)iÑ‘CŠ4$Ó<Ò_”ÜîZ¼´óßwç™Oý÷¯úÀ ~§*EB:ÓEnë0düý½|ævT£I=(TR’útE©¥†‚èÀ7±hf¡ ÍLê•Á¬Ö/ê#¬­¿Ve¨_¤Ÿù²¼ZcvþrÀZ|Ï;´Í™è˜ èÛ¦ •ˆ¼JGö±fH´lÛ1ˆ‘BõªWK.„¾´žš$t†`Pƒ˜Ú鳕—Î[<¬xÇ:©E„6ö[È͆6yi]=ƒ|zž¦£Þe}ªh}S9ší¼FãЦyƒÍ)HüRÔ9€QÞ'•O«ïc2lYc¬º÷sÛ5X-‚ûiíµÌÉpéƒÉ4A"ºu–ƒ›h ­?e¤¿Íû4žþ÷W«¢õc'öz„T)¯ŸVƒ4-¡F¹ìºfDIùˆ¬uT´¢)"ý!¢‘)^Ö1uµ/ß«Œ„Þ¶¬"Š´ñ´˜pš1ºüS.‹ñHsZ*OΖ5jŸ%g „ÚçHÉoñÓÜÆ¡ë‘*Yyt#UÐã@9¿@O5 ÒÐ+Àt‡¹±aFãøÝÚMCÖHïó9Úè´Bí”ùe¬¬K­6Ôª¥ŠHr9Tþ`3÷hr‹H·BU=F…ãšhh2  ¥o£k…ÔÐ)Dä +øñÜŠv~7Ú{¬ ÷´³e°m2¶Ý—‡"3òI «âúIPS"Ë tùX¿>µ¾¢Dð…êߟ7ç¬ÍãÇM¨éãeš_Œpìï|{Y¸“SÞ9ñÎé,¯cFï7†›¼®Èež.j€ëvÊtúP¥–ê÷ߌÁƒŒ~±àßf]c¬¾Ã¦­Â:Ëy `’Þ)¡öûùÚywð¢Ä_Øè) úOKݪ¬¿ùäœçÅzÐäT 8V‚.詀 Rÿg†^d:r±*v/P|„zÅÔ`uQa¸FžÅï#,æŸÃu­ê]JEÊÙy¡¶^ç£jñó×ÌNõ¯kÂÒm%¨¬{!˜¦AkjëiRŸ¡ý¾´Z^íÏép¬ñÃ%ýy-"¥tæóvÿ†”[KÙ6ÓÏžÚ˜IS£UÅîŸH}A…ž•´ª î–Ô"<2ý•Òû'XûØ ‚ÿ2Öû,[eïÞ7ý?½ +í°öþ@*ýȆVÅ%¤V9…I,‰9—å|]+ÈèSmuHm+MHgþ&H½2‹“uÇÏV`.÷霢̤0vb?GH¡ýóæ9çjÖÔL“— 9p Ó¾¥ª^ 8ÙfZ5ç£ö¯+<— þì›ìþ7HÞ³j:)ÚÐ6àÏ@%ÚäWMÕ7K‡õãž×~¸ž(LÕtøi0ѹíÃTP¯/' ¯®¼(G›ÙÁW ª×wXâNqƒ{’ÏxPI¡ÔëªCK¡ö«1Ê…2$S§½^Ýú1“̾µµº¥1áïvv=Öåìö‹oXÆŠÞÌ0Žc M°ggâ/1%T>\a÷ŸQ¥£f…=œaå'ßÕæïxßé;[_Ô`¨ý sìËÛ?"=+]D¢¼¬ôU²¬ªÒVßHÔUaÕ«C%çѨèrÉlÖ¥Œ"†“»ƒy[` '¶™¼è-ÂÁýßà¥a`#a \|ßÔ`E»5ÚÓšm5yCÁ#äD1ÕëÐ^zêÌ ¹Ä¢ª—ŒÜ¹}„˜!RnÙ^5¯¯»øúög¶÷qa„ð™EZCHŒÔ‹¤¬”¯6Ên›¸£=3Miu¿Ÿˆ‹jó_}æà`U+æ1öjÀlUå.”iÒ9“xÿoïÔ^¬†/ª"µšå ´Te¤ß~pL÷‘Ùðû‘Ó]‘0Ü+l³E 1Vx–ï¤Îö…|!©;Sxw?¢n°ºÕÁhË5Ž×dUÙµÒ^«¬«Þ'ˆ>Ç­q$“&ªQYNdµ8R½s°Ü—2ÐߦÓÝÓKÛ¾¶Ót›ô§ÖŸð«ž¦n\µÓ6—š£š²è±ý0ªî¯j~ Â­(•5Tebåj†3.k¨ÀF_„錿~~¾…÷IÏm#š+ªíšòê3SåÕ2áÈÀ¼“Ò#ý]R~Œô§‘æE¾û‹Ww“^^fÛ¾¼„@ä.¿ˆ{\bj Cµ‡+¿Rï".¯êOã®ó°é‘ÌaA#gõü”:Cjw¯?!=G‰RR3N+kš\_ÑS¤ü²ËoynÜ´>;X ¨÷€tò#¹ºáåÜ¡üÓórfÃðpæfÒÎ T#èI­&.P‘j²Ú{Hv"‹ÕšP1 8¿ŽÖ–e^Þ¶óz¾¸‹Ã²ô’öûŒV†y‹/k”ăŸn,ZÖÉsýMÜY©9.'1özªÝß/Û”‘Ò²AYq:h"yˆž2R«Z÷_¶»Ðšz‰‹8¿ê›ó‡º%½ë~ᯪ6³äÿÑŸºÌ’ÖðR§”ôÛ1MwßøtvÝîëþ½uÇôY…ûŸš×K»Ÿ%¥gkå*†Ö{eœ.h—G*Nýè# õR/j£{=DªC†­xJ¿ÓNæij??Í=Ô\…M¼ìÓ A®9?§WÊ÷?K‘Q±”—•jw±Î÷ÌŠæö¿Øêž­Ü­kð–ñŒ ¨.`µº„<Í^Ö¸.Z™“¨bhû(ÏÚ/£…E°¿^ ÚN ¬Ó[¸[ñöƒ™÷X¡öi*@7K‰@­PwÁ#-D1O=?¸‘‡EݹÎT¬®C:ºØzÍ}ÎÛW÷ùgVŽ9Çr kv #[ùü¹ÏÏ«ÎzÞ¸:¬••Sa'@§I’S©Àúž"“»/³Ð` .–:IU4³)£é*p÷@ÎbºP[ÛÙ·Ô[Ùð뚀ÚV5Fꇟ*‘.~ßç~z µ^@šå«ækåÚ©ç ð%œD&4¯wÿðsñ,¯ekÚGO.¶iƒV!dƒOÆãŒiÕwPE06xYE¨öNŸ¦7¶æ¨(³$r edü1õ ¹š_´sê7¾´îIC RUH‹Ÿ‡ˆÄR=5†5J_ÐÕ§øº}f²¨ ³´ÐÚN| ¨$‰5vùõ;€.U­'»¼T¨K`Eº*©Ü9eZ‹ËJX¸wRœÖ­É¤l—‡P½†½ðP"¨ß)T÷øÛéïÇ8‡Ñwç»Üäx›Í+)ëÜvP‡˜Y¿ë2MC’Ûc°m~­Y×Þ¹Õg6à‘úeÊΑF®Šá©*áL{†v~KiŰ—b6Àì½/´ŸõR•üÂg‰kÈúØ¢FÚÕàë{5S¨{UQ¸:hr^¤/„;ÊpŽ4î´0~ûË@¿aWÍ)@&ŠËí㽊mûûèù'ŸÊy‡@gÿ0PÀ{?ÃY-¬”Ý1¨±`ýö‘ÒËÊqÌd—C•ci%~âHP®#Äîr©)r™s}°¾ ¿L,Ì7z¸ÔÜ×2ÜíŒY†¾Ìâž[„,ó!_Ú©Öm+‘.è`µXêâ°C¬ª›-+vÂVvW BÖ­â4  zð_ž=H†ürY{OÛÊ—6*ùS«šc¤_ˉ½n ’º[v~/xrˆÕ“bجëíj0¯ ÒÛñóðs½˜µ—<ÙNÀ{z…Z0˜.Fo«uζ=²°µî$aÙºFª >1QX`ý>Ãꌖž&}{›|e[-†wì1rŽó«–n¡Òºâ¼–fáá™Hå8Ôa÷ÎÒìS ©óa;ÈWŽKw?÷C˜ì¸F.­e¤¿‚ Ø%.¿ê1-Έ͗5ò ¼æêД¡~7nƒ6Tã’a=™×¬ÉcᬊÑ4´ArÍ=^K¶%ȉ`¤-)Mí¢«fö h†¢ýøsÔ4B?¦ÝU¼ˆ0¨ˆp„6û£¼€1–<áÇ“B|IXÅEÍ•<"ƒyP‡Ë¡RÞ=ÎïèXyŸ´FÆ¢ øyÐÎZx?{²ñ͖®+Ø'+xa“ëïKM[\<øNR¢·^Ê` ݧb±Ë.ó[ý:[ÐO7Û_íÿÇ…F cwB÷.‚^$#úF%‹‹K¸ŸVÊ{&Ø@Àc­ó?e·®¡jÔã}SŠìq;&¯·5ÎîãÃ#¬ÖMôyhŒÑ útAˆ‚!|d Ñ‹D7‚Œ¢Vý´rY©IuæRêiåèH¶ÎÖÛëÄáCM ~\Wò^8zÌÓa´ß ¶N®rcbwãê9\Ë r”ƒõh“ˆ—ñ"í÷Õ:Ã’Z@_”DQ©ÿ‹iøûÒ£ªu(Ö`j¤Zl )ºÿú$Ž“Ù™YÝR#b´ÅCýE+шÿMˆ9¾Ç ‹ˆ˜ú ”ól@V çvÁØ%¾C aªœŽb…ªxI5…¢üh,óÀÒǹˆCLrp÷²£ÖÚT—½œé\"ÚÁ¹‰ÎIÔ.ì]!&†»Ͱ‡âËR>Ð÷¾P)f·Qlêbz£‹ÃiÜ56--¯p"pq¹[ÅÖù8-Ãhv^Nâ¹a…ÍÆ:yßáf­mÇ–±ñ¥>÷Cðcˆ!0ôýy“¸z©ý}1îÿõ6KÓï«/ë|EC, üjŒÇ´Ä)f^àѸ(ôñ±þÌv¯tˆ=5‰“§C]Â+ÄÔÑ)"ü‰¤¶Ça) # ¾·Ñ¤TMgå ¥çVa§1 Aê·EÚÄ.ž£ûþ=’ãuŸ–5÷FWµ× oA„ Ö#v‡!µŒçÑf¹Žƒg?Ÿ ‡±cétŸBëRÿÈ•Ü`5<ŒøËŸ$D °q=pAju¹¶î.{ã~£me@µì;KP÷$Yoš ¸q ¢&›ù~Ù ù>Žºª$ÿý4Ç©¿‚’÷üK/õ‚¯¶›/ITk¸™s†‹ûží¹à«)è.•až8œ4bEçô’«Â\´{k¨oµŠaär«šý-B<«èéñÍÁÒH·Â ]^`‹µ¨êi‘Ç õzOl}'Üe°óWqzŸÃï#Ïžï¸â;®É•A~'ŽÈæÌ-Vi©•]¦…žD*¦îZ+Ž€B¼¿ôôs h(‹`" gæ½ä¦©ç*¤“Ë»^À¸] žb3ÞãCÕrh™ }Ù]¸×{ L´Œ`Ëé@ËÕºKoÝëF]¨ÓÖ0ëþúº«¼°,*ç èЯ+Ї~LÇ ¢óYB‰ö³©÷Ƈšûk”¬µÃõõ\èá- ¢:ÏŽ§ä¾ÒËi«p²[¤·áž6Ü¥þé¡ã cP§ô²%>’ eÏmRë=ºÆØAžh"݅ò›Œœž{l¤i–ð¬lÒE î¦ ŠÖ™ç)ù;4À*™¦ˆrÿY¶¯5<1ëd׫åváy¸ £ÅÅ^#/]ïû¢ÈGÅNï4ÓäÁj3õ¼ÚzwÿQ˵p~So¹\· íFÒPU4¥^Ëõ^q¹n6íÅúú¦]9@TçÚ%In“^Ízë9J÷ßä»ù+Qž®îÿzN]¬9`JŽ=žÒ$îRKÚ¥­?Oû úžxI# ÕÄãÙ±Þ}×NK8‹§³¦#rL–3îéC ×ïxŒÆ¹m‡CÒmn²X»e+pÖµn¦*oFû8ã)‘0Æ 9ÎÈ“élÕå™”:x *6‚Yû&bPk¯ÁÜ÷½ 4Ñ•ªT×ÀDŠ›JjÈ/¦l)†t19àû}¸€R,vSÒl¼íhnkÅ(ã(³³}S†˜w>§e!}Þ¦ã°Ë+¹ð½Š«lÃeò¼·öÌiÞý4ù"ÿxæ"SE#æè c¦J‚jÊ,Â,š÷q’W¶œ÷¢€%ÁòÍCÅÍ»[[¼PEGYºä¼/•}O¿œåg…3ñCÏ'‡9 ”¢ý¯aæ$¿*`RrüóÈwRGö‚’+‘“K×¢ú{ŠfÞ§•߆o`Ï“]ÿ>mn­àA\$ÐZ„ä¼ð8yBI)2œe˜™²ЦàãAAãj« /¤ûîË®m1ðÀòs:õ‘(Ö<ùu¸í– D™©8B´L;¹`X`D³•0Ußxá´=Ìï ˜ØÑÜÎ%õu@'³Ñ£»Ç\ŠžžÊuÓ™ !µÛ¶)®d‡wµ`á)… ¦f1]úñ ¹ÉN»c²¹ÅœK֟Ͷ)ƒŒÏf d%øžØÍŠÜ1u½ZM˜,z3érÿµE¯åŠYMã$þ$QfÕ(Ž$\Qÿw±ùqœó9ðË81„o§ÀtåskÜ?—Öê]˜YÇt‚ã Ãd”\8LmV 1›ÚÃüÏ×þóó?ÌÝ:þ3ÿóÏ?ô}ù˜‹8ŽW]F8ÛÝ»ŽøÕkkØ-Ϲ.{wð^³\‰Ù_õ:å"?ï@nÒ÷¤B¼Pn“Ü!òƒíœÛ`;¦Vº@ɬœÑ„å}üø˜õsZ4°‘M2G4ðˆs ¤ì{”ðëá%W³°œ¹!Á ±ê~Ü%’2 ì2Üt¢qÅꦈñ+}0¹Uþ4÷WM§ç2Q4¶‚{¤<—å´Üx ík0E˜ü!òÃjN>ê!6YA“…q0;ö`F8q‹z}çÁÆOi©\Ã&¢@¤¾Æ–ä¥NnåmGïfe¡%’zì¯w„,Ѝ¢mïÞNÑÀÔH ×ÁŒi†iÖPë™ÄqúÇjˆi¼†ë6z‹Z*¹–aæ¦à¢1ûr'qh™CYØo—ˆG˜FR ± ò‘Á€* ôšg`ä¡[„S÷>-[³P¹ÿò#3ÀÔ®$:ðcEÍPøJ تz¹eIÕ-(Cjƒ¦ÝQYöfZÅí¼¥7³Œó´îºÖGšk<1ä@“Jmá0çZí8ÄÏ*í`Võža…ÌYK|U+Ì4¹€båñr2{õ _w©‹3Û‘FæÖï8 $Ás ’xLü¥˜Sïœï|ŸiJíƒfÝôœœÑòOî§[QÜ(C¡1*“'ÉëÙpæû¿ÖI±\M þ<‘˜ÃUªR ~œàç¹…j„ºlž|}K€ÿÃ]ü–nýrHÐPÞÒŠ^¥  Ô3’†©œ½¥fo.ó•‘R´ Ê·’¯¬¦z±ZÒ ðòþS·ùç^þÄ@»Ø¹Ïð‚.R‡îuêZ‚ZË–6²Sšªð+»­êR=\Ëè<šÇ½criè¿C=N€kSÈènq{¥CÃ@źžžX¾[âa.&éÊ{+­¢|}Åyõ@åü’c(ñŸK†uÕçí 5P3à¶ûœ 3<åù… =µv#ØÆ¹¦ÕbÆhíG«º¾{z|%$žB£ åñÇ»¦—)—Î1¥3UF°4^Mw`~ç´ÑêÌ»^«Z½H_9¤ ¶š‘¯çðƒõ§ò%~KØIïúi«¥€ò#z‘fóL:ÐSuë ²ÔSyñë´¬M†_õCRÖ>+(È(‡¡F! ²"5CÚ5FFHI"°ô«™&¶Ã³½üê¾gxÝY\o¶Þ‹k KżgôÜ­}L €†/ú•š®)™ú.YÞüšžñøEíèÉ'äÚ ë°L¦ãÖûnÕâšzÍÅ1 h²Eh·òÝWz·ªÙíW¡ «Tj²Š™uŠpZ³:'%œsè³ðÍ»ØÉjk/áÜZ¡iÝ‹Qt ¤  kå ½¦Æ°ŒÿùÏŠqóõôgö/HàM­ÜeíÛˆ–0“¼¥[ß»ÈÞXJ܃%m»h´âNiÛ!e«ù“‰™'ö“Þ{ÏÙåç ¶æ_4×j(*Y~å%J‰ËÃ)|ÃwdÎ8ƒÖÆ“‘¦Ç²(m°M2òC P„fP¶˜ÂPåëÿ×ÿ{¢¤*xg8&ÒÔñPö!Ç÷ý{‰ˆlÍÛg}¦v÷|BGR¶ºM Juh)Ä«ƒºï‰CwP³/h»HÑA|çL©á³3{R]Œ Þ¡#¼Ôvgš"ÚoúÏâp/!% côV)òl`‘/þ>†³åÒž£œß¼~?+Y€\<v÷ØÈȌĒ?†ÐÜÄÏ.ï¾ÐjM•åÒ²žŸÛÏèÜ&mmGoUˆõ0‡/G« °´Õn ©šïç9Áç9y{Ó–ÊŸ–Óêÿp]z™Ê8š8tð>ózh§:îžúÒCÒÅI ±^Âö'Ü_?P\¦RŽ{J•³–YØ€>l{㉳O·ÔÿVCkÅÊ¥Ÿ{¼|5YF@GÁð_áÉP[Îeªƒ0<×¹çʹ|ÑÀ{±p‰ ûYĘë#ñe§~[Ó˃I ‘@–K¡< Qè`CÓØ1Ðß# ãH>¯PCsR¾S2{Û÷Ažpl  Ï^°ðIin>ÑÝfšâz†}÷÷{â~ßì))§é>¥ç³Øðâ»h^÷SûŠ»¥î°®~yX~\öe±yyڟϲ!í &¡Êód:5I†&ÒÎ!Æ6~'EþââA¥zg?Qå§SóÓ©|Á©€ñëlºSû™«víXUmc‚:˜ê…´Gì?sõ÷Avt>K8(lz¡Ç@_UTÚQÃ.lRU5'_ë=9‡xŠŒ+}ƒcɧ"NM–?—x•¶ä“uÿR†8g×kdz¾œpñ|½þ"®»ÇÒ|ød==Çó# ÚR†ì cÒ™Iìú^기ѩ![ÊçmMDhü"«„‘ŸÂ_Žb¸û¨í»2F÷¶ÿxï­u¾Â‡¼¨áã–xK]x݃L®È"¬)q£‰«3?+û’Uøy“V?›‹ˆ—ÿç7Ûœ#–G5K9¾_äöËt熲m^Vˆ?>%JýÖ?DtDâßþÅÿ…–Só.ïú¹µóóóííæþ+­æ·÷Ûíöüô4ÝÝ_,ÖÈö¹Ar³þo33#éâÃØ›ë¼ }.›ßz&S;JHj6³· õ¡.Józ¨Aêìõ¿ó¿Ó¿÷ÿý˜þý÷ÿŒÿº¿"‹8=ÏŸæ²”¡9üµ=Õ‡¶5%úâ®)㄃[µ?%·J"]E}•è Gi³ì}Ï%9½z$T-J0¡Àçíß+¾çeX¡´qFjf.&ïP{ݶ@õZ7€Jw/ª€f€1õðiOGd”aïT%&Š _Í(Ôµë¸æéZ7…£i”&bjEbcš­ÕÊ/HÕb=‘)¬@úÕPæEªi羚F ¥.«$ÚåŸûCgXÇ6^S}Ø—5Ð ˆ _†™]Ï…åâï«~MoQ-‹€…Q7tp®mVÚj;SÓÖüVè›§lQ*]˜Öl¼DawÈ`¶= þ¡÷+kÃ(âmî$æ«ïÃZ„WàÍôŸîr v¡þG“¥´lY-·÷-9™‹ZlQŸS©Øqm­«É¥KIPÅ—^K›ßнÉÜ&öW—¸¶¶hèÖY´~3?‰ ³Ø{J°ÁŽè~Ôcƒ¬F?I“Z¨2ɉ‡)»Ñ¶blÇQRp@I­U «»G™­Æ]êg•#^ªk®Äd‚¥kÿžè£]…= ¾ƒ¡€ô4U,}=ómw@—w~1ïM Ávà‘|Ú£÷r ÁDiMÕ¡ò¢’wuôà›åE…Lôˆ¶„«D'{&¦í‘‚ÔÂÅ$ÖGŠÐÂmºù¯ƒuûóæ_ùQ©?;èùãz¬ûh:—tðÏé¶´2Î|ø‡¿¸°ù¾°v‘‚›ôܦy¹º`$áŒ2çâFy)À¾ ç¸hø¨N@¹¢j=ϽRÊÈJR‚ìb3ź«*Ãbh*Îi´ž”…½¥"k¥å´¥)»ùžãÆyRTôÂö:9˜bõJÁgW˜?µ0ç[‚;ÿøŸ³²d:¼v€¢+-´ E£«Ïó±pó[¯RW¸öÙ–:£áë®Î^aPsô1ìΛÍ1UKú¡lŸüUÒ[4—»Y×Iæ½²n§í¨0)Û±–€žK€ŠÖÇõeœ»ÍOÅ€÷ý¿u4 ÔcçB'È2tŒø8×»šÂTNßXŽÊ­Énœ„õ‚F›”l¾|åCÉíÔ˜­ØUâ2¥í@‚ Z; jÖ¯DûMeìHR¦Úí|F8¡D®Ú œöm©jÙ#ñßK»žãºa(YåM!1jû¦ ÑÝò¢kÞ þ_Ý¡¥Ù¡±Ä¥’¿œ}ÏšmK‘þ)ÒoôÓF1§ìÃǺï7ü ÏLhNØmOQ›_Prï;Qµ ‹¤¯óŽ’]_ÿ“«áÒ E±rp;/¤[wù G¹ñ«§Ô;x opwÉDɧž„ BÂêÛ;d?g ñ,á‚cÌ^H0õ*\~Zf1ÛSÿÎfPbX•A¦ª­îb¨lAßÄP\ÏtD~ ,«”–ãä¹Z®^¨,ɪÉUØù¾¨wèäxWCºåýc7?ÇS¼ád%wÒ$'žâLêÞSÑŸÐré&Ø…)Éaîª`.Œ•ãq‰éÌ,%䆒…2–AþRVìΙi1ÿW)!=ãXúQ+wJûÈÚó8ÉúYé ÇÝ*Ý’/y,ë„Ù®ÑZ$ssFË?tšþ¢Œ2r—ãø(ب!­2”Ýå Û¶ïÌ­' z«š2ÌJâ¾½íB¸ãÓYÎ%ÕY”Þ$U¢´îŸ·tV&ºú9ô‘T†ÒpwŠÈÔeQ‰µì&³(Òw|™ÌÚÄ”Ò)×èî!³0•ð’Ø50ç½eÿ¯¶<Ï«› õé&^"ê<=GJÓ¼MÓp‹¶žþðáþ8À¬Ï‹7$«b»‘¬ªÁûòÒ:_tšÔ‰ró>‡[sNSæÒÍåÚÍ8žw ¸ªM íÑóÏ#ÎýŽø;Fº”¾eëo~ Rõ¦ù¹ÃÌ›EëÏj–4[Q\k"ênÃë»ÉáôGÔVáÄZfãx–Ó>&iˆ £yKGŽèê’ <# ¶ Í•PþnŸsê’Ìw¡½‡ú˜_Q„²Ê{gŽé€–a¦™ÆP]²zbsù&eøé·}g8½ô¹©Âywäo|™ŸÜ·c»Pפ KŒq hŒ#v¶Ë Ê.ýÆ:ð`*3R .†Ô!ÊÇHÅÉ«ºœÂ ô°r­wºïƒÿÉN¯:Æ^ •öú ë¾sE,Ù{7¯ ÒùOÓªÕ´pü'ªmi"§Šêmx¶…sK ãaŒ`ŠÖ,$ÿe˜ûýN>©7ZL.gèªÓÕKž1L•¶|áç°»éÍüZÖ=F©ŽÏ†’çlÒëøÖòʳº6QbCÀe·Hƒ³&'®1%Z œÉ¦ˆò‡2õ´Ù+!¼ HÑ"ãó]¤{3!·Š®œžÝsÁZ‡¨… íš"ƃª;©hq«ãRßW¾ä$œ{¤!i+ ŠÒc^KzòÝ­iKUZa¼@cêÜ<+"xUÖí qŸTà|Pä¬õÚÌü„Þ4÷ƒýøÐ³ìíG¢Ó6T¦oÂêí"1'D‡‚|ÝΣ‘Ñ. ª§…ŽÍ¯/^ù«N]FzdsB8Nʇõu8)²¸rá+ÄgÈKFþÆ™zŒóp'YðÌÕ‘U.Â<Ó¦r÷]Œs²ñÌ–ÞU¶  ¨{¦Þ<ój P»b;ˆ=¯6 fèvN–1Ú'Äv0R˜“é‘?©È{!ç(£qH s¤çâpÏa¦Þ!TÞkŠQn=MÆV¯æ•{Ôû™¼2Lçâ%ôïѶû§³ÿï€Ns @ºìÎg Ë·›Ô¥–ÉçJ‰”wgeÝv[Ëë8TCNõ‚«fa±¢ÖÝ!K©Pþ&ÉÈ B§%˜?넞àˆÎM™ÇtF¿‚d›(În¼ê¤»H̹“ܹC~4˜úS”?^¨7¢½s@ß…¹,[Ÿcþg–InÜÿ†ÒµÍ¿N‰-;M!ÓnhwNeÔs'uâ-w¸J”'ÜvÛö_~ã—§Ó"»ˆ†¦Ø8™2Žî<ªŠ8 èö –š9Ôàg{7Ê\ ?:V“uæS(‹n!†2𣍕½ ò”‹%øË—¹–³´Ð›ebôÿVËTúÝŒqëZÖq~ª{6AHcå’Ï‚ÄKÚz†‡–t±R¦á2ü:ü5x‚õ.þâlABøiºò‚~³á/›”;,Ñï)òí7)ÓîºñªßˆvÁez-K(¿›¥­~ÖŒi3©}\ÿÊ(EH£¸³ok€zuÎMÛv‹@¦˜…™Ô•Vº×íWªå»>¤».›šÕ<¸ðî¶Tp[^f_Úk¢m‹ÉN*!‘&Z$57žñ$Pz¤i eWßNÔ†fõ: K}|×VðìL–l0?œe”‹ zHUœ9œH:mÛÎa¾àQëg½Ú˜2˜íaÖMÎîÔÛò ª´¯EZü\&…Ë)?d—ÔÊų+µZ[>êÀ\:CyZ®2åí²òƒïY–Ô¹j×û蹸  Ò-pÿ0fÒµãò ÒÆ"W‘«¹Fˆ8;g> >Àž›OþºKÏú]†¹Rì±LÃ$¡ŸA îsk¯g•i˜Â sQ0è§ óü8¸yKs6Þ“­Á^M) *¯g¨l#6RA?ø”W˜ŠSUµbä?ÿL£oO³5ÉJØCú+ Ãè¹C@ÿùùÍ|À÷ýÏò›ÿ?ûÏë?¯wHÄ’Ù;WÍðʪJ<ô¥•ªr#b²)£5QðpÊqfL¿L©§™ÍÀœò\·Âå’ú"'Õ*ß5)ÕÊWÉ#½Ry»?´MdA\FÐ]¹ÈW“¹òÕx`/+Äÿ”?M春ŸèÌåT¢ ûß_€–jÖ©¹-’jS¸ž¥½,“ö‘ä¿{.o²  éiÏèU³›Q·Nø>ŸŸêsU½\&ßKƒm‹É-KNÍДQþ6¹‰ëÓ¬oE%Tø¨Aü±ñzF%hÖ=½dn ±SÕ­mAûÄ¥AÓfæÿßËûÜV˜Åæ¶3:+C§ †NÙ”a&N͸/îî˜n«Ø+Ðå.¨`ì8°ô´Ž¦1:ϨI!Ë+Í߽ϥ;ý:¹Cƒð¥Kéj*~Ãù5˜Åè¹ë¥Eê"ÒÐeå¹íö5é˲ÇB6Ûeq!Dm|`XDûG¥4§½´½Ýš‰/A‰‰¬})H\¿~›ñåe5æ/Ï‹“Ü,òÈ0&Lƒ÷,ZÐ¥ZGiË ¼…µµÓd“[I‰HÜñ£6ý(¸þkŒs “ÿ N?õèœñÔ”ò·(z‰!"œÐ Peiâa ÑXsʵP·ï‘L;©†{™a° âœ6DSÀŠç³Šer/ùœÇJ•¾ª1Ú˜éu߯~ ´ÇUð$wWO^'B,›©²Ûì5ÄN|ãDùò7$µ"¦I¼é§ÿ=/-*—ÉߨÜÛÖUë\©–»'&*£J"­«0QùN)ÿÝÙÿî.ÞÌewp¢S³1ßB“…‡ë0÷×ÞJòÒsÊk*hÖ·¬à“­Aæ]q ÿà‰ÂSjFC¢{ÒÖÊ>èXjs»Lœ¤Ùê%}Yƒtšæón©Ë÷žQl9Ÿƒ£œjÇ#õ©_TUƒßÉU9Nµ†ëµ—·ª  ¾)wù­s¬|³Ñ9ˆJŠˆä+Ÿ 2ÌÆìæd$ðNöÔ¨P¨Zj—ÛŸ‚Zá÷HZÛäÔR²]Wâ\»c×/ÉÊ^öû÷Ël=eGõˆOеñ9öÿë™hÃn+ª¸7M +‹Wj¡V¤òšòYM1mó/{¨õÔŸ\m“­©æHµŸ¯ºû¯ÒšWJ!èso”¡ag~éyÂ]zJÝ2ÊM.,¬¡Ö·éÍÎVÏZÍÄÃ2¤ Zë ¤-,ÓˆùL·û N眴„ó¥i›PQ=…øÛh¡¥óÄâ/éç)üéÙÿ]Í`?ªü’Žð àÕb©v›¸%Ì+ƒÌÇ¥PàAAÀÎw>0“kinÜçHœð·]4Qã~fÞ̯ sFϦî=¢ägç,gk©ÉäpRºlÆy~j„µw2ÐYœ„~óì¹5@‹ïÑuó¹WÜã@õæß‹­F]·¢,µð¯Ž gäªT-^Þ…?˜ÓÙ/h«7‚»Ùç;Ùñ}Ú(…¼óД@îZ||!5ê7Ói·Š±Qïá4C¦ÀíÞó„åc0 õ0‰[šeRôT¯~¾Œ>IbÅXÑ¢º$iÎ…ŽÒ{–·JJß*YõP@"˜¨}“l-yMIj=›…Ý`†ú‘ê Ž.4šã&è.ߣeÜ­ýWïžÍ¨ÇǪÄÒ£W±éLÇ_§ð£ kÚO³¼º”9 m“à =í7/T:õ¬†hdcûùS¯HSÑÅvê(÷¼]xžƒfH‡î„Ì4¤Ÿ’ïîó¡./hF©vó>]ä§rH«¶ô«ÑÚ‚»]e:úòY§:P~¹IÃ> Ì«³«v™¿Ee„Ûbõ0꽺 gcÜvÛ~”¼3Öòº»n=5@-J@¤@Þ³èõö>ÏÏn³å¬ß{q5àȉh³-·›Š6$µß‚Œv©ÑÓ`þÏ<|~$£%ö ¼x*£çD#íëÚ&P ­ÀŠÏš ]¾Gý§I+çù˹öHvÄçMôtÎ 8Pž†©ˆÒ # ›áL·åj|aê192¶…¡êvÂÓŽ—  œ‚<·žY!e¶˜b#O„¬²§%,ÂünÆqœV†¿1ú–ÇYG^Ñ’•wïº{’x€ ‰•o«pn†)»å{ݲ¾ìÖým9€ó@™ýÐòÇIR"›/`D;tÚô ¸|6²#×ÕhÚq>çÔÍ—“%µ–¾ èåãYXΜl:;zÓM÷ÁQg›2JÐPvš¤ñ¶bœ,}-YèÇAXwÞ^M`u›‚ÜrÈ…TÀÄTT3~ŒñÈÓA¿Ÿ]eGX‡TÊ5}Ûuì r§‰Ä&†Ä×÷¿<ß¼å"{£I –ŸSž #u˜§òþ¢›BOçŽvºN]¦§°ÓÙ| FJÃ4j¨uÿA –Ý/h(»PÞ:Yö(u-8…§K­8¤‡Ë¹A•×çÚµžŽ¦ï14Þ³bÍéah_Q¬ÛÄ d¶’¹Å¥aˆM€œúÌAJÙ®!çà.•Ö´¤K ´æÊÏ ¹†Ú%×þjÐIEO¼JP"†Xséw»£À¸Œ‘2aÔ<9Þ§&©ZC>ètSÛ|@®\o |#ÒÌ*¤¢´ä‚ÜiY“çþª³ÌYþ0…ݱž¬ÁÙ •åO@J c:E˜PjÉ Šû¿r_§OuåoVÔd‘Œ°l9Uê'º)‚üþndÚÕv´ñ\ÐT„zCÉK2ĦkÊ Å#«â«ÃU%®‰PG™øG]¸MäÛŽëMñS¶‘¸ïAþç??ßÿùÏÈÍØjß…÷Ï?óüÏ?ÿ¸ßPg 8‡6$Â%’Êv—^2~"¢­eæÈy!.à–‹þUZÉæî‚ß‘_—…‰ôÔ-2Vß®НÁ]t·ïz8Ønx@*·Û¨Ù¤xž¨»Ì±POR, ÅÓrÎÙ3%"£YùC]7VSOÐxs±p;b6TP\¬¢XøJ„HÅ1ÊÚrë8Qwç9ú‡¦Ð¿æºXtOzÊÑÆ:$ѵ`=‚IDyá©€y™(¯†ÖrÈ–ªÑr Ï!ðtûXCµ 1f åïW£¨eë¥×‡ÏrNÀ)Æ ž‹5”³IÜ)vP•YßµÌc•òÚQ ÛÉ=”ýD=“}‘2‚­IuSg=æ×¡t¡»çÖb v~ç$×áÌ|8SMêÁUq·B’¥Î´·ÌqïYF‚“$§‹»^ …É0=4gO:°.Î¥;ÃÀ² }´wK1»ŒtÇÔ:¯f>tµ\N|/ FTÑb,|{ƒÜ5 ÇsyƒJaÞ_èÆ Šì)ä‹vÓ‹q`trHyF£ w ÌÖ ÒG2ܸߣ‚–nǃ )É·Ú˜¦ ò·±éêiúC;Ïïíü2jýš†ÖöÔaÌ5vj§î˜y7KÞæ½ cÁe¨^È{2:pRÈu¼,:­Y‰“ªFeK’[Ú“$‚-o}ØÁÒÍö °î1ú—-©ÿ|Ægôj4´ó£Tt"ŒÝEŒ ×­ÔÓÛ²zë411†cE®Ó÷Ö`Æ £Á.ÂdÏD·öŸÂž7úóÓù¾ý'œ_Šþ5Îþì?ÿu?¾ºþ5W×q)]“øŠÑ[ó4®ó„©Õuº å–KŒ‡NÈõ,<ч m+¼&< áôygâ+bñþ,F)3ܲnû¸ô¤¯\n=¸ˆ}ÌÆ8Ù[i:Ìjœt¥ Ý“A¯BªBôЬÛ,|XQfá‹(—g’E¿»ÍO®O9áÑùgý kE(Ä!ª«ñѧýW!R|Ñ#%#°œ’S QÉš5ºNó«´ßÓ0ÝT;9½|.Ö]àdˆ¾_\܆2qŠ×Ötk£RÄ3hšê“9q¢ÿ~mµ-²p‡øÃ bõ‰Ûÿmo¶;Žd >ûgäZu<Žäˆ ©è«¥{û¬~¸ù¤÷üÿ¿hlH8TTyDèHʪ̆ɆmÛÒÀÇœK-u*±zV_E«Çªë¨þüdñhlÒÈÌ0¥5“|_ü!¥^k¯ùìéÞ‰]‚œ;±°çâÿP'¶545ÉÐGª™[`‘‰•©;óŠ¡<¬2 Hþr*ÃZ¸mj«¤i Ñ™4NƗб—G·hÒq©ë@ÿ º ÚÅôv'´Ë«óî3Q7M| 1žÆ/ü=¥ M–°®ã\Œ¸þL1ãÕƒœnKm¹ óÜD’…b`Ê&”~LÔ­Y Ò…©ÇñeðV÷«¼â”ù×Ä–°F`È;Û |HLúÀU” u¾p±Ûan¦™â¨HZŽÒ/~šŽ7Ö¼ò„,ññ1Ý=׈º)þŽû¾GcàrˆÂ*áFbõU˜Ÿ·'Ô$ž‘‚` ÃÂkl¦‹²•åçÐeY ÍN ^ö–u¹è|{ߤÞöœçkNqÚE{¦(—b1îᬎÔÊ9¯Þe–ÌÐošÛFD»jto©¸ïFëÆÇ[÷K?klaO}Â…Ú]ŒÆ0˜`q/ø0aI·Üßêýº=(Áâ)y­n’/—ÀNê¥Î*žfàzKú†x*æÊç‰õiœÒD___0¿ob1à\ô£ñ;K¿Å9åZuÂiKùjV¨ µ÷¾Ï”“§žßÍ$tW³fâ—í¨ÆìWD# 3övß«lè^%¹Hæ^§_ÆO™[ji>Àùµ˜sW¾îñáãÃlÆO¹ÿ—ºï´ˆlèCÇýÍgÉî†IJc~ŸÑ9Òºê^«;ñ³1¬’f[6(W^ l‘Jé·s )'ögrïçˆê‘OšBdÐmßûè:ÙÀr­Å“F€Ôažg…“øÎå¤pb?{° …4¤ÄVž?ðx†DäªÀÜW4%¼ÃŸH{¬òµ…”(4Ø)øÞkµš‡ùu)ðišJ,ÅÁßûÄ5ZèrïíÌ€GEUÕ5ÍÞД;RöÖ÷v‚rú\í *%M(£ ]“"c¬¿’´m ܾŒxÑÁLdÉ0åcýÚª ãÐ0¨,åµá%!S OR e} ¤  ç-àI¾~²ôk[åByYÁÃ+ˆqcr­u¼MÕ°Qo3·<Óó=ëöá)ë“.¸R¼¬@ }òLZ çƒ1Ô bÀ”´\Ã’BD„¾öm“Ö%òH½Š·©Á¨’‰2’;'&µºµœ~ÈÓµŸûºž…¨1Äcˆ‡ƒ4_üFñ§KBUŒë®¬uÅMÓ/ÔƒzI¥KMe¬ÂzÑ%áëHO»1§:öõüCî×Pð¥ÑYÌVžS‘¼Oã¥ë8SÖ‹|ýõè´®ü"a±jï2žî4\Áƃ€&@N©€Œö|Ž×OqY¨a·ÞSN'´=Bÿ¥mšóùh+‹×Z„+;fLž“PžÚsÛÔÙ§6%œ¶%+Õ%á/õÌxøI´sX¤‰Õ×Öñ‡&ÓµPåEݸâè…qʲUç¤ißM¯MÅš»îe’ßpñÏ´ø]úù‡~)&/ ¥‹Ú³¨_/¼¹ÕaÖ|¨~gw6t’”²õqÍÑ Ã¬É%˜t*« £5_^†Ñ+z€Kï 5¼÷ÄŸMs( µš.)OéÄ ŸÆX´M„þüÍo~Ÿ2|ºßˆÕ’”2øüª®£Aè N.õm,^Ñp«Ã<YZ¬é·ÁH|Ae1ÙSç¸íaò_N”î¦ážU OñÌ@³*Ÿ"¶àóEºô¸~—‚-‰ËIÄsĪˮH?…‰VPù]ÖÀû”n¦%?é¡ë¶uÕ×hõ‡Í)ð.9u€Óv‰Vàü³“ܼ)Qvïåäô /${ô:÷¤E­<’¦ù"A‰~²…טÚ^ØÔ)Q³ægâiİóTNï:© ‘>ñhcö4|Ý]Öx¯¡œSOsçŽÍßd†²|?é…J­ÎAC€ƒä•-I껊q_‘r’"ýL{?¸•ÊEßär0d%ºtHŠón“lië03$Óx8'.{tSI “\~-O‹r °e’…iAêçQJÔU lùíæl˜«4õ¸;´åØ•õê9VÂAÃx¦œ ñß¼Ç`<¶ë–®Ü"*Ýß©y<Œ<¼ó§‰ãº˜C«¸Ò³A1É’VL[Kë„=  ƒ¼Ó™ÂZ§ˆõ´˜–;%";%u˜Ÿ·áißg¾ïµÿ$鬺‰µ*zøÅ»@S*WÚ6 ö ®?š¤×rGQ31˜éhAqÜ'‰Ï©6sn\é­i›ž­{zí×!]K£ÉÕÅ{*D°V‰è¬ …qª¸€n)E ²£Ôá± œpžîJ8=»ö'ß©†s³Îó4Å¡‡'êwõ \'«âAdÇå¼01&¥kZ÷Û‚0ã¼ç¾”xõ?€ò:L¢¿˜>fåÊ&Ъœ=’¤DKÛêöÃïáéííC½‰7WÝÇ…5Û«Äøî½$PÉry‚¨Õ”b‹L“—+gTÈ&¤›Ó“²Kc7hòG7ä¼úó”RšCë-åq´ÀiSvôŸø~üé3šÏH¨ºt&h¾ôj}ƒ*d¨àüs³S¾1 MàÌ~‰kÂù=k#ÖW—9¿K»¾6É:KN~"zèП)¶Cõ* àêTpnÒš‚ª˜MˆQé_û+?u¿{P¤@/”ˆÇJwÜÛK²ÞÙ‡Jßߪ6ý¤Šþç­Ç”ˆxq>,É\ “§Ç¥h;ß´-Â…ü ˜zi Zè0Ô…)¦+®š=ðЬIàoÙ=ø¬8[»ùü Ô.ᓥýTgK˜jÞ¯*‰EµÀ“}àîh ¦Ø=Wæ¬üŠI¼BǨe:»&ñ®ç®Oº pAèİÌ#èã]ï5º¾ ïÄ/F56é/µ@Ý%@š>ÿ©?ê±Bzv©ÎH‰CÄ×øü3 zàôr^|™ôž[€zû%ÆÈ~Ô³¥ZˆŸ‰0›ÅßÍ•©|Y©Bz-Ø<ÏGk_`]6ªÈ: ­ëïXõ¶:–ø«/εè5¨0<ïìÃÒ«„êÓkTZ½§âÁ>ºû3å™m×} »Û´°éôôcè+&ÿŠÂÙg\=Ì×¥ÂhÎݳ^–H/à,R BƇÊ ¡›šzóCÕ™[˵_ ó#›N-ÇáN{·çô jÞ‚ƒzƒ×årÂi‹S/¯â,×{@ÃÈÛ«8çʈ€œŒÅD6è|ìI2ªSHÍØÝäG½¦ŒYH§l=)átås££§ßj:‡ôZiÓ¹ÕÁ%~ÉE›=]H=$fFD&Bq›MI¦ØÓyèX<”ÀRIšµ½ÕB¦Ø†w¬QÜ&Sœ‘º¥µ‰g6:>Hì0œÑ?q‚Júcåçò6…9bòh?gŸÕÛ´›‰ÐÅ•?‰ÙyðžÑ-ȵe Io ×DÚ[éç-ÇÊÃ<{7ë×Ëñjšmª¸M‘2qYv`9O<)ó@ò*Ìïï›"Æð©¯w‰º1ËB¹£‹ž ÷ô5ª&JNBÚŸèw ¿Ž@¢tÆÿŠPþkÂä&͉°ôÐïûÎ9ÿh œ¢à¿‘Çï@3aù®Nn²:yÝžSm §YÁá=iÛѯ‡@w :·²R¥Q„}¼xžB›ñ?¬ÃüÞã“ Åþ WpŠ¥iž:]„eÇy^Ìáâß´{¥Ô^i¼IádŸ¤'Òßžöxíw¡ª‰Cަäèà£J²tbi¨T»ÙÉ““ïTÁ˜|';/þЧ±+\»4 O<=—wg<ü¼PR EBT ÐUØLdòuº7© h6'Z­£oÓÔe<ðs'I¾Ë[ãç÷MöÞù™·5ù¤ñO{à·U^Oœ¾Ÿ…:6(Äe<»ë5þF™ü55_Ö‘zíƒN?û̬ûES“¨E&_ðjø9Ò鮯ÂÌ©ÒÑÁçjOc꾄“§©tÏUGh7/½LK߈s|–i´˜~‹{9Ý·QwOãÓ`^ßݺ ¯~…f9ê`Ž=Ÿ€§¹0ÚGgô%iP(¯NG'l Ú ´šÊá1Z hºæÝ(ÕÄ:Ù†³úé¯Þõ¤>‚+>º‚ŠmP~ÑË~ÖaFœÃ˱fííIo’H¾]Ÿ ³H·tá7ÝØoõcÿtùñÌ| è²Ñ|œâQâ5y˜l г2ܸ4âЧðöVm ýÎÅë¾{â5ש–Úä¶*LhFG'DÛÖþ49Ú-¼ å i\d5Ô/©‘©Îu ·á};<~攵ÃÛgI!AqÄû…ߪ…X¹¡Èø öœÙ¶¹n—ªå .¸pyÞt”Þ%Èš#œxBNfÆtðs‡5ã]mH^“¡éœòŽ$Á_y Aâ®ÂЗµZ×F¼ëÂþÌ O#h•¬x/’yÞÝê0¿–BS?³`†“d^}~|ò£›¯R çY°&¬iºO»& §Çég›©”Gž‡¡4ÇMË& Ñˆž¥‹^Ö×Èí7à¿©¸K5?IŠ· ŸOg(áä”ã¼)ÝÊ}z¶³™]©þš×IÙ¸w>Æöž…¡Š_¡.k*ÕŒ‰|cŽ@us(Ò`ÏT®>~\xÃ{Þ^ ‡ ôÜ e…vXExÄ2º¾ðĀΣÌ;Í JZáÿŠÿ-S„ç¦,ÙõéÄÙa®:MÔØ?NÝžUc4BPœª¸Ì- Iaã;£;¶êw‚¹[ –J“NÀýþõt@x_¤.Í6ÀÙç8ÕgÒþ:]ühRhåUB\½ &‡T(߇¸O¹fQ”žÐF g_ü`È<·B*‡îÕ#ƒ&MÙvœ¢‡“ŒÞ4  "{¨|[ å=åK’0s(üþ>ª§YTø-a>&4MHé.í¢cÒɸ’/=C7s¢ê5Ýåáã>R;f¿~›êBe¹Ìd t`„áÞSaPC,=ÙhPÚŒuæ KOÄìš¡ecRJ§³=Â< ‘Ÿ=x•N§ŠCeÜ¡í ¿X¸¼ô4Bª7©§§%%-ׄÓ.yÆPBMÉýQ]VúÄ%J}M¾ë-F`«À‘r·9¾L1lÊJ÷g8‰ù2qÆ'¦S…Þ^)/ÏãÄMtFŒ ªã²î…GT"s«ƒŒ(_³mº†py®BÑÊ&{Òì ªŒÄ«”KÕHQ-ËD¢:УOZz·¨Cg&6ø$†’:GÍm#-@CüøiÙ3À¿KÈW&_Tôè ]:öÌs%Zéó¼·*Ê¥ >Œ|sž‘ðëÇœwTµõ!ɾšÞ\Áy´ú˜$qYMiRKƬfx#<þ,  m…(2q0§O·ñõ®ÖgÝDœ£ÐÁÃço’f3ß,z.•?Aú9!¥ÔÓŠW@X}ßßßâ7hK\oRUU.`U1xÑ%AâD¸-à¼T~D'ÀöÀ¯½=Ðo9E¥ð¼ÞÂM¹GŒÕŠ7*^¨`Ù¡â©\¯Ò–úȹ’}ǃCUõKRÈ -bHÙë"Áu“A„Ì^çÑuÚ'Azh­Eˆë§j’„Ðà–¨xfPǘÃÙ+׈ӕ“£ØsΓš²\—ÚDŠ@'ÃäYè–Xݱkʱ3EU-@ù ü ù{½uÕ¤SÅ!à%Ž ŽëÒ}küœ/iZý¡OÁÝ€)ëZÓÇJY!°ï¹Ãƒ*´-¡®<}uq£gö®>SÉ1XgTš±,:éÜÆ~eõ§£½©…fä¶0^©\eFìä‘ÞQÅÉçH£êýòÏ€ÞŒËÛtæâcøp׉^³! ške[蓉p«Âœ£ÑùJ£fèG°_Õˆ6ß«_–®@k(V¿ˆI̺@‚|nçgü?SÎcòF_õjË^–'K35¡õæ@0à5æg©æÐ‚“’£‹s_ç´Í÷%‹b&J°T[ž×qdªã4áÌŸ0Yðæ9é?H@9šºf!ž_{<6ÅC›*c ô¿ äû–û˜vÓUbɤ˜KzJ¨Ø3<E»=²øuc~ßâÝ1˜xzâo‘~BB‡heô:çÑS‹uµøÀÙp”6ÏQ=¦# (¾™^a`x‚‹âç=ªou 1ú|R£;ôiB†g‡¾>5;)–`—zÄŸMÁÓÃ"•l•ªBIÉ>™' ˆ¾9ü([8ø"Í8­ï€ÛÜΰhät¼¼~Ñk–{]ªÊ§ ÕèlAèÄP®1o1j¢ ·†}úƒ…o™Ú‚^ T™â²L|Ë(Õ©L¯¦ï\hê/p5Zn}‹ÂÓhИzõŠ‹¬åñ‡Ô Üšœ×4Ø–&t¹ÔjÝL_W/¶0?¯ÁÜ5fÛ\ê‹×)jŒÆñUw|ÕíäæókÉ‹½6mc¹š$÷㢛4o¶ä@­ÑyHS+NÎÅ cš=ô i¾mD¼›Êa(0"çäãë˜SRÀ”þvÑœ;ÚY¤‘ŽQ§—¼I"Y¡Û²„‡æ”ý¾çü™ÆJ…„¾ß†[9/h´ŒÞStK;~Žüú¸#Ã\ƒy ñUütMÊùÿ,ÓoœÏjTÒq?¯Ðçôt·:Òeà™=ºA×\’&Ú0 ‹¼ q£×B¾=„,LSÅùu{ÏøðÛ§ÙÛä1yzn{ä÷ · |$rÀ…*î²E9N Òø_ÛóÀŠôØ{o/Ëá'õx;ÉPÊQiW¸!š’Nuœ”]oË…°A3úÖ3oZ5‡-½_±I 4u…6=ý ã¦Xפ†uÌwtqv‡:î/™¹5è埃1æË¡Ò£Ö§™±×Ñ0 Ñü ¨£;Éâ®4¯Df?Ô€~¢Ü:W,ýöðŸV”wßùT°ÁÍÿs¤Fù&-²‚Únª0¿XcXb¶Î=-øÖBo8yefTî m –`I}7 ¢±û‰_¤ÁM9ÇÃ홀Âwòjœš«>?iW4ßë¯eÏ“£¨êtÖOÚúÉ,µ¹ø®Û1ÏS—;`Õè õ¤xn›/RÅ;(Ð ¶I¡å<•i•DêYñ”_@ù™æudÉ—ñŸ¡aô÷}ƒNM"SB'4á°KÄÒÙ7ÑVŠ;¤žÝQJÚD©®}Kì;A~È‚È?ì þ4yP–d}%8ZÑ#åÅo®“¬¤^ÞÎpÆÛô/¤žÈÑÃÀŽW7Ìiúnãê­¸/»G'¤¨tc=»O9R‹fðLÓ©Ý¿Gù»‹d¹Št;¡ïJñÚgcmÜ¡^]öÏ/6¶C‡i­Ÿ£×FaâÑkÞÌiõF¨C:®oYôœ1 ‹õ5pMgŠšH©¤fQȤrêÈúwàýüäk±:P³À›t)¸ ü*õl²mzžDw«.üʉòßÎÙ¿ˆê»T¬•¾X~ÎÍÂ$¾ŽJw§‹èîMš©²÷c·vNšÎ}š4€A=˜¬Ž€bœ(O?ÊŸ|?>üšH¨;‚1j€ˆ™²Å Å“¤‘­v‘n¿XéÝ×ôòÚ'›&Êc<ýyXŽS‘„ðY¢fPº¡˜0 N–©lR¹Üg J»°‡ÁwŠžiš$¶\¤<5©ý¢fLá~ÜÎR’Im\ÅMêzH²µ÷“š&¬ƒ†;­ú`¿êÊw2œaeЉ˜ÝC§èMÁSžw6@8OÐ,Æó¥?Ò*!cÎrŸ—{0&½qi¢‡’žd̨K¢˜8P7é'Ô$âêÞÀw™¦s¶Q[nÓÔŠ1á¬E¿ªIèÏBí NtÕ¨°êÎlÛiøñ÷ÿ/ê&fž»@ûº0p§™s =_=»Q2ÍkêœÓâw´iz0à ,HÝOLCÐV&Ý?úYB›¥Ú=ÔE ¬ãùFO«S…I¡ß_ÁùçëVêQ­ Üa¬DÜ˰TšBáÑ1oßG˜4]ˆ1ãªÏâØÖš^ÙŸ‹ôuZ•±(IT‰a+¢¶2ïP&ŽMêiD"E @—ØY§+ªfM õR RÅsn\¾î+»ažôÛ W$ºë‰nƒ”̤fÑñ¶Ðñ¦f¡šEw’ùåã´jÛzÃõÆM¼ôµNŒ‹EÎ’3¯ôe•wî' µ£Kè¤<_úÐ.@|}LY½]¦ )©SûôÉRIäç÷ÍÝÿëq!Ángçøî $)ÇÙ¯2c¿ˆp’ѯh_¦&Ì›tn¼#=8|¬vß~7‡øÊÏj©§Ëï®[pˆ7çs#¢Öv–L>ô~ŠG@l;[•3ã‰rÓÈ¥Dš|G‹~fKìPîcôK 5k1þQìΡºEKVQXªõ†š-bæ‚¶Ø,¥B²huÙq®2y;&m°¨Ë•Œ !¨ #@}ñJÛŸ©J»ô€VžÊäÑ®â©[ŠˆâÁ>ªå­îþþ†¥rI'WîI®ôÒsÝ:†tì®ÊReCšæ€lJo‡øo_è«áÏ ²Ž|¢0´•厘GažîÝw1þrêîF1ªè©ƒ(ãÞRÓ¿ÁFo£œÕXÞY~)E¡qÓ:!¯Æ§Íé¼w)á#»ãîõ:DUØÑ%+Vêî5MD’Ü¥‚èáXsœbéÊ!‡*©ä×6îi<å-ê«wUžfÐät·jRŠž•¶¬KÜ@3{qT\ŠS?^··çþ¯û×,9Ÿ?Ýþ¹R¡Xy¹Óæ6;Óð§<8Ð’GS;Oð±P7 ÔØ\ÅŠŽ+jf–¯ˆXúx |ïð¹,þ—U«$¦rÇX:Øb“r†)úç<ïùé?^|ŒôÁé·k9 ¦òªQñUõÎDwÅá i5\­ÿ#LŽT8ñàïizÏæ±8ó; ›€¾H†6A4³˜Ã¾Ô®"~»s“R7¸(“S‡ëäM—%•g÷Sƒ®å–®”ˆ°1ÎÇFHÿ„<"i}á»â‚v§7O˜‚óq bKÏ©ÚÑM‰§nÐSVÔþÈÆ& ¤E£‹Àü–ð…P,=Ïôù4¯­ s 4-|˜ØhÓØAâ­C:•&0ƒ¹3—t‹ª0ΰxÏ ôñ\!5†àÙœðH ZtçH÷»œ"=ªša溤ªäÔy­âÅê3Š„Œt‚­+¥¿Õq¤x™ÂÎV\©E8›Xå sØÞzR®wgôïjWM9%í$ýtÿu®¤·ÃϨ£A²ÈžÄ›>8VÓ1…h¶´ìœ_ÎD^Ëlpá@eåèIÊMc‚Æ3ÜöJsœš¯zzã+æÄPŸƒ,³ûòrõ¦ÏÑsôC%¦Èt‘0GT0½Ec¥¤¥ô9Ìè1»±6Éá¨kõxî±¢"JF)¸MÙëéâCþàºI+ XKò˯']¡è¿ ]üÇ8v”ø‹ÿÔ£@Z¹Ý¡ÃœD‰ ©º­ƒ=—Æäé”[@ ÏDN$‘•JPMЛqmá›Ê£¸éèPÞÒ‹4û"ñ¿fõ†D¥#ÖHÿì }¼ËwðGïèz%¼…m‚,´Ú÷EŽ%Aæ‘b !2Ĉƒ.‘γñS¶´nSr”.™ôü®g´Ì…P7(«:¥œ™Ô·:Jô…û3Õ‚] Í’É2 ü@õ‘gu$§l1ôU¤;²„‰rfsöÔ_~£D9M?s(9v }£€©4UDÎÊæƒÖ[ˆc„‘ÜјÑm ¦[tÎä{Ë@yý} _Ÿ7ÏNO(FõwÜü™‡q—¾Ç(ñU†¤$n‰2VbMŽd 1dHnRNÛRSÓPÕ¢‹ORÆL#ÿõåû‰ší)ç£?Ú±¢S‰S‹Y¨,ãü<Æ)Ò ¯éRêÇ5koƒsEØÜ»ž(?ùq0‹1‰™çG |ðM¨Øól0¿þv\³Š›d`&i%› m4=Q~-üª“\ƒkÓÄ>zŵB±ï“ÖÄç©ÿ.º'†]ù’ó¶H¦ó·:ȼK½÷‹ZÙ0+gî®~[ß} áh{€YNNˆQY;µ‚ñóð4^PMBÄ.µŒziW>ɧ€~Iæ6—3gGã&èíÎzï5Upõ4k‘Æ8Ìnž Ü•ÔKP‡¹ÁÉ$È›O]œ”ŠÀÕgF¨K3úª0·8ñeр鷭— ¸)僺Þ"›xZT²¤šx Ò£§þã#øT%]Ç Mî=òÌ. ÖxÉTu — V–TuòÄËcÈLܺNwY͵£?âÏdR ub{ä«G)Õ—<&¢häð¥g©.A *ou˜üe\¸YûñÒ:EÒ$P…ö`øN¨ìX¹QYnÏ.ùNu˜ßyÚP°_ ÞÆ¦Ã­ÉwBj “x]¼Lƒ+v(ïxS+^ çyu)ù+ uE9}=<P+F/)‚ÐA3{.]ßøÃž ÷¹Es \gt8õð>ÙAò›ƒ„áÛ5”ñnòf %÷í ÞésùÄÏt÷ÿåûC¤$îkÚNŸp¨Ÿxuå§×Œwƒ¶k£ÙÂóWIN¯R fe€CªÝýТåzçÒH±]œë°A[p¦ŽFú‚oŸúÞg¹ô§—X啼ì5åáWèqA W‚1\Õ9Ù3á<‡‰7i£?8ý&nÝø¹>±gRNE&O£C\ó’²îxBG' ¨_7ï juã ^‰š?빦¨ãf]¨EâÂN“Läì¤/CÄ8ÝuüX™xØc@U¦œ&„Õ½·Ž½K “E½HãÌ'”Ÿ‡(©q(šóuîK/VÅ¢»Ô¢²Ë…Dé¢s0Ïœ_N\Ï[9›g²V­‰lÎk?t¯ u †²E¨ \w´ÄC¯ žç ƒ.½dPÛtì[E÷ ™°*¡‹jÞ7d4OÞ+C£«(¿¸ã4ðä=áìÈ=ú«^&=EŸXþ(#% ™©E' åÏ:R°¡)“û5Oèc>··MßüPhSÅ"lPÞÙ •“d×¥x¤ð÷­š+#ɺÍ[Uwû¨h" Öâ!cS±ŒÞø§TË©"¾Áß)³[´ºñ? Aýƒ˜Î,1A¾ˆ9RŠö-@gH“›å7ÉÒY˜ÔïäIdýÁ‹AÝÍZ~$íùŠp»¿>=½>Ç[þ} ñðEW°·`¯)/É×Hj"ŠžªdNŠŽNŠŸ¯Cš¼å\íîËœ}ÂËà×ôtBç\jtPË· »” Lá¡äyÃQÚÃÈ|é—í¹oÉ’Z¢=#´G¤×1uß(t7 SØtozdå¯t7tˆÑë€ )rdH=³WßÎIN;×aF×Ô¶¤ÐùtîD›A‰O “ÄXÖ×lL‘ ¥A¢7W€¦[´Rº½L)™˜Ï$@È‘bKÏ:­è=mzŽ‘ôö~Rm¢”E[sÜŽ»RF­<šr]_þÀr%RC'Óò¾Ž£ºè¡Ì m´åXÐKù%‰¨N¨¡Žókæ×*½C]Íxç—}Èw¾_·Ž®¨oaå¢ÎE—:݈SÎ y\MÕA{E¥¡3熭æN†Q®%8êùÒ)2ÕhtB¾Üñ •EÍh Ð¥åžþxÓmý©çô«¿®–0TRR¯“WÕÉòù†wâìT• z§(¿nüNŠ›3y&v1èöð7dKñ0õRD*ºÓKDq…R'Ã-Ÿ§sŒ% ¦¸©’›úþnש½&rG¥½Ë@scºu%ìE˜îÈ5½´æ\c é²@²Ùü‚*š¬dri ÁB©ØX6»\NððžKj6_ºŽYÖ¯C}%nu¸ó%ÕóÊí©eëcç«_#±/V/¸e¹ô éRKu«E°ß#·VkН.ý˜:±¹ø˜Òk ›r¤I©T6!ÍhÔócÔïóÑêÒd‡ôÝÊ“ª#͇ /¢=Ö’‡6­¹mT….'M3Îc˜i¯öÃ#Ko4ˆ¬D—×ý˜áe‹þ´“éâŽÒ·š5ù‘¢£? ëÍál½Æ›ýîWôÄK7¿œv½Ç}$mbP7h2s¬8·'[÷Ì1©/ûÔL¹¨=ÊÉåuO=#¤ònÁÙrŒL*ÇÝ=ü÷Î8ÿlp~bRÒ÷¿oqÙû¤—O-#ˆæû{èïqgÝíýù_û|-“pZ±uJ¦!. j‚QjOš&ub7S3)úìø+ Î£.bƒÍõÔp”:sèÃuÿpœ› DåËéfŒìÉ‹'ûeÆ5‘ Æ]]¨ÆË ¹3áOÌnãnGêT·¦Ðå3hÚ§šSââ÷«Žöú¨±‰>ަ{‹ N6™ÔËI!äÒ£Ïõ[4Ñvâ& Zö\ ÐÄGsV3pCqÒe޲sØ<ø-ZôžJdè¾ò¡˜.h¸$±Jï¾ è@ }²å¶ÿkx)^Ò«YèT÷=¨ehåµQ%’xn2(¿65þÆ%I^>ǨîçµQŒ?ë=ø–u4ÅåwsêÉ Mn^c1‘PŠa!ùøì¿íW‘µ”›¶ÂöbJL°J{dš/^¦‰OŒZ™êµÆÔã(çÄ`À$⧺…w¯Ói Ôú~ª¦Œ|{|ú­±\$‚|gliuÜÈg÷¼æ¹³ñG¥2²co¡b£ {òŸ”UMÎ={Ι rÿVY]ÙJ‰ÑÔ‘ã5ˆï¢°hPó{ŸÁZºTo^ŽTº8ðÒ¦-Jªª"¾Q”Œàï“_½O>Ü.€C}êiÙãSõÓ<ŒR¸ûù­ê™š…¹ˆÿ3œëÏõŠ¶Ðø–áŸËí@D{©;Jé¦$µû[0ä͸1êîº7©Aè$©*,´JMá䳸^ ‘û\šnþ¢ÎPW\QÕz2pžâÓO}`ë߸=OöTû±IÊÊ $¨POcOßÜh߯î~謹«;@)J¥Ómfu5%É;AOÆbP—Ä 2ƒ8Ϥ®vnNÈAx¢å_Ù¤²þô“lú5HfŽwä@Ÿz1ªIFmjÃC!úÞýèŸßG÷nÂ\´BÜ·a£X{šJ9h±}§ûb@«)ä©OSOk01¾mÎ:ôÓ×ÚpœjB/ ñBà ýÙñ$Ç©I­°kù]è ΋¦tž‰æŒAJA2¤v:•žz¢èç ÐhQ”–ú{j)n©såª]ý"&þ‰žFÊè‹¿¼ôÀiæ—ÞgÏ©fÏj ¯€ëó2=;:£t(®új?á«$Iús”K±±Z>Ó[Wí[ãzvçÆt²e/n{›r×aâýtŠ®ÌPÂý‡TbzÜeühaPÎ)ÅÀÞ ›€î‘W¬½•A[Õ4 T@úƒëÈpL Œ\QÑ„s5•Ÿ¢4MÁ_¦=/ R š£ä„UØ-º~êI÷µÁ¦ñ¿Æx?ì|æ_^M“Í-­Òfª¨8’ô³v~Ó>}RÓIZߤW÷销’ÐR&¥*¾O¹Î»Ã<èêUW}AÑ0?É ½â?«}ÚÐx~1êÆä„ÂÍãÚé:iç‡[æŸ[¾áÞ±7.±!V)ü1Þò((¤4b)ÞÄ>#]Ûn^ô¨M·¥×&¾9 ŠØ¢©²ì>O®ý¼ykÙ¿SU3W·×>ñe¤”ž#PÄ´/ž¿dfD!˜S…¹ ƒÖ £ÇðÃ×¢ê¬õ“+à#ÍM¬« Ï—Yfæn˜ žêÅÔLk£Ï.\(.¨ÀØäµuW!ÃÚoú“+Hó5*û@S aÉ ÖÖ·c„Iyp/ôͱOñBŽaänð…ËRBÕÇBÄ‹i6›ª0šŽ{RëV·”ßÊècœ ZÝ;B-Ã$(‹$2©Ó®<®øg~é»ïú›xþj>)0È£ÿýÝ,ô´ Ãénƒ•&©úËžÍø¥k;>5]ü“µsÓóÉp>dz5”'Ó §=èÖ1âY pµ”È´aÊ/4Íõâ…Å 8_ˆøjW5ŒuzBÙ ÛÁ¸8a„ëyFi»ˆ1n­xŠ±ÍªÜ„kÇ{êB ÎñH_ä7%¥@ï?˜,'…(‹âýª‘Èû¿“³ï>ÔǬ÷>| ~þé2msšÔ-PwŒ+ÿ€ºÔ¢×ô°st<ª•·µÙÄ+4ZIç³>${…M‰[d¥f½õa‰¡Ž:î?¦f»ŽÛ3ËÓ·è+§(LŸ=âVã,y$ôQÑAk³§`É]²'©ÁUPV¯DLF¾¼ìT'Iª+*>Å.°$Åͳ,ü$sŽrO]ïŸ.|vâñÁ¨ÙÔ8¶èÜ놶¦#V!~ahÆ,GšÕ&;Îý.³ ÓîĬîŽt¶”H•Ï÷RR9Ll­axg{Ò𳡨œª9HLÐ@d)øºÛ%ÖwNôF Ðëë^-;çY’¾––]ïø1âl’Q5·:ÈZNbOw£¹ö ”è@R³uw|Ý)„ m@ß&P‡åÑëª@4c–ž†¥(u”bmÙ¥åûÁè‰= ªN„¢ ª¡µã¬M ¨} R¦Zó?_ú= ­$3š‚-Ežã¶œ$ÏHf¾£aƒuœ{§Û)‰N¿@£ê:”m;8e]TÜЃÃ Š»Ÿµ¾³¶ü–Ñ^©Œ¼á­¸I“$,Œ¹îdüÿ`·ª(ý™¨_5ð.«FA£xí»Þ ½ƒ%éRŽö+ÄÚ€^Ô+­wç¤gߤhá¡8q4½S®èÓÀUØÇîmT1£›Ukš”g;¾öb‘aÑÊæµo8[-‹Ù7åÇŠ|iÌPBßÓŠo ŒÒw-€øüÔÏM¯~4ëòœ~_F‡ æxÿÛsßmÒ`²Û”ëÔ@'È!q¿âwoHU¿½MM·m1d–ÊŽž'³;°tXâÏÆkžg*/Cvj0—ïÌýφÝãu7||ˆôKä’ü›æÒÔ¿| ÒýRSÈBc¡_ËëÔ+b³ÂkH*eFq“ª›u,箆óOÅ¢ÝË/ýûñ²Jû— «åñY9Õ¥¡¢’o°\ÍÝÇý™^ž ίråw/t]x$ƒ>Mн6kºÑ(FaNà%ªµÏÒÕ=J ÿŒT>Öø~{Ueaò—PdŒ~²Àqò{ÕÆNC:ë@ë¹Êë:ŸYu^ /"à{tì0±I(z¤˜x¼…ùYÂü\ÕÁ1VÒ¢™%pDyPÞÙåV]ŒERuš€FÔ<#7oÙ¥vñì<Íз Ò?+ƒâÛšÂèÒ6ˆ.Ü{ú¹ÔãbŒ|„¿þ²ö´Þãyò›wÉYaž"& @6[ÌëMpÌ*KUŠÁúm7N“ØD†ÀmÅoT¦–DtSdŻϩƒ@%þÔTCƒ|•)ò#©·<Möü® ]j[6o­;DWç|» ”N„| mêºøüóé·Àªé½IVý÷9VzþµûZ‰7ÞçÕ;¥ËamG' YQHèZ4;ð4‰(ÂåÆžÒ¯s¤Ð€ŸvWÊ”¬nÕ’¼á^TM#2rÍ¢xQ9ù£Å0W¶}ŸæÌS±Kã…û>¼oB¤!T%I b¿ˆ  E•žþŸ¨Évä“[¸”€Ð˜y/;‹B:Ê+©AÞ² yç׉*qߘ;•ÒÄ)Œç 3Ðâ0"Çð·Ÿ“ I2¾-PCtìÒG~H—ËßÇ—èöý­?þþ{užZ;0==TÝ‘IéΗM8ÏîRÚ³ýe¡ç™”€ÙœÁRÕf±(Ÿê Íœ½¯MSÐþÎj%ä°ÈM"ÿòq"iŒCê} ñìÌ¢|½ü‰úZù“„Cý4Ñs¢ž$å7t2‹ žqiBÊ9ÒÏ„´Ç]Bß÷OC|“ô,œŽ“ôðbB‹;õŸ$þoReràåt½ “pMÕ {bäãòÍ8¼~l$J[ß{h —ËS>}Oð¦ê ÿ¬_ûéÓǧþy¸ã:]y% ¸YÌ¥QS˜à쩸5M’v¿‚s}ßÓßa0kí·zú¦Ì4Foß:pR«ßp´Z›ôÜÿ¸ÇÇ©ÅÙKƒ¦ˆH8NjáQ€A•“7ô]kòfߢôu“¼Ùûß_ò6’Uㆺ•Žn~éÿ·ýÕÚ·ßšÄÚâÅYÄÄnÖËiÁ9OÔ¬I2#»ËY‰çP3ø¼…B0§ 5$nÚ€“ w×u´q<¨‡S$’:¦Õ¿Ç5Ks&›ª¨$>-’@ªÝ}Œ0íR´ò—pÙªI:E’¶À{Î4r­ätM­¦†:ÒR&e—r•®™]ÍøŠ[Ð {T‡™QÍæÞ1MH‡©º1kPUzÖš$2 £k üpF½¶ 1ÒdØÆs_ƒÜ#ñ£z¬;’‹>:ơآœÊbÝ<\ýf´¦›Y¯Ø˜/n:ö‡©°&…<a :i <¦,³gQÕô6ÖQîÜä‹Ôô´ü?’öé¨÷±zô208Òt?UzêŒP¯n§Ÿ Ž‚ô¿Ò`&¯«ÂoŸËE5œ_·5Îê‘o` LÑ%˜" '÷†K­@u˜ð†r£¤^%–ª Ç8›t©f¬BÊÕh>å̪"_K‚áVà ¨Ý_ý›ÃÐÔ†Ì_+<äË·(±&éõPG-rJ )R÷Jæwíßì˳µ¯_uš&ƒ,´^ œ<§duV‡ùyó:ƃƒ(÷éîª*ͤ3Œw.<{> œÆ¥G©çàð|ÎÍýýŽQvßcd9f!Ü,Ž,J#Mãý¦¦ªÏs¤wïüû8é|ì<õûùYÆ–;W©%2y\|Õ V‹ïSb¡Å¨¿´&‰¼]¿• „=¼çÂÍ‹ÎþÜ›ž.Ò¸øu˜_·>üµp[.Òv=do ê…÷iyCÓ8™\ùê–µ]*(‰ó½¤•JªPÙÆ8Äëæ2ß25>H̹‹wˆ—†ÅNz¦°>Ǹ©»5À­ bOfE}(eÓð8*Ï GÕ°‰W–Ïñ] T]ƒU' nÁU~†µ9FLbè`4AÀ[á9]Ó†³2Sâz:9•¼R§ãêkVH*qZj­¼`Ïã_Ç"mrIàCs¤ æÂ ª–ÏÇ} ÏK§©4"£²ŽkeñI¯¬,Ž¡lZá†!åµy @™ þ ñš•!›¨àdR¡š~†y¥Úñ˜á"›žÂ$28òÈïiÖ¿VÌj«—[êrÛòK-MŒôÑMñŠåL4wõ‰G hÔ]é4\iâÉ ?ì2€TI•æ@¸µ–×gt>-ê5 çKï·@ÛFn+rN0XÀ/ÍßÖ°á\¸F)ë]ÅÈ/|W|—»H{(\^ïÀô‹øŽÆ»ÞAŸóGâYœ5›u#0È[ P7<İ\ôéÀ—/êtçH=Fw^C {Ùž| Åö(.ÖQÆØ~öGvé ø.;ùÔw„.H¥)Çâ;Ãɤ<‰ÿÚ*ÊÏÛ} ŽþBóÏG÷!©>‡žŸ®ÛM®ªí3 ‰E‘+¢ ¹9õÆœÄ ®Àü¼=OçíÁ/ãÐm_ÓËÏ|VEt‚ˆm†½°|èÇØ Où± ʳ2‚Ù‡ÙFÀƒÏ Mi—¤pÙ)â1¨Ïmuœ‰Ñ8Ïåæs™W;q†ÐSÑA —3 £ ™®KI²¤© Kbé$½£%Ø)(íùMð³ñÄ è¤Æ7ªÈëñÚe¦}âÛkÚgÖ*Kò¨}ãž2í§¸¡Wöåý›t¢·x’ÂöM8”SžÛJ(¯¥e´|yÆJó¬ÚÅG4hoòóú mñàãŠßÇ£ÌÉ«˜Ñ.þÉ_QË_QL¢šCÃ>=8ý©lÒo|½z[{–ó¿уv\+ HçÖ]/£_´½jHÏ}ÒaoéÏ^û¢€U7@åi®nHƒÌÓ°õºA ICØ“¿J›\=J‰ÖЉq¸ô1ᔵqÍî´GÑ´RÅù}ûkìü»‡Ûü>)ôF5§ÎVñjä5a^(Ýtô£8SØtFO+ïÓÊWP~µxk ¢¡Ø”U~¢ƒ/¼§f öèG¤~Êíy‚ÐÞÕ9ëœLVsý$=6¦FŲ|«â†¦ºƒv#9¹MÙsêI³™ 9œ'6²½Qôkª4R ‹Éýp!È­¤I èçí&œ†yŒá¸3òîrZLêh>¸32nUY" ÷:˜T!iAº#¯²þ\Mì!•èPÅðÍ€ñüÎwëµ7M8k‡i¼>ÐzN@!¥«BŠÓÄæ$‡Œ4y&¹|:Ì-J~ǶbSƒû”Z?Ñœ¤ÅÊãƒÔ¬žW¾KåðJñžéú<é§ÜHâÖOqÅ,µüzÕKSÂ4óQi6| ÌAL,¼Q—¡Áaf=U’a*¨ŸCÊwÐùÂ×AæyPOµz ÃŽdÞONÒ4=Hh \5=Ôd—[Ôq®˜ín9Æ«ÀÜóI¦'o(¦í^îžÌ)ÒxÜ!…ñA7Ú½ž¿¡Dƒ©à<ÏŽ,}SeŒú—„‹»4ÞRš?LŽ?L¤é‡ln 'º*L–Ýû¤D=>¨`· ;kàÚùˬŠë1™ì*Òy溾k|ž˜²šMi.5›J5”ô›šŠU¾†Ì/Ÿ»…!] É&_€b¸lUHPçjðÂev{tæ½­ÚÐ(ˆŽ’h¼ç‘Šd¦å•-hªm ÎËg¿!Gžn)Ká:a{.Þs¡I³ÃÔ”×=Ý i¿¤ÏÖó{É¢¶'9dDT±ö†I¤vÝ"KÑŽôeo§Û mǦ:À1¡Å˜”B§Ô7è¡kz¹”†Ez¨¡ïþŽé|pEMC“å¾vöoÓ+ÿX37†#œMãYUHÒ„ðrÚÍ嚀Îp~ÙóúäUŠœÀ‚‹†À-ZÍ~Ä×~™ºÂήZRC»zÖ¨EÏ‚æÃìÉa ™ü&˜ ̽ÙÀÜ_ø†¹qÄvmG£äÄŒéYÙ«4~ùçdsáWoÓH:Ê3oR9D‰ÈÔÚi©Ì>úøHæ¢2â÷pÙ¥lIPÌ~~=º˜vŽRkpoPptqá1ª~9IR´Rã$µ!}éí¾„Õí!­7©ÎÓ–Q¿AYL1??p¦– $“U‡¹¼J§iÀoúEØ´ª“JâN³—Tq‰ÅWÞ§¦ß:ÌïÎäã¤OìwW¾‰ð(=ñsdÒŠ_MœQfÓ„ u¢ÖOz¾'g­ô˜×ñ>œÂ¦Ítˆ›¢’†aI–™”Ø?®ßN .ž|ÔǤg•æÒ<0²iqä›:Üê_®‰CCÃKGÓ ç› /ërþæ×“Ädu£Îiñ!eÈYŒ¯ì›º¾ø¹Ò,T\xä"x{cÔˆnš`ßr¦ú~êt8 m]ïœÆÇ…^¢ ¡—+Õu&Ú¯UZ1“X‰µ˜˜¥ù®ežîÖVÌÔ'}ƒq®ïîäúâ_““E,Ôú47sáe ¢5 ¾‘}pʆô}ð©ý…òlîG¤X©Wr/ØÖåHƒ Æ¢&¤W·nÃ%;mš „.ƒ­IóÎuy ^Û&h)ôa›lz [Re)Õ9z4Ë8®’l4ij†[à„ö£Û´—þ¤+Bç)Z¸4ªs>b‚»/2äÆˆ–áííi:WÐ%|yÜ~i[Ã6d  fÊêÀLÊ«(ÂL}Pç«?IyÓX"—?”RG÷ÉQÇVS÷³EÞ_AF×õÞ0ÞDÔcF»Í8‹mÉþØ/¢´ÕN„ò=zÞ!êÏ Ê/W’'Ý×NüE—ëÒ·Iæämý<™ð(Òn-÷b«ö óÙ…S|»òØÐèé`U1GëÊgHbK–·\•)¨ÑW»Pª”Å6`…ŠÝ/ µê¶=s_ò÷×ÖDò” rx˜›Å•õ Ö™[è߫ë鲕µÏ‘«ëQP.ãëÎÕêŒÉiõ†š£ì¹ÏL./ÓPA­fÄ)g%¬é` G¦*þŽÍºÀ†òÞÛåø™×X§<ú7¹yù+`ó$ä–kBuÃ#k틵ü€ÜëèïW¨K°G À©»IAªŽ3Æªï ½k-{ò#ÍT @ñ]ÛÁè#‹ªé]m@ŠÖâ!Õà°÷ÿÜ¢)aÑ¢* úÎäñÚËI²aíÁ—r)ým|±K©å )T>BÅ@¢¨Š/|®xÕŸÌ›î^PÇ6mJ Pƒ9¦Ÿ{\ªŽE…Y™ËN±©¬Ÿþxû£. -f®üškïáýñx~ÿkÅžÙ‡Ù?ú¿÷ññWü[ÞIOUŽõôþ»ÒœÌë®L% {˜â™Gï¦ïÊlU¡’’r‹=÷Ø(Ë篇©EëŠpFÕ*k™šˆS…»çSèæ÷÷Ùš/ÒÔá±ÿ€NªôòP–G£-θ®l|OÔØIñyéŸÂ ä’ØÞŽ÷žUþßJÀûeÿéjR[öÔÔºéÁ”ïÀî^V>n•ytJ€0IÔÖ š¥BÝj•ιÁýŽ_ßÔ_1ê^Üïý„Oƹ7à!=JޤB=$ùT —8ùU"½ÈhÑÊýü¬îË^ßÏ~šú÷ææ0òªÌÏ£Bûç8ã5zï/£ËÙ>ñók¼«ëÉÖĦq²lüGy”g%œÏAtÅžóœ FÑLŸá£gþèòÝ¥.‰\¤¶^t›¡uÄžé݈Sc¶d#¨>g†]’¸Ú¸‰h”ºNPó‹wÒ{~…”Åyñâh€zÃH—Ê@šë¦8IÚæÙÕ¼D¥üÒ]©WÔ~p~r“þë<¬«/š¶€Ê†ßÉ%1àIÇï{+øå>ž£VXÒå©.þãCÊ»y“û~eR¹KCÝÈx„!Ñaö4ù–mSž2&¹÷Uœu±ïMt[w±GÒòù†zÍÕüeòrží^ù}ë~}¨_4>úcÌfôÐâT0ªÝ3¨Ý»ñ·¾ft$ŸsšuñÖ§–£º9룄bÏTE5¾×††Á+Tr 2‹2U`¢Ãt@…§§9^}Ô°Ó£ÅTe]ü_¹\F€J"Ñ4--§Cr\áæšI ”d.É1A»>«wûÞ¦f(3 O€8üŽ_¿÷ö4ÕÐã+Ÿ^{W:¤rëNBÿ‚„ÁCÆÒe—òº‘é ­Ûôû6er†æ6ŽÆÙs>MÝCtlñ5_|•eÃj0P†ªD"9)û@J¡¬ÅÐiþ*’[·Àoòñìnw‘;ÂXC£yEuhe!(Á¹(N¦›©Ž³_#ÎI!v÷Ñÿ‰„Rˆ3á]²öÀ ÖL-1u nÔñ¯ Џêë(Ò¬Û$8õ@42O]9£¹€Ky'róðæ¼Mhî1k•´Ô4pÊi$ó½Óqá84ô¢×¨ ³Vvú×M¼\— ´7¥AVÜ÷ž ­O8õ\“*±;ÏÞ¦‘hÇ«ƒß2Í f1”ÅaZ`òX/²ÄU՜ɞ}Z}R6›‹ÌýÐÆF}Òc`uÅ×)Èé’z"‘®kFš~?l»$vLZo+÷ö} 82 ¥¹y1£ÔS»I dJÞ)Å|ã=J”á, QÓË„/½«Ûvü577GK–†Ç»f˜Šd9Bn.tœ…7 ¦P 8iÞ”­‚Å|A~ÜYâ>ˆFjTa6ÑLÖH§ ?gŒ:zšèíì-$O¿C‹7”¤"Í­ ê0um ăX>n©,EÖÞqESæÉ 6€ºá8É$Bu&[•BPj,­CV7Ì™úò³Ï+Çy?AºKr\˜‡ é­*áúYñ†Z7¡&ò]·¬½¿<«ÁÖá·qÒý{ý†`Ö–Í»UeËîd“æ.4ãb‡ÚË–OvÍ¡½MnT‹Y‡¹é½¦nyÁªYù ÞUAËžSr&D——1µ¹swöߣ[3ul.M›3Î]ÿd‹ÓÒ lö]Ç’¹ñÖšeöƒ i|_gNã«Ó\þè®cZäˉˉµ/xF€:ç¥ÍJ2u¨lwäFFܘ¡îFÍÝÆÙGß è>—*ÒãàAý_ýÿ •àJ¢JŸáüÁ®S5_ÃúØO»m Eó…ÑÇŸ*ëo1`õâöCš« öÏÍ æ4gº=Xûßúº¢2†QÐ:…‡ Àžpòmx³!׳!ªëöÃðˆá ﮟº=`2:€–„ƒ5Ÿ‹îbfº\Ú» S§ùjU¸q7”Õ^r½yw³;öï°×–F×± !@†›Á :'&[ЖŸuøØîÝýªî-y­TΦöZÈŠ2´Ÿ ò'h盀Û5ZS½hÜò纷1lEÝÑÜ4¢%ßÀ’.J¼ÇêhémØañœ^¹Õ­0©NÐ@8ôψn´/ƒk|V©ÁýŽpF˜MA«â ‚u›kwM™0‡›A¢êc]ÜÖm]˜Wq¼ N µÚb݃½0ݹHo¬­»K™Þæ+1}%þ»©”ü/66к´Ätq¸Ÿ»pí‡s‹Xç½e3˜2Pð{®—OšÒ`ÎÛ¸%xã'ƒœÄÌ Yþ‰yÙUá×÷ný¬íyŸ°xµhUX|‚†Þ¯ÛøÖuoojvT¼r»„O¿Z®ï‡©þ/Ðÿ 6eéáDÄÔÄ4#$ã+›ˆ:M$¥Á×_Š$2îI%©l%Ea_ zض“Š{ƒ}·;`ý^\÷ʦ§’8ª}i\IÆU€ëÓí /u°µ´á0†7i{¦>¥ÁRfy£x¹­ÚŒ;U'´ôù"¢ÕíçÖܽÁžook2ȾqËÊÛŠ£HŒì‰»Ò4Àc1°/Dt²d`ã¨=¬f]0.P$ \>oÐñçôƒòþîÖ¯Åùv˜ó2`´\1 2@†·œ‡mëòQk› %_q»Øh[&A=aïÀ9W­ mp’Š ¶Ý¤I aܹÆv ëwXã^èTï£×€Y ì!öPÌ_Ð¥ú:ܱçf}Þµ­n ØSûBt}ÒƒZl‹>æ) Ž[dãµÁø•~™;HoæÝ c¢ÁŒo÷;þïOæI< 7þZ]åjÄŽÃHö6 ¾˜+̶lXt¾ M"¥éÔš )d“aÅ$”oòÊ+®kò?×R* ÈÓËn©>¬¤;\õz ,Á¯Bb9ÞžÕÌ«¯©:Ó„ts¢¦Öà4Bgøaþ }â2†‘*nÃFGlžæH«TÜê8Ú­4õ…rê_¡9`ß±”bMÍ+6iʤôZ8FßZb?rB™:v·ÁŠÊœO–[gÂg=Ì$€`§xFj ¤VÛÝZÐû0ýdá5·M®2öë<ø2xŒ2Kx·xæƒ#%ß›2v ÔÏC¨Óæ1áåüd­xSb¹¼¬ÄLoK¦A,š˜=¯zâ\¡ÛªÁ¦G±={äîñ½I)¦Çgcis3âÿ'R«ãbêu*-¨ƒIÑ •–xÈí‹ð·Ej°½îÖ ÷yxgPÿCÛT‰Rûšå<ëoi&nõPÝ{«]j•ïë.çN;a@¿í×3àb¥Á„¤³W=øEürð\3ZDÌ 1¯áæä•:+uu -”û3}D²6ÀaUèÚ`Íh`W¢ ë÷÷¦!tÉ·Tþø+c“ŠxK!£Í+-‹·ø6°˜G$;Ó¶úg]绿Î{©wÒß9Ç …õgH M85 ¨¿O7Õ¸ÌÇ>ÀÙ2+hƒqù1³ rÍZÛM³»v«§L¼œ-}AÔl=@ï ³Ám_|?—Ö}~ž>q>¿F/z|ˆ4䣟,qNšµù:•Ýö…ZZdÐl§ã‰[48‘®Óÿ†ãï~ ªL‚ †pÆHÕ{žg)pZ“ÚC.àd·išêG*4üç siŒŽ´ÄëäùµÏûë§ÀôÈÏíýt[ßö÷äæßš#ǤY¼Mªãª;BÊPr‚¸‘¤­Òp‰nê0p¢îìjú÷«MkÙ{r!¾L¸óÙYç’ºVNâõ«Ä Ó?]i¦F›Ij¸ž8Ë¢@…ÏM€-@é¶œ$~É'üáb 49vIL<ÁD+W€Í·|b_HÄÖ¸ùûS*¯<øô¡‹ÚÒo–{mªSV5:Î.îR¡yù8¨…6fI¦$ZœôségÜÜ¡_Ñvç&“øÅ´žœ$òò0Ä-Ô%ùèíûù$Å+ µ˜š'’ܼLôKþšÿJH®¼æº‚“”v¨2hýfë®ùI’iƒ¶ÀQpXB<>IM2:(®(Ñ‘d¡fó2  tÉ“:E‹9Ï>ÔÆpçÔà`Ò@cñ{nO•|&•Ý£IßS‚÷¿G«0i¿Yš?¢4à¬eåaˆÍkUœµºÚ°mªwúO"u*†|2.;ÜÃúqOÓ…+(ÿ܉Ñb¤G eãwi(nþˆýÃ^“І°ïM››×ÝwÅ‹$R:·’ΑoÍè5÷‚ÌSáÎwªôb4§Ÿ(î éŽóÁ‘â×_7õÚ÷¯n}Hi‡è…¾ôÑ'{úœ/§ìééE*ö:•Ac€ƒ7@¾„i+úNpë†i‚c2"ûãÉ»Ö-‘îôƒå°£OW?Ë–Pq¬à@nƒ¯„T¦m Sßâèh›ÆŸèÏ_¯¸þ÷ÛjO:×tš2‹y)ÔÆHq”' &­ÒÊný¬´©{9½QUr›}<\*zÍüµ÷^_¨‚f#Ö¶êmpcàì/}ƒG:Õ#=È—Hì©…T Míڀ¿ËeÇüu6> S½®¶ØíÌ•gåOTÜDPh9ûñp³z^C SS‹zÖÍŠw„¯™ÙÓððS§€© æÎ§ÿG0yà 6)¦N‚œÀê=Sµ]H Ü:\àw¶Ošº¢‚·å@¥©)E]*?°ƒ_„õD‰‘Pù]ÑÑø@‡í gKˆ‡´¶LŠ¿=‚I©ü'0ûä%ç ó’òãHäÄ_$ý(ìÁrÝ­Ž³î™nó¹uÏ4o•*ÞRÆ‘¨# ˆHfyÚtÀåµÊŒ¶u°w±Èäaˆ¼ßÃiun§oǵû¹äš.Õ&ß°ˆ|»G+6AHÔž:Êe<š.î%ƶ@›Úé)× fZˆ¾Ÿ1Ì¢[T¦nÔ[®…uaLŠžÃ”Ä=AÔðˆfjfû !•âÔóü¤=ƒmú?q˜ ¤ bK}ÀÒ,Ѩçê³¢ë@6q·–¥ÏxV™Û‰Pksv·*ÆÝNú¿×¿¸¬÷0i‘{±AÜÃùrJ@s”§¦~°:RRЃ®··t9ù||Ö~†Ì —T¦'Óˆ<.&·oÛx¦Ìt¦~`Üqí¦®ÎTóûd°þ®Wš?øÅ& ýqõSœóÇ¥ûÿjæ$ ›44²;}i%;Rzsé»[øådì=¯3œlÖ:šê!•îø¥Ï‘ߥ>«& {Ÿ0?»¤(WÖ\¨{P‚‡$ žbž[ýH5¼N?UqðI†UàÇßnÖ>ÜÎþ Mê”Ç™¥FíüewíëMõ©C¾4>?=4—CϳõËÚÈ5œçÝÁyºôrºönS4*Wœx·^wòôji5ý±l’ÄTñÞÆØ.dž›Ÿ›+}*ßÕïûÏÛÿÃ>ã¿ü/Â÷¿~fÎ’T¢|(>l@ƒ/{l‰iBZšôq̤¾?âêÔUPÕ1|b‰>«b' 4Éä q«¢ŒWÓo(bއ\² Kk÷ • ¥¶§Œnkï=T{¹1eùr‚¤™Š#g¿7ûsÝê·S‘g©Ésù[3Ï=3¦ï9ßJ–"&ß„s€x†%`i¾€Û4Ö®4 vϑʕ;_¤NÒ/a;nRS&IR·ggMÇè/=I²ÐA6†T°u§˜I BµJrY @¿‘g»s?SzAèÅLÑr|“BŒZ d“çkž«5Ão¦{©²°gáÓeMOúé‚Êâ[Dw—¤º—u·ÅQÙï›sÁÙï}Ûo¬YÇ™ÓÍ^“±pô‹Ê$ò¡ÏM¨“Ÿÿ²ƒÃgu˜v‘æ»Iš…ín¨wÚK£ÛÅ+À©ùØ›t쿦Zý!Ð?ñ؃G2çhpDEÙÏi ›:挀AÚ[ ­4ÆËÒ™éjIMÓu“NEÑÄsÝoù_ß÷gÇÞ,p©×pE ‰‡vïlÓ¤.íoUœ¹ƒ€²4ƭŦgï§à¢¸P6ÍŠ›‚ÕE”¸ÅóÙ‡ôW 9ƒ ñ©/’$ (¥cã=„õÀ¦}¬y§†%`JžS¼÷ÁòìúˆÔ‘¹OhI7pºq$ä¨Ýæg^ó,¡“jŒZc‚'7(.L“±‚»ÕAVGŒ¥ÔU¯85µdwœ`8“¾S¾ôIô7ÀП:„1X+l¼çÂŒiìÚÁŸ©Ã¦§a‡ñ¦†ì¹|«õ]b»RÛð)FToã_Î=<}÷Ð5cÚRÚÍfUo"P*5 ~‘jÖ†RG/h͘Hõ}}}¤É– }4qò'ÄBšÌ„î6l€ ™uy߀µ‰`R3-Qž×-wÕ­ZÎkØö*š|)Úãç¿Àš{¬{íDÎ*†±6Îè"•yLlÅÐù«¦ À¶!]zG´—åG‰W E_Ó«x§‚FĶlàéš[Ã~½­ ¡øhV)Ù$÷T(‰¶!C4Àh×e ýŒX¢½4œÿM ÿ@þûß;‡êÖÿ‹Ê8ÿ¹1ý¿þÕ¿9×÷Jyï­¢dìZæÀ”½¢gÂXzèÌkvž¸žo^yšX;þ·ìÜó‰¯ëNš`¿)¯€(çA›h•+ê‹”#>Àƒú Ç~øx?Mª8ãî³ßPkžÆŠ €k›ð逊¸${~Ê&‘ Bu õûöüü|yyyü~~yž>ç§çßñÏøÏïßÏO¿[b袭u%}iðÞuÇK†=6 »¡òX³ê75Üz¤k×êö¶j™Ø“Úñ‘“Œo—Åé KÜ/:?©o €'²‘_WžíGïÈ!ìW[A6 ¿ZOCgÖe·f£¿’È_Rç}Iådóñô÷šxPò§ jtªFƒÃgŸ*:ÒÖ0ŸªañoþÙæ¯ÑѰ£ÄhÝë8#!óYúT¶eÙ1;|½èÜD«–í±ÅM¥’Dv ÐG˜ÓQy¸PmÖ{K'nô¦,Êþ 5)A%‘ô¦µ=Ÿ‚mèrD+VÑŒ¾´æ Dýõ|UAÉ/F0 @÷#fœÃÅÐø¸ºªö¤Á|(択é" KJ‹Tzä@¿v€’KjÀì‚Ä|âH o×ãÍïá÷jF»*ÛڅܹR' Mb!Pº²óQ’ñü–è_mºØ-Ý­½¦©&®²7ƒ~è«{u¾RM‡i'^«Â n)ƒšî6Í«nʼnhƒ—ì@|JMXòâœM‰#€…_ìPÎÁ2É>㤯7´ý££¡Q¿áµP·Ý¡«i›ríoä¾exÅ…L+g+ï n³Îœáúѯ‰Û°IZ&€[Òû:¡ë½a)&ŽÓ¡‘‘&ª·àäs†\M,?W“ái‡º$NH9\–hp‚ßM>Õ¿®Ò[0}®Õ6Žâ9=Jl̪$—XA2šÏbò®ð£¨’Æ2œÞø ãiê[ã²_æÔ‹)"šgm  ­?d¸¢¦/êåŸZÁ. ;”']ï·%›:šf'q¤Ø÷£‚Oõ¥¬·—å%®Sô–m¾Lça›ŽÐ¶‘êëÁÚ *D%6ªÏ¤ñŠU‘ê¹ÃŠfF¤¿Ò úærÏÕ”d¢1lљ⩻49(t¢Rޱ ô‹.T¸¦½¿cÆ—³þObx(Æ‹óÀ µ,›Ö÷ªYò­ŠøDFF3ŽïÃûþPM7Hå£!V¼l/–ó咾Џ5ÙÖ>Ü#ÆÅs×õ#þ:LKa“Å­w M0+Mêp,öKX§½àR¾±+•Èâóxz5 ¥V¹‡Ý½Ðê^£Í)ÞP­íx{FÊIRº0Û¶ sŒ°¬}ØhÓ Óè<¯SÍ–¥®×cÄHƒB£ûÀK¾}!߆–-qoê3pë·¡is¼oáÄÄ‹L³¨Õ+ž(ó)jm±+þ_|³ ÐÃÜb{’Z£¯ö\®0ZÔm·êá…°làiFˆD2•˜Ù¥ cé¯ âþs¤;›´!A6dS%ÐøØÊ(† Öc„M Giñèõ;¶5’€Å½ovþ‹4¾ËÓ9÷@~³ƒ?鬯¡–[öqNEçÖ™öó‰ ,õŒùöS™ìôŠÂ÷«ÔóÀ“`›æÜ’‹D<éd|êxŸF|¥XЪó(ƒ¨;F?©l²]ZtÁÒF±B ¿t+¤«Û´kCúE›BÞ…ñÅ„ð2ÆW:¢] @³-ãYã¢{bÅ“%»CœNäTÐ%‹jx¥J‰%fê! kÈ8Ðð„TâÙô˜kò ñßÝŒuYóu³ãvŸÖ=í‰'c-Iªkψ‘¹,Ѩò–R«U ß¤t°Sy÷Fô•Ö=ù•×fœJŒ¾¥ö"¾ 4·"|}Ú€öÆ¡¥¦Âq¼‘Šz Uç5¬è½:Aùe&ëZ99ë•^°kÈDîê.¨›4¥Y‚ÇØ ˆÛp›™@Ye®pŽò{cÒBTÄ_¹f‘X‡P†Cb_ØDÙ©#8êjDÚOcVvésty"^£‘žò˜ªoLLúa;‘…u ’ŠIäèh)çìÌ!éû’S¢~@(™Î õ»xøÌGª8RѺmoñ ~égZ™XYnÏLEscNN*ß+…îÁù æÞ²ËD–õ÷%=“ÓÜÀ¬7 'æ#:š»h~Ý!¸A57èÄ+®c½I7H@¥`Cq_î³ÉÚš§Â©—}ˆÞ¨`}£+Å»4õ-65Þä'²Ñ¤ÍyIÐ_@Ö£±_’Aeì7G:Ò€/QAúçëûO —û;¼äçq3æIŸ‚.yÅæÀº¸¸ødÈ ñ9{¡+*è>)hW­;T;Ë¡®S;-9wš†;D÷Y1ãrGDM­uÓÞ>üÇðüa>>†aK%ÔE3æ'zKp Úˆ3ÃOVÀ𬠨H@ÿP9:FôÇ[à&FýË‹ñí-ÝR‰bܽÑ~ø4öÃû‰[´7‰"˜¤k¢xi(eŒB"k5ìÔzjãÕŸ4§K<(¥ÆA$d™]¢ñ•M}OÿeÞ²kO¹¡Mýƒ6ùô»À€Ê¨KÕc_¤H=Ç7$ö”ßù ÓÕ’4¼%43³•/$&%£*Òm¢lFêé©Ò_}öIh =OCKç#ÏW\Ò1"ý…s”_¤ôŽq¥ü~:wé/(.Õk¼QBInRÃÜ¿xšâêë¨ÿÆÅ°¿W‡qñÿüø/¡Ü?qª¼£A|F\ƒÅ ~¥uÈ:ú-ÖíÂý¡„?6IÎ_ ª¯çˆ/6;whŠxÐp´j8Yñlòª`M»üªïîcwš0-_Ói¬®’.Ö›â‹`8-eJöa~}}‘9?©Ù™Ê9ýF“€øb?cxþ¥‡gSâ§N8õÁìbI’kÈˆWÑ•eu|ÒÖ®-ýgÊêt«‰)e¯v¼ì¥õŸggbõ5êV°K€)-:3{¨ã̧*; ²QOe*Ô_IP_{ÅMŠ@Ll¤‹ž~†'%OÛòEH´”„ø¦úøô‹R_<ýB\B:¬ðMŸn“*?óùf¤n¡‡Æ'Ëÿ'¦ƒ{ó- sºõ´‡ºEg¥EÖ»¦n‚¢n©=MâK‘îùÒ›F²†µŸg:jE¢q3.ç):yz²©³ÄóH»´vœ†4Í­OäÓ!õê:&Èòu°DšÄœ Ø/ì¾ ×/þ'Rù&¤•ó¶Ê[g‘Ÿ˜<*I4L 2q›²+JÊEAÀ¡3}9U˜DÕ÷ëIÔëpµÅõ“)ØÒGßwq›ò—´‹ ’tµë@k"v;¢µ ù ô"ɤÕ<ÁN¾¤È?ùòŸ¦…o5éVXY“I¥Â ¼Â¤fsëÃI­à¬ îL j0 Ke'¡Z×_3‹âF!Íü¨Ã<‘þÖGñsSkfRºpàŸÆ›¢ÊH½Á%ªTÐÃݹœ©‹¤Þœ:ÁÀÀ‹ñŒÇ fômÈ‚ @A8r†ª#€öºyu˜êwþ¤I)[–Lº í’"C«IO‡ãý ˆ“lS‡‘î:X@õËtd´"½¦*~(]úŒ{ÚÔåÞÄ7@ɳœ†mù;q@gH|­»µàTžI(÷s“W_Ñ™PÌü<Ï«ŠbêâÊݪ(!kÆAaõÝaŽïG 2¸òã‘·ºfºD:+…9—%Õ[BÝ@åÒX‚º±ëØœG9ä¢âe¹0‚Å/ó@ªÒ¸øÛ@ÄÑ´—ô{õ3)ž4/MR:ºç‡)bf‡iªïTa~Þæ·r(´xøF¸|˜ÈƒB§ ÔÔ£ Í1!ÆÉs›aÊò¡n_Å9ßùŒÃx‘>7üÚ½óOFüä1 àniKs<ÙÂëu’¯ó·*Ì?;.éP˜s.f#3¿@ïÜÆ3Å5A`ŠÎ“IÞS d]YwØNÅl;ô·¨¶XuŠðT^~"duƒÒoF0 p£* pßûwïÆ÷u.²*fö¦ ™ª(ñ*¿ŽGhz iujÚ6mÚ2Ÿ µ™A{Mc*¶ã%Å¢ÑôÞƒ]úyÓA~È-?þÏ{ç¾… áH*̤ATNñóÄÑhpA²ï,ꉞ9&ƒƒäßü¦Rbë¡(ÅMpðñÎC'†/½î6»sÓjöüsK¦ÛºPCv†~ÝqÒ6xÐ$âK¤K²Ã쟠é׌ôÏן=¤´Â(ˆ÷£Ž' Ì×Ô½G†–ÃZo±+”bÔA ’zŒ ‚'2+˜h±¾¤âZ7)µpäfDÎŒ~ú!ÿØ6åê7î…ULOAé"¼“¨•ø[áw}Tb„Û¯;¡›F)Œ§À}jÑ»Ëý§ˆt~C)A Ú‚4Ú2‘ˆ£?ú¶÷'}\£ .~} ¶C™æ^ íÁ”©¡üS™¡’P›ñsMÃ¥©6F#pó•ÛBž{›„—êPS϶˜øáÑ÷pUt×­e"šbQjßB2Dª¨?•H•LoSÒÔ •º¶òs…¼ÿ°}¿¡ê6ÌÏðÔ¬ Þ‰‡e¥>¶©"eÐ&›®yo}áç›Í6m*èRÓ®‰_|£xмPRÞÒLäSŸ¹™ÀÑŒn$GÑ»KÅËÞÁAyÿ5¼¯©QÕYŽLÆçÀ-ã& ©>âçúHí0R0Kã‰ãrà4?£)*»lÈK0å+šÔ€ÛªÂYÎßúï”z¿¦ —o|šðí/’èɼö¦(å“*õ韣äê™Ý<ÿ1Äßôa#_xî>íŽk_n|ŽƒŸ^'¡Õ*ÎsÛNoÓËoS®àJM”w%/Ê u?‡¡aêØd0ã«¿…ùçfúGß ÏÏ÷ÇØuw²4£¤ŽœÑ«œÓˆ‚‰³§_7©•Ûø@¡–S$È*¶(š‹WPw-º~î‹ÕSŸ›3?L—8zH”©â‚*‘êibÒU¤ÛÏÇÈþ“'åavo´(ãDÎ3ô*kAVQF˜t˜G™Ì9›Œ)ÛÊMTS¾÷˜˜ç96BÏi›¦Œ^ê0Œ;­Yý1Ô6M@’ÿךœ|ÉÇ÷S8¥B‘“_:Ëÿ{èñL΃ÌëìFþ¿MÎPßsÜ¡÷RÚÔ2§”„„à”VF›þ‚€Ð…+ªµqT¢IᔚÂ)• oƒ½Uƒ¹—ÙY}ÔäH¹LÑ…_Nžœf‹Rû»nAzë³H=Û¢ç­ Ë›êMº'uµˆÓò·¾tHaÐгfÏí·ª*b‚ "f]QŽ61ø)¨¨÷x\C‹dÿ!pCÁ3ÕÈÓ,€K§)'õ ü¼cÜÒÅÂóo“ÎY}ƒ®[1[,ÚÒÛœÈú] Tn‹7´pÑÞ¨eèçš]2¤wÓÒÍÚÛ×þ!›®‚º¨ò¬Ë#»xð ÊdÙ&-jE)ˆ“ž˜2Qã×ÒÿõM:üÐHêYÝRhÓY¤ô!=XüÖg´˜ˆ<&ËÜŸônº[$¾¿M•ü©÷·ìõ$ézÂôSs˜n*3ÖPNmm xoÙ”ÐÃi§Vq‰r6„l…TÌN~)QÝ=Hî1“/Ò|øøôIµ6¾ú—ÒdsdO¾P6Dç¤+€êå%ÍÀÀ€~Ÿ›”>;½·{&¦ºMWjÛÖž„,Õ‘Ž–Ý¥Ü¨.ØÔ©Ó¼üGœâÝå¯g ˆ¬­£‹ßE6nW>õ!pÍHtöŸ«@Selê¹?x®ž§$t…ÐI"eâ=+58½›&i´á¬|®†#“Z Èï8÷G0ƒ›~ ókf<™ã»½wºÇ¨Á'J9‘i‰M¼Þ¤%;Ûïõf€¨…±*zQE»›µ<©ƒ®[I ýŠ1ááhØÛSVÞ6Ã1s2/ƒ2_AÐlyÞ궃òÂpL6½õ¨I¬Ç ‡ì§ås¹{h*à@¾ôGÉ_úBÑ”âzjâHÿ˜”Š«4½Wq³Ê5šWúEèEDÄ`ÅP¥(`ªÛsw¨ß0CDªÜ\÷ïÿC'I‰èÞ jc ÏÛÆ¼Îªv-8káçÑ«TõGÈ닊½Ýx¢)\ª üÚÝŸ!þ-0£Ñ×"ô ÛÌu•:Ï÷`å°§ ¤bËq~àœöd|4ò¼É,Ę{žÔz¶ÓJÈr•ÊãDgAxÒ®q¯<#§Ì“M&MÙ{c–“SYùSJ6ï²á* •ÅÂvù§ö!•¦`7`/ÕŸ=ÎkXNWR3êq2.ozï%…ÍÆôšo ÐtK¢†÷˜õšjøa ײ%ÿ' –פ¶ %#æès ¾ YøÇ@OvA“ÿºùO |Š÷cJ¡ÉƒBGv ô{»nkoçÞ:¿øíŽÿLjpñ.툸¡4{ô9P§W¼´ [õ&m)ßÒÒ^E>‚½ùqƒ»Ù Î§€¤áDáN÷ÑÜ*¼Lv9|óërË´ôX"æ£ê›Ð•™|›Jõƒß`ÏUÍ¡^ÆIGžÊ8Z˜>ðù#Á¡¨I©Ç¦ j˜5Õ6tØ]ø&ÝUš™¿Æ0ÓR˜E‹ü84Ì;d 8ÎÏœŸ·Üû„/Ë—YýÞ?Î_©µÂÚÜ‚÷Mš6žu¸8»zMi&vÍœŸeÈì¿ÀCü‡Óç1ú”Ï«•oK–@8$󋚸6,eKÒ«_ƒ¹.¯{ÏM…Æõèî} 2»ù-@yML øŒ ­MØSòú¸Aâ9ÒÔÚ%Gꨭ½è²îØ•>7ŠLîþûèôJ¶X’ÇΡ GAn¹pô]˜)‘ߎtßvã†Õ(‡Ô0Ôk׋Ÿå7(A®D Ò$cü4ôiøT¿Läb5ýtaˆ©–£h´†#“Hå|üýæâ'±õ^2­;zðÍ8Ø#¥ˆ3˜HìI‚µ¹À˜Q2‚IDˆìÞbÔ–¦±+sK¦6L ‰Q÷jÐÒø^MöthÂ7-HO€Çs³Vý×gA¾XxZ`à(±6æUHA «©èðÏÍ©úÁ¬Û›©Z­—´aØÊ¯â’’5¤ r-HMÖ rì7a¼ÿrëA¾¡àm„#¤¾1€¤cN>ê…NýMM&ÝOå÷94ˆìO|w ³1(á4=_p^¡Ë´ázZÓKwЏêýþX%ó[Š·ù!u6ºªGt±¨ŒïçlÑ…ZT½Fá5÷ë·s,œèËGžäÑ”QɈšYòY3ƒº[LfËuÑá>Ž/ÃðûêQJC%i~Æ„Ä –u7«ô±²+ ?¿öÓ;çT“8!—˜£cûPpß|á’B0(þXúW¯‰“ø:4¼;ôy¾èŒ”©‚xîX²)GúH?¿¦_|ÑmOÔ’®ïýøjû±³R¿>÷× ^_üã÷}&çNSQm)·aK/Ÿî©xg{ûÃ#2_Vp$&€Ô U7·`ô‹ˆ?³´¹Øä¡ê—Snn=ÓÄ£dŒ*€–¥[jrh:x#&é¢9\*™ä×A¢ú==ƒ)Kê™p‰Oã ü­C⬰ËúÕÕI$'ÈÑw‡Û©K3ÖH—< rd HohfX]øçêþ§ö,Õ+P/êwlš‘fmµ¤¡ âCzð¶:¢èk2ôÀ8á5‚žçKz¥óhŠúÒÿ‰g‰ÏKv…^¯wÛgšP²"Á„ï$¢ïÔ€”a,¾Æ4õaí;5úÌq‹zEúŠR1§9dgãR!§sëŽå÷ô÷OΓ҂5¨5º# NÝÒ¬öË@׸·úî >#%hšÊr™nF¨ˆ2™»#ñ?Möˆ¨ÐP®‘,UÊäcHíU£~Þööèr¬†ÁoÂû&¨Ä‚Ñè-MÊ´U½R³I+0ÿL9=Cé<»¾~zô']]…7?>Uºè E›0I¦Ë°‰ŸËtuã÷&þE5-ø¹ïk~nkµÙ¢»MP{›¶<$ …k ¶^&˜4ØuþôÅ7¨s N¯ƒæf‘*‘JAêù°ü•²‹nö9ÐéR¯ã:Î@ø‰$õ8¡ý:Æ¥¶Xþ"e}Åè§Ôq¦‰ânÏsο´?𡳢KŠôý9Ò2ÿ# ÀÐÊÚSPºŸ(¯Å£GÊvñæ•8É¢æΔ¿Óù•CÐз01ê½JxŸ¹¯ñ޲Ä×òG@Ḧrã?3èîm‘/±”{?€©âÌÙ@íÔ“ã1Ù…º0€þá@Ã0ÝæÑøàÖá¨ðeµq·Dâ¢A¡§,¢Ëçye<„ÍÊ£•½fϳœÞ4rMÑ­OwÖ©¯Qje·†õáq ~ž?дÇîL½·MóêC2ÈáS“x€ª@µµC˜¦$OŸäT» Íï¨A]Ô£˜ãUñè6Eè áϨ>~d(EéWõ—û\¼ÿv©W“é ÿ¿ìRÑuÝæ·:Ѓsß3ã®y…õÕŸèèÂs >ÏkO:À¬@fSLZ‡‰Î!=Ï綬ó-Mjv6^fª0ËÒG¤ò¶ŠI! Qúys²ÕnûñÕ-¾I úèw6TÅÅâ²#. 4&IñP¯„*Ð"Þ õðŠr@M³§~’3‹‘‰"8‰Þ ·‡5Ws‚MZºu¬;uÀk:¤¤ÏFV±¡ŠO‡ \MÝèÓ[ulXèÆP}ü‚a8|›19 ’ÐFù…¸k sN"LÅmJo”¸ÕPÆ_ŸÉ*¦éˆ÷mó˜,^Ó]½ù¥§bž,ø{„Õ³÷T§¹ƒ[¬ÿ>XþeµÿÊß$_:¸}¯”§^³K%(qÞcìPà÷§šHÊAa«¶I¿n(Øu' ?µ W)‘ØÖäSÒJ¬TЃÊV ÖÁ¼M)às›n̶QÔ7 ¯&(c^XÕ›Ùª.!oAê¼r6îÉ@yšHm‡æÃ×ß\T¢~ûÓÑw*ª:ÿÕ®¸þ ¿þUºþëH‰°ý2ÇM_žÎë !44¿ uå#IZ‘¨"ØCZ / f½ñ¡ß¾ü­ü°ŒâéW ž¾üŒ áµÍeÒ:T¡yëhNà^N€ÿ,B…@ÀMÏô¡tR]e*òt»VXwñ6;¶®·Fü(HTãŸâX…›™šœqàh8QƒUo "WüqhÍŠ®ÅÒ’ÝSJ¤W<¨¢ôTD6g}ñ+›uiSÁ”´-@‰oj1w Õ'Žï¨ ²"5½US_Óa×cTæÈñ~ŒT¢};~¬â›åŸ½29/U™o€y°£¶ç¥Ñ щÞð§?|°ËYê| Îb:âŒvª§Øô÷šyÞE:w? Œ(0 šŠ›0‹I-©˜E¨Ï7š:—@~– ª»ùy3æõ5þwÛ)cþËäÏ›qθÿrø~zO‘¥Wª¬ëš²I«›’çÿï—ŠÆiø‡Ð]¯¥(¶jQ3§Y úvÉ®­ QR=b@d¥¸[X°ó« ‰ en[˜ßÌ\—ݦÈSüâò¡JÅhEοÂìU¸U²¼«ü,¬}hÏÏYiù4Iì6ªÛˆjv <„bý£ igõR ”2âÏ©÷Ã!‚((+m‚ﺯÖÊ»üaF%.|t©ŒímþðôtmP¹gºJ¬ !˜­¯ªÛu 8H”§TìºZAUÔjÔfÕ«§ªžJÍås…i ñp%¸Uõ:С ²†ô,ë›:x6^Ê© é=°£=-‘T‘ì  ÜåÑ«8¿¦Ùãy€ï'7Àé^ÕSügI• £ W%cô¸1^µ@­h3 [m†} *UKW„¦TZKÐìR•œ7cM.Mçç™I#RØñ×…ªÎÔ‹—qîÔ_+¤~u¹S™ÖIÞ©òVÚôÏ.Öý¿?üÇG÷ÿ"<Þä›ý+þæ\íÖRTñKU*MÊvÊ Œ¡ÊB‘‡ª'a»Ó}úu8~vÚCx}]©°‰Syýyþ‡Å¹Š¾Šr‡U|ð<µ“X‡:H\_¥*Áb@C±iÀâ­R|ü¸ÈÓª@ÓT°ƒ˜5¸ 4huÑGY«ÜN-‘‹ø‰D»™AÝ×Ë-æyÓJ*ª5`¾ÎüØOe¤B/᪡žSÙÑ jZ-³+@ÊðôÔ¬» u¹sw¦ÈÝl@¾µ„e¾€êIŒMœÞ¯Á.~Kg( h»“­ÊjbžÄÄ@Ô*Áî>[XÏ­1Ê@Aâ6ð‡Kñtõi°{ÖMbÁûjrÿ„5ÉðJê=S OAï ⳊÊjzcM–“j8\_©ÆÂ¨~{BÒl¢l‘”4ª¼ xäW0=?¼ÊÏuâ_’ݨ¨Ó4 fS&φـôÄ6˜ô3^ÇËIjëæ>õyß9”l¼« 2BÅfuKç9˜!Ô. îÏϤÂ}´ÞŸãEøëí÷½…r0÷vzdÕI„{§“7+Aµ+ôȃÚÉw@(v@\V·ë¾—=5H\Ø’ÉÞi8Y”cýÇsàjEza« ãú;¦Â:_CnèL¿]Ál츌õ}À­ÚiµD ®[bAMwj ÐÚq6…‹Öéï6㘭¯]­U/X5O´`¥ïGï\Xƒ qëÔÚYs-?QZb€QÎ\Ó‰‚ƒUŠ ,2×øÆÔ»&¾˜lKuöø`‘˜\_<¯Z­.Wš¾RÇ|»'¥ Óß cß½þ=Ç·ûï0þíPÞZk$œúÚbjO ŒF-ùŽ5lÔš$µãpk0ìÑŽÍùÁ1‡—EL!Af€")¨ú 1f‘¿øa&Ó »œyTÈs†÷îâÃ?õ,û`õ®ÖÃlOCwâÉÒ +Ï‘r:½›F•W÷i¼ÂûòF¹qO^neѳ$‹\ˆŠ)pq&nVÑ­Î/•AàBƒbêH?´á ùX4â7nâ) ±©²®ý„4±2º7˜¨Ä é¸ú0Ík±©UÙ§êÁªpÓ‘Oý¾Wv©\²A®Whš‹îŸ,oÕ°  ×à.«]ëtú„žªôVm£•úê¡Þ¦t &~ò‹·_¦1°-Hù£´ý4ÇUiJª Ü2v…t±i‡{w éîÇý:Vm³)%ƒb°4hkü¥2(Wð8…´dÿ|&·ï«Äù5áü¼éx[jñì´{ŽÛ4ÄÍŠ7ËÉážÚ©»ÏÓ4ºÌ¯3B-B))Ù^–ûÔjR˜V¿KU‹3‹(óýåÊšt¤~ãÏ—•MK¨ÛYýiò’OÕê¤òêr¡’Š48êµÅÿœfF€¦ZL±ã·«4å4S=*^EχfÙ©FÒ+•­ª Í‹©#ýAÖºâO3¾*gQ †ŸþhaµxªÊçw¿‚s>S!Yñ Ñu¸# 4„ ”ê‚íÌU¤Y ˜¾ëhº.ß©¼E“Î&eÑs §=©vϦf¤¨b ˆ,kQZ#ˆ¦dRv{®W}ŠZ.ó*èFuhHöp«Byú9msꩺVÃy§†þ²ïOT -:Êý˜²h8§^¢CMaøV éOô¦êƒ¢Èí³IsHKùÙ®°hêø«‚¬Ëdüää§|*ø”Oãë‘j‘:éZ‘–‘T aê!‡Ãðúƒ;ÊÓü¥·ÃtHËî}Œ^tÅkŸ’¯4ò矛eÿ_(¨*?ïÊ&“æ'JTVH/A£C)”LU4|›‰[!u|L…¹Õ-ºêS˜:ÐÚ}ÏŠ—q¸rò>*PÕâ¹ÆÕ¦ ×Ùâ=Չ؀öö:þ²jB8Ž›n:÷ãzÆÄëýz½¦zEèòè∦jØ?¾c$Ýß{Òóq–6®÷?û ÇÛ)ô஢O]ɵ}gõ ŸYö-ö}š#R=ýý®1?­q`xytƒâ±?6qW\­3øJ˜ á'·ímMZºßçí€'ú±ßuW³À‹Ü›8g€<2ëà[ ]b]*Wɳ’¬ûû€q­Ö§mα炗zVÙý…ÎÊøôH¡±ÝË[ƒÈí ÷ò̆útÓ© Sx;g{£ j@•$§³;XƒjÁNî”ùè펯%÷שQ/Ô1¯EpSVHVìö¢Fðs 4˜ÍŇÃK Q‚ÐC¥"€jMaO&?DHÒØõúuÕc]>W¨ËÏ&ª¥€Ó sW¸W¨õå n›¨oè>wúúò¶ÿÙ¯VMb‰b‡hI G$’ô,^Y®<_Ù”\¯Z´R®úäzh›‚k‰Ž ´ø]œùØ›&œwzBEû±oÇŽ÷|£ç¢¹#èäÚe%í‡ÌsÏÚ¢í‡z¸2q×! Ÿí?N j®I¥húE¢}|{1Kã>noì¶"ºM4¯ÇÔè¢X¦RÙBË[ä[²é?x~uõÃ{ëã+¥ –@¹1ÅáÒ›ÃešÀî„£ìz;9ã.“JÛ´œÆŒÈ5±íëÛ ®G¸é`3M­¶†Ô´¢û±j[ð­\™º0dÔ°‡X™êãOôj U06^+g‡Ç÷Î-Ì•¬Eû¬šöÛåoRNÕÞ¢Iþ)Ϲ\p2‹B>ÏÕ?_ý­^MËÐË›s=Éé"kU¨{J›ÇÅW惘´Å¦1¨®<žKÖñ~§Á—ÒwÇP!TÔ¡²êÇA\uSó=e@`!P’•/@%x>P©ºRGº^þ¾ÖvùÎÜö Y¢ã–g½\oRAj5§?÷ŽýfAZ—¢íËÎô$ý…a§ÿ ÜU‰Hç«ä¦&¡Ç-Ò?¤[?ú^ü´ï¦©¾‚ì]ˆw>úìuiS»Ô©¡UL“M…Cl÷H…© ÐmIüAìKç\|ë.^P3< ee%Z×Ù)^¨Ð›Ûô'Mk³%ézssÏ=é:£ ZÃx~=e®ÍOšÐŸ¥èRCŒº`yöâCW“†6Õ‘þÉ%Õ9S1ìÒb«°B¿º&ÍÜn 6¨.Âj{#oi×WUÿ"ÔÕõ ì?¨O~„Êö©ñ™«T‡Z¢òÞü§ßBmÔ²ÄR%“ÆÅ “zlëËÿ}S¡‹ ôOü˜©{%àÆÛ]Žûˆ n(ácÐM ¶üì) “èci ûÝïõÞQÒ»Ó¹/+Qˆ*¡¦e RTUÙû•kšjU HOÿÒʰ°iâ‰2h/˜ž“ë¨ZQ æÇì„æÖµ4Púø@5t×Â…†~*ñ>¥+¨‚Ná“üxÐÓÞr¿[ÆÛ“âé}¡»ŠÙ3Æ¥%=Z•fž‚üG}ºI_éfrƒÝjÐ_«™ôÉ ù|ÍlŽ•ùQd ¨³²êHOû0¿¨?¹÷IYE`ê-Áe‹¸®ŠtuV7 ýº…LFËtŠi˜8tÿ&âòª¹¶AŸ2&ЀQ/•æi .Odž~YGJõ÷èïcXS?OT‹+Eñ3F ê®÷eX!¤(]â(4"Ý|ÎÓ¦)Òô˜BX`‚sXcµž´Ý[°âÍŸßýÒ?|Ò®¼dÕyæL¼¡‚…f÷O¸ÞcHºJÝ­Ž¾ïûqûÙ-¥]-žÌ º ºJñÙ7b…´È ƒ¢Ð‚t»¡¿$S²ò÷ö¯)ÙÔšú*Èž½^(·$Oò‰¢ç@ãÚ¿ùw¹™|{¤¦×6 P÷BÔÔhйàt—fw­p~nq~ÒÀÎdK?ïw«ˆñ=©»Ãðù˜’ŒîO*…q˜éÀ‹'‚Ë©åMJcO ‡?>–x/gïisù_êJPKîÔH”E—ŸøÔM-TA©Ô—D¡^JI}Œ÷G´ßK’vÏàÄK¾äáÂO¥Š¢š3ý¨FwßĪ8NÁ rSRÕž¤E>KÞ™Gö3Ñ Õ¨< ´€ÝìY}k@|ƒšêž¢ÎÏ)–Ijh\U¾ Ûi4Ÿ8¿ –=c'O¿½ìU%ŦœO‡ª(ÞÇý™”[ š‚á3 <ý{*kM±4X^[Œï‡P óMPOr?X~ÖCe¨Æg0…Š×ÎDÄÅzHR Ä©êÁ:x­Š>õÓç¬ÂïKÚgô¯HüGI~¦Ø@lˆ¾‚óÝr É&:­ýLÛab]OÿM´j0ª $BàæeâZñê²iêCóõÖ_qV™v -‰¯7TÔø{€@æbkòuå~]e§—û+Ãiæ‹€lÚ ‡ž¼’&„X§T1˜¨¾ ö;?ØçáÒ:ÍïzšÖÒ†«•¢VÔÐ$MÊ…@EiÔ¹‹:eª©“¶uø1˜¸CGûñ!ÖÇ/?Xsß¿¦ds "Rîb“Ç·7lÕxÛ軄~m°ãð6 )yzô×EõA¡È6…ÿÚ©âôóŒªêÍm¹ÿ+×éö½jœEHÒº†œŒëej‘û ¦˜Mõs¤;µe®gp”@ûOüŽQŠHã ¥ç#å—êp~M8¿W8á¢nªÍãéûÛh¬Wƒ×H² JRûufM¬(ÿjKùOQ *)½GŽÍ”—Áô %Ô«&­°SÃØ‡p‰¡ — 9V DšPÜSÜ44œ*Üê@±üC.õº6©Ê†Åì£xH•BIï¬ZÎ$®WÖÊGþƒ‡*"PU+¬x\Õåy” çç!θþªzR’PuÜ›zõ¸õ¡*~ÄžXé4ÖWFçÏb<™5GX•KùÕªM¿1äó1Þ_TˆX'˜D£ºqTÛ«ªÂ ËŽ d,TðªNÉŠ¿øûŠŒU%蓯1ŽBEe8ê¡ÚîÕ:y6‡+1€H…à@¹“B’z9ªªá4ñz¦hMëÍpÓ6e,©ò‹;Õ@þSó°JYÀI¨¶ŠGjŠM.®;äû`;UÓ´‚¸Q%”Ñê5þpYÕ qª”oAK¢ooGÒ9{†m+ŸÚþ1 Í>Š÷þ•P•O¥Ê¨ÀÎû(.½ÿ™¡"Ò¸·è±ðYBòŠjUÕ·úêG‹vÃÚ7Í,•#˜MEj¢Pâä{VŠ"¯R`Õ"wy5`õ^9ð)ö&UäÏ¥c•å¾$4h*aÑ=%$§Qi*­xÒ)=‰œÚ îTþëÓ°"¿ó¨î³¼Oª$q´xÌáôJ¤jDOª˜H÷u NÿzšrÖþ»%Qt)g¬S %lˆç]ÅŠfû`Vm­È´®:CÙ„”Ddúâà晴©S¹ð­¿o ®óm?ê  ÄP„”òߪc9+æVåÛÆ;ëñÌɨ/˜©ÓÍì±^Iu¶nô[â{¥ýb뺪¤AÜï*Þà0ª±Âà/8Ø|n²ìÑ4¶ÈXp Ùk @—"Ðû¡ö» õ¦ª¯zOdÅ#¨*ä¹5MPûe´ZË”ZÛ4 Ôo¢þÓõEŠU–˯k}³Þ>îI ýhÃLJWMjÛæ´êä_”-@èIT·ã(¹„_AWªßVa:ô`)RŒµ¡~ÿ¨•ÒgMs”CQ Vj•«DéòG÷ê|œð/ðŠØúêÊ'åŸ4Æ‘wÍÚ>…*Z½Bjõª_Qêÿ°cÓÆž´¸ô1úƒšªæ#„rÅÛª“¤JçÖ¤Ãö‡ŸÐHL™˜h§¶åeê¸ð'Õ5a=‘ÒD5ØÛàdT•S@ÑaáÑŠ*ü)H ªódñ—¸åRªj¦)¢Õh ­*®L\žSZÇÿ}fôÆ ‰¥˜²WtužZrjäþ»žjJ¯ VʃOþ;@¿Ðo̪Ó=ieÛc|ˆñ×èî÷ðêÄ(Ü“ßW¾”R%_%ì4Ð[ ºÿHªÓð¿#›&åêÚ¦³Jb ¢…ä»”sé©––»“ª@ó'…©Ùû_‚€xK­ -dêííåÒüE{ÐÎ0ÿÀ$j•sãðbÆ.UêÔí›ôD>!IR@>!)T!i–¿ÀêLb+~UL ¬B¡-£ÿ× x|ÂçXåaX_(/©"¢Öj³Mà ã”Îa~ÞzŸrgŽ­º/½©ýGÿ8ú›&½C–uá€ú\K©ÛÙôQ›]úS õdZtã¢S }[¬}Qö )òoÚS#Åt}†º(}TÊû[÷/Ìxðê´Ö눚ԓj@¿öh*fØ"mlïG÷ÅÿºH¦hžLI:¿(OÖp~߆YÊ}'?}Öt#PÈ}Es*=>îHéB† äa: HO î/~[³'ÔU¯ È’'ŠÛÔºY;¹Ž_óM?)©õÇHÒ) ©•^ÅàDê©™&”¶ EmòL–¶É“Õ”JS4ÂÅG })|›r~šËí³ ÔÎÏÛ/ó*‡»-ÒiÍŠOŽ…ŒB’V!”.*~èu )×,Zø¦“Üÿ°€ÝÉN¶µÍÃçóC´â¿š<¶©J剤¿´öå´Ïûú¸ªB1 ÏÑðN£GÅûÒJ“*‘&|\4éÎg‡ù]¯L§&i¡îÝј†´ö G+í­´¡0Ù¯¿.òK&Õ‰è_~k‹;ʯŸR©ou _™œÈ­¸IO¯6(åˆiÔ·èAçà›4°bŸ ‘¾3Æz2õÉÎå©G5qÖ”áC&2ÀÕo”ä‰B‰×,ʧ HÖ~¡†1\½õ'<ðg)Í#ýŽQ³¿¯C½j?̳ñcú Âç§ý$m¦…6Ä[ŠË9e­çùà›¤íu÷Âm|+ˆÏó3nT‰\ÚýYÞ÷þ~s¢QãÓñê·}(’üjð6Ó'¶é.±ëô<Õ館™™¤Ñéó]ñ@q!ò¸O5%Oê@«¿ÃæŽjà¤Ò4ÙÁh£w ^t|õÛ ®ƒôAKÁ_Nœ ‰:¯#âœÂt§"ÇË¡btL ÷ j3¿6Èü¶”Kˆ<ö ª§+lªY¥Ÿrgð¤*(©ÉûhÙÓ×ÄO¾üœÒ¡Bøß§®Tv ²×T„¨Õ²dè×±I£œ§%zºgEqÍôàÉ•rIÜ£fÓߣŒÞÓÓ¸j };Zý©Çw'$hhûćJžé)Š\…jYü>zû>‚âù˜ÙgÛ´ä—€"„ö½Òè÷+z“"ÖYÖ)ž~Aň¬Ó–$BªçQ‹2Å+ï`ùý$Åïg‰DJõH ÕÈ›T©7éèÖA-2ghMÞtÐ5Ñ‚ª!:§]t¿x^”ÕICHê@ƒèG‚ý“õŸ’pQˆ¼(]œ)NH 2UÍš Ø-ÑcmÒÝ­ÚR5Ëc^}ôh0嵸¦ ÇçRBúæTŠ [tÿàL¥ÕÓP'ëÆ6-?$zƒm‚:ˆñ¯¿´ôÓBò·Ã TuAŽ@Йr޳‘ }%mn÷«=‰ùú¤Í„$×Ó\ZJQ6V¿¼SijB ÖáiÐêÍtƒ"M7MšnáÕ½ ÏñOkŸÝJ6¿R‘˜ IÑ•RÐJçªåŠ)4€ÉÓs„øõÍï}¿4"Ì_×¾M‡FÓŠ;UB9‡kPE×iìSÓ„õY[á`I1 . ïîþ¢I-ËÍ¢—±Òd{bŒ ½4>Ô”/Uæ_)ZçÞ§«sØíìú©ãïÑâ\ܰ–3¹8zŠøu–y8Åù}{<4îyºé=(Éiõå!P«KÖ¼>LKZÄ(‚¸sÚÊ#°Á¤°¿nÔï›ç”$ieÛ²—?måÅ  áh\ÿi¸CR×ÎævÊ(?¢BY?F}z½þÅ{Ú%Z}ù?o¿üøñH•²÷¼ÖWÀ~ewÄ;ó§dŸæC&l»‹T`¢:²Òu‹®îþ"-EhmÿåQ’ÊC‚B@Ûɨ•I­-‚>i¦†©S Zÿ ߢ'ýžn7ŽøL9Eî¸ý“&[:¤c‘H+–Ÿ®)›ŒŠ¡“ÙIé;Æö>^þºæ0u#ƒ5!höHÅÓïyjüwE/zhêDI.?Ã(=É9ö©½k¥óR½ü§D¦yÙKLqå;µ ¢€TÂ9Ô'¨sçœ7›’hîcø×è¶Éþ²È{Æ— ÐÕMãP]áPi›þ5›Ò¡â.ê|øšïJÓÃOyš³³+½4ôz°&¥ y«¡Œ *?]äóðÕ^ºR3ÔT‘t:^­%_jÕL³š .'ˆÄ‘ßø…µWíiʤZR÷ÃÃ(î¦D¤sªè óƒêiÔ ’Ä·?Øâà³fö³è²`x( CºI½)»ûœ*^Ô©¹ê/”öçG·ùôPíÛ9¹r÷J5¹·×*…Ë )*NF‹ßi»\ÿ4ɩլgÍr{[õlùÃ\IÑSÅW®ù^3³vʳ±ªPÉ®ñ9ùx!VÿâYt#5Ñ!=ß“}»¢e¾ÐŸ§«Jú6¤õOK¢.T(¨4 AB+C÷\Ž?ÝmRÅcUJëŸIsô¥ÈVÆ ðÉ==µ<ÿ©6“Sýÿ~©N‘Äs "åÉ*_¤U„MªÉm€ºÐLÛ2Ék$G¤eQNóEêŸæ$ HQƒyt¯òG ã£sÔ´õ(¢%–]=Þ±rJ×ùÔ‹^Gú},}¦xà$l– !ô麸Ì éâVw¤œfRéŸ[×?ñ3º¤§7d²·º¼üËËj‰•ŠLµ7Ý î¢ðB“2•m²kx˜q@x-c`6Ïç®#œÏ“YW,Û š¬h¦Go°]Ë"eË·ìbg®ÁªŸ·Ð-ËýHöd[Â’Ïú“¦>OÌyá *;ýœœ29©eùíÔ|–OçPïîÔ6¹|ìÔèVÛ.þ±kÓ¥ ‚ PÓFí†~ÿÃüVöj]¢™XÞÔà½VëMêÒ‚ «¼äžÆÇOÒÔ¨R0>ÁõÅâ‡âê—ÓdßÏê.½$RÀIŽñ˜˜5ǽèÚUK‘ª)°ë Ÿ\Üÿaã$YÒØÍÕÏÈžù?èí›æfûªAlͪoUœ)ÙOs˜ÿ]õ—¿ÆÆÍm¡¦‘ ª *äÀKú¸ü}ÀŸPΖ kµZ¡:\"ºÎäXCšD!› ºx ä:•åS{p¨ÆÞgïWUê$Z¯Lõ¼¢ÂÇPD7AäÚoè'ó¨úI5m}¬Â•º[= è ö€r¢.úæ‚Úîs«cýÚ¯®ªI–ZS¾sRUQV e­RNÝÒ5¤ß·éMMhÖ xùf¥j…¢ ¨h›, bJ};ÒõM5ÌÔ46{µµ}®J A¹Á»C£J=­+Ôžç|ÏÚ¦ð’¤ÆÅ7•§WX­[°ºI?Ÿ°þ9Åj´{O÷»{`´››ö:º*¬í $€Ä‹§Y.õ]¡ŠÌ¦9REͦßëºÚ €gC6€zãtjù°èîuïUAT E½2þ»§yYu¨›­é&çj޳¯6RLmiAUŒ8C½Ä‰¬{z®àœXÃtgzX×ë_?T“Ì †|ìþ'¤Y䥛ôó â—ßÔŽŽWÔdÂFà³rºêuKPA”ó=ö$í‹KKæY uÌôøE‘´U7Ò l# Mô­ZæÉ$–½òÖbÙ:|ƒ´álQ4€ºèÇ][Lv—]Q¶´© ëmŽ£(ÿ×2ÅYÃè@4H ðY¡¤¥K£\u@@Ü®ÚÔ–¦LçmS lä 'R$½á`þäNQ œå©}À¬Íuú¿ëƬõVD]D°×¾`A]*¡vICÿ’]û5@ú¸½ ID Ö Púí¥5…QUñj¬®ãlþX…­×Õp‚uqµª›ÓËÕ*¦FÀ:Ö}4(™Ö“Ч@š©Eß]8W–ëTÄ 0À[ét±þši½e¨D^¨ ý„Ž–_/ûêsÁ[™ç’HÀhƲÁZ#\É)S»[ ãôxNþLßÄ_½¢<$£Y_Wýªâ,¬¶K/@ ¬T‰{WŒ(ÿG«åø+ V¡~ÍP§táBÝIõö}z¿¿?=Ù×5ËFfçÈ*ÞªRD¨¯ùF•ÝÍøåô‹<•ºfSHþ¢jéèËìL K-ócí­È³»` VPlºøXA  0+ÀÚÅ[UD´jÂʪ«@3}ÿN\á®±Qª.I¿@¤¨àÙI®ù. ‡‚piî®®H\¸¦z–†~E·Ê ô.sÞ‚”ÇF‡)_ut©]NU ]Ô—*B3%ä5nð”öæliÓ$N³þèMÝ¢% "`¼Ó¢Ë›6> Hy\J ¹€4ÇzÃöqqÅàÞ×Òä­¾ æQ!o-ГÌZ´.AªU yñO$ßô–d¬æª´ 㿕Å. ËP"êiIrïUœß7o~=*R„££u5]•2+ý•2Ú…²†äBßqõáý¹¤6H×"ÿz|íLJ=Fz MXT‚B+±˜GÖ{¾K qzšHVµç&üŸ?s³]\zû6iþ ¨oÅ–yÕBàµw©E Øz®úâáŸUÞ#Z &˜ànŠ’K+X¼£4¹)Í&¥ Ð÷‚ @ÁbÂOüYÈà‰Ëäºtð¡«ÐƒøÝ« J¤«J˜v¤GŸ„÷²òS*W ß{øËaª¦»¥c_CÉ`öS‹j ˆõº›¦~%Ü ˆM êjü‚R¼ ¨s&ÍNm@úy{¥òù8Ï£±LLoÿà›³‡´òˆæëÒ¨Ëeª‚›úÕZŒŠŸB’ëãÛð‚àpÔËÛꆪg(§¾êÎÅ(*þã‹GµþئMV-[«ö?lÊ’Ôú©>^«3à÷v¤ì/DÒ¸Ô†úcŠýr)´J ÷ÓĬg‹o˜¥‚¤DJ}—Ù)9Ì•_"ÙÉqrô÷Ý+êdIªþ‚SÅGô‚‰":U|„rgÕÔ­\‹ *"î{’,(÷A½¬¡Ôÿ'ËÑ*€B¬`/Ói»4ë÷À¾OtôÏÔ@®–V¦ø_"ªBmÍw|ÛëÎçŠuß Vä¹u jø/O_’à3®,nS^èln\oØ7¦Qpd×"¢’ÐÑN`1”t…ÔeÉš¤¤OÁû?·¶½^¬ ':(r•¾h³+Áj™ +mVeŸ sý…{‰é|Wo-bØ©dV³J! µÝi’‚i¸nxª<öj— I‰ë÷‡mtªÆ!z+¤¦Q°N’ 8¶¢hTµ$ÕŒNÁÊŸ?Ä]ΞÿŸëŸë›úÕ?ÆñyxƦ£x~y&³‚nç;ãc¹Y³&€*s@%É&Ä¿µ&â{ø$%:„¯M÷ëç©à¼gû¡ï×Jõõn ª[ Ĩ²øO0……›ûÑM#@ßþúËþ²Ä°˜ŸÛ§ÓÛ µ‹6ˆµ†‘:^¶sÞ²³4¦ŒªìUãÞ‚yÐe¦æõá¯Ù°._eWØ,¸â,ù…ÂZF Ú2tEI°å¥Ý]þ¹Gœzš/çØSüb ¸"0³‚T”—-±º¥~érýòg`7Ÿ ÃéÐÔ¯}A^ÒeìªS`ÃØ&?Ôû×qPêÒû–™ª–*ñK1žª„š_Y{k£Q<—™¬P èC‘»Ö’ù±W)àj¸´nÿÎõ¬„áôñ‘…@Pá»D¨vI^y9 YÕ¡¼1—I:ýücXSBšŒ 1pšª"³ìÛ«E¼¥lÚ«A´éW¶×HjØÞ^Š·çĵ#bxDŠJ [þ‚î¦ÂU(æÔ‡a¢Y À;ËßÀ Ÿ’¬ZÁ¨½×æìDdo›<¾ƒ!öqô0\mdLf•Wb…-GZT¼Ïõà*Pþ”„¡Fsx_¸æQÕTŽñ¡s˜V0¦ jýü_{Xg±]ŒþBá‚kJm©˜ó¬ñÙ_¸Žr¦ñ(`j¡AK_<þ‰€Ñ„P±F>v¼²–7U«$¹ÕŽtÿ“¸îò“Jd%ÐlÛE…W ‘ub- ë^… Å•$U¿âXŸ41_Ð…ÄhB‡[•OT'¬s0 HÅÐwMXÎÅËJötÝÿÆu?Î÷¿ÛžýÊ“š’àƒ‚ ²PÙî¼Î ,@ù}‚2MVqî_ÿr¹ ý8É=œ¾°)FùèPq†¥)Rm~’:·æWü·¸©W÷ìÔ0 Ý4DkºñõwâV÷Cè£Y׼ůÁÏ"•xø¡c$SÚ•…x5EW¸£v~1¤D¯`š€“}ú)­áq¤²]Ã- aUT¶ô§ªpT c-@\CMàܨ_ÛÙ:äï{Ĭ}ôQ7/U8T^ Û\ªSIšà|·F·u ±)3o€êŸ õû‹†Õ}“io^þB“ý+JÏÏÄ]õãÓ»|û÷ÝÝZØZîØ݈0D÷D_²M™aéR…õܦôäJ ùZýYË5Ôö0U;pézà´ „F•IUJä•‚c;|¬?CØò+ÊAÕÒîlU* (â‚8Ü]aå÷?îÁ\þgF=¸ÿ›Æi¢Ó®Gƒ¸EL]æ*"<¨d‹:Ö¯[Üþ}OÒ;{ª(ò¿Áì%ZsàÙª-:pùfµœg RX7U ßÇ£kÒÝ%¡Å»I³Ý­ %s´! ýUîXµN\'¨œßÑ¥þ0ë·³Û½:}.gY1T3¾«#ÀJÁYª{*€ºVÿ0÷÷9êŸø Ó“ðS´ÄÓo‘ 4ò*²j¸Z Jç~aàÓjáUC c†ªâ `%ÆðÕçHEÒ[n3*¥~­Zþ/ʱƒ ˆ¶ ¼ùüHÆW@séø1]æó>}™ØÕwš /†þ×)d ˜Ž¿Hâ 1¬¶\Èj‚j—àUݨ;SËú)U…2­áºŒq ª4Œ*mº ’ÛÄé⸆•>[¢œ`x™À€±z«†dà[•Ë ÒÛI½‹ç0A\ c8ݨ;PÏK‹<(².Þ©Š[ úÛ2÷™~hA9iÁtrí=©C¿?Â{ðÏc°ö]½gq{5¼R»1’œƒ|^ ~ý!/äOލ¢óŒ¿¾VüR/÷a°ðò…ž†á©á-ôOoOñ‰}»«ßÙŽAZOÑŒïÎj&‰,µ ©…ºÁV°“ŸÃøIîaЙ£=ÓËÍ(³û!! A#½‹¡‰%ØŽÄIºÐ„Ö<¿õo† }ôìœôr»ýZÂú@Ht¤!PÁ i…6±Ž°Aª¨¢œh“sµ¾.âÞlçêcçG *ú¸Šâƒã~R²ŒNÆúu‚5:užh^a@„´¨¨û%Ì;Ø'úD\K3´zˆ»ö!u"íšW'¾_³ySu¬_½ÛSjÔi×{Lýò…Ï„™jnyÞM–ÓÔ¯3¨_·w*”>ÉÁÚáQr öãåßN…ìIëwh_Lc›1÷½§ž„~•2sEqG+z†êF]æ¥NŠ/¯”Žp#f\W~JY¸"Bih™c¨ô¼wMP‡|SŸ¾sñ³Þ·ŒÓˆl‚œèÊ*„$í÷*ÎJ2›ûj%: wGt§{íPâ)·*×){5ü#¨Ÿ'Pq¬ÞžÌs|—@C_̨Tÿš›ÿó…{u«NÕ=Ì|Óôì /™šÊ hÙ§jU ›­;mN¿Þ™˜ü{CY‰¯%‹Ú¹(Ó‘Û\GºÉEå*é¥ v—UjÒ Ã¥ŽDU-Ÿû´šÉþ×.×À!Û§^Ý%bš‚žš°¬ éOŒªHTÿV¦ú»’DÑA(ÚhTARµÆ“<ߪ®ðD¢SÖ‘BU;ÞM]ŠòöFgo ÚñG§xE¹Q _P%_°Ž³Öí‹‹uÍšiš¥f¨-%`ˆçâÿ%Ð䯓6¤Ï§`²ú$Q†QjPýÁíª £=V&Qí*RzU7k¶$‹…z jýªÖÊIý]’†Z„E(à ¯Ê)*˜×ž,ÿ4aaCPohKÍÃI!öÓièéP‹÷ó ;ûVAz¶SQIÛÓ¦©gÌ&N¸]Þ£ïƒïÕ5“Òâù¥ªcýDiå–ØcVSê·•”ý]Û”“©Ô©€ïxöÄËUB% ˆO/‚ahµ“:GÐɈ %ܶª¸Z3S½Á²7tÊÑ_–T<)ž¶³“M—Z)OpµZšÿþ*â»P]þ¯è¯¼ ÑídÙ¯%K/K•ý'Í€‰nÍÒR~_!YA³´ë@£oýˆ@lÑ Ò†Ø16t {*÷–+pw•´”¼!°ôéɘ§§‡¼G_5†Ô/5nºöÝUU$…=byZ¤.pÀ™f®u)ìód ZÁ*ÏÒ}Pž?zÙqí'°£ÿT‹È®uØ/ÁµÖT:ñB®¾ gE5­~½Iåú‰ÊtàúƒðJÚ$6€^N¿ÏÃj°ŠW-úQ˜LŸ3a˦_•ùê±Å¤ %Ì~-šSãý¹4ÒkRU©†”¹Ö Ä—ôˆàoÃÛ°šÒÈED(((Öòõ]±þ6eØZ€úx› njùØmO¸JñÀÕ‘JC­tE&°PTN©hÔwª0¿o!ÆúK³GÅ£ÕWMCµ:èEÑOWhÿ GUU‘ÆXE ú÷}=®;j`S5u|¢‰!€)â”B¢V$=e{k(ã«ÿîûY‚èÔ,Œs]4Ñ—Ë_†ÿz:ú5¨ñ‘ú=°”uÖS]¹PlRýG¿$¢¥:¾PIO½‚3nÔs~«úÚÀ™O®j¼ü£Ó§!V^tÐ"¥Hc¡§ªê×ç©ïçÓ?ð`5z"w[Ô¦ŒðÎ{5'°ô?`d]¼¤ý”eJ+°!Sòˆ»ám©c “ù/'„§±…NÇ»#nQðÒï÷*÷{´€Í<ªv™š¦‚ éÿQ­|[Ü)ê@ŽJÓ-Péÿ i|]ŸKCLôPá ‚‘‚ªt¼Â:æ ªBª4]2þNa~&nÚ¦¯ò̦m¢zÏA„ê52W²&»X?uMH½‹‘¤þ)Ÿ}Eçë ­6yÔs ¹<™¾ý®nÿªÊòdº‘Ð0MÜ9JôÌ—g^ØéFB¹Âûkáw»7Òd°ì `¦Æp97• ½ÒÇÈ´§›'|T¡FJµ¿+\E¼'ð8Ì<´ñÃ}l²hM¹)IÝ}1tÖ˜£èWPýär%§ØFHFÕù©âW1*zçyžßÏ9tOàÍxÿýX…¤®ˆô¤:Ì÷ Úí*ã~ü*â}Þ©Þ§»[Ëv¥¤Ÿ”CÃR¢\tS‹;j…z5öeO“4Õ~,'òô^ö(©B>•biR¥l:cº‡ÊÃj3è³ ´A“2•~ÐÓeâÃïBXcµ‹ëÒ„’Þç—½hR%74÷|yÓ‰âxq¾Œ!u 5l„¤ëéEðçúy E¬OfÕ4XùЬI?Ucý\°Æ¨t½öÃêÛŸ*(£æ •Õ1ÞßTVMÙ¿® +fè=&\û‰”KÀÜ.+ÁSŠ~µëX_¯’\—PzJ4juù¸øš¢1ÊMkçxýðH‰4¨V¼V²È¢(Ÿ´žÐÆÿ7¶„a¡,¸ÿ¬û…pFܱJñþ¾«‰P]ÿšˆ×aÃUhš¥eBFS °Ñ•”SºïùÖÅ¿êg—zý¢N´`?cŒã%ÅxðÑ}àè(2&»ÿ…JÒ9 û´´à¦ì6KßÕ/ªxúi(U'h:1©ñmZĪ$ìù¤s›NŒš×øî'_uØ=S—yª9¨Šn@tªÁS.±:«ZϤg¨V&Mœ­½êßfÂûK?BH|ì©låõn•X§ÉSs§ØkîL bj¤}‹–)%Ë Lc”ŽrVõ0<7LÿðøO:$’FëÞž*•iQ†bJe|ÒÄq˜ÿJcJÇ:å­¯¶wåAõY•øVIžîW…Ú«HšÔîVµéçŸÛ öÔ±U›<+jïu þ€\U8«J²Ò„&QÕ*ý¬Ï%ØVQÅÒU*ÄMÐiîНùX!qYGšÿãz‡WÀIÇÂ$¡†zÆâÊâPõƯÂueñ'R¥b{s²dóúïõ"§n ˆQ¼¬’k»((è-ÙŠ¤C¥nçÅ)piœêÒ¯Æ# ð ÖÉV_¾Q¬Xr!†× vµiú£ï¡Z;ôô* k àZ›Úï‡à€¢Rí%õ÷ ³`õS‡ò²1¿&_tpŸ·ßA܇ç_Ýøüûù>>›—÷ÇørGϤå«?ù"Ÿ)›PZµ½ÿñéõèýu+U¨ŠIêﳺøS^ýìSÔÎ;UTŸttܱXüC¬:O¯oÔÏ[·øýý2ã§?ÁÚ€øq”´ä€..*oyþïcæÔÒ$²°ÀÛ<ç•ÜUKÅé´ðZ¯ÊD9Tq*‹PU`’¸ ŽPz«ã°Ê« éúZåæÍZ$ÄÉ}ŸÝ¾í2–†^ª4M»¶Aäñ² ÙTîÏÔÞïíWºpV]ÇGRœ[5BQ„)`õw_¬KÎj^ÿ<™ÒÙ’R¯²_ܧ” \Ý&‡zà0Ïk@Myuióúšê,»|¥’”ú‚õóëmü[ÇÏð÷ß÷¿_ÿ‡AÇ?ãçEÇŸ^Fñ÷Ê«v{7Uñ¤¦r §FP»bQÁŽP›êF÷vn֯Ϭ÷R—÷Ê‚?}KˆµíY©c i »Æ\ú1ÀjØ ¸ëøÊ_÷ó| •Tþt¼=ÝzÏÎïêO ®>ñÉù©jJLC@ÇTÊ•éŠpü•‡Yý\,ùɱjH›Ü î¡GÀ•hT¡š%H„.]Uœ è15ißS­Ë"Y‰€Ñü²2e­ u•oÕS¤õÕ䓨ê À)¨gCðÛ¾øeH¥æ å @O×~£AÓ¨ì’ &—£´ˆ{eµ°r9Å“ìæÙob|£ô<…‡®SÑö ”nžòh ñMºnÿªëÇÙÙMÜdÞœ£yÄœí¯Œ»¹%6ñ*“¨+ ÉëÏÎlRν§(¿yÂ[ÅQ-=_”K—È£*(‚s¤…\R®Åþ\´gsEb\þØËú·õ&ÒêMÕž×'pª–LZ’ž ·Ã—~úyëCôá*âù¿&t|DÅisòÞóIGI‚d|׃T"eÉ ŠM\Ò§4Èwbù…?Ô±œSGù ?Í£ª†Œ—~Ǻ§T¡;}<ªNT@&ÝÙÂÃoáÐ×K>D¢ÆT¿ø:Ä­ÊÝT!ƒj/•|«8aRH¶u‰JçÜm:¬µÝ.‡&4‹´?O“ï]š‚ÿÊÃ/r u 5‰9ÿJ§ŸLjWÒž«Ê5ù¾Ì®<:‘©ãŸ&½T±2L[”»; qA|IeôIã߆)~(Ëå\QN5ÿÎw”íÜuÒ­…Ç[»=øt_fÒP˜T&Õá*N¼Qtu¾¥ö¹dÏ·ö«[5U§-¦åÅÛ4F|œ›ääúî§ùÞ˜³MßqƒÞ]iR|ÄiC}oj6ÃjYtJ•^þÏSœôË[úõ=ñ¨¶›`Ï®«c¥ëizÈ*Ùªä}DÀl¿BºÀ£Ý³Š÷@Ñ?íWM$E…1¯<íSŠӯܭŽóó†.4ŸZÑŽÜþËN"ÓV‹aÄé8Â곬g V<ùæušê—vÀùhœ>õarV1€¿+¬Ô‘à›°>ÒáÂg}ü÷±6kº—C¾»ÍôteatK=ÀùMÁtÊü¾LÀvªû…¿U#íV.µHÌ *ÖF‰w5+бÏÐxgý–þU$Ïw¸-/¹¾¢Tú²ò¬Ëž_¹v®–À_U8×^^K˜/ØÃÅOX÷&ŸþЮçDe‡TËöõÇ—+yWRÎë!ÊĨ8¾Ç‚:û9Š)9µ#—œŠ)TEùÿ‚¦â‹”*‰]1*%TîÓ ð˜ y ¶šRÍ<Ÿ Bô´….ìêŠõGéOŠ[ç|®Æ“¹´?” ,†ç.P‹vZ™D*’Óz“ÏМpšñ¯Ñ¾³ 厰^cÄÊGþ±Õ|V šè“Uš¼–LþXñÓæ¯÷¤CsrŒ^gж_Iâ4åù3 Ï:úI@Ï›Øk²¾=ÿ9ñ‡‹ÇÐîÇ˾E3-újˆ¢P³'ÝSÃ}}ŽÎuIZ„_6Ó>¹M÷)“8MK8J)áe"qJû)1:"X;<ò1¤wà´ci ®J’:˜¿àæhÔ+ú[0‹Çê²Q+%…*…’TÏþ96«hò¯7Ó²¸ºè DÇOc¦žÏO@’ØtËù¿²«RI³¸ j>évÐíC­ÌÒÎm5ñ]M¥‹GK6Õ7kW6Mšº§0ã±2˜ù2Þ_¦ðºÆUnS×^Ìa6-î*cÓd¿*Д²òxõS eêJ5PIESw)[ˆx’öOaS·]|è©Öp¦ ë GygñëüïͼœÅ¢²+,ê&¥s”4/A,ßÞëª;Ó“Ö"XQ9X©ECÅ›ƒèÓKùúMµ7ˆŠíY8+&Ï ¬cý¼é×èÀ¢º_¦SÃIQSëe`o$ñkæÿ$Ç u¬ß”a‹OK¿ÖßÛ—ï×½±l;UŒD°¶·iÄóàqèú_cƼƒSŸÕÐ%¸ˆ VÃ3WÙö½dûŽÀ6³pA‡Þ6õXÔ,›hëáPWë§vÝŽÏ]¬ZpÁ1 –’ÂUœb<|²ú[¤õñ¹{ÓSÔâ5Ðj‚Zú‡uÎ`n¡6\²{³^ø(˜Vç±Tu°néTMj+C?̺+ãQ®¥)(\k×Y6M{íš,«Ñœ2榕¡7ÄYrKúzéÝWåv-^Ú4ì7Bm0«]xËmÅ¡¦o$)l ž­å¹ÖT `âùjÚËNm„Z'îŒû9‚jMJ`´@]ÁÊwsµáØ |bH}t·R:e…‡<8‰¹2G¢µ ©–r9ZÜL‚ÊîL ÷.ÿÖ·ªKȰêâ©êÒ¬¿:Ôïï[v§øV{; ^>¶Ã÷Bq ¤Jâýƒæ`—{s#ÈÆ‰ýQk-ÇÁæoC?î’qu¬X÷ùuC#OO'ßûvC7¼^;#B–]QPÄ¥›Twë˜o:ü•Púá(´½¾ƒ·Òû jám{:löÖ`ÜÛV“­þ¹Uw‡„0¬jcV‘åwÎ÷]¶§!íO‡„“Wض5v Õܪûg¬]?ÉíáÝËD‹Åª¶Œ·B.jlA~Î ¿SRû>¾'!#—jD5¾¦Ÿ¬º®ÁÉí˰£ÅÈ ò›KM ,™@¢Ž­*<{ºö-{ù\mæÅ±õ·Û;V©&¤ƒrvèhÃæû:0ró¶õ¶®º¿3&d±i1Ð 3C^“ªÝÈ©?0/ 5\¯î©®3¨Ž³…ÂDl™`~îÂÄŸ·¾·ÖŒÏÿ5<Ïq`YŒ?saö–oÕ÷ÐrŸº|©žš“N׬¾7É®°mêwmZgáí q²Z°ÅÒ™Šg®Þ€ò¤][ÐðVa_ŒóœÕÄeâ8N®§ˆÕ@6­ Äê˜NîíÏÃÓß8¯¼ çh‹ª&‘qÁm¬bÅýÿ4¿T¾EÒ¨žÝ‘_\öjÁjJ{/U gDj/ LZ†Eåð‡©–R%„›µ8X2¤Ü`+ô—|H"|iÏÎçºNÈFÑh1kÒìWUP&³vÎZ*eÇ›—³»ªMÑh­jƬ fÓ¢`-Ë ”P?K¨_·_iáãóo3i´_dzûyÌÂTbçfݵgvU<ÿNKÙ[ݬ_ 0V¿é“2ÄIñègÌüR˜™vL—9ݱ2L­˜GÆM›áíãííñÀpqÜÙ&ao3L.ÖÄÈ]‹[ìjÜ2¤¼m<Ýðª†ÅÓ¨8Ч´n1kÃ{°–8\¶-€£6£þ¯øßR²½ÿëÍ:¡%)iJ9U%eNüsAžãÞa޹¸ vIÝÃ5ãÂ/¤vš‡rÔóš{Z—óV0ÙEãMf­«c˜´iÇ,{?+Å´h]ÊM3‘%ÇÞL!È‚à›ë±ÊŸ\ ð›„íG»ä ïqõ±_æ Š]M¶‰>^ÊqYðœbüGTiÕÒ“»A¹1c‚?óñ_›í¡úþ¸ÿ¾ßŸåÝßï÷§û.ÊÌp‘áX4L¡%³Kuã£èV .?ì°E0þ®ãMðxŒ¿ï3]`ß’Úî`ÜÓ``§2̳ƒÒYÞ| t²cù¹õæ/ X§sôÉ]ß×÷@Cþ5úyÛ›ƒÖqŽZ-3ìÑHNIíÈ·GQ‚»ãðþþÏ æ¼&fÙ÷Ⱥx±±&¶kV9$ Ÿóú§)›õ/ÑÿÒ˜?>¢s;¬uNÊ`Î$îԱСç8C•Yövv;Mhß|_R2j%Ø]£†Ù¦sô¢ q|â«Ï-ª±ú–þͲo^[%§ã9u»ë‚»ÿºëy Wßo,º?‡Y+iè÷tG¹bšI†9éè5Ò][ÆÇ)È÷ÁD/ðé1š'ëÆ÷0’´yƒ4/á<f"«8}‰óyAù Á¯œ^C ýç+þ&Mºøƒ ûÏÑã·êïOf 2£zê_G£ÆÇýþúxÂhöûÌeÈÃLþCT0)±¨pðwÔœ,èׯLIž0¥xIbÑùÁ±•çW¥*óeÊq Ø$¬†|i¨"ü“òs»ËæGò750¿.I_A(õ¾JB:©©S‚_¹3’Û™§•삌Ÿ§¸¹žÔ{ß9Š€âÿœ"]Ì÷ŒÚã}Ò8 4Ç(u¶u©¿>NDÆ÷æÆðׯè =ɾ ÎNniGaxÆY*IcéPÜ?´ãgq>µ¾—÷åöàhlºã÷ûø’é^^Îì¸ÇŸ·uíïìøLŸîǯ-ÄÂŒbx[0ËÞGO,Bî‹fT]yM;Š@ñréU˜Ñ.5½y;·4ŠaôƒWÌËïÇãqß¼?ºÎ{ß ÃûÙ™z'Þêz²7Ï0?"qPýöúÙJþMHoõ¢†®LŠ·•œ‹[zRx‘Ññ•è›êø‚Ru`Ì­±œìû8Èáþ:¾Œ©þL¥ÒSC–ñ˜[Rß–fãZoÚ¢ |Ï“*z:PüÎaÃgzTÄýËÅgè_Ñϳ¯ý¿^…U¿_ůÆ-$Å#²‡ ºb;k™1|ÿ™OÄþ¡ø¾=¿ý2ê­0(Ü \0¦s¥Ìðv7âÉì>·«'×¾«‰ #½xE“`¿ÏŽÅ´œóÿÓcG\'O¯z^žB|îoz=ˆ ö÷T¹‡ÍO†6‰GÚr2ô˜u„5*¯4BðA¿™{!šN†(ü—M¶ë¢$•c±kØ5ØtQOç_Ü"¹q]ãºjÓî?ÃöÈoý“¶è6Â"–k¢>Nî_ñ`0€SeãÔXe­Ü²A©ƒ}Of ro{\ÚdÅ)þÇ‘è°êÿµ«Ævà‹•5!Ë•d°p$4XÎ̳öü¨û”§8]éï”§È6{{ûÝzÀîQ/Àm˜ºÆéKœÏ åJOýYrdÑÅ“ –gºý†~ì&XñjË‚MýÚ³&ІâB°|17y’Š‘âÿ~7†·Ñ¼™ô r_½!'6$óí.æi0O~ktlO³ªMñr„ÅH!%J–óð5?!þ"á¯øìºÔïç\J;ÎpUØÃøn ³¾Nídéólï"þñ°ÚNŸ»œ¾;ÛMáh7¡ºL]ˆ¤DñOVr¢ƒWìw\4!âý–7íÒ²’²¸Üp¯ÅKrÞ–s0r煮}”U?×tÁ ƒº«„xƒËlŒF~3¾4nç÷J`!?ñ…{rvÆß÷œ×jã…³Çåü¿4:©PóS9Á¥O²rìó'¿gT:¼ïÒ®çár½KDß9€E}æÌÄ(UúU&6ŸÜ1)£„†ÃðBOƦ×lo_J_K;©ØdN¸øHÅveG29ÈõN—òŸ¯™ªïãfôI!íÊA:98u¿?­øÊ}æM4nFß"£Å®Ô¥ú÷®38Ÿ©I°y–ìÉü‘yÉßÞžž¢ëusjW¸®ÒÍ/Ò¹>ºÃ¤„¾6äåçîÁI$¬“Oï¿t¿~Ÿ¢”;ö¾Æ;>žQ‰!8•KCßàÍÉA´2±Û^ò°ÖƒÏ»EdO[ñ¸i+ÞÆ|W˜wJ’ˆÝá<á &QÆýõå„]?<¢»°3d~W¾m•_Z–_IK/FnÚ¢,¼lŽ2 Ÿ7õòò’ ï(À‰Güz\†—û£fç°þÿUW…B>‰samtools-0.1.19/examples/toy.fa000066400000000000000000000001421212162403000163700ustar00rootroot00000000000000>ref AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT >ref2 aggttttataaaacaattaagtctacagagcaactacgcg samtools-0.1.19/examples/toy.sam000066400000000000000000000014221212162403000165640ustar00rootroot00000000000000@SQ SN:ref LN:45 @SQ SN:ref2 LN:40 r001 163 ref 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 r002 0 ref 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * r003 0 ref 9 30 5H6M * 0 0 AGCTAA * r004 0 ref 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * r003 16 ref 29 30 6H5M * 0 0 TAGGC * r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT * x1 0 ref2 1 30 20M * 0 0 aggttttataaaacaaataa ???????????????????? x2 0 ref2 2 30 21M * 0 0 ggttttataaaacaaataatt ????????????????????? x3 0 ref2 6 30 9M4I13M * 0 0 ttataaaacAAATaattaagtctaca ?????????????????????????? x4 0 ref2 10 30 25M * 0 0 CaaaTaattaagtctacagagcaac ????????????????????????? x5 0 ref2 12 30 24M * 0 0 aaTaattaagtctacagagcaact ???????????????????????? x6 0 ref2 14 30 23M * 0 0 Taattaagtctacagagcaacta ??????????????????????? samtools-0.1.19/faidx.c000066400000000000000000000252661212162403000147040ustar00rootroot00000000000000#include #include #include #include #include #include "faidx.h" #include "khash.h" typedef struct { int32_t line_len, line_blen; int64_t len; uint64_t offset; } faidx1_t; KHASH_MAP_INIT_STR(s, faidx1_t) #ifndef _NO_RAZF #include "razf.h" #else #ifdef _WIN32 #define ftello(fp) ftell(fp) #define fseeko(fp, offset, whence) fseek(fp, offset, whence) #else extern off_t ftello(FILE *stream); extern int fseeko(FILE *stream, off_t offset, int whence); #endif #define RAZF FILE #define razf_read(fp, buf, size) fread(buf, 1, size, fp) #define razf_open(fn, mode) fopen(fn, mode) #define razf_close(fp) fclose(fp) #define razf_seek(fp, offset, whence) fseeko(fp, offset, whence) #define razf_tell(fp) ftello(fp) #endif #ifdef _USE_KNETFILE #include "knetfile.h" #endif struct __faidx_t { RAZF *rz; int n, m; char **name; khash_t(s) *hash; }; #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif static inline void fai_insert_index(faidx_t *idx, const char *name, int len, int line_len, int line_blen, uint64_t offset) { khint_t k; int ret; faidx1_t t; if (idx->n == idx->m) { idx->m = idx->m? idx->m<<1 : 16; idx->name = (char**)realloc(idx->name, sizeof(void*) * idx->m); } idx->name[idx->n] = strdup(name); k = kh_put(s, idx->hash, idx->name[idx->n], &ret); t.len = len; t.line_len = line_len; t.line_blen = line_blen; t.offset = offset; kh_value(idx->hash, k) = t; ++idx->n; } faidx_t *fai_build_core(RAZF *rz) { char c, *name; int l_name, m_name, ret; int line_len, line_blen, state; int l1, l2; faidx_t *idx; uint64_t offset; int64_t len; idx = (faidx_t*)calloc(1, sizeof(faidx_t)); idx->hash = kh_init(s); name = 0; l_name = m_name = 0; len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0; while (razf_read(rz, &c, 1)) { if (c == '\n') { // an empty line if (state == 1) { offset = razf_tell(rz); continue; } else if ((state == 0 && len < 0) || state == 2) continue; } if (c == '>') { // fasta header if (len >= 0) fai_insert_index(idx, name, len, line_len, line_blen, offset); l_name = 0; while ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) { if (m_name < l_name + 2) { m_name = l_name + 2; kroundup32(m_name); name = (char*)realloc(name, m_name); } name[l_name++] = c; } name[l_name] = '\0'; if (ret == 0) { fprintf(stderr, "[fai_build_core] the last entry has no sequence\n"); free(name); fai_destroy(idx); return 0; } if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n'); state = 1; len = 0; offset = razf_tell(rz); } else { if (state == 3) { fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } if (state == 2) state = 3; l1 = l2 = 0; do { ++l1; if (isgraph(c)) ++l2; } while ((ret = razf_read(rz, &c, 1)) && c != '\n'); if (state == 3 && l2) { fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } ++l1; len += l2; if (state == 1) line_len = l1, line_blen = l2, state = 0; else if (state == 0) { if (l1 != line_len || l2 != line_blen) state = 2; } } } fai_insert_index(idx, name, len, line_len, line_blen, offset); free(name); return idx; } void fai_save(const faidx_t *fai, FILE *fp) { khint_t k; int i; for (i = 0; i < fai->n; ++i) { faidx1_t x; k = kh_get(s, fai->hash, fai->name[i]); x = kh_value(fai->hash, k); #ifdef _WIN32 fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len); #else fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len); #endif } } faidx_t *fai_read(FILE *fp) { faidx_t *fai; char *buf, *p; int len, line_len, line_blen; #ifdef _WIN32 long offset; #else long long offset; #endif fai = (faidx_t*)calloc(1, sizeof(faidx_t)); fai->hash = kh_init(s); buf = (char*)calloc(0x10000, 1); while (!feof(fp) && fgets(buf, 0x10000, fp)) { for (p = buf; *p && isgraph(*p); ++p); *p = 0; ++p; #ifdef _WIN32 sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len); #else sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len); #endif fai_insert_index(fai, buf, len, line_len, line_blen, offset); } free(buf); return fai; } void fai_destroy(faidx_t *fai) { int i; for (i = 0; i < fai->n; ++i) free(fai->name[i]); free(fai->name); kh_destroy(s, fai->hash); if (fai->rz) razf_close(fai->rz); free(fai); } int fai_build(const char *fn) { char *str; RAZF *rz; FILE *fp; faidx_t *fai; str = (char*)calloc(strlen(fn) + 5, 1); sprintf(str, "%s.fai", fn); rz = razf_open(fn, "r"); if (rz == 0) { fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",fn); free(str); return -1; } fai = fai_build_core(rz); razf_close(rz); fp = fopen(str, "wb"); if (fp == 0) { fprintf(stderr, "[fai_build] fail to write FASTA index %s\n",str); fai_destroy(fai); free(str); return -1; } fai_save(fai, fp); fclose(fp); free(str); fai_destroy(fai); return 0; } #ifdef _USE_KNETFILE FILE *download_and_open(const char *fn) { const int buf_size = 1 * 1024 * 1024; uint8_t *buf; FILE *fp; knetFile *fp_remote; const char *url = fn; const char *p; int l = strlen(fn); for (p = fn + l - 1; p >= fn; --p) if (*p == '/') break; fn = p + 1; // First try to open a local copy fp = fopen(fn, "r"); if (fp) return fp; // If failed, download from remote and open fp_remote = knet_open(url, "rb"); if (fp_remote == 0) { fprintf(stderr, "[download_from_remote] fail to open remote file %s\n",url); return NULL; } if ((fp = fopen(fn, "wb")) == 0) { fprintf(stderr, "[download_from_remote] fail to create file in the working directory %s\n",fn); knet_close(fp_remote); return NULL; } buf = (uint8_t*)calloc(buf_size, 1); while ((l = knet_read(fp_remote, buf, buf_size)) != 0) fwrite(buf, 1, l, fp); free(buf); fclose(fp); knet_close(fp_remote); return fopen(fn, "r"); } #endif faidx_t *fai_load(const char *fn) { char *str; FILE *fp; faidx_t *fai; str = (char*)calloc(strlen(fn) + 5, 1); sprintf(str, "%s.fai", fn); #ifdef _USE_KNETFILE if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn) { fp = download_and_open(str); if ( !fp ) { fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str); free(str); return 0; } } else #endif fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] build FASTA index.\n"); fai_build(fn); fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] fail to open FASTA index.\n"); free(str); return 0; } } fai = fai_read(fp); fclose(fp); fai->rz = razf_open(fn, "rb"); free(str); if (fai->rz == 0) { fprintf(stderr, "[fai_load] fail to open FASTA file.\n"); return 0; } return fai; } char *fai_fetch(const faidx_t *fai, const char *str, int *len) { char *s, c; int i, l, k, name_end; khiter_t iter; faidx1_t val; khash_t(s) *h; int beg, end; beg = end = -1; h = fai->hash; name_end = l = strlen(str); s = (char*)malloc(l+1); // remove space for (i = k = 0; i < l; ++i) if (!isspace(str[i])) s[k++] = str[i]; s[k] = 0; l = k; // determine the sequence name for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end if (i >= 0) name_end = i; if (name_end < l) { // check if this is really the end int n_hyphen = 0; for (i = name_end + 1; i < l; ++i) { if (s[i] == '-') ++n_hyphen; else if (!isdigit(s[i]) && s[i] != ',') break; } if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name s[name_end] = 0; iter = kh_get(s, h, s); if (iter == kh_end(h)) { // cannot find the sequence name iter = kh_get(s, h, str); // try str as the name if (iter == kh_end(h)) { *len = 0; free(s); return 0; } else s[name_end] = ':', name_end = l; } } else iter = kh_get(s, h, str); if(iter == kh_end(h)) { fprintf(stderr, "[fai_fetch] Warning - Reference %s not found in FASTA file, returning empty sequence\n", str); free(s); return 0; }; val = kh_value(h, iter); // parse the interval if (name_end < l) { for (i = k = name_end + 1; i < l; ++i) if (s[i] != ',') s[k++] = s[i]; s[k] = 0; beg = atoi(s + name_end + 1); for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break; end = i < k? atoi(s + i + 1) : val.len; if (beg > 0) --beg; } else beg = 0, end = val.len; if (beg >= val.len) beg = val.len; if (end >= val.len) end = val.len; if (beg > end) beg = end; free(s); // now retrieve the sequence l = 0; s = (char*)malloc(end - beg + 2); razf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET); while (razf_read(fai->rz, &c, 1) == 1 && l < end - beg && !fai->rz->z_err) if (isgraph(c)) s[l++] = c; s[l] = '\0'; *len = l; return s; } int faidx_main(int argc, char *argv[]) { if (argc == 1) { fprintf(stderr, "Usage: faidx [ [...]]\n"); return 1; } else { if (argc == 2) fai_build(argv[1]); else { int i, j, k, l; char *s; faidx_t *fai; fai = fai_load(argv[1]); if (fai == 0) return 1; for (i = 2; i != argc; ++i) { printf(">%s\n", argv[i]); s = fai_fetch(fai, argv[i], &l); for (j = 0; j < l; j += 60) { for (k = 0; k < 60 && k < l - j; ++k) putchar(s[j + k]); putchar('\n'); } free(s); } fai_destroy(fai); } } return 0; } int faidx_fetch_nseq(const faidx_t *fai) { return fai->n; } char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len) { int l; char c; khiter_t iter; faidx1_t val; char *seq=NULL; // Adjust position iter = kh_get(s, fai->hash, c_name); if(iter == kh_end(fai->hash)) return 0; val = kh_value(fai->hash, iter); if(p_end_i < p_beg_i) p_beg_i = p_end_i; if(p_beg_i < 0) p_beg_i = 0; else if(val.len <= p_beg_i) p_beg_i = val.len - 1; if(p_end_i < 0) p_end_i = 0; else if(val.len <= p_end_i) p_end_i = val.len - 1; // Now retrieve the sequence l = 0; seq = (char*)malloc(p_end_i - p_beg_i + 2); razf_seek(fai->rz, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET); while (razf_read(fai->rz, &c, 1) == 1 && l < p_end_i - p_beg_i + 1) if (isgraph(c)) seq[l++] = c; seq[l] = '\0'; *len = l; return seq; } #ifdef FAIDX_MAIN int main(int argc, char *argv[]) { return faidx_main(argc, argv); } #endif samtools-0.1.19/faidx.h000066400000000000000000000061641212162403000147050ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 Genome Research Ltd (GRL). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Contact: Heng Li */ #ifndef FAIDX_H #define FAIDX_H /*! @header Index FASTA files and extract subsequence. @copyright The Wellcome Trust Sanger Institute. */ struct __faidx_t; typedef struct __faidx_t faidx_t; #ifdef __cplusplus extern "C" { #endif /*! @abstract Build index for a FASTA or razip compressed FASTA file. @param fn FASTA file name @return 0 on success; or -1 on failure @discussion File "fn.fai" will be generated. */ int fai_build(const char *fn); /*! @abstract Distroy a faidx_t struct. @param fai Pointer to the struct to be destroyed */ void fai_destroy(faidx_t *fai); /*! @abstract Load index from "fn.fai". @param fn File name of the FASTA file */ faidx_t *fai_load(const char *fn); /*! @abstract Fetch the sequence in a region. @param fai Pointer to the faidx_t struct @param reg Region in the format "chr2:20,000-30,000" @param len Length of the region @return Pointer to the sequence; null on failure @discussion The returned sequence is allocated by malloc family and should be destroyed by end users by calling free() on it. */ char *fai_fetch(const faidx_t *fai, const char *reg, int *len); /*! @abstract Fetch the number of sequences. @param fai Pointer to the faidx_t struct @return The number of sequences */ int faidx_fetch_nseq(const faidx_t *fai); /*! @abstract Fetch the sequence in a region. @param fai Pointer to the faidx_t struct @param c_name Region name @param p_beg_i Beginning position number (zero-based) @param p_end_i End position number (zero-based) @param len Length of the region @return Pointer to the sequence; null on failure @discussion The returned sequence is allocated by malloc family and should be destroyed by end users by calling free() on it. */ char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len); #ifdef __cplusplus } #endif #endif samtools-0.1.19/kaln.c000066400000000000000000000355761212162403000145430ustar00rootroot00000000000000/* The MIT License Copyright (c) 2003-2006, 2008, 2009, by Heng Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include "kaln.h" #define FROM_M 0 #define FROM_I 1 #define FROM_D 2 typedef struct { int i, j; unsigned char ctype; } path_t; int aln_sm_blosum62[] = { /* A R N D C Q E G H I L K M F P S T W Y V * X */ 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0, -1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1, -2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1, -2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1, 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2, -1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1, -1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1, 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1, -2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1, -1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1, -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1, -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1, -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1, -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1, -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2, 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0, 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0, -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2, -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1, 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4, 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1 }; int aln_sm_blast[] = { 1, -3, -3, -3, -2, -3, 1, -3, -3, -2, -3, -3, 1, -3, -2, -3, -3, -3, 1, -2, -2, -2, -2, -2, -2 }; int aln_sm_qual[] = { 0, -23, -23, -23, 0, -23, 0, -23, -23, 0, -23, -23, 0, -23, 0, -23, -23, -23, 0, 0, 0, 0, 0, 0, 0 }; ka_param_t ka_param_blast = { 5, 2, 5, 2, aln_sm_blast, 5, 50 }; ka_param_t ka_param_aa2aa = { 10, 2, 10, 2, aln_sm_blosum62, 22, 50 }; ka_param2_t ka_param2_qual = { 37, 11, 37, 11, 37, 11, 0, 0, aln_sm_qual, 5, 50 }; static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar) { int i, n; uint32_t *cigar; unsigned char last_type; if (path_len == 0 || path == 0) { *n_cigar = 0; return 0; } last_type = path->ctype; for (i = n = 1; i < path_len; ++i) { if (last_type != path[i].ctype) ++n; last_type = path[i].ctype; } *n_cigar = n; cigar = (uint32_t*)calloc(*n_cigar, 4); cigar[0] = 1u << 4 | path[path_len-1].ctype; last_type = path[path_len-1].ctype; for (i = path_len - 2, n = 0; i >= 0; --i) { if (path[i].ctype == last_type) cigar[n] += 1u << 4; else { cigar[++n] = 1u << 4 | path[i].ctype; last_type = path[i].ctype; } } return cigar; } /***************************/ /* START OF common_align.c */ /***************************/ #define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF; #define set_M(MM, cur, p, sc) \ { \ if ((p)->M >= (p)->I) { \ if ((p)->M >= (p)->D) { \ (MM) = (p)->M + (sc); (cur)->Mt = FROM_M; \ } else { \ (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \ } \ } else { \ if ((p)->I > (p)->D) { \ (MM) = (p)->I + (sc); (cur)->Mt = FROM_I; \ } else { \ (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \ } \ } \ } #define set_I(II, cur, p) \ { \ if ((p)->M - gap_open > (p)->I) { \ (cur)->It = FROM_M; \ (II) = (p)->M - gap_open - gap_ext; \ } else { \ (cur)->It = FROM_I; \ (II) = (p)->I - gap_ext; \ } \ } #define set_end_I(II, cur, p) \ { \ if (gap_end_ext >= 0) { \ if ((p)->M - gap_end_open > (p)->I) { \ (cur)->It = FROM_M; \ (II) = (p)->M - gap_end_open - gap_end_ext; \ } else { \ (cur)->It = FROM_I; \ (II) = (p)->I - gap_end_ext; \ } \ } else set_I(II, cur, p); \ } #define set_D(DD, cur, p) \ { \ if ((p)->M - gap_open > (p)->D) { \ (cur)->Dt = FROM_M; \ (DD) = (p)->M - gap_open - gap_ext; \ } else { \ (cur)->Dt = FROM_D; \ (DD) = (p)->D - gap_ext; \ } \ } #define set_end_D(DD, cur, p) \ { \ if (gap_end_ext >= 0) { \ if ((p)->M - gap_end_open > (p)->D) { \ (cur)->Dt = FROM_M; \ (DD) = (p)->M - gap_end_open - gap_end_ext; \ } else { \ (cur)->Dt = FROM_D; \ (DD) = (p)->D - gap_end_ext; \ } \ } else set_D(DD, cur, p); \ } typedef struct { uint8_t Mt:3, It:2, Dt:3; } dpcell_t; typedef struct { int M, I, D; } dpscore_t; /*************************** * banded global alignment * ***************************/ uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap, int *_score, int *n_cigar) { int i, j; dpcell_t **dpcell, *q; dpscore_t *curr, *last, *s; int b1, b2, tmp_end; int *mat, end, max = 0; uint8_t type, ctype; uint32_t *cigar = 0; int gap_open, gap_ext, gap_end_open, gap_end_ext, b; int *score_matrix, N_MATRIX_ROW; /* initialize some align-related parameters. just for compatibility */ gap_open = ap->gap_open; gap_ext = ap->gap_ext; gap_end_open = ap->gap_end_open; gap_end_ext = ap->gap_end_ext; b = ap->band_width; score_matrix = ap->matrix; N_MATRIX_ROW = ap->row; if (n_cigar) *n_cigar = 0; if (len1 == 0 || len2 == 0) return 0; /* calculate b1 and b2 */ if (len1 > len2) { b1 = len1 - len2 + b; b2 = b; } else { b1 = b; b2 = len2 - len1 + b; } if (b1 > len1) b1 = len1; if (b2 > len2) b2 = len2; --seq1; --seq2; /* allocate memory */ end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1); dpcell = (dpcell_t**)malloc(sizeof(dpcell_t*) * (len2 + 1)); for (j = 0; j <= len2; ++j) dpcell[j] = (dpcell_t*)malloc(sizeof(dpcell_t) * end); for (j = b2 + 1; j <= len2; ++j) dpcell[j] -= j - b2; curr = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1)); last = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1)); /* set first row */ SET_INF(*curr); curr->M = 0; for (i = 1, s = curr + 1; i < b1; ++i, ++s) { SET_INF(*s); set_end_D(s->D, dpcell[0] + i, s - 1); } s = curr; curr = last; last = s; /* core dynamic programming, part 1 */ tmp_end = (b2 < len2)? b2 : len2 - 1; for (j = 1; j <= tmp_end; ++j) { q = dpcell[j]; s = curr; SET_INF(*s); set_end_I(s->I, q, last); end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1; mat = score_matrix + seq2[j] * N_MATRIX_ROW; ++s; ++q; for (i = 1; i != end; ++i, ++s, ++q) { set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */ set_I(s->I, q, last + i); set_D(s->D, q, s - 1); } set_M(s->M, q, last + i - 1, mat[seq1[i]]); set_D(s->D, q, s - 1); if (j + b1 - 1 > len1) { /* bug fixed, 040227 */ set_end_I(s->I, q, last + i); } else s->I = MINOR_INF; s = curr; curr = last; last = s; } /* last row for part 1, use set_end_D() instead of set_D() */ if (j == len2 && b2 != len2 - 1) { q = dpcell[j]; s = curr; SET_INF(*s); set_end_I(s->I, q, last); end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1; mat = score_matrix + seq2[j] * N_MATRIX_ROW; ++s; ++q; for (i = 1; i != end; ++i, ++s, ++q) { set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */ set_I(s->I, q, last + i); set_end_D(s->D, q, s - 1); } set_M(s->M, q, last + i - 1, mat[seq1[i]]); set_end_D(s->D, q, s - 1); if (j + b1 - 1 > len1) { /* bug fixed, 040227 */ set_end_I(s->I, q, last + i); } else s->I = MINOR_INF; s = curr; curr = last; last = s; ++j; } /* core dynamic programming, part 2 */ for (; j <= len2 - b2 + 1; ++j) { SET_INF(curr[j - b2]); mat = score_matrix + seq2[j] * N_MATRIX_ROW; end = j + b1 - 1; for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) { set_M(s->M, q, last + i - 1, mat[seq1[i]]); set_I(s->I, q, last + i); set_D(s->D, q, s - 1); } set_M(s->M, q, last + i - 1, mat[seq1[i]]); set_D(s->D, q, s - 1); s->I = MINOR_INF; s = curr; curr = last; last = s; } /* core dynamic programming, part 3 */ for (; j < len2; ++j) { SET_INF(curr[j - b2]); mat = score_matrix + seq2[j] * N_MATRIX_ROW; for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) { set_M(s->M, q, last + i - 1, mat[seq1[i]]); set_I(s->I, q, last + i); set_D(s->D, q, s - 1); } set_M(s->M, q, last + len1 - 1, mat[seq1[i]]); set_end_I(s->I, q, last + i); set_D(s->D, q, s - 1); s = curr; curr = last; last = s; } /* last row */ if (j == len2) { SET_INF(curr[j - b2]); mat = score_matrix + seq2[j] * N_MATRIX_ROW; for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) { set_M(s->M, q, last + i - 1, mat[seq1[i]]); set_I(s->I, q, last + i); set_end_D(s->D, q, s - 1); } set_M(s->M, q, last + len1 - 1, mat[seq1[i]]); set_end_I(s->I, q, last + i); set_end_D(s->D, q, s - 1); s = curr; curr = last; last = s; } *_score = last[len1].M; if (n_cigar) { /* backtrace */ path_t *p, *path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 2)); i = len1; j = len2; q = dpcell[j] + i; s = last + len1; max = s->M; type = q->Mt; ctype = FROM_M; if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; } if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; } p = path; p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */ ++p; do { switch (ctype) { case FROM_M: --i; --j; break; case FROM_I: --j; break; case FROM_D: --i; break; } q = dpcell[j] + i; ctype = type; switch (type) { case FROM_M: type = q->Mt; break; case FROM_I: type = q->It; break; case FROM_D: type = q->Dt; break; } p->ctype = ctype; p->i = i; p->j = j; ++p; } while (i || j); cigar = ka_path2cigar32(path, p - path - 1, n_cigar); free(path); } /* free memory */ for (j = b2 + 1; j <= len2; ++j) dpcell[j] += j - b2; for (j = 0; j <= len2; ++j) free(dpcell[j]); free(dpcell); free(curr); free(last); return cigar; } typedef struct { int M, I, D; } score_aux_t; #define MINUS_INF -0x40000000 // matrix: len2 rows and len1 columns int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap) { #define __score_aux(_p, _q0, _sc, _io, _ie, _do, _de) { \ int t1, t2; \ score_aux_t *_q; \ _q = _q0; \ _p->M = _q->M >= _q->I? _q->M : _q->I; \ _p->M = _p->M >= _q->D? _p->M : _q->D; \ _p->M += (_sc); \ ++_q; t1 = _q->M - _io - _ie; t2 = _q->I - _ie; _p->I = t1 >= t2? t1 : t2; \ _q = _p-1; t1 = _q->M - _do - _de; t2 = _q->D - _de; _p->D = t1 >= t2? t1 : t2; \ } int i, j, bw, scmat_size = ap->row, *scmat = ap->matrix, ret; const uint8_t *seq1, *seq2; score_aux_t *curr, *last, *swap; bw = abs(len1 - len2) + ap->band_width; i = len1 > len2? len1 : len2; if (bw > i + 1) bw = i + 1; seq1 = _seq1 - 1; seq2 = _seq2 - 1; curr = calloc(len1 + 2, sizeof(score_aux_t)); last = calloc(len1 + 2, sizeof(score_aux_t)); { // the zero-th row int x, end = len1; score_aux_t *p; j = 0; x = j + bw; end = len1 < x? len1 : x; // band end p = curr; p->M = 0; p->I = p->D = MINUS_INF; for (i = 1, p = &curr[1]; i <= end; ++i, ++p) p->M = p->I = MINUS_INF, p->D = -(ap->edo + ap->ede * i); p->M = p->I = p->D = MINUS_INF; swap = curr; curr = last; last = swap; } for (j = 1; j < len2; ++j) { int x, beg = 0, end = len1, *scrow, col_end; score_aux_t *p; x = j - bw; beg = 0 > x? 0 : x; // band start x = j + bw; end = len1 < x? len1 : x; // band end if (beg == 0) { // from zero-th column p = curr; p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j); ++beg; // then beg = 1 } scrow = scmat + seq2[j] * scmat_size; if (end == len1) col_end = 1, --end; else col_end = 0; for (i = beg, p = &curr[beg]; i <= end; ++i, ++p) __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->ido, ap->ide); if (col_end) { __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->ido, ap->ide); ++p; } p->M = p->I = p->D = MINUS_INF; // for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n'); swap = curr; curr = last; last = swap; } { // the last row int x, beg = 0, *scrow; score_aux_t *p; j = len2; x = j - bw; beg = 0 > x? 0 : x; // band start if (beg == 0) { // from zero-th column p = curr; p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j); ++beg; // then beg = 1 } scrow = scmat + seq2[j] * scmat_size; for (i = beg, p = &curr[beg]; i < len1; ++i, ++p) __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->edo, ap->ede); __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->edo, ap->ede); // for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n'); } ret = curr[len1].M >= curr[len1].I? curr[len1].M : curr[len1].I; ret = ret >= curr[len1].D? ret : curr[len1].D; free(curr); free(last); return ret; } #ifdef _MAIN int main(int argc, char *argv[]) { // int len1 = 35, len2 = 35; // uint8_t *seq1 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\0\1"; // uint8_t *seq2 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\1\0"; int len1 = 4, len2 = 4; uint8_t *seq1 = (uint8_t*)"\1\0\0\1"; uint8_t *seq2 = (uint8_t*)"\1\0\1\0"; int sc; // ka_global_core(seq1, 2, seq2, 1, &ka_param_qual, &sc, 0); sc = ka_global_score(seq1, len1, seq2, len2, &ka_param2_qual); printf("%d\n", sc); return 0; } #endif samtools-0.1.19/kaln.h000066400000000000000000000040301212162403000145250ustar00rootroot00000000000000/* The MIT License Copyright (c) 2003-2006, 2008, 2009 by Heng Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LH3_KALN_H_ #define LH3_KALN_H_ #include #define MINOR_INF -1073741823 typedef struct { int gap_open; int gap_ext; int gap_end_open; int gap_end_ext; int *matrix; int row; int band_width; } ka_param_t; typedef struct { int iio, iie, ido, ide; int eio, eie, edo, ede; int *matrix; int row; int band_width; } ka_param2_t; #ifdef __cplusplus extern "C" { #endif uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap, int *_score, int *n_cigar); int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap); #ifdef __cplusplus } #endif extern ka_param_t ka_param_blast; /* = { 5, 2, 5, 2, aln_sm_blast, 5, 50 }; */ extern ka_param_t ka_param_qual; // only use this for global alignment!!! extern ka_param2_t ka_param2_qual; // only use this for global alignment!!! #endif samtools-0.1.19/khash.h000066400000000000000000000426271212162403000147140ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008, 2009, 2011 by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* An example: #include "khash.h" KHASH_MAP_INIT_INT(32, char) int main() { int ret, is_missing; khiter_t k; khash_t(32) *h = kh_init(32); k = kh_put(32, h, 5, &ret); if (!ret) kh_del(32, h, k); kh_value(h, k) = 10; k = kh_get(32, h, 10); is_missing = (k == kh_end(h)); k = kh_get(32, h, 5); kh_del(32, h, k); for (k = kh_begin(h); k != kh_end(h); ++k) if (kh_exist(h, k)) kh_value(h, k) = 1; kh_destroy(32, h); return 0; } */ /* 2011-02-14 (0.2.5): * Allow to declare global functions. 2009-09-26 (0.2.4): * Improve portability 2008-09-19 (0.2.3): * Corrected the example * Improved interfaces 2008-09-11 (0.2.2): * Improved speed a little in kh_put() 2008-09-10 (0.2.1): * Added kh_clear() * Fixed a compiling error 2008-09-02 (0.2.0): * Changed to token concatenation which increases flexibility. 2008-08-31 (0.1.2): * Fixed a bug in kh_get(), which has not been tested previously. 2008-08-31 (0.1.1): * Added destructor */ #ifndef __AC_KHASH_H #define __AC_KHASH_H /*! @header Generic hash table library. @copyright Heng Li */ #define AC_VERSION_KHASH_H "0.2.5" #include #include #include /* compipler specific configuration */ #if UINT_MAX == 0xffffffffu typedef unsigned int khint32_t; #elif ULONG_MAX == 0xffffffffu typedef unsigned long khint32_t; #endif #if ULONG_MAX == ULLONG_MAX typedef unsigned long khint64_t; #else typedef unsigned long long khint64_t; #endif #ifdef _MSC_VER #define inline __inline #endif typedef khint32_t khint_t; typedef khint_t khiter_t; #define __ac_HASH_PRIME_SIZE 32 static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] = { 0ul, 3ul, 11ul, 23ul, 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) static const double __ac_HASH_UPPER = 0.77; #define KHASH_DECLARE(name, khkey_t, khval_t) \ typedef struct { \ khint_t n_buckets, size, n_occupied, upper_bound; \ khint32_t *flags; \ khkey_t *keys; \ khval_t *vals; \ } kh_##name##_t; \ extern kh_##name##_t *kh_init_##name(); \ extern void kh_destroy_##name(kh_##name##_t *h); \ extern void kh_clear_##name(kh_##name##_t *h); \ extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ extern void kh_del_##name(kh_##name##_t *h, khint_t x); #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ typedef struct { \ khint_t n_buckets, size, n_occupied, upper_bound; \ khint32_t *flags; \ khkey_t *keys; \ khval_t *vals; \ } kh_##name##_t; \ SCOPE kh_##name##_t *kh_init_##name() { \ return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ } \ SCOPE void kh_destroy_##name(kh_##name##_t *h) \ { \ if (h) { \ free(h->keys); free(h->flags); \ free(h->vals); \ free(h); \ } \ } \ SCOPE void kh_clear_##name(kh_##name##_t *h) \ { \ if (h && h->flags) { \ memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \ h->size = h->n_occupied = 0; \ } \ } \ SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ { \ if (h->n_buckets) { \ khint_t inc, k, i, last; \ k = __hash_func(key); i = k % h->n_buckets; \ inc = 1 + k % (h->n_buckets - 1); last = i; \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ else i += inc; \ if (i == last) return h->n_buckets; \ } \ return __ac_iseither(h->flags, i)? h->n_buckets : i; \ } else return 0; \ } \ SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ { \ khint32_t *new_flags = 0; \ khint_t j = 1; \ { \ khint_t t = __ac_HASH_PRIME_SIZE - 1; \ while (__ac_prime_list[t] > new_n_buckets) --t; \ new_n_buckets = __ac_prime_list[t+1]; \ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \ else { \ new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ if (h->n_buckets < new_n_buckets) { \ h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ if (kh_is_map) \ h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ } \ } \ } \ if (j) { \ for (j = 0; j != h->n_buckets; ++j) { \ if (__ac_iseither(h->flags, j) == 0) { \ khkey_t key = h->keys[j]; \ khval_t val; \ if (kh_is_map) val = h->vals[j]; \ __ac_set_isdel_true(h->flags, j); \ while (1) { \ khint_t inc, k, i; \ k = __hash_func(key); \ i = k % new_n_buckets; \ inc = 1 + k % (new_n_buckets - 1); \ while (!__ac_isempty(new_flags, i)) { \ if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \ else i += inc; \ } \ __ac_set_isempty_false(new_flags, i); \ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ __ac_set_isdel_true(h->flags, i); \ } else { \ h->keys[i] = key; \ if (kh_is_map) h->vals[i] = val; \ break; \ } \ } \ } \ } \ if (h->n_buckets > new_n_buckets) { \ h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ if (kh_is_map) \ h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ } \ free(h->flags); \ h->flags = new_flags; \ h->n_buckets = new_n_buckets; \ h->n_occupied = h->size; \ h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ } \ } \ SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ { \ khint_t x; \ if (h->n_occupied >= h->upper_bound) { \ if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \ else kh_resize_##name(h, h->n_buckets + 1); \ } \ { \ khint_t inc, k, i, site, last; \ x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \ if (__ac_isempty(h->flags, i)) x = i; \ else { \ inc = 1 + k % (h->n_buckets - 1); last = i; \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ if (__ac_isdel(h->flags, i)) site = i; \ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ else i += inc; \ if (i == last) { x = site; break; } \ } \ if (x == h->n_buckets) { \ if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ else x = i; \ } \ } \ } \ if (__ac_isempty(h->flags, x)) { \ h->keys[x] = key; \ __ac_set_isboth_false(h->flags, x); \ ++h->size; ++h->n_occupied; \ *ret = 1; \ } else if (__ac_isdel(h->flags, x)) { \ h->keys[x] = key; \ __ac_set_isboth_false(h->flags, x); \ ++h->size; \ *ret = 2; \ } else *ret = 0; \ return x; \ } \ SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ { \ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ __ac_set_isdel_true(h->flags, x); \ --h->size; \ } \ } #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) /* --- BEGIN OF HASH FUNCTIONS --- */ /*! @function @abstract Integer hash function @param key The integer [khint32_t] @return The hash value [khint_t] */ #define kh_int_hash_func(key) (khint32_t)(key) /*! @function @abstract Integer comparison function */ #define kh_int_hash_equal(a, b) ((a) == (b)) /*! @function @abstract 64-bit integer hash function @param key The integer [khint64_t] @return The hash value [khint_t] */ #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) /*! @function @abstract 64-bit integer comparison function */ #define kh_int64_hash_equal(a, b) ((a) == (b)) /*! @function @abstract const char* hash function @param s Pointer to a null terminated string @return The hash value */ static inline khint_t __ac_X31_hash_string(const char *s) { khint_t h = *s; if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; return h; } /*! @function @abstract Another interface to const char* hash function @param key Pointer to a null terminated string [const char*] @return The hash value [khint_t] */ #define kh_str_hash_func(key) __ac_X31_hash_string(key) /*! @function @abstract Const char* comparison function */ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) /* --- END OF HASH FUNCTIONS --- */ /* Other necessary macros... */ /*! @abstract Type of the hash table. @param name Name of the hash table [symbol] */ #define khash_t(name) kh_##name##_t /*! @function @abstract Initiate a hash table. @param name Name of the hash table [symbol] @return Pointer to the hash table [khash_t(name)*] */ #define kh_init(name) kh_init_##name() /*! @function @abstract Destroy a hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] */ #define kh_destroy(name, h) kh_destroy_##name(h) /*! @function @abstract Reset a hash table without deallocating memory. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] */ #define kh_clear(name, h) kh_clear_##name(h) /*! @function @abstract Resize a hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param s New size [khint_t] */ #define kh_resize(name, h, s) kh_resize_##name(h, s) /*! @function @abstract Insert a key to the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Key [type of keys] @param r Extra return code: 0 if the key is present in the hash table; 1 if the bucket is empty (never used); 2 if the element in the bucket has been deleted [int*] @return Iterator to the inserted element [khint_t] */ #define kh_put(name, h, k, r) kh_put_##name(h, k, r) /*! @function @abstract Retrieve a key from the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Key [type of keys] @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] */ #define kh_get(name, h, k) kh_get_##name(h, k) /*! @function @abstract Remove a key from the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Iterator to the element to be deleted [khint_t] */ #define kh_del(name, h, k) kh_del_##name(h, k) /*! @function @abstract Test whether a bucket contains data. @param h Pointer to the hash table [khash_t(name)*] @param x Iterator to the bucket [khint_t] @return 1 if containing data; 0 otherwise [int] */ #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) /*! @function @abstract Get key given an iterator @param h Pointer to the hash table [khash_t(name)*] @param x Iterator to the bucket [khint_t] @return Key [type of keys] */ #define kh_key(h, x) ((h)->keys[x]) /*! @function @abstract Get value given an iterator @param h Pointer to the hash table [khash_t(name)*] @param x Iterator to the bucket [khint_t] @return Value [type of values] @discussion For hash sets, calling this results in segfault. */ #define kh_val(h, x) ((h)->vals[x]) /*! @function @abstract Alias of kh_val() */ #define kh_value(h, x) ((h)->vals[x]) /*! @function @abstract Get the start iterator @param h Pointer to the hash table [khash_t(name)*] @return The start iterator [khint_t] */ #define kh_begin(h) (khint_t)(0) /*! @function @abstract Get the end iterator @param h Pointer to the hash table [khash_t(name)*] @return The end iterator [khint_t] */ #define kh_end(h) ((h)->n_buckets) /*! @function @abstract Get the number of elements in the hash table @param h Pointer to the hash table [khash_t(name)*] @return Number of elements in the hash table [khint_t] */ #define kh_size(h) ((h)->size) /*! @function @abstract Get the number of buckets in the hash table @param h Pointer to the hash table [khash_t(name)*] @return Number of buckets in the hash table [khint_t] */ #define kh_n_buckets(h) ((h)->n_buckets) /* More conenient interfaces */ /*! @function @abstract Instantiate a hash set containing integer keys @param name Name of the hash table [symbol] */ #define KHASH_SET_INIT_INT(name) \ KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) /*! @function @abstract Instantiate a hash map containing integer keys @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ #define KHASH_MAP_INIT_INT(name, khval_t) \ KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) /*! @function @abstract Instantiate a hash map containing 64-bit integer keys @param name Name of the hash table [symbol] */ #define KHASH_SET_INIT_INT64(name) \ KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) /*! @function @abstract Instantiate a hash map containing 64-bit integer keys @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ #define KHASH_MAP_INIT_INT64(name, khval_t) \ KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) typedef const char *kh_cstr_t; /*! @function @abstract Instantiate a hash map containing const char* keys @param name Name of the hash table [symbol] */ #define KHASH_SET_INIT_STR(name) \ KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) /*! @function @abstract Instantiate a hash map containing const char* keys @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ #define KHASH_MAP_INIT_STR(name, khval_t) \ KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) #endif /* __AC_KHASH_H */ samtools-0.1.19/klist.h000066400000000000000000000066201212162403000147350ustar00rootroot00000000000000#ifndef _LH3_KLIST_H #define _LH3_KLIST_H #include #define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \ typedef struct { \ size_t cnt, n, max; \ kmptype_t **buf; \ } kmp_##name##_t; \ static inline kmp_##name##_t *kmp_init_##name() { \ return calloc(1, sizeof(kmp_##name##_t)); \ } \ static inline void kmp_destroy_##name(kmp_##name##_t *mp) { \ size_t k; \ for (k = 0; k < mp->n; ++k) { \ kmpfree_f(mp->buf[k]); free(mp->buf[k]); \ } \ free(mp->buf); free(mp); \ } \ static inline kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ ++mp->cnt; \ if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \ return mp->buf[--mp->n]; \ } \ static inline void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ --mp->cnt; \ if (mp->n == mp->max) { \ mp->max = mp->max? mp->max<<1 : 16; \ mp->buf = realloc(mp->buf, sizeof(void*) * mp->max); \ } \ mp->buf[mp->n++] = p; \ } #define kmempool_t(name) kmp_##name##_t #define kmp_init(name) kmp_init_##name() #define kmp_destroy(name, mp) kmp_destroy_##name(mp) #define kmp_alloc(name, mp) kmp_alloc_##name(mp) #define kmp_free(name, mp, p) kmp_free_##name(mp, p) #define KLIST_INIT(name, kltype_t, kmpfree_t) \ struct __kl1_##name { \ kltype_t data; \ struct __kl1_##name *next; \ }; \ typedef struct __kl1_##name kl1_##name; \ KMEMPOOL_INIT(name, kl1_##name, kmpfree_t) \ typedef struct { \ kl1_##name *head, *tail; \ kmp_##name##_t *mp; \ size_t size; \ } kl_##name##_t; \ static inline kl_##name##_t *kl_init_##name() { \ kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \ kl->mp = kmp_init(name); \ kl->head = kl->tail = kmp_alloc(name, kl->mp); \ kl->head->next = 0; \ return kl; \ } \ static inline void kl_destroy_##name(kl_##name##_t *kl) { \ kl1_##name *p; \ for (p = kl->head; p != kl->tail; p = p->next) \ kmp_free(name, kl->mp, p); \ kmp_free(name, kl->mp, p); \ kmp_destroy(name, kl->mp); \ free(kl); \ } \ static inline kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \ kl1_##name *q, *p = kmp_alloc(name, kl->mp); \ q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \ ++kl->size; \ return &q->data; \ } \ static inline int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \ kl1_##name *p; \ if (kl->head->next == 0) return -1; \ --kl->size; \ p = kl->head; kl->head = kl->head->next; \ if (d) *d = p->data; \ kmp_free(name, kl->mp, p); \ return 0; \ } #define kliter_t(name) kl1_##name #define klist_t(name) kl_##name##_t #define kl_val(iter) ((iter)->data) #define kl_next(iter) ((iter)->next) #define kl_begin(kl) ((kl)->head) #define kl_end(kl) ((kl)->tail) #define kl_init(name) kl_init_##name() #define kl_destroy(name, kl) kl_destroy_##name(kl) #define kl_pushp(name, kl) kl_pushp_##name(kl) #define kl_shift(name, kl, d) kl_shift_##name(kl, d) #endif samtools-0.1.19/knetfile.c000066400000000000000000000436341212162403000154110ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 by Genome Research Ltd (GRL). 2010 by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Probably I will not do socket programming in the next few years and therefore I decide to heavily annotate this file, for Linux and Windows as well. -ac */ #include #include #include #include #include #include #include #include #ifndef _WIN32 #include #include #include #endif #include "knetfile.h" /* In winsock.h, the type of a socket is SOCKET, which is: "typedef * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed * integer -1. In knetfile.c, I use "int" for socket type * throughout. This should be improved to avoid confusion. * * In Linux/Mac, recv() and read() do almost the same thing. You can see * in the header file that netread() is simply an alias of read(). In * Windows, however, they are different and using recv() is mandatory. */ /* This function tests if the file handler is ready for reading (or * writing if is_read==0). */ static int socket_wait(int fd, int is_read) { fd_set fds, *fdr = 0, *fdw = 0; struct timeval tv; int ret; tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out FD_ZERO(&fds); FD_SET(fd, &fds); if (is_read) fdr = &fds; else fdw = &fds; ret = select(fd+1, fdr, fdw, 0, &tv); #ifndef _WIN32 if (ret == -1) perror("select"); #else if (ret == 0) fprintf(stderr, "select time-out\n"); else if (ret == SOCKET_ERROR) fprintf(stderr, "select: %d\n", WSAGetLastError()); #endif return ret; } #ifndef _WIN32 /* This function does not work with Windows due to the lack of * getaddrinfo() in winsock. It is addapted from an example in "Beej's * Guide to Network Programming" (http://beej.us/guide/bgnet/). */ static int socket_connect(const char *host, const char *port) { #define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0) int on = 1, fd; struct linger lng = { 0, 0 }; struct addrinfo hints, *res = 0; memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; /* In Unix/Mac, getaddrinfo() is the most convenient way to get * server information. */ if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo"); if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket"); /* The following two setsockopt() are used by ftplib * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they * necessary. */ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt"); if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt"); if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect"); freeaddrinfo(res); return fd; } #else /* MinGW's printf has problem with "%lld" */ char *int64tostr(char *buf, int64_t x) { int cnt; int i = 0; do { buf[i++] = '0' + x % 10; x /= 10; } while (x); buf[i] = 0; for (cnt = i, i = 0; i < cnt/2; ++i) { int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c; } return buf; } int64_t strtoint64(const char *buf) { int64_t x; for (x = 0; *buf != '\0'; ++buf) x = x * 10 + ((int64_t) *buf - 48); return x; } /* In windows, the first thing is to establish the TCP connection. */ int knet_win32_init() { WSADATA wsaData; return WSAStartup(MAKEWORD(2, 2), &wsaData); } void knet_win32_destroy() { WSACleanup(); } /* A slightly modfied version of the following function also works on * Mac (and presummably Linux). However, this function is not stable on * my Mac. It sometimes works fine but sometimes does not. Therefore for * non-Windows OS, I do not use this one. */ static SOCKET socket_connect(const char *host, const char *port) { #define __err_connect(func) \ do { \ fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \ return -1; \ } while (0) int on = 1; SOCKET fd; struct linger lng = { 0, 0 }; struct sockaddr_in server; struct hostent *hp = 0; // open socket if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket"); if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt"); if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt"); // get host info if (isalpha(host[0])) hp = gethostbyname(host); else { struct in_addr addr; addr.s_addr = inet_addr(host); hp = gethostbyaddr((char*)&addr, 4, AF_INET); } if (hp == 0) __err_connect("gethost"); // connect server.sin_addr.s_addr = *((unsigned long*)hp->h_addr); server.sin_family= AF_INET; server.sin_port = htons(atoi(port)); if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect"); // freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!) return fd; } #endif static off_t my_netread(int fd, void *buf, off_t len) { off_t rest = len, curr, l = 0; /* recv() and read() may not read the required length of data with * one call. They have to be called repeatedly. */ while (rest) { if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading curr = netread(fd, buf + l, rest); /* According to the glibc manual, section 13.2, a zero returned * value indicates end-of-file (EOF), which should mean that * read() will not return zero if EOF has not been met but data * are not immediately available. */ if (curr == 0) break; l += curr; rest -= curr; } return l; } /************************* * FTP specific routines * *************************/ static int kftp_get_response(knetFile *ftp) { #ifndef _WIN32 unsigned char c; #else char c; #endif int n = 0; char *p; if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0; while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O //fputc(c, stderr); if (n >= ftp->max_response) { ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256; ftp->response = realloc(ftp->response, ftp->max_response); } ftp->response[n++] = c; if (c == '\n') { if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2]) && ftp->response[3] != '-') break; n = 0; continue; } } if (n < 2) return -1; ftp->response[n-2] = 0; return strtol(ftp->response, &p, 0); } static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get) { if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing netwrite(ftp->ctrl_fd, cmd, strlen(cmd)); return is_get? kftp_get_response(ftp) : 0; } static int kftp_pasv_prep(knetFile *ftp) { char *p; int v[6]; kftp_send_cmd(ftp, "PASV\r\n", 1); for (p = ftp->response; *p && *p != '('; ++p); if (*p != '(') return -1; ++p; sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]); memcpy(ftp->pasv_ip, v, 4 * sizeof(int)); ftp->pasv_port = (v[4]<<8&0xff00) + v[5]; return 0; } static int kftp_pasv_connect(knetFile *ftp) { char host[80], port[10]; if (ftp->pasv_port == 0) { fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n"); return -1; } sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]); sprintf(port, "%d", ftp->pasv_port); ftp->fd = socket_connect(host, port); if (ftp->fd == -1) return -1; return 0; } int kftp_connect(knetFile *ftp) { ftp->ctrl_fd = socket_connect(ftp->host, ftp->port); if (ftp->ctrl_fd == -1) return -1; kftp_get_response(ftp); kftp_send_cmd(ftp, "USER anonymous\r\n", 1); kftp_send_cmd(ftp, "PASS kftp@\r\n", 1); kftp_send_cmd(ftp, "TYPE I\r\n", 1); return 0; } int kftp_reconnect(knetFile *ftp) { if (ftp->ctrl_fd != -1) { netclose(ftp->ctrl_fd); ftp->ctrl_fd = -1; } netclose(ftp->fd); ftp->fd = -1; return kftp_connect(ftp); } // initialize ->type, ->host, ->retr and ->size knetFile *kftp_parse_url(const char *fn, const char *mode) { knetFile *fp; char *p; int l; if (strstr(fn, "ftp://") != fn) return 0; for (p = (char*)fn + 6; *p && *p != '/'; ++p); if (*p != '/') return 0; l = p - fn - 6; fp = calloc(1, sizeof(knetFile)); fp->type = KNF_TYPE_FTP; fp->fd = -1; /* the Linux/Mac version of socket_connect() also recognizes a port * like "ftp", but the Windows version does not. */ fp->port = strdup("21"); fp->host = calloc(l + 1, 1); if (strchr(mode, 'c')) fp->no_reconnect = 1; strncpy(fp->host, fn + 6, l); fp->retr = calloc(strlen(p) + 8, 1); sprintf(fp->retr, "RETR %s\r\n", p); fp->size_cmd = calloc(strlen(p) + 8, 1); sprintf(fp->size_cmd, "SIZE %s\r\n", p); fp->seek_offset = 0; return fp; } // place ->fd at offset off int kftp_connect_file(knetFile *fp) { int ret; long long file_size; if (fp->fd != -1) { netclose(fp->fd); if (fp->no_reconnect) kftp_get_response(fp); } kftp_pasv_prep(fp); kftp_send_cmd(fp, fp->size_cmd, 1); #ifndef _WIN32 if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 ) { fprintf(stderr,"[kftp_connect_file] %s\n", fp->response); return -1; } #else const char *p = fp->response; while (*p != ' ') ++p; while (*p < '0' || *p > '9') ++p; file_size = strtoint64(p); #endif fp->file_size = file_size; if (fp->offset>=0) { char tmp[32]; #ifndef _WIN32 sprintf(tmp, "REST %lld\r\n", (long long)fp->offset); #else strcpy(tmp, "REST "); int64tostr(tmp + 5, fp->offset); strcat(tmp, "\r\n"); #endif kftp_send_cmd(fp, tmp, 1); } kftp_send_cmd(fp, fp->retr, 0); kftp_pasv_connect(fp); ret = kftp_get_response(fp); if (ret != 150) { fprintf(stderr, "[kftp_connect_file] %s\n", fp->response); netclose(fp->fd); fp->fd = -1; return -1; } fp->is_ready = 1; return 0; } /************************** * HTTP specific routines * **************************/ knetFile *khttp_parse_url(const char *fn, const char *mode) { knetFile *fp; char *p, *proxy, *q; int l; if (strstr(fn, "http://") != fn) return 0; // set ->http_host for (p = (char*)fn + 7; *p && *p != '/'; ++p); l = p - fn - 7; fp = calloc(1, sizeof(knetFile)); fp->http_host = calloc(l + 1, 1); strncpy(fp->http_host, fn + 7, l); fp->http_host[l] = 0; for (q = fp->http_host; *q && *q != ':'; ++q); if (*q == ':') *q++ = 0; // get http_proxy proxy = getenv("http_proxy"); // set ->host, ->port and ->path if (proxy == 0) { fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name. fp->port = strdup(*q? q : "80"); fp->path = strdup(*p? p : "/"); } else { fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy); for (q = fp->host; *q && *q != ':'; ++q); if (*q == ':') *q++ = 0; fp->port = strdup(*q? q : "80"); fp->path = strdup(fn); } fp->type = KNF_TYPE_HTTP; fp->ctrl_fd = fp->fd = -1; fp->seek_offset = 0; return fp; } int khttp_connect_file(knetFile *fp) { int ret, l = 0; char *buf, *p; if (fp->fd != -1) netclose(fp->fd); fp->fd = socket_connect(fp->host, fp->port); buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough. l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host); l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset); l += sprintf(buf + l, "\r\n"); netwrite(fp->fd, buf, l); l = 0; while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency if (buf[l] == '\n' && l >= 3) if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break; ++l; } buf[l] = 0; if (l < 14) { // prematured header netclose(fp->fd); fp->fd = -1; return -1; } ret = strtol(buf + 8, &p, 0); // HTTP return code if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file off_t rest = fp->offset; while (rest) { off_t l = rest < 0x10000? rest : 0x10000; rest -= my_netread(fp->fd, buf, l); } } else if (ret != 206 && ret != 200) { free(buf); fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret); netclose(fp->fd); fp->fd = -1; return -1; } free(buf); fp->is_ready = 1; return 0; } /******************** * Generic routines * ********************/ knetFile *knet_open(const char *fn, const char *mode) { knetFile *fp = 0; if (mode[0] != 'r') { fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n"); return 0; } if (strstr(fn, "ftp://") == fn) { fp = kftp_parse_url(fn, mode); if (fp == 0) return 0; if (kftp_connect(fp) == -1) { knet_close(fp); return 0; } kftp_connect_file(fp); } else if (strstr(fn, "http://") == fn) { fp = khttp_parse_url(fn, mode); if (fp == 0) return 0; khttp_connect_file(fp); } else { // local file #ifdef _WIN32 /* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may * be undefined on some systems, although it is defined on my * Mac and the Linux I have tested on. */ int fd = open(fn, O_RDONLY | O_BINARY); #else int fd = open(fn, O_RDONLY); #endif if (fd == -1) { perror("open"); return 0; } fp = (knetFile*)calloc(1, sizeof(knetFile)); fp->type = KNF_TYPE_LOCAL; fp->fd = fd; fp->ctrl_fd = -1; } if (fp && fp->fd == -1) { knet_close(fp); return 0; } return fp; } knetFile *knet_dopen(int fd, const char *mode) { knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile)); fp->type = KNF_TYPE_LOCAL; fp->fd = fd; return fp; } off_t knet_read(knetFile *fp, void *buf, off_t len) { off_t l = 0; if (fp->fd == -1) return 0; if (fp->type == KNF_TYPE_FTP) { if (fp->is_ready == 0) { if (!fp->no_reconnect) kftp_reconnect(fp); kftp_connect_file(fp); } } else if (fp->type == KNF_TYPE_HTTP) { if (fp->is_ready == 0) khttp_connect_file(fp); } if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX off_t rest = len, curr; while (rest) { do { curr = read(fp->fd, buf + l, rest); } while (curr < 0 && EINTR == errno); if (curr < 0) return -1; if (curr == 0) break; l += curr; rest -= curr; } } else l = my_netread(fp->fd, buf, len); fp->offset += l; return l; } off_t knet_seek(knetFile *fp, int64_t off, int whence) { if (whence == SEEK_SET && off == fp->offset) return 0; if (fp->type == KNF_TYPE_LOCAL) { /* Be aware that lseek() returns the offset after seeking, * while fseek() returns zero on success. */ off_t offset = lseek(fp->fd, off, whence); if (offset == -1) { // Be silent, it is OK for knet_seek to fail when the file is streamed // fprintf(stderr,"[knet_seek] %s\n", strerror(errno)); return -1; } fp->offset = offset; return 0; } else if (fp->type == KNF_TYPE_FTP) { if (whence==SEEK_CUR) fp->offset += off; else if (whence==SEEK_SET) fp->offset = off; else if ( whence==SEEK_END) fp->offset = fp->file_size+off; fp->is_ready = 0; return 0; } else if (fp->type == KNF_TYPE_HTTP) { if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future? fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n"); errno = ESPIPE; return -1; } if (whence==SEEK_CUR) fp->offset += off; else if (whence==SEEK_SET) fp->offset = off; fp->is_ready = 0; return 0; } errno = EINVAL; fprintf(stderr,"[knet_seek] %s\n", strerror(errno)); return -1; } int knet_close(knetFile *fp) { if (fp == 0) return 0; if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific if (fp->fd != -1) { /* On Linux/Mac, netclose() is an alias of close(), but on * Windows, it is an alias of closesocket(). */ if (fp->type == KNF_TYPE_LOCAL) close(fp->fd); else netclose(fp->fd); } free(fp->host); free(fp->port); free(fp->response); free(fp->retr); // FTP specific free(fp->path); free(fp->http_host); // HTTP specific free(fp); return 0; } #ifdef KNETFILE_MAIN int main(void) { char *buf; knetFile *fp; int type = 4, l; #ifdef _WIN32 knet_win32_init(); #endif buf = calloc(0x100000, 1); if (type == 0) { fp = knet_open("knetfile.c", "r"); knet_seek(fp, 1000, SEEK_SET); } else if (type == 1) { // NCBI FTP, large file fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r"); knet_seek(fp, 2500000000ll, SEEK_SET); l = knet_read(fp, buf, 255); } else if (type == 2) { fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r"); knet_seek(fp, 1000, SEEK_SET); } else if (type == 3) { fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r"); knet_seek(fp, 1000, SEEK_SET); } else if (type == 4) { fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r"); knet_read(fp, buf, 10000); knet_seek(fp, 20000, SEEK_SET); knet_seek(fp, 10000, SEEK_SET); l = knet_read(fp, buf+10000, 10000000) + 10000; } if (type != 4 && type != 1) { knet_read(fp, buf, 255); buf[255] = 0; printf("%s\n", buf); } else write(fileno(stdout), buf, l); knet_close(fp); free(buf); return 0; } #endif samtools-0.1.19/knetfile.h000066400000000000000000000031131212162403000154020ustar00rootroot00000000000000#ifndef KNETFILE_H #define KNETFILE_H #include #include #ifndef _WIN32 #define netread(fd, ptr, len) read(fd, ptr, len) #define netwrite(fd, ptr, len) write(fd, ptr, len) #define netclose(fd) close(fd) #else #include #define netread(fd, ptr, len) recv(fd, ptr, len, 0) #define netwrite(fd, ptr, len) send(fd, ptr, len, 0) #define netclose(fd) closesocket(fd) #endif // FIXME: currently I/O is unbuffered #define KNF_TYPE_LOCAL 1 #define KNF_TYPE_FTP 2 #define KNF_TYPE_HTTP 3 typedef struct knetFile_s { int type, fd; int64_t offset; char *host, *port; // the following are for FTP only int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; char *response, *retr, *size_cmd; int64_t seek_offset; // for lazy seek int64_t file_size; // the following are for HTTP only char *path, *http_host; } knetFile; #define knet_tell(fp) ((fp)->offset) #define knet_fileno(fp) ((fp)->fd) #ifdef __cplusplus extern "C" { #endif #ifdef _WIN32 int knet_win32_init(); void knet_win32_destroy(); #endif knetFile *knet_open(const char *fn, const char *mode); /* This only works with local files. */ knetFile *knet_dopen(int fd, const char *mode); /* If ->is_ready==0, this routine updates ->fd; otherwise, it simply reads from ->fd. */ off_t knet_read(knetFile *fp, void *buf, off_t len); /* This routine only sets ->offset and ->is_ready=0. It does not communicate with the FTP server. */ off_t knet_seek(knetFile *fp, int64_t off, int whence); int knet_close(knetFile *fp); #ifdef __cplusplus } #endif #endif samtools-0.1.19/kprobaln.c000066400000000000000000000246631212162403000154210ustar00rootroot00000000000000/* The MIT License Copyright (c) 2003-2006, 2008-2010, by Heng Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include "kprobaln.h" /***************************************** * Probabilistic banded glocal alignment * *****************************************/ #define EI .25 #define EM .33333333333 static float g_qual2prob[256]; #define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; } kpa_par_t kpa_par_def = { 0.001, 0.1, 10 }; kpa_par_t kpa_par_alt = { 0.0001, 0.01, 10 }; /* The topology of the profile HMM: /\ /\ /\ /\ I[1] I[k-1] I[k] I[L] ^ \ \ ^ \ ^ \ \ ^ | \ \ | \ | \ \ | M[0] M[1] -> ... -> M[k-1] -> M[k] -> ... -> M[L] M[L+1] \ \/ \/ \/ / \ /\ /\ /\ / -> D[k-1] -> D[k] -> M[0] points to every {M,I}[k] and every {M,I}[k] points M[L+1]. On input, _ref is the reference sequence and _query is the query sequence. Both are sequences of 0/1/2/3/4 where 4 stands for an ambiguous residue. iqual is the base quality. c sets the gap open probability, gap extension probability and band width. On output, state and q are arrays of length l_query. The higher 30 bits give the reference position the query base is matched to and the lower two bits can be 0 (an alignment match) or 1 (an insertion). q[i] gives the phred scaled posterior probability of state[i] being wrong. */ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual, const kpa_par_t *c, int *state, uint8_t *q) { double **f, **b = 0, *s, m[9], sI, sM, bI, bM, pb; float *qual, *_qual; const uint8_t *ref, *query; int bw, bw2, i, k, is_diff = 0, is_backward = 1, Pr; if ( l_ref<=0 || l_query<=0 ) return 0; // FIXME: this may not be an ideal fix, just prevents sefgault /*** initialization ***/ is_backward = state && q? 1 : 0; ref = _ref - 1; query = _query - 1; // change to 1-based coordinate bw = l_ref > l_query? l_ref : l_query; if (bw > c->bw) bw = c->bw; if (bw < abs(l_ref - l_query)) bw = abs(l_ref - l_query); bw2 = bw * 2 + 1; // allocate the forward and backward matrices f[][] and b[][] and the scaling array s[] f = calloc(l_query+1, sizeof(void*)); if (is_backward) b = calloc(l_query+1, sizeof(void*)); for (i = 0; i <= l_query; ++i) { // FIXME: this will lead in segfault for l_query==0 f[i] = calloc(bw2 * 3 + 6, sizeof(double)); // FIXME: this is over-allocated for very short seqs if (is_backward) b[i] = calloc(bw2 * 3 + 6, sizeof(double)); } s = calloc(l_query+2, sizeof(double)); // s[] is the scaling factor to avoid underflow // initialize qual _qual = calloc(l_query, sizeof(float)); if (g_qual2prob[0] == 0) for (i = 0; i < 256; ++i) g_qual2prob[i] = pow(10, -i/10.); for (i = 0; i < l_query; ++i) _qual[i] = g_qual2prob[iqual? iqual[i] : 30]; qual = _qual - 1; // initialize transition probability sM = sI = 1. / (2 * l_query + 2); // the value here seems not to affect results; FIXME: need proof m[0*3+0] = (1 - c->d - c->d) * (1 - sM); m[0*3+1] = m[0*3+2] = c->d * (1 - sM); m[1*3+0] = (1 - c->e) * (1 - sI); m[1*3+1] = c->e * (1 - sI); m[1*3+2] = 0.; m[2*3+0] = 1 - c->e; m[2*3+1] = 0.; m[2*3+2] = c->e; bM = (1 - c->d) / l_ref; bI = c->d / l_ref; // (bM+bI)*l_ref==1 /*** forward ***/ // f[0] set_u(k, bw, 0, 0); f[0][k] = s[0] = 1.; { // f[1] double *fi = f[1], sum; int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end; for (k = beg, sum = 0.; k <= end; ++k) { int u; double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM; set_u(u, bw, 1, k); fi[u+0] = e * bM; fi[u+1] = EI * bI; sum += fi[u] + fi[u+1]; } // rescale s[1] = sum; set_u(_beg, bw, 1, beg); set_u(_end, bw, 1, end); _end += 2; for (k = _beg; k <= _end; ++k) fi[k] /= sum; } // f[2..l_query] for (i = 2; i <= l_query; ++i) { double *fi = f[i], *fi1 = f[i-1], sum, qli = qual[i]; int beg = 1, end = l_ref, x, _beg, _end; uint8_t qyi = query[i]; x = i - bw; beg = beg > x? beg : x; // band start x = i + bw; end = end < x? end : x; // band end for (k = beg, sum = 0.; k <= end; ++k) { int u, v11, v01, v10; double e; e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli * EM; set_u(u, bw, i, k); set_u(v11, bw, i-1, k-1); set_u(v10, bw, i-1, k); set_u(v01, bw, i, k-1); fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]); fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]); fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2]; sum += fi[u] + fi[u+1] + fi[u+2]; // fprintf(stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG } // rescale s[i] = sum; set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2; for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum; } { // f[l_query+1] double sum; for (k = 1, sum = 0.; k <= l_ref; ++k) { int u; set_u(u, bw, l_query, k); if (u < 3 || u >= bw2*3+3) continue; sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI; } s[l_query+1] = sum; // the last scaling factor } { // compute likelihood double p = 1., Pr1 = 0.; for (i = 0; i <= l_query + 1; ++i) { p *= s[i]; if (p < 1e-100) Pr1 += -4.343 * log(p), p = 1.; } Pr1 += -4.343 * log(p * l_ref * l_query); Pr = (int)(Pr1 + .499); if (!is_backward) { // skip backward and MAP for (i = 0; i <= l_query; ++i) free(f[i]); free(f); free(s); free(_qual); return Pr; } } /*** backward ***/ // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from) for (k = 1; k <= l_ref; ++k) { int u; double *bi = b[l_query]; set_u(u, bw, l_query, k); if (u < 3 || u >= bw2*3+3) continue; bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1]; } // b[l_query-1..1] for (i = l_query - 1; i >= 1; --i) { int beg = 1, end = l_ref, x, _beg, _end; double *bi = b[i], *bi1 = b[i+1], y = (i > 1), qli1 = qual[i+1]; uint8_t qyi1 = query[i+1]; x = i - bw; beg = beg > x? beg : x; x = i + bw; end = end < x? end : x; for (k = end; k >= beg; --k) { int u, v11, v01, v10; double e; set_u(u, bw, i, k); set_u(v11, bw, i+1, k+1); set_u(v10, bw, i+1, k); set_u(v01, bw, i, k+1); e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 * EM) * bi1[v11]; bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e. bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1]; bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y; // fprintf(stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG } // rescale set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2; for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y; } { // b[0] int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1; double sum = 0.; for (k = end; k >= beg; --k) { int u; double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM; set_u(u, bw, 1, k); if (u < 3 || u >= bw2*3+3) continue; sum += e * b[1][u+0] * bM + EI * b[1][u+1] * bI; } set_u(k, bw, 0, 0); pb = b[0][k] = sum / s[0]; // if everything works as is expected, pb == 1.0 } is_diff = fabs(pb - 1.) > 1e-7? 1 : 0; /*** MAP ***/ for (i = 1; i <= l_query; ++i) { double sum = 0., *fi = f[i], *bi = b[i], max = 0.; int beg = 1, end = l_ref, x, max_k = -1; x = i - bw; beg = beg > x? beg : x; x = i + bw; end = end < x? end : x; for (k = beg; k <= end; ++k) { int u; double z; set_u(u, bw, i, k); z = fi[u+0] * bi[u+0]; if (z > max) max = z, max_k = (k-1)<<2 | 0; sum += z; z = fi[u+1] * bi[u+1]; if (z > max) max = z, max_k = (k-1)<<2 | 1; sum += z; } max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0 if (state) state[i-1] = max_k; if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k; #ifdef _MAIN fprintf(stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2, "ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG #endif } /*** free ***/ for (i = 0; i <= l_query; ++i) { free(f[i]); free(b[i]); } free(f); free(b); free(s); free(_qual); return Pr; } #ifdef _MAIN #include int main(int argc, char *argv[]) { uint8_t conv[256], *iqual, *ref, *query; int c, l_ref, l_query, i, q = 30, b = 10, P; while ((c = getopt(argc, argv, "b:q:")) >= 0) { switch (c) { case 'b': b = atoi(optarg); break; case 'q': q = atoi(optarg); break; } } if (optind + 2 > argc) { fprintf(stderr, "Usage: %s [-q %d] [-b %d] \n", argv[0], q, b); // example: acttc attc return 1; } memset(conv, 4, 256); conv['a'] = conv['A'] = 0; conv['c'] = conv['C'] = 1; conv['g'] = conv['G'] = 2; conv['t'] = conv['T'] = 3; ref = (uint8_t*)argv[optind]; query = (uint8_t*)argv[optind+1]; l_ref = strlen((char*)ref); l_query = strlen((char*)query); for (i = 0; i < l_ref; ++i) ref[i] = conv[ref[i]]; for (i = 0; i < l_query; ++i) query[i] = conv[query[i]]; iqual = malloc(l_query); memset(iqual, q, l_query); kpa_par_def.bw = b; P = kpa_glocal(ref, l_ref, query, l_query, iqual, &kpa_par_alt, 0, 0); fprintf(stderr, "%d\n", P); free(iqual); return 0; } #endif samtools-0.1.19/kprobaln.h000066400000000000000000000030401212162403000154100ustar00rootroot00000000000000/* The MIT License Copyright (c) 2003-2006, 2008, 2009 by Heng Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LH3_KPROBALN_H_ #define LH3_KPROBALN_H_ #include typedef struct { float d, e; int bw; } kpa_par_t; #ifdef __cplusplus extern "C" { #endif int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual, const kpa_par_t *c, int *state, uint8_t *q); #ifdef __cplusplus } #endif extern kpa_par_t kpa_par_def, kpa_par_alt; #endif samtools-0.1.19/kseq.h000066400000000000000000000211531212162403000145500ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008, 2009, 2011 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Last Modified: 05MAR2012 */ #ifndef AC_KSEQ_H #define AC_KSEQ_H #include #include #include #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r #define KS_SEP_TAB 1 // isspace() && !' ' #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) #define KS_SEP_MAX 2 #define __KS_TYPE(type_t) \ typedef struct __kstream_t { \ unsigned char *buf; \ int begin, end, is_eof; \ type_t f; \ } kstream_t; #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end) #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0) #define __KS_BASIC(type_t, __bufsize) \ static inline kstream_t *ks_init(type_t f) \ { \ kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ ks->f = f; \ ks->buf = (unsigned char*)malloc(__bufsize); \ return ks; \ } \ static inline void ks_destroy(kstream_t *ks) \ { \ if (ks) { \ free(ks->buf); \ free(ks); \ } \ } #define __KS_GETC(__read, __bufsize) \ static inline int ks_getc(kstream_t *ks) \ { \ if (ks->is_eof && ks->begin >= ks->end) return -1; \ if (ks->begin >= ks->end) { \ ks->begin = 0; \ ks->end = __read(ks->f, ks->buf, __bufsize); \ if (ks->end < __bufsize) ks->is_eof = 1; \ if (ks->end == 0) return -1; \ } \ return (int)ks->buf[ks->begin++]; \ } #ifndef KSTRING_T #define KSTRING_T kstring_t typedef struct __kstring_t { size_t l, m; char *s; } kstring_t; #endif #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif #define __KS_GETUNTIL(__read, __bufsize) \ static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \ { \ if (dret) *dret = 0; \ str->l = append? str->l : 0; \ if (ks->begin >= ks->end && ks->is_eof) return -1; \ for (;;) { \ int i; \ if (ks->begin >= ks->end) { \ if (!ks->is_eof) { \ ks->begin = 0; \ ks->end = __read(ks->f, ks->buf, __bufsize); \ if (ks->end < __bufsize) ks->is_eof = 1; \ if (ks->end == 0) break; \ } else break; \ } \ if (delimiter == KS_SEP_LINE) { \ for (i = ks->begin; i < ks->end; ++i) \ if (ks->buf[i] == '\n') break; \ } else if (delimiter > KS_SEP_MAX) { \ for (i = ks->begin; i < ks->end; ++i) \ if (ks->buf[i] == delimiter) break; \ } else if (delimiter == KS_SEP_SPACE) { \ for (i = ks->begin; i < ks->end; ++i) \ if (isspace(ks->buf[i])) break; \ } else if (delimiter == KS_SEP_TAB) { \ for (i = ks->begin; i < ks->end; ++i) \ if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ } else i = 0; /* never come to here! */ \ if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ str->m = str->l + (i - ks->begin) + 1; \ kroundup32(str->m); \ str->s = (char*)realloc(str->s, str->m); \ } \ memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ str->l = str->l + (i - ks->begin); \ ks->begin = i + 1; \ if (i < ks->end) { \ if (dret) *dret = ks->buf[i]; \ break; \ } \ } \ if (str->s == 0) { \ str->m = 1; \ str->s = (char*)calloc(1, 1); \ } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ str->s[str->l] = '\0'; \ return str->l; \ } \ static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ { return ks_getuntil2(ks, delimiter, str, dret, 0); } #define KSTREAM_INIT(type_t, __read, __bufsize) \ __KS_TYPE(type_t) \ __KS_BASIC(type_t, __bufsize) \ __KS_GETC(__read, __bufsize) \ __KS_GETUNTIL(__read, __bufsize) #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) #define __KSEQ_BASIC(SCOPE, type_t) \ SCOPE kseq_t *kseq_init(type_t fd) \ { \ kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ s->f = ks_init(fd); \ return s; \ } \ SCOPE void kseq_destroy(kseq_t *ks) \ { \ if (!ks) return; \ free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ ks_destroy(ks->f); \ free(ks); \ } /* Return value: >=0 length of the sequence (normal) -1 end-of-file -2 truncated quality string */ #define __KSEQ_READ(SCOPE) \ SCOPE int kseq_read(kseq_t *seq) \ { \ int c; \ kstream_t *ks = seq->f; \ if (seq->last_char == 0) { /* then jump to the next header line */ \ while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \ if (c == -1) return -1; /* end of file */ \ seq->last_char = c; \ } /* else: the first header char has been read in the previous call */ \ seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \ if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ seq->seq.m = 256; \ seq->seq.s = (char*)malloc(seq->seq.m); \ } \ while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \ if (c == '\n') continue; /* skip empty lines */ \ seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ } \ if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ seq->seq.m = seq->seq.l + 2; \ kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \ seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ } \ seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ if (c != '+') return seq->seq.l; /* FASTA */ \ if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \ seq->qual.m = seq->seq.m; \ seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ } \ while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ if (c == -1) return -2; /* error: no quality string */ \ while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \ seq->last_char = 0; /* we have not come to the next header line */ \ if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ return seq->seq.l; \ } #define __KSEQ_TYPE(type_t) \ typedef struct { \ kstring_t name, comment, seq, qual; \ int last_char; \ kstream_t *f; \ } kseq_t; #define KSEQ_INIT2(SCOPE, type_t, __read) \ KSTREAM_INIT(type_t, __read, 16384) \ __KSEQ_TYPE(type_t) \ __KSEQ_BASIC(SCOPE, type_t) \ __KSEQ_READ(SCOPE) #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) #define KSEQ_DECLARE(type_t) \ __KS_TYPE(type_t) \ __KSEQ_TYPE(type_t) \ extern kseq_t *kseq_init(type_t fd); \ void kseq_destroy(kseq_t *ks); \ int kseq_read(kseq_t *seq); #endif samtools-0.1.19/ksort.h000066400000000000000000000236151212162403000147540ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 Genome Research Ltd (GRL). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Contact: Heng Li */ /* 2012-12-11 (0.1.4): * Defined __ks_insertsort_##name as static to compile with C99. 2008-11-16 (0.1.4): * Fixed a bug in introsort() that happens in rare cases. 2008-11-05 (0.1.3): * Fixed a bug in introsort() for complex comparisons. * Fixed a bug in mergesort(). The previous version is not stable. 2008-09-15 (0.1.2): * Accelerated introsort. On my Mac (not on another Linux machine), my implementation is as fast as std::sort on random input. * Added combsort and in introsort, switch to combsort if the recursion is too deep. 2008-09-13 (0.1.1): * Added k-small algorithm 2008-09-05 (0.1.0): * Initial version */ #ifndef AC_KSORT_H #define AC_KSORT_H #include #include typedef struct { void *left, *right; int depth; } ks_isort_stack_t; #define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; } #define KSORT_INIT(name, type_t, __sort_lt) \ void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \ { \ type_t *a2[2], *a, *b; \ int curr, shift; \ \ a2[0] = array; \ a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \ for (curr = 0, shift = 0; (1ul<> 1) - 1; i != (size_t)(-1); --i) \ ks_heapadjust_##name(i, lsize, l); \ } \ void ks_heapsort_##name(size_t lsize, type_t l[]) \ { \ size_t i; \ for (i = lsize - 1; i > 0; --i) { \ type_t tmp; \ tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \ } \ } \ static inline void __ks_insertsort_##name(type_t *s, type_t *t) \ { \ type_t *i, *j, swap_tmp; \ for (i = s + 1; i < t; ++i) \ for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \ swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \ } \ } \ void ks_combsort_##name(size_t n, type_t a[]) \ { \ const double shrink_factor = 1.2473309501039786540366528676643; \ int do_swap; \ size_t gap = n; \ type_t tmp, *i, *j; \ do { \ if (gap > 2) { \ gap = (size_t)(gap / shrink_factor); \ if (gap == 9 || gap == 10) gap = 11; \ } \ do_swap = 0; \ for (i = a; i < a + n - gap; ++i) { \ j = i + gap; \ if (__sort_lt(*j, *i)) { \ tmp = *i; *i = *j; *j = tmp; \ do_swap = 1; \ } \ } \ } while (do_swap || gap > 2); \ if (gap != 1) __ks_insertsort_##name(a, a + n); \ } \ void ks_introsort_##name(size_t n, type_t a[]) \ { \ int d; \ ks_isort_stack_t *top, *stack; \ type_t rp, swap_tmp; \ type_t *s, *t, *i, *j, *k; \ \ if (n < 1) return; \ else if (n == 2) { \ if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \ return; \ } \ for (d = 2; 1ul<>1) + 1; \ if (__sort_lt(*k, *i)) { \ if (__sort_lt(*k, *j)) k = j; \ } else k = __sort_lt(*j, *i)? i : j; \ rp = *k; \ if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \ for (;;) { \ do ++i; while (__sort_lt(*i, rp)); \ do --j; while (i <= j && __sort_lt(rp, *j)); \ if (j <= i) break; \ swap_tmp = *i; *i = *j; *j = swap_tmp; \ } \ swap_tmp = *i; *i = *t; *t = swap_tmp; \ if (i-s > t-i) { \ if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \ s = t-i > 16? i+1 : t; \ } else { \ if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \ t = i-s > 16? i-1 : s; \ } \ } else { \ if (top == stack) { \ free(stack); \ __ks_insertsort_##name(a, a+n); \ return; \ } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \ } \ } \ } \ /* This function is adapted from: http://ndevilla.free.fr/median/ */ \ /* 0 <= kk < n */ \ type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \ { \ type_t *low, *high, *k, *ll, *hh, *mid; \ low = arr; high = arr + n - 1; k = arr + kk; \ for (;;) { \ if (high <= low) return *k; \ if (high == low + 1) { \ if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ return *k; \ } \ mid = low + (high - low) / 2; \ if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \ if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \ KSORT_SWAP(type_t, *mid, *(low+1)); \ ll = low + 1; hh = high; \ for (;;) { \ do ++ll; while (__sort_lt(*ll, *low)); \ do --hh; while (__sort_lt(*low, *hh)); \ if (hh < ll) break; \ KSORT_SWAP(type_t, *ll, *hh); \ } \ KSORT_SWAP(type_t, *low, *hh); \ if (hh <= k) low = ll; \ if (hh >= k) high = hh - 1; \ } \ } \ void ks_shuffle_##name(size_t n, type_t a[]) \ { \ int i, j; \ for (i = n; i > 1; --i) { \ type_t tmp; \ j = (int)(drand48() * i); \ tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \ } \ } #define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t) #define ks_introsort(name, n, a) ks_introsort_##name(n, a) #define ks_combsort(name, n, a) ks_combsort_##name(n, a) #define ks_heapsort(name, n, a) ks_heapsort_##name(n, a) #define ks_heapmake(name, n, a) ks_heapmake_##name(n, a) #define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a) #define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k) #define ks_shuffle(name, n, a) ks_shuffle_##name(n, a) #define ks_lt_generic(a, b) ((a) < (b)) #define ks_lt_str(a, b) (strcmp((a), (b)) < 0) typedef const char *ksstr_t; #define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic) #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) #endif samtools-0.1.19/kstring.c000066400000000000000000000124141212162403000152610ustar00rootroot00000000000000#include #include #include #include #include #include "kstring.h" int ksprintf(kstring_t *s, const char *fmt, ...) { va_list ap; int l; va_start(ap, fmt); l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'. va_end(ap); if (l + 1 > s->m - s->l) { s->m = s->l + l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); va_start(ap, fmt); l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); } va_end(ap); s->l += l; return l; } char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux) { const char *p, *start; if (sep) { // set up the table if (str == 0 && (aux->tab[0]&1)) return 0; // no need to set up if we have finished aux->finished = 0; if (sep[1]) { aux->sep = -1; aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); } else aux->sep = sep[0]; } if (aux->finished) return 0; else if (str) aux->p = str - 1, aux->finished = 0; if (aux->sep < 0) { for (p = start = aux->p + 1; *p; ++p) if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; } else { for (p = start = aux->p + 1; *p; ++p) if (*p == aux->sep) break; } aux->p = p; // end of token if (*p == 0) aux->finished = 1; // no more tokens return (char*)start; } // s MUST BE a null terminated string; l = strlen(s) int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) { int i, n, max, last_char, last_start, *offsets, l; n = 0; max = *_max; offsets = *_offsets; l = strlen(s); #define __ksplit_aux do { \ if (_offsets) { \ s[i] = 0; \ if (n == max) { \ max = max? max<<1 : 2; \ offsets = (int*)realloc(offsets, sizeof(int) * max); \ } \ offsets[n++] = last_start; \ } else ++n; \ } while (0) for (i = 0, last_char = last_start = 0; i <= l; ++i) { if (delimiter == 0) { if (isspace(s[i]) || s[i] == 0) { if (isgraph(last_char)) __ksplit_aux; // the end of a field } else { if (isspace(last_char) || last_char == 0) last_start = i; } } else { if (s[i] == delimiter || s[i] == 0) { if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field } else { if (last_char == delimiter || last_char == 0) last_start = i; } } last_char = s[i]; } *_max = max; *_offsets = offsets; return n; } /********************** * Boyer-Moore search * **********************/ typedef unsigned char ubyte_t; // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html static int *ksBM_prep(const ubyte_t *pat, int m) { int i, *suff, *prep, *bmGs, *bmBc; prep = (int*)calloc(m + 256, sizeof(int)); bmGs = prep; bmBc = prep + m; { // preBmBc() for (i = 0; i < 256; ++i) bmBc[i] = m; for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; } suff = (int*)calloc(m, sizeof(int)); { // suffixes() int f = 0, g; suff[m - 1] = m; g = m - 1; for (i = m - 2; i >= 0; --i) { if (i > g && suff[i + m - 1 - f] < i - g) suff[i] = suff[i + m - 1 - f]; else { if (i < g) g = i; f = i; while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; suff[i] = f - g; } } } { // preBmGs() int j = 0; for (i = 0; i < m; ++i) bmGs[i] = m; for (i = m - 1; i >= 0; --i) if (suff[i] == i + 1) for (; j < m - 1 - i; ++j) if (bmGs[j] == m) bmGs[j] = m - 1 - i; for (i = 0; i <= m - 2; ++i) bmGs[m - 1 - suff[i]] = m - 1 - i; } free(suff); return prep; } void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep) { int i, j, *prep = 0, *bmGs, *bmBc; const ubyte_t *str, *pat; str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat; prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep; if (_prep && *_prep == 0) *_prep = prep; bmGs = prep; bmBc = prep + m; j = 0; while (j <= n - m) { for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); if (i >= 0) { int max = bmBc[str[i+j]] - m + 1 + i; if (max < bmGs[i]) max = bmGs[i]; j += max; } else return (void*)(str + j); } if (_prep == 0) free(prep); return 0; } char *kstrstr(const char *str, const char *pat, int **_prep) { return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep); } char *kstrnstr(const char *str, const char *pat, int n, int **_prep) { return (char*)kmemmem(str, n, pat, strlen(pat), _prep); } /*********************** * The main() function * ***********************/ #ifdef KSTRING_MAIN #include int main() { kstring_t *s; int *fields, n, i; ks_tokaux_t aux; char *p; s = (kstring_t*)calloc(1, sizeof(kstring_t)); // test ksprintf() ksprintf(s, " abcdefg: %d ", 100); printf("'%s'\n", s->s); // test ksplit() fields = ksplit(s, 0, &n); for (i = 0; i < n; ++i) printf("field[%d] = '%s'\n", i, s->s + fields[i]); // test kstrtok() s->l = 0; for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { kputsn(p, aux.p - p, s); kputc('\n', s); } printf("%s", s->s); // free free(s->s); free(s); free(fields); { static char *str = "abcdefgcdgcagtcakcdcd"; static char *pat = "cd"; char *ret, *s = str; int *prep = 0; while ((ret = kstrstr(s, pat, &prep)) != 0) { printf("match: %s\n", ret); s = ret + prep[0]; } free(prep); } return 0; } #endif samtools-0.1.19/kstring.h000066400000000000000000000105531212162403000152700ustar00rootroot00000000000000/* The MIT License Copyright (c) by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KSTRING_H #define KSTRING_H #include #include #include #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif #ifndef KSTRING_T #define KSTRING_T kstring_t typedef struct __kstring_t { size_t l, m; char *s; } kstring_t; #endif typedef struct { uint64_t tab[4]; int sep, finished; const char *p; // end of the current token } ks_tokaux_t; #ifdef __cplusplus extern "C" { #endif int ksprintf(kstring_t *s, const char *fmt, ...); int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); char *kstrstr(const char *str, const char *pat, int **_prep); char *kstrnstr(const char *str, const char *pat, int n, int **_prep); void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep); /* kstrtok() is similar to strtok_r() except that str is not * modified and both str and sep can be NULL. For efficiency, it is * actually recommended to set both to NULL in the subsequent calls * if sep is not changed. */ char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux); #ifdef __cplusplus } #endif static inline void ks_resize(kstring_t *s, size_t size) { if (s->m < size) { s->m = size; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } } static inline int kputsn(const char *p, int l, kstring_t *s) { if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } memcpy(s->s + s->l, p, l); s->l += l; s->s[s->l] = 0; return l; } static inline int kputs(const char *p, kstring_t *s) { return kputsn(p, strlen(p), s); } static inline int kputc(int c, kstring_t *s) { if (s->l + 1 >= s->m) { s->m = s->l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } s->s[s->l++] = c; s->s[s->l] = 0; return c; } static inline int kputw(int c, kstring_t *s) { char buf[16]; int l, x; if (c == 0) return kputc('0', s); if(c < 0) for (l = 0, x = c; x < 0; x /= 10) buf[l++] = '0' - (x%10); else for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; if (c < 0) buf[l++] = '-'; if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; s->s[s->l] = 0; return 0; } static inline int kputuw(unsigned c, kstring_t *s) { char buf[16]; int l, i; unsigned x; if (c == 0) return kputc('0', s); for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; s->s[s->l] = 0; return 0; } static inline int kputl(long c, kstring_t *s) { char buf[32]; long l, x; if (c == 0) return kputc('0', s); for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; if (c < 0) buf[l++] = '-'; if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; kroundup32(s->m); s->s = (char*)realloc(s->s, s->m); } for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; s->s[s->l] = 0; return 0; } static inline int *ksplit(kstring_t *s, int delimiter, int *n) { int max = 0, *offsets = 0; *n = ksplit_core(s->s, delimiter, &max, &offsets); return offsets; } #endif samtools-0.1.19/misc/000077500000000000000000000000001212162403000143655ustar00rootroot00000000000000samtools-0.1.19/misc/HmmGlocal.java000066400000000000000000000150741212162403000171020ustar00rootroot00000000000000import java.io.*; import java.lang.*; public class HmmGlocal { private double[] qual2prob; private double cd, ce; // gap open probility [1e-3], gap extension probability [0.1] private int cb; // band width [7] public HmmGlocal(final double d, final double e, final int b) { cd = d; ce = e; cb = b; qual2prob = new double[256]; for (int i = 0; i < 256; ++i) qual2prob[i] = Math.pow(10, -i/10.); } private static int set_u(final int b, final int i, final int k) { int x = i - b; x = x > 0? x : 0; return (k + 1 - x) * 3; } public int hmm_glocal(final byte[] _ref, final byte[] _query, final byte[] _iqual, int[] state, byte[] q) { int i, k; /*** initialization ***/ // change coordinates int l_ref = _ref.length; byte[] ref = new byte[l_ref+1]; for (i = 0; i < l_ref; ++i) ref[i+1] = _ref[i]; // FIXME: this is silly... int l_query = _query.length; byte[] query = new byte[l_query+1]; double[] qual = new double[l_query+1]; for (i = 0; i < l_query; ++i) { query[i+1] = _query[i]; qual[i+1] = qual2prob[_iqual[i]]; } // set band width int bw2, bw = l_ref > l_query? l_ref : l_query; if (bw > cb) bw = cb; if (bw < Math.abs(l_ref - l_query)) bw = Math.abs(l_ref - l_query); bw2 = bw * 2 + 1; // allocate the forward and backward matrices f[][] and b[][] and the scaling array s[] double[][] f = new double[l_query+1][bw2*3 + 6]; double[][] b = new double[l_query+1][bw2*3 + 6]; double[] s = new double[l_query+2]; // initialize transition probabilities double sM, sI, bM, bI; sM = sI = 1. / (2 * l_query + 2); bM = (1 - cd) / l_query; bI = cd / l_query; // (bM+bI)*l_query==1 double[] m = new double[9]; m[0*3+0] = (1 - cd - cd) * (1 - sM); m[0*3+1] = m[0*3+2] = cd * (1 - sM); m[1*3+0] = (1 - ce) * (1 - sI); m[1*3+1] = ce * (1 - sI); m[1*3+2] = 0.; m[2*3+0] = 1 - ce; m[2*3+1] = 0.; m[2*3+2] = ce; /*** forward ***/ // f[0] f[0][set_u(bw, 0, 0)] = s[0] = 1.; { // f[1] double[] fi = f[1]; double sum; int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end; for (k = beg, sum = 0.; k <= end; ++k) { int u; double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] / 3.; u = set_u(bw, 1, k); fi[u+0] = e * bM; fi[u+1] = .25 * bI; sum += fi[u] + fi[u+1]; } // rescale s[1] = sum; _beg = set_u(bw, 1, beg); _end = set_u(bw, 1, end); _end += 2; for (k = _beg; k <= _end; ++k) fi[k] /= sum; } // f[2..l_query] for (i = 2; i <= l_query; ++i) { double[] fi = f[i], fi1 = f[i-1]; double sum, qli = qual[i]; int beg = 1, end = l_ref, x, _beg, _end; byte qyi = query[i]; x = i - bw; beg = beg > x? beg : x; // band start x = i + bw; end = end < x? end : x; // band end for (k = beg, sum = 0.; k <= end; ++k) { int u, v11, v01, v10; double e; e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli / 3.; u = set_u(bw, i, k); v11 = set_u(bw, i-1, k-1); v10 = set_u(bw, i-1, k); v01 = set_u(bw, i, k-1); fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]); fi[u+1] = .25 * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]); fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2]; sum += fi[u] + fi[u+1] + fi[u+2]; //System.out.println("("+i+","+k+";"+u+"): "+fi[u]+","+fi[u+1]+","+fi[u+2]); } // rescale s[i] = sum; _beg = set_u(bw, i, beg); _end = set_u(bw, i, end); _end += 2; for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum; } { // f[l_query+1] double sum; for (k = 1, sum = 0.; k <= l_ref; ++k) { int u = set_u(bw, l_query, k); if (u < 3 || u >= bw2*3+3) continue; sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI; } s[l_query+1] = sum; // the last scaling factor } /*** backward ***/ // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from) for (k = 1; k <= l_ref; ++k) { int u = set_u(bw, l_query, k); double[] bi = b[l_query]; if (u < 3 || u >= bw2*3+3) continue; bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1]; } // b[l_query-1..1] for (i = l_query - 1; i >= 1; --i) { int beg = 1, end = l_ref, x, _beg, _end; double[] bi = b[i], bi1 = b[i+1]; double y = (i > 1)? 1. : 0., qli1 = qual[i+1]; byte qyi1 = query[i+1]; x = i - bw; beg = beg > x? beg : x; x = i + bw; end = end < x? end : x; for (k = end; k >= beg; --k) { int u, v11, v01, v10; double e; u = set_u(bw, i, k); v11 = set_u(bw, i+1, k+1); v10 = set_u(bw, i+1, k); v01 = set_u(bw, i, k+1); e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 / 3.) * bi1[v11]; bi[u+0] = e * m[0] + .25 * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e. bi[u+1] = e * m[3] + .25 * m[4] * bi1[v10+1]; bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y; } // rescale _beg = set_u(bw, i, beg); _end = set_u(bw, i, end); _end += 2; for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y; } double pb; { // b[0] int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1; double sum = 0.; for (k = end; k >= beg; --k) { int u = set_u(bw, 1, k); double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] / 3.; if (u < 3 || u >= bw2*3+3) continue; sum += e * b[1][u+0] * bM + .25 * b[1][u+1] * bI; } pb = b[0][set_u(bw, 0, 0)] = sum / s[0]; // if everything works as is expected, pb == 1.0 } int is_diff = Math.abs(pb - 1.) > 1e-7? 1 : 0; /*** MAP ***/ for (i = 1; i <= l_query; ++i) { double sum = 0., max = 0.; double[] fi = f[i], bi = b[i]; int beg = 1, end = l_ref, x, max_k = -1; x = i - bw; beg = beg > x? beg : x; x = i + bw; end = end < x? end : x; for (k = beg; k <= end; ++k) { int u = set_u(bw, i, k); double z; sum += (z = fi[u+0] * bi[u+0]); if (z > max) { max = z; max_k = (k-1)<<2 | 0; } sum += (z = fi[u+1] * bi[u+1]); if (z > max) { max = z; max_k = (k-1)<<2 | 1; } } max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0 if (state != null) state[i-1] = max_k; if (q != null) { k = (int)(-4.343 * Math.log(1. - max) + .499); q[i-1] = (byte)(k > 100? 99 : k); } //System.out.println("("+pb+","+sum+")"+" ("+(i-1)+","+(max_k>>2)+","+(max_k&3)+","+max+")"); } return 0; } public static void main(String[] args) { byte[] ref = {'\0', '\1', '\3', '\3', '\1'}; byte[] query = {'\0', '\3', '\3', '\1'}; byte[] qual = new byte[4]; qual[0] = qual[1] = qual[2] = qual[3] = (byte)20; HmmGlocal hg = new HmmGlocal(1e-3, 0.1, 7); hg.hmm_glocal(ref, query, qual, null, null); } }samtools-0.1.19/misc/Makefile000066400000000000000000000031111212162403000160210ustar00rootroot00000000000000CC= gcc CXX= g++ CFLAGS= -g -Wall -O2 #-m64 #-arch ppc CXXFLAGS= $(CFLAGS) DFLAGS= -D_FILE_OFFSET_BITS=64 OBJS= PROG= md5sum-lite md5fa maq2sam-short maq2sam-long ace2sam wgsim bamcheck INCLUDES= -I.. SUBDIRS= . .SUFFIXES:.c .o .c.o: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ all:$(PROG) lib-recur all-recur clean-recur cleanlocal-recur install-recur: @target=`echo $@ | sed s/-recur//`; \ wdir=`pwd`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ cd $$subdir; \ $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ INCLUDES="$(INCLUDES)" $$target || exit 1; \ cd $$wdir; \ done; lib: bamcheck:bamcheck.o $(CC) $(CFLAGS) -o $@ bamcheck.o -L.. -lm -lbam -lpthread -lz bamcheck.o:bamcheck.c ../faidx.h ../khash.h ../sam.h ../razf.h $(CC) $(CFLAGS) -c -I.. -o $@ bamcheck.c ace2sam:ace2sam.o $(CC) $(CFLAGS) -o $@ ace2sam.o -lz wgsim:wgsim.o $(CC) $(CFLAGS) -o $@ wgsim.o -lm -lz md5fa:md5.o md5fa.o md5.h ../kseq.h $(CC) $(CFLAGS) -o $@ md5.o md5fa.o -lz md5sum-lite:md5sum-lite.o $(CC) $(CFLAGS) -o $@ md5sum-lite.o md5sum-lite.o:md5.c md5.h $(CC) -c $(CFLAGS) -DMD5SUM_MAIN -o $@ md5.c maq2sam-short:maq2sam.c $(CC) $(CFLAGS) -o $@ maq2sam.c -lz maq2sam-long:maq2sam.c $(CC) $(CFLAGS) -DMAQ_LONGREADS -o $@ maq2sam.c -lz md5fa.o:md5.h md5fa.c $(CC) $(CFLAGS) -c -I.. -o $@ md5fa.c wgsim.o:wgsim.c ../kseq.h $(CC) $(CFLAGS) -c -I.. -o $@ wgsim.c ace2sam.o:ace2sam.c ../kstring.h ../kseq.h $(CC) $(CFLAGS) -c -I.. -o $@ ace2sam.c cleanlocal: rm -fr gmon.out *.o a.out *.exe *.dSYM $(PROG) *~ *.a clean:cleanlocal-recur samtools-0.1.19/misc/ace2sam.c000066400000000000000000000233411212162403000160470ustar00rootroot00000000000000/* The MIT License Copyright (c) 2011 Heng Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include "kstring.h" #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 16384) #define N_TMPSTR 5 #define LINE_LEN 60 // append a CIGAR operation plus length #define write_cigar(_c, _n, _m, _v) do { \ if (_n == _m) { \ _m = _m? _m<<1 : 4; \ _c = realloc(_c, _m * sizeof(unsigned)); \ } \ _c[_n++] = (_v); \ } while (0) // a fatal error static void fatal(const char *msg) { fprintf(stderr, "E %s\n", msg); exit(1); } // remove pads static void remove_pads(const kstring_t *src, kstring_t *dst) { int i, j; dst->l = 0; kputsn(src->s, src->l, dst); for (i = j = 0; i < dst->l; ++i) if (dst->s[i] != '*') dst->s[j++] = dst->s[i]; dst->s[j] = 0; dst->l = j; } int main(int argc, char *argv[]) { gzFile fp; kstream_t *ks; kstring_t s, t[N_TMPSTR]; int dret, i, k, af_n, af_max, af_i, c, is_padded = 0, write_cns = 0, *p2u = 0; long m_cigar = 0, n_cigar = 0; unsigned *af, *cigar = 0; while ((c = getopt(argc, argv, "pc")) >= 0) { switch (c) { case 'p': is_padded = 1; break; case 'c': write_cns = 1; break; } } if (argc == optind) { fprintf(stderr, "\nUsage: ace2sam [-pc] \n\n"); fprintf(stderr, "Options: -p output padded SAM\n"); fprintf(stderr, " -c write the contig sequence in SAM\n\n"); fprintf(stderr, "Notes: 1. Fields must appear in the following order: (CO->[BQ]->(AF)->(RD->QA))\n"); fprintf(stderr, " 2. The order of reads in AF and in RD must be identical\n"); fprintf(stderr, " 3. Except in BQ, words and numbers must be separated by a single SPACE or TAB\n"); fprintf(stderr, " 4. This program writes the headerless SAM to stdout and header to stderr\n\n"); return 1; } s.l = s.m = 0; s.s = 0; af_n = af_max = af_i = 0; af = 0; for (i = 0; i < N_TMPSTR; ++i) t[i].l = t[i].m = 0, t[i].s = 0; fp = strcmp(argv[1], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r"); ks = ks_init(fp); while (ks_getuntil(ks, 0, &s, &dret) >= 0) { if (strcmp(s.s, "CO") == 0) { // contig sequence kstring_t *cns; t[0].l = t[1].l = t[2].l = t[3].l = t[4].l = 0; // 0: name; 1: padded ctg; 2: unpadded ctg/padded read; 3: unpadded read; 4: SAM line af_n = af_i = 0; // reset the af array ks_getuntil(ks, 0, &s, &dret); kputs(s.s, &t[0]); // contig name ks_getuntil(ks, '\n', &s, &dret); // read the whole line while (ks_getuntil(ks, '\n', &s, &dret) >= 0 && s.l > 0) kputsn(s.s, s.l, &t[1]); // read the padded consensus sequence remove_pads(&t[1], &t[2]); // construct the unpadded sequence // compute the array for mapping padded positions to unpadded positions p2u = realloc(p2u, t[1].m * sizeof(int)); for (i = k = 0; i < t[1].l; ++i) { p2u[i] = k; if (t[1].s[i] != '*') ++k; } // write out the SAM header and contig sequences fprintf(stderr, "H @SQ\tSN:%s\tLN:%ld\n", t[0].s, t[is_padded?1:2].l); // The SAM header line cns = &t[is_padded?1:2]; fprintf(stderr, "S >%s\n", t[0].s); for (i = 0; i < cns->l; i += LINE_LEN) { fputs("S ", stderr); for (k = 0; k < LINE_LEN && i + k < cns->l; ++k) fputc(cns->s[i + k], stderr); fputc('\n', stderr); } #define __padded2cigar(sp) do { \ int i, l_M = 0, l_D = 0; \ for (i = 0; i < sp.l; ++i) { \ if (sp.s[i] == '*') { \ if (l_M) write_cigar(cigar, n_cigar, m_cigar, l_M<<4); \ ++l_D; l_M = 0; \ } else { \ if (l_D) write_cigar(cigar, n_cigar, m_cigar, l_D<<4 | 2); \ ++l_M; l_D = 0; \ } \ } \ if (l_M) write_cigar(cigar, n_cigar, m_cigar, l_M<<4); \ else write_cigar(cigar, n_cigar, m_cigar, l_D<<4 | 2); \ } while (0) if (write_cns) { // write the consensus SAM line (dummy read) n_cigar = 0; if (is_padded) __padded2cigar(t[1]); else write_cigar(cigar, n_cigar, m_cigar, t[2].l<<4); kputsn(t[0].s, t[0].l, &t[4]); kputs("\t516\t", &t[4]); kputsn(t[0].s, t[0].l, &t[4]); kputs("\t1\t60\t", &t[4]); for (i = 0; i < n_cigar; ++i) { kputw(cigar[i]>>4, &t[4]); kputc("MIDNSHP=X"[cigar[i]&0xf], &t[4]); } kputs("\t*\t0\t0\t", &t[4]); kputsn(t[2].s, t[2].l, &t[4]); kputs("\t*", &t[4]); } } else if (strcmp(s.s, "BQ") == 0) { // contig quality if (t[0].l == 0) fatal("come to 'BQ' before reading 'CO'"); if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); // read the entire "BQ" line if (write_cns) t[4].s[--t[4].l] = 0; // remove the trailing "*" for (i = 0; i < t[2].l; ++i) { // read the consensus quality int q; if (ks_getuntil(ks, 0, &s, &dret) < 0) fprintf(stderr, "E truncated contig quality\n"); if (s.l) { q = atoi(s.s) + 33; if (q > 126) q = 126; if (write_cns) kputc(q, &t[4]); } else --i; } if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); ks_getuntil(ks, '\n', &s, &dret); // skip the empty line if (write_cns) puts(t[4].s); t[4].l = 0; } else if (strcmp(s.s, "AF") == 0) { // padded read position int reversed, neg, pos; if (t[0].l == 0) fatal("come to 'AF' before reading 'CO'"); if (write_cns) { if (t[4].l) puts(t[4].s); t[4].l = 0; } ks_getuntil(ks, 0, &s, &dret); // read name ks_getuntil(ks, 0, &s, &dret); reversed = s.s[0] == 'C'? 1 : 0; // strand ks_getuntil(ks, 0, &s, &dret); pos = atoi(s.s); neg = pos < 0? 1 : 0; pos = pos < 0? -pos : pos; // position if (af_n == af_max) { // double the af array af_max = af_max? af_max<<1 : 4; af = realloc(af, af_max * sizeof(unsigned)); } af[af_n++] = pos << 2 | neg << 1 | reversed; // keep the placement information } else if (strcmp(s.s, "RD") == 0) { // read sequence if (af_i >= af_n) fatal("more 'RD' records than 'AF'"); t[2].l = t[3].l = t[4].l = 0; ks_getuntil(ks, 0, &t[4], &dret); // QNAME if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); // read the entire RD line while (ks_getuntil(ks, '\n', &s, &dret) >= 0 && s.l > 0) kputs(s.s, &t[2]); // read the read sequence } else if (strcmp(s.s, "QA") == 0) { // clipping if (af_i >= af_n) fatal("more 'QA' records than 'AF'"); int beg, end, pos, op; ks_getuntil(ks, 0, &s, &dret); ks_getuntil(ks, 0, &s, &dret); // skip quality clipping ks_getuntil(ks, 0, &s, &dret); beg = atoi(s.s) - 1; // align clipping start ks_getuntil(ks, 0, &s, &dret); end = atoi(s.s); // clipping end if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); // compute 1-based POS pos = af[af_i]>>2; // retrieve the position information if (af[af_i]>>1&1) pos = -pos; pos += beg; // now pos is the true padded position // generate CIGAR remove_pads(&t[2], &t[3]); // backup the unpadded read sequence n_cigar = 0; if (beg) write_cigar(cigar, n_cigar, m_cigar, beg<<4|4); if (is_padded) { __padded2cigar(t[2]); if (beg && n_cigar > 1) cigar[1] -= beg<<4; // fix the left-hand CIGAR if (end < t[2].l && n_cigar) cigar[n_cigar-1] -= (t[2].l - end)<<4; // fix the right-hand CIGAR } else { // generate flattened CIGAR string for (i = beg, k = pos - 1; i < end; ++i, ++k) t[2].s[i] = t[2].s[i] != '*'? (t[1].s[k] != '*'? 0 : 1) : (t[1].s[k] != '*'? 2 : 6); // generate the proper CIGAR for (i = beg + 1, k = 1, op = t[2].s[beg]; i < end; ++i) { if (op != t[2].s[i]) { write_cigar(cigar, n_cigar, m_cigar, k<<4|op); op = t[2].s[i]; k = 1; } else ++k; } write_cigar(cigar, n_cigar, m_cigar, k<<4|op); // remove unnecessary "P" and possibly merge adjacent operations for (i = 2; i < n_cigar; ++i) { if ((cigar[i]&0xf) != 1 && (cigar[i-1]&0xf) == 6 && (cigar[i-2]&0xf) != 1) { cigar[i-1] = 0; if ((cigar[i]&0xf) == (cigar[i-2]&0xf)) // merge operations cigar[i] += cigar[i-2], cigar[i-2] = 0; } } for (i = k = 0; i < n_cigar; ++i) // squeeze out dumb operations if (cigar[i]) cigar[k++] = cigar[i]; n_cigar = k; } if (end < t[2].l) write_cigar(cigar, n_cigar, m_cigar, (t[2].l - end)<<4|4); // write the SAM line for the read kputc('\t', &t[4]); // QNAME has already been written kputw((af[af_i]&1)? 16 : 0, &t[4]); kputc('\t', &t[4]); // FLAG kputsn(t[0].s, t[0].l, &t[4]); kputc('\t', &t[4]); // RNAME kputw(is_padded? pos : p2u[pos-1]+1, &t[4]); // POS kputs("\t60\t", &t[4]); // MAPQ for (i = 0; i < n_cigar; ++i) { // CIGAR kputw(cigar[i]>>4, &t[4]); kputc("MIDNSHP=X"[cigar[i]&0xf], &t[4]); } kputs("\t*\t0\t0\t", &t[4]); // empty MRNM, MPOS and TLEN kputsn(t[3].s, t[3].l, &t[4]); // unpadded SEQ kputs("\t*", &t[4]); // QUAL puts(t[4].s); // print to stdout ++af_i; } else if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret); } ks_destroy(ks); gzclose(fp); free(af); free(s.s); free(cigar); free(p2u); for (i = 0; i < N_TMPSTR; ++i) free(t[i].s); return 0; } samtools-0.1.19/misc/bamcheck.c000066400000000000000000001634711212162403000163020ustar00rootroot00000000000000/* Author: petr.danecek@sanger gcc -Wall -Winline -g -O2 -I ~/git/samtools bamcheck.c -o bamcheck -lm -lz -L ~/git/samtools -lbam -lpthread Assumptions, approximations and other issues: - GC-depth graph does not split reads, the starting position determines which bin is incremented. There are small overlaps between bins (max readlen-1). However, the bins are big (20k). - coverage distribution ignores softclips and deletions - some stats require sorted BAMs - GC content graph can have an untidy, step-like pattern when BAM contains multiple read lengths. - 'bases mapped' (stats->nbases_mapped) is calculated from read lengths given by BAM (core.l_qseq) - With the -t option, the whole reads are used. Except for the number of mapped bases (cigar) counts, no splicing is done, no indels or soft clips are considered, even small overlap is good enough to include the read in the stats. */ #define BAMCHECK_VERSION "2012-09-04" #define _ISOC99_SOURCE #include #include #include #include #include #include #include #include #include #include "faidx.h" #include "khash.h" #include "sam.h" #include "sam_header.h" #include "razf.h" #define BWA_MIN_RDLEN 35 #define IS_PAIRED(bam) ((bam)->core.flag&BAM_FPAIRED && !((bam)->core.flag&BAM_FUNMAP) && !((bam)->core.flag&BAM_FMUNMAP)) #define IS_UNMAPPED(bam) ((bam)->core.flag&BAM_FUNMAP) #define IS_REVERSE(bam) ((bam)->core.flag&BAM_FREVERSE) #define IS_MATE_REVERSE(bam) ((bam)->core.flag&BAM_FMREVERSE) #define IS_READ1(bam) ((bam)->core.flag&BAM_FREAD1) #define IS_READ2(bam) ((bam)->core.flag&BAM_FREAD2) #define IS_DUP(bam) ((bam)->core.flag&BAM_FDUP) typedef struct { int32_t line_len, line_blen; int64_t len; uint64_t offset; } faidx1_t; KHASH_MAP_INIT_STR(kh_faidx, faidx1_t) KHASH_MAP_INIT_STR(kh_bam_tid, int) KHASH_MAP_INIT_STR(kh_rg, const char *) struct __faidx_t { RAZF *rz; int n, m; char **name; khash_t(kh_faidx) *hash; }; typedef struct { float gc; uint32_t depth; } gc_depth_t; // For coverage distribution, a simple pileup typedef struct { int64_t pos; int size, start; int *buffer; } round_buffer_t; typedef struct { uint32_t from, to; } pos_t; typedef struct { int npos,mpos,cpos; pos_t *pos; } regions_t; typedef struct { // Parameters int trim_qual; // bwa trim quality // Dimensions of the quality histogram holder (quals_1st,quals_2nd), GC content holder (gc_1st,gc_2nd), // insert size histogram holder int nquals; // The number of quality bins int nbases; // The maximum sequence length the allocated array can hold int nisize; // The maximum insert size that the allocated array can hold int ngc; // The size of gc_1st and gc_2nd int nindels; // The maximum indel length for indel distribution // Arrays for the histogram data uint64_t *quals_1st, *quals_2nd; uint64_t *gc_1st, *gc_2nd; uint64_t *isize_inward, *isize_outward, *isize_other; uint64_t *acgt_cycles; uint64_t *read_lengths; uint64_t *insertions, *deletions; uint64_t *ins_cycles_1st, *ins_cycles_2nd, *del_cycles_1st, *del_cycles_2nd; // The extremes encountered int max_len; // Maximum read length int max_qual; // Maximum quality float isize_main_bulk; // There are always some unrealistically big insert sizes, report only the main part int is_sorted; // Summary numbers uint64_t total_len; uint64_t total_len_dup; uint64_t nreads_1st; uint64_t nreads_2nd; uint64_t nreads_filtered; uint64_t nreads_dup; uint64_t nreads_unmapped; uint64_t nreads_unpaired; uint64_t nreads_paired; uint64_t nreads_anomalous; uint64_t nreads_mq0; uint64_t nbases_mapped; uint64_t nbases_mapped_cigar; uint64_t nbases_trimmed; // bwa trimmed bases uint64_t nmismatches; uint64_t nreads_QCfailed, nreads_secondary; // GC-depth related data uint32_t ngcd, igcd; // The maximum number of GC depth bins and index of the current bin gc_depth_t *gcd; // The GC-depth bins holder int gcd_bin_size; // The size of GC-depth bin uint32_t gcd_ref_size; // The approximate size of the genome int32_t tid, gcd_pos; // Position of the current bin int32_t pos; // Position of the last read // Coverage distribution related data int ncov; // The number of coverage bins uint64_t *cov; // The coverage frequencies int cov_min,cov_max,cov_step; // Minimum, maximum coverage and size of the coverage bins round_buffer_t cov_rbuf; // Pileup round buffer // Mismatches by read cycle uint8_t *rseq_buf; // A buffer for reference sequence to check the mismatches against int mrseq_buf; // The size of the buffer int32_t rseq_pos; // The coordinate of the first base in the buffer int32_t nrseq_buf; // The used part of the buffer uint64_t *mpc_buf; // Mismatches per cycle // Filters int filter_readlen; // Target regions int nregions, reg_from,reg_to; regions_t *regions; // Auxiliary data int flag_require, flag_filter; double sum_qual; // For calculating average quality value samfile_t *sam; khash_t(kh_rg) *rg_hash; // Read groups to include, the array is null-terminated faidx_t *fai; // Reference sequence for GC-depth graph int argc; // Command line arguments to be printed on the output char **argv; } stats_t; void error(const char *format, ...); void bam_init_header_hash(bam_header_t *header); int is_in_regions(bam1_t *bam_line, stats_t *stats); // Coverage distribution methods inline int coverage_idx(int min, int max, int n, int step, int depth) { if ( depth < min ) return 0; if ( depth > max ) return n-1; return 1 + (depth - min) / step; } inline int round_buffer_lidx2ridx(int offset, int size, int64_t refpos, int64_t pos) { return (offset + (pos-refpos) % size) % size; } void round_buffer_flush(stats_t *stats, int64_t pos) { int ibuf,idp; if ( pos==stats->cov_rbuf.pos ) return; int64_t new_pos = pos; if ( pos==-1 || pos - stats->cov_rbuf.pos >= stats->cov_rbuf.size ) { // Flush the whole buffer, but in sequential order, pos = stats->cov_rbuf.pos + stats->cov_rbuf.size - 1; } if ( pos < stats->cov_rbuf.pos ) error("Expected coordinates in ascending order, got %ld after %ld\n", pos,stats->cov_rbuf.pos); int ifrom = stats->cov_rbuf.start; int ito = round_buffer_lidx2ridx(stats->cov_rbuf.start,stats->cov_rbuf.size,stats->cov_rbuf.pos,pos-1); if ( ifrom>ito ) { for (ibuf=ifrom; ibufcov_rbuf.size; ibuf++) { if ( !stats->cov_rbuf.buffer[ibuf] ) continue; idp = coverage_idx(stats->cov_min,stats->cov_max,stats->ncov,stats->cov_step,stats->cov_rbuf.buffer[ibuf]); stats->cov[idp]++; stats->cov_rbuf.buffer[ibuf] = 0; } ifrom = 0; } for (ibuf=ifrom; ibuf<=ito; ibuf++) { if ( !stats->cov_rbuf.buffer[ibuf] ) continue; idp = coverage_idx(stats->cov_min,stats->cov_max,stats->ncov,stats->cov_step,stats->cov_rbuf.buffer[ibuf]); stats->cov[idp]++; stats->cov_rbuf.buffer[ibuf] = 0; } stats->cov_rbuf.start = (new_pos==-1) ? 0 : round_buffer_lidx2ridx(stats->cov_rbuf.start,stats->cov_rbuf.size,stats->cov_rbuf.pos,pos); stats->cov_rbuf.pos = new_pos; } void round_buffer_insert_read(round_buffer_t *rbuf, int64_t from, int64_t to) { if ( to-from >= rbuf->size ) error("The read length too big (%d), please increase the buffer length (currently %d)\n", to-from+1,rbuf->size); if ( from < rbuf->pos ) error("The reads are not sorted (%ld comes after %ld).\n", from,rbuf->pos); int ifrom,ito,ibuf; ifrom = round_buffer_lidx2ridx(rbuf->start,rbuf->size,rbuf->pos,from); ito = round_buffer_lidx2ridx(rbuf->start,rbuf->size,rbuf->pos,to); if ( ifrom>ito ) { for (ibuf=ifrom; ibufsize; ibuf++) rbuf->buffer[ibuf]++; ifrom = 0; } for (ibuf=ifrom; ibuf<=ito; ibuf++) rbuf->buffer[ibuf]++; } // Calculate the number of bases in the read trimmed by BWA int bwa_trim_read(int trim_qual, uint8_t *quals, int len, int reverse) { if ( lenmax_sum ) { max_sum = sum; // This is the correct way, but bwa clips from some reason one base less // max_l = l+1; max_l = l; } } return max_l; } void count_indels(stats_t *stats,bam1_t *bam_line) { int is_fwd = IS_REVERSE(bam_line) ? 0 : 1; int is_1st = IS_READ1(bam_line) ? 1 : 0; int icig; int icycle = 0; int read_len = bam_line->core.l_qseq; for (icig=0; icigcore.n_cigar; icig++) { // Conversion from uint32_t to MIDNSHP // 0123456 // MIDNSHP int cig = bam1_cigar(bam_line)[icig] & BAM_CIGAR_MASK; int ncig = bam1_cigar(bam_line)[icig] >> BAM_CIGAR_SHIFT; if ( cig==1 ) { int idx = is_fwd ? icycle : read_len-icycle-ncig; if ( idx<0 ) error("FIXME: read_len=%d vs icycle=%d\n", read_len,icycle); if ( idx >= stats->nbases || idx<0 ) error("FIXME: %d vs %d, %s:%d %s\n", idx,stats->nbases, stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); if ( is_1st ) stats->ins_cycles_1st[idx]++; else stats->ins_cycles_2nd[idx]++; icycle += ncig; if ( ncig<=stats->nindels ) stats->insertions[ncig-1]++; continue; } if ( cig==2 ) { int idx = is_fwd ? icycle-1 : read_len-icycle-1; if ( idx<0 ) continue; // discard meaningless deletions if ( idx >= stats->nbases ) error("FIXME: %d vs %d\n", idx,stats->nbases); if ( is_1st ) stats->del_cycles_1st[idx]++; else stats->del_cycles_2nd[idx]++; if ( ncig<=stats->nindels ) stats->deletions[ncig-1]++; continue; } if ( cig!=3 && cig!=5 ) icycle += ncig; } } void count_mismatches_per_cycle(stats_t *stats,bam1_t *bam_line) { int is_fwd = IS_REVERSE(bam_line) ? 0 : 1; int icig,iread=0,icycle=0; int iref = bam_line->core.pos - stats->rseq_pos; int read_len = bam_line->core.l_qseq; uint8_t *read = bam1_seq(bam_line); uint8_t *quals = bam1_qual(bam_line); uint64_t *mpc_buf = stats->mpc_buf; for (icig=0; icigcore.n_cigar; icig++) { // Conversion from uint32_t to MIDNSHP // 0123456 // MIDNSHP int cig = bam1_cigar(bam_line)[icig] & BAM_CIGAR_MASK; int ncig = bam1_cigar(bam_line)[icig] >> BAM_CIGAR_SHIFT; if ( cig==1 ) { iread += ncig; icycle += ncig; continue; } if ( cig==2 ) { iref += ncig; continue; } if ( cig==4 ) { icycle += ncig; // Soft-clips are present in the sequence, but the position of the read marks a start of non-clipped sequence // iref += ncig; iread += ncig; continue; } if ( cig==5 ) { icycle += ncig; continue; } // Ignore H and N CIGARs. The letter are inserted e.g. by TopHat and often require very large // chunk of refseq in memory. Not very frequent and not noticable in the stats. if ( cig==3 || cig==5 ) continue; if ( cig!=0 ) error("TODO: cigar %d, %s:%d %s\n", cig,stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); if ( ncig+iref > stats->nrseq_buf ) error("FIXME: %d+%d > %d, %s, %s:%d\n",ncig,iref,stats->nrseq_buf, bam1_qname(bam_line),stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1); int im; for (im=0; imrseq_buf[iref]; // ---------------15 // =ACMGRSVTWYHKDBN if ( cread==15 ) { int idx = is_fwd ? icycle : read_len-icycle-1; if ( idx>stats->max_len ) error("mpc: %d>%d\n",idx,stats->max_len); idx = idx*stats->nquals; if ( idx>=stats->nquals*stats->nbases ) error("FIXME: mpc_buf overflow\n"); mpc_buf[idx]++; } else if ( cref && cread && cref!=cread ) { uint8_t qual = quals[iread] + 1; if ( qual>=stats->nquals ) error("TODO: quality too high %d>=%d (%s %d %s)\n", qual,stats->nquals, stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); int idx = is_fwd ? icycle : read_len-icycle-1; if ( idx>stats->max_len ) error("mpc: %d>%d\n",idx,stats->max_len); idx = idx*stats->nquals + qual; if ( idx>=stats->nquals*stats->nbases ) error("FIXME: mpc_buf overflow\n"); mpc_buf[idx]++; } iref++; iread++; icycle++; } } } void read_ref_seq(stats_t *stats,int32_t tid,int32_t pos) { khash_t(kh_faidx) *h; khiter_t iter; faidx1_t val; char *chr, c; faidx_t *fai = stats->fai; h = fai->hash; chr = stats->sam->header->target_name[tid]; // ID of the sequence name iter = kh_get(kh_faidx, h, chr); if (iter == kh_end(h)) error("No such reference sequence [%s]?\n", chr); val = kh_value(h, iter); // Check the boundaries if (pos >= val.len) error("Was the bam file mapped with the reference sequence supplied?" " A read mapped beyond the end of the chromosome (%s:%d, chromosome length %d).\n", chr,pos,val.len); int size = stats->mrseq_buf; // The buffer extends beyond the chromosome end. Later the rest will be filled with N's. if (size+pos > val.len) size = val.len-pos; // Position the razf reader razf_seek(fai->rz, val.offset + pos / val.line_blen * val.line_len + pos % val.line_blen, SEEK_SET); uint8_t *ptr = stats->rseq_buf; int nread = 0; while ( nreadrz,&c,1) && !fai->rz->z_err ) { if ( !isgraph(c) ) continue; // Conversion between uint8_t coding and ACGT // -12-4---8------- // =ACMGRSVTWYHKDBN if ( c=='A' || c=='a' ) *ptr = 1; else if ( c=='C' || c=='c' ) *ptr = 2; else if ( c=='G' || c=='g' ) *ptr = 4; else if ( c=='T' || c=='t' ) *ptr = 8; else *ptr = 0; ptr++; nread++; } if ( nread < stats->mrseq_buf ) { memset(ptr,0, stats->mrseq_buf - nread); nread = stats->mrseq_buf; } stats->nrseq_buf = nread; stats->rseq_pos = pos; stats->tid = tid; } float fai_gc_content(stats_t *stats, int pos, int len) { uint32_t gc,count,c; int i = pos - stats->rseq_pos, ito = i + len; assert( i>=0 && ito<=stats->nrseq_buf ); // Count GC content gc = count = 0; for (; irseq_buf[i]; if ( c==2 || c==4 ) { gc++; count++; } else if ( c==1 || c==8 ) count++; } return count ? (float)gc/count : 0; } void realloc_rseq_buffer(stats_t *stats) { int n = stats->nbases*10; if ( stats->gcd_bin_size > n ) n = stats->gcd_bin_size; if ( stats->mrseq_bufrseq_buf = realloc(stats->rseq_buf,sizeof(uint8_t)*n); stats->mrseq_buf = n; } } void realloc_gcd_buffer(stats_t *stats, int seq_len) { if ( seq_len >= stats->gcd_bin_size ) error("The --GC-depth bin size (%d) is set too low for the read length %d\n", stats->gcd_bin_size, seq_len); int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len); if ( n <= stats->igcd ) error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n"); if ( n > stats->ngcd ) { stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t)); if ( !stats->gcd ) error("Could not realloc GCD buffer, too many chromosomes or the genome too long?? [%u %u]\n", stats->ngcd,n); memset(&(stats->gcd[stats->ngcd]),0,(n-stats->ngcd)*sizeof(gc_depth_t)); stats->ngcd = n; } realloc_rseq_buffer(stats); } void realloc_buffers(stats_t *stats, int seq_len) { int n = 2*(1 + seq_len - stats->nbases) + stats->nbases; stats->quals_1st = realloc(stats->quals_1st, n*stats->nquals*sizeof(uint64_t)); if ( !stats->quals_1st ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*stats->nquals*sizeof(uint64_t)); memset(stats->quals_1st + stats->nbases*stats->nquals, 0, (n-stats->nbases)*stats->nquals*sizeof(uint64_t)); stats->quals_2nd = realloc(stats->quals_2nd, n*stats->nquals*sizeof(uint64_t)); if ( !stats->quals_2nd ) error("Could not realloc buffers, the sequence too long: %d (2x%ld)\n", seq_len,n*stats->nquals*sizeof(uint64_t)); memset(stats->quals_2nd + stats->nbases*stats->nquals, 0, (n-stats->nbases)*stats->nquals*sizeof(uint64_t)); if ( stats->mpc_buf ) { stats->mpc_buf = realloc(stats->mpc_buf, n*stats->nquals*sizeof(uint64_t)); if ( !stats->mpc_buf ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*stats->nquals*sizeof(uint64_t)); memset(stats->mpc_buf + stats->nbases*stats->nquals, 0, (n-stats->nbases)*stats->nquals*sizeof(uint64_t)); } stats->acgt_cycles = realloc(stats->acgt_cycles, n*4*sizeof(uint64_t)); if ( !stats->acgt_cycles ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*4*sizeof(uint64_t)); memset(stats->acgt_cycles + stats->nbases*4, 0, (n-stats->nbases)*4*sizeof(uint64_t)); stats->read_lengths = realloc(stats->read_lengths, n*sizeof(uint64_t)); if ( !stats->read_lengths ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t)); memset(stats->read_lengths + stats->nbases, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->insertions = realloc(stats->insertions, n*sizeof(uint64_t)); if ( !stats->insertions ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t)); memset(stats->insertions + stats->nbases, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->deletions = realloc(stats->deletions, n*sizeof(uint64_t)); if ( !stats->deletions ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t)); memset(stats->deletions + stats->nbases, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->ins_cycles_1st = realloc(stats->ins_cycles_1st, (n+1)*sizeof(uint64_t)); if ( !stats->ins_cycles_1st ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,(n+1)*sizeof(uint64_t)); memset(stats->ins_cycles_1st + stats->nbases + 1, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->ins_cycles_2nd = realloc(stats->ins_cycles_2nd, (n+1)*sizeof(uint64_t)); if ( !stats->ins_cycles_2nd ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,(n+1)*sizeof(uint64_t)); memset(stats->ins_cycles_2nd + stats->nbases + 1, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->del_cycles_1st = realloc(stats->del_cycles_1st, (n+1)*sizeof(uint64_t)); if ( !stats->del_cycles_1st ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,(n+1)*sizeof(uint64_t)); memset(stats->del_cycles_1st + stats->nbases + 1, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->del_cycles_2nd = realloc(stats->del_cycles_2nd, (n+1)*sizeof(uint64_t)); if ( !stats->del_cycles_2nd ) error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,(n+1)*sizeof(uint64_t)); memset(stats->del_cycles_2nd + stats->nbases + 1, 0, (n-stats->nbases)*sizeof(uint64_t)); stats->nbases = n; // Realloc the coverage distribution buffer int *rbuffer = calloc(sizeof(int),seq_len*5); n = stats->cov_rbuf.size-stats->cov_rbuf.start; memcpy(rbuffer,stats->cov_rbuf.buffer+stats->cov_rbuf.start,n); if ( stats->cov_rbuf.start>1 ) memcpy(rbuffer+n,stats->cov_rbuf.buffer,stats->cov_rbuf.start); stats->cov_rbuf.start = 0; free(stats->cov_rbuf.buffer); stats->cov_rbuf.buffer = rbuffer; stats->cov_rbuf.size = seq_len*5; realloc_rseq_buffer(stats); } void collect_stats(bam1_t *bam_line, stats_t *stats) { if ( stats->rg_hash ) { const uint8_t *rg = bam_aux_get(bam_line, "RG"); if ( !rg ) return; khiter_t k = kh_get(kh_rg, stats->rg_hash, (const char*)(rg + 1)); if ( k == kh_end(stats->rg_hash) ) return; } if ( stats->flag_require && (bam_line->core.flag & stats->flag_require)!=stats->flag_require ) { stats->nreads_filtered++; return; } if ( stats->flag_filter && (bam_line->core.flag & stats->flag_filter) ) { stats->nreads_filtered++; return; } if ( !is_in_regions(bam_line,stats) ) return; if ( stats->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->filter_readlen ) return; if ( bam_line->core.flag & BAM_FQCFAIL ) stats->nreads_QCfailed++; if ( bam_line->core.flag & BAM_FSECONDARY ) stats->nreads_secondary++; int seq_len = bam_line->core.l_qseq; if ( !seq_len ) return; if ( seq_len >= stats->nbases ) realloc_buffers(stats,seq_len); if ( stats->max_lenmax_len = seq_len; stats->read_lengths[seq_len]++; // Count GC and ACGT per cycle uint8_t base, *seq = bam1_seq(bam_line); int gc_count = 0; int i; int reverse = IS_REVERSE(bam_line); for (i=0; i2 ) base=3; if ( 4*(reverse ? seq_len-i-1 : i) + base >= stats->nbases*4 ) error("FIXME: acgt_cycles\n"); stats->acgt_cycles[ 4*(reverse ? seq_len-i-1 : i) + base ]++; } int gc_idx_min = gc_count*(stats->ngc-1)/seq_len; int gc_idx_max = (gc_count+1)*(stats->ngc-1)/seq_len; if ( gc_idx_max >= stats->ngc ) gc_idx_max = stats->ngc - 1; // Determine which array (1st or 2nd read) will these stats go to, // trim low quality bases from end the same way BWA does, // fill GC histogram uint64_t *quals; uint8_t *bam_quals = bam1_qual(bam_line); if ( bam_line->core.flag&BAM_FREAD2 ) { quals = stats->quals_2nd; stats->nreads_2nd++; for (i=gc_idx_min; igc_2nd[i]++; } else { quals = stats->quals_1st; stats->nreads_1st++; for (i=gc_idx_min; igc_1st[i]++; } if ( stats->trim_qual>0 ) stats->nbases_trimmed += bwa_trim_read(stats->trim_qual, bam_quals, seq_len, reverse); // Quality histogram and average quality for (i=0; i=stats->nquals ) error("TODO: quality too high %d>=%d (%s %d %s)\n", qual,stats->nquals,stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); if ( qual>stats->max_qual ) stats->max_qual = qual; quals[ i*stats->nquals+qual ]++; stats->sum_qual += qual; } // Look at the flags and increment appropriate counters (mapped, paired, etc) if ( IS_UNMAPPED(bam_line) ) stats->nreads_unmapped++; else { if ( !bam_line->core.qual ) stats->nreads_mq0++; count_indels(stats,bam_line); if ( !IS_PAIRED(bam_line) ) stats->nreads_unpaired++; else { stats->nreads_paired++; if ( bam_line->core.tid!=bam_line->core.mtid ) stats->nreads_anomalous++; // The insert size is tricky, because for long inserts the libraries are // prepared differently and the pairs point in other direction. BWA does // not set the paired flag for them. Similar thing is true also for 454 // reads. Mates mapped to different chromosomes have isize==0. int32_t isize = bam_line->core.isize; if ( isize<0 ) isize = -isize; if ( isize >= stats->nisize ) isize = stats->nisize-1; if ( isize>0 || bam_line->core.tid==bam_line->core.mtid ) { int pos_fst = bam_line->core.mpos - bam_line->core.pos; int is_fst = IS_READ1(bam_line) ? 1 : -1; int is_fwd = IS_REVERSE(bam_line) ? -1 : 1; int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1; if ( is_fwd*is_mfwd>0 ) stats->isize_other[isize]++; else if ( is_fst*pos_fst>0 ) { if ( is_fst*is_fwd>0 ) stats->isize_inward[isize]++; else stats->isize_outward[isize]++; } else if ( is_fst*pos_fst<0 ) { if ( is_fst*is_fwd>0 ) stats->isize_outward[isize]++; else stats->isize_inward[isize]++; } } } // Number of mismatches uint8_t *nm = bam_aux_get(bam_line,"NM"); if (nm) stats->nmismatches += bam_aux2i(nm); // Number of mapped bases from cigar // Conversion from uint32_t to MIDNSHP // 012-4-- // MIDNSHP if ( bam_line->core.n_cigar == 0) error("FIXME: mapped read with no cigar?\n"); int readlen=seq_len; if ( stats->regions ) { // Count only on-target bases int iref = bam_line->core.pos + 1; for (i=0; icore.n_cigar; i++) { int cig = bam1_cigar(bam_line)[i]&BAM_CIGAR_MASK; int ncig = bam1_cigar(bam_line)[i]>>BAM_CIGAR_SHIFT; if ( cig==2 ) readlen += ncig; else if ( cig==0 ) { if ( iref < stats->reg_from ) ncig -= stats->reg_from-iref; else if ( iref+ncig-1 > stats->reg_to ) ncig -= iref+ncig-1 - stats->reg_to; if ( ncig<0 ) ncig = 0; stats->nbases_mapped_cigar += ncig; iref += bam1_cigar(bam_line)[i]>>BAM_CIGAR_SHIFT; } else if ( cig==1 ) { iref += ncig; if ( iref>=stats->reg_from && iref<=stats->reg_to ) stats->nbases_mapped_cigar += ncig; } } } else { // Count the whole read for (i=0; icore.n_cigar; i++) { if ( (bam1_cigar(bam_line)[i]&BAM_CIGAR_MASK)==0 || (bam1_cigar(bam_line)[i]&BAM_CIGAR_MASK)==1 ) stats->nbases_mapped_cigar += bam1_cigar(bam_line)[i]>>BAM_CIGAR_SHIFT; if ( (bam1_cigar(bam_line)[i]&BAM_CIGAR_MASK)==2 ) readlen += bam1_cigar(bam_line)[i]>>BAM_CIGAR_SHIFT; } } stats->nbases_mapped += seq_len; if ( stats->tid==bam_line->core.tid && bam_line->core.pospos ) stats->is_sorted = 0; stats->pos = bam_line->core.pos; if ( stats->is_sorted ) { if ( stats->tid==-1 || stats->tid!=bam_line->core.tid ) round_buffer_flush(stats,-1); // Mismatches per cycle and GC-depth graph. For simplicity, reads overlapping GCD bins // are not splitted which results in up to seq_len-1 overlaps. The default bin size is // 20kbp, so the effect is negligible. if ( stats->fai ) { int inc_ref = 0, inc_gcd = 0; // First pass or new chromosome if ( stats->rseq_pos==-1 || stats->tid != bam_line->core.tid ) { inc_ref=1; inc_gcd=1; } // Read goes beyond the end of the rseq buffer else if ( stats->rseq_pos+stats->nrseq_buf < bam_line->core.pos+readlen ) { inc_ref=1; inc_gcd=1; } // Read overlaps the next gcd bin else if ( stats->gcd_pos+stats->gcd_bin_size < bam_line->core.pos+readlen ) { inc_gcd = 1; if ( stats->rseq_pos+stats->nrseq_buf < bam_line->core.pos+stats->gcd_bin_size ) inc_ref = 1; } if ( inc_gcd ) { stats->igcd++; if ( stats->igcd >= stats->ngcd ) realloc_gcd_buffer(stats, readlen); if ( inc_ref ) read_ref_seq(stats,bam_line->core.tid,bam_line->core.pos); stats->gcd_pos = bam_line->core.pos; stats->gcd[ stats->igcd ].gc = fai_gc_content(stats, stats->gcd_pos, stats->gcd_bin_size); } count_mismatches_per_cycle(stats,bam_line); } // No reference and first pass, new chromosome or sequence going beyond the end of the gcd bin else if ( stats->gcd_pos==-1 || stats->tid != bam_line->core.tid || bam_line->core.pos - stats->gcd_pos > stats->gcd_bin_size ) { // First pass or a new chromosome stats->tid = bam_line->core.tid; stats->gcd_pos = bam_line->core.pos; stats->igcd++; if ( stats->igcd >= stats->ngcd ) realloc_gcd_buffer(stats, readlen); } stats->gcd[ stats->igcd ].depth++; // When no reference sequence is given, approximate the GC from the read (much shorter window, but otherwise OK) if ( !stats->fai ) stats->gcd[ stats->igcd ].gc += (float) gc_count / seq_len; // Coverage distribution graph round_buffer_flush(stats,bam_line->core.pos); round_buffer_insert_read(&(stats->cov_rbuf),bam_line->core.pos,bam_line->core.pos+seq_len-1); } } stats->total_len += seq_len; if ( IS_DUP(bam_line) ) { stats->total_len_dup += seq_len; stats->nreads_dup++; } } // Sort by GC and depth #define GCD_t(x) ((gc_depth_t *)x) static int gcd_cmp(const void *a, const void *b) { if ( GCD_t(a)->gc < GCD_t(b)->gc ) return -1; if ( GCD_t(a)->gc > GCD_t(b)->gc ) return 1; if ( GCD_t(a)->depth < GCD_t(b)->depth ) return -1; if ( GCD_t(a)->depth > GCD_t(b)->depth ) return 1; return 0; } #undef GCD_t float gcd_percentile(gc_depth_t *gcd, int N, int p) { float n,d; int k; n = p*(N+1)/100; k = n; if ( k<=0 ) return gcd[0].depth; if ( k>=N ) return gcd[N-1].depth; d = n - k; return gcd[k-1].depth + d*(gcd[k].depth - gcd[k-1].depth); } void output_stats(stats_t *stats) { // Calculate average insert size and standard deviation (from the main bulk data only) int isize, ibulk=0; uint64_t nisize=0, nisize_inward=0, nisize_outward=0, nisize_other=0; for (isize=0; isizenisize; isize++) { // Each pair was counted twice stats->isize_inward[isize] *= 0.5; stats->isize_outward[isize] *= 0.5; stats->isize_other[isize] *= 0.5; nisize_inward += stats->isize_inward[isize]; nisize_outward += stats->isize_outward[isize]; nisize_other += stats->isize_other[isize]; nisize += stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]; } double bulk=0, avg_isize=0, sd_isize=0; for (isize=0; isizenisize; isize++) { bulk += stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]; avg_isize += isize * (stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]); if ( bulk/nisize > stats->isize_main_bulk ) { ibulk = isize+1; nisize = bulk; break; } } avg_isize /= nisize ? nisize : 1; for (isize=1; isizeisize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]) * (isize-avg_isize)*(isize-avg_isize) / nisize; sd_isize = sqrt(sd_isize); printf("# This file was produced by bamcheck (%s)\n",BAMCHECK_VERSION); printf("# The command line was: %s",stats->argv[0]); int i; for (i=1; iargc; i++) printf(" %s",stats->argv[i]); printf("\n"); printf("# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n"); printf("SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd)); printf("SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered); printf("SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd)); printf("SN\tis paired:\t%d\n", stats->nreads_1st&&stats->nreads_2nd ? 1 : 0); printf("SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0); printf("SN\t1st fragments:\t%ld\n", (long)stats->nreads_1st); printf("SN\tlast fragments:\t%ld\n", (long)stats->nreads_2nd); printf("SN\treads mapped:\t%ld\n", (long)(stats->nreads_paired+stats->nreads_unpaired)); printf("SN\treads unmapped:\t%ld\n", (long)stats->nreads_unmapped); printf("SN\treads unpaired:\t%ld\n", (long)stats->nreads_unpaired); printf("SN\treads paired:\t%ld\n", (long)stats->nreads_paired); printf("SN\treads duplicated:\t%ld\n", (long)stats->nreads_dup); printf("SN\treads MQ0:\t%ld\n", (long)stats->nreads_mq0); printf("SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed); printf("SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary); printf("SN\ttotal length:\t%ld\n", (long)stats->total_len); printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped); printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar); printf("SN\tbases trimmed:\t%ld\n", (long)stats->nbases_trimmed); printf("SN\tbases duplicated:\t%ld\n", (long)stats->total_len_dup); printf("SN\tmismatches:\t%ld\n", (long)stats->nmismatches); printf("SN\terror rate:\t%e\n", (float)stats->nmismatches/stats->nbases_mapped_cigar); float avg_read_length = (stats->nreads_1st+stats->nreads_2nd)?stats->total_len/(stats->nreads_1st+stats->nreads_2nd):0; printf("SN\taverage length:\t%.0f\n", avg_read_length); printf("SN\tmaximum length:\t%d\n", stats->max_len); printf("SN\taverage quality:\t%.1f\n", stats->total_len?stats->sum_qual/stats->total_len:0); printf("SN\tinsert size average:\t%.1f\n", avg_isize); printf("SN\tinsert size standard deviation:\t%.1f\n", sd_isize); printf("SN\tinward oriented pairs:\t%ld\n", (long)nisize_inward); printf("SN\toutward oriented pairs:\t%ld\n", (long)nisize_outward); printf("SN\tpairs with other orientation:\t%ld\n", (long)nisize_other); printf("SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2); int ibase,iqual; if ( stats->max_lennbases ) stats->max_len++; if ( stats->max_qual+1nquals ) stats->max_qual++; printf("# First Fragment Qualitites. Use `grep ^FFQ | cut -f 2-` to extract this part.\n"); printf("# Columns correspond to qualities and rows to cycles. First column is the cycle number.\n"); for (ibase=0; ibasemax_len; ibase++) { printf("FFQ\t%d",ibase+1); for (iqual=0; iqual<=stats->max_qual; iqual++) { printf("\t%ld", (long)stats->quals_1st[ibase*stats->nquals+iqual]); } printf("\n"); } printf("# Last Fragment Qualitites. Use `grep ^LFQ | cut -f 2-` to extract this part.\n"); printf("# Columns correspond to qualities and rows to cycles. First column is the cycle number.\n"); for (ibase=0; ibasemax_len; ibase++) { printf("LFQ\t%d",ibase+1); for (iqual=0; iqual<=stats->max_qual; iqual++) { printf("\t%ld", (long)stats->quals_2nd[ibase*stats->nquals+iqual]); } printf("\n"); } if ( stats->mpc_buf ) { printf("# Mismatches per cycle and quality. Use `grep ^MPC | cut -f 2-` to extract this part.\n"); printf("# Columns correspond to qualities, rows to cycles. First column is the cycle number, second\n"); printf("# is the number of N's and the rest is the number of mismatches\n"); for (ibase=0; ibasemax_len; ibase++) { printf("MPC\t%d",ibase+1); for (iqual=0; iqual<=stats->max_qual; iqual++) { printf("\t%ld", (long)stats->mpc_buf[ibase*stats->nquals+iqual]); } printf("\n"); } } printf("# GC Content of first fragments. Use `grep ^GCF | cut -f 2-` to extract this part.\n"); int ibase_prev = 0; for (ibase=0; ibasengc; ibase++) { if ( stats->gc_1st[ibase]==stats->gc_1st[ibase_prev] ) continue; printf("GCF\t%.2f\t%ld\n", (ibase+ibase_prev)*0.5*100./(stats->ngc-1), (long)stats->gc_1st[ibase_prev]); ibase_prev = ibase; } printf("# GC Content of last fragments. Use `grep ^GCL | cut -f 2-` to extract this part.\n"); ibase_prev = 0; for (ibase=0; ibasengc; ibase++) { if ( stats->gc_2nd[ibase]==stats->gc_2nd[ibase_prev] ) continue; printf("GCL\t%.2f\t%ld\n", (ibase+ibase_prev)*0.5*100./(stats->ngc-1), (long)stats->gc_2nd[ibase_prev]); ibase_prev = ibase; } printf("# ACGT content per cycle. Use `grep ^GCC | cut -f 2-` to extract this part. The columns are: cycle, and A,C,G,T counts [%%]\n"); for (ibase=0; ibasemax_len; ibase++) { uint64_t *ptr = &(stats->acgt_cycles[ibase*4]); uint64_t sum = ptr[0]+ptr[1]+ptr[2]+ptr[3]; if ( ! sum ) continue; printf("GCC\t%d\t%.2f\t%.2f\t%.2f\t%.2f\n", ibase,100.*ptr[0]/sum,100.*ptr[1]/sum,100.*ptr[2]/sum,100.*ptr[3]/sum); } printf("# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: pairs total, inward oriented pairs, outward oriented pairs, other pairs\n"); for (isize=0; isizeisize_inward[isize]+stats->isize_outward[isize]+stats->isize_other[isize]), (long)stats->isize_inward[isize], (long)stats->isize_outward[isize], (long)stats->isize_other[isize]); printf("# Read lengths. Use `grep ^RL | cut -f 2-` to extract this part. The columns are: read length, count\n"); int ilen; for (ilen=0; ilenmax_len; ilen++) { if ( stats->read_lengths[ilen]>0 ) printf("RL\t%d\t%ld\n", ilen, (long)stats->read_lengths[ilen]); } printf("# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions\n"); for (ilen=0; ilennindels; ilen++) { if ( stats->insertions[ilen]>0 || stats->deletions[ilen]>0 ) printf("ID\t%d\t%ld\t%ld\n", ilen+1, (long)stats->insertions[ilen], (long)stats->deletions[ilen]); } printf("# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)\n"); for (ilen=0; ilen<=stats->nbases; ilen++) { // For deletions we print the index of the cycle before the deleted base (1-based) and for insertions // the index of the cycle of the first inserted base (also 1-based) if ( stats->ins_cycles_1st[ilen]>0 || stats->ins_cycles_2nd[ilen]>0 || stats->del_cycles_1st[ilen]>0 || stats->del_cycles_2nd[ilen]>0 ) printf("IC\t%d\t%ld\t%ld\t%ld\t%ld\n", ilen+1, (long)stats->ins_cycles_1st[ilen], (long)stats->ins_cycles_2nd[ilen], (long)stats->del_cycles_1st[ilen], (long)stats->del_cycles_2nd[ilen]); } printf("# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.\n"); if ( stats->cov[0] ) printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]); int icov; for (icov=1; icovncov-1; icov++) if ( stats->cov[icov] ) printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]); if ( stats->cov[stats->ncov-1] ) printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]); // Calculate average GC content, then sort by GC and depth printf("# GC-depth. Use `grep ^GCD | cut -f 2-` to extract this part. The columns are: GC%%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile\n"); uint32_t igcd; for (igcd=0; igcdigcd; igcd++) { if ( stats->fai ) stats->gcd[igcd].gc = round(100. * stats->gcd[igcd].gc); else if ( stats->gcd[igcd].depth ) stats->gcd[igcd].gc = round(100. * stats->gcd[igcd].gc / stats->gcd[igcd].depth); } qsort(stats->gcd, stats->igcd+1, sizeof(gc_depth_t), gcd_cmp); igcd = 0; while ( igcd < stats->igcd ) { // Calculate percentiles (10,25,50,75,90th) for the current GC content and print uint32_t nbins=0, itmp=igcd; float gc = stats->gcd[igcd].gc; while ( itmpigcd && fabs(stats->gcd[itmp].gc-gc)<0.1 ) { nbins++; itmp++; } printf("GCD\t%.1f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n", gc, (igcd+nbins+1)*100./(stats->igcd+1), gcd_percentile(&(stats->gcd[igcd]),nbins,10) *avg_read_length/stats->gcd_bin_size, gcd_percentile(&(stats->gcd[igcd]),nbins,25) *avg_read_length/stats->gcd_bin_size, gcd_percentile(&(stats->gcd[igcd]),nbins,50) *avg_read_length/stats->gcd_bin_size, gcd_percentile(&(stats->gcd[igcd]),nbins,75) *avg_read_length/stats->gcd_bin_size, gcd_percentile(&(stats->gcd[igcd]),nbins,90) *avg_read_length/stats->gcd_bin_size ); igcd += nbins; } } size_t mygetline(char **line, size_t *n, FILE *fp) { if (line == NULL || n == NULL || fp == NULL) { errno = EINVAL; return -1; } if (*n==0 || !*line) { *line = NULL; *n = 0; } size_t nread=0; int c; while ((c=getc(fp))!= EOF && c!='\n') { if ( ++nread>=*n ) { *n += 255; *line = realloc(*line, sizeof(char)*(*n)); } (*line)[nread-1] = c; } if ( nread>=*n ) { *n += 255; *line = realloc(*line, sizeof(char)*(*n)); } (*line)[nread] = 0; return nread>0 ? nread : -1; } void init_regions(stats_t *stats, char *file) { khiter_t iter; khash_t(kh_bam_tid) *header_hash; bam_init_header_hash(stats->sam->header); header_hash = (khash_t(kh_bam_tid)*)stats->sam->header->hash; FILE *fp = fopen(file,"r"); if ( !fp ) error("%s: %s\n",file,strerror(errno)); char *line = NULL; size_t len = 0; ssize_t nread; int warned = 0; int prev_tid=-1, prev_pos=-1; while ((nread = mygetline(&line, &len, fp)) != -1) { if ( line[0] == '#' ) continue; int i = 0; while ( i=nread ) error("Could not parse the file: %s [%s]\n", file,line); line[i] = 0; iter = kh_get(kh_bam_tid, header_hash, line); int tid = kh_val(header_hash, iter); if ( iter == kh_end(header_hash) ) { if ( !warned ) fprintf(stderr,"Warning: Some sequences not present in the BAM, e.g. \"%s\". This message is printed only once.\n", line); warned = 1; continue; } if ( tid >= stats->nregions ) { stats->regions = realloc(stats->regions,sizeof(regions_t)*(stats->nregions+100)); int j; for (j=stats->nregions; jnregions+100; j++) { stats->regions[j].npos = stats->regions[j].mpos = stats->regions[j].cpos = 0; stats->regions[j].pos = NULL; } stats->nregions += 100; } int npos = stats->regions[tid].npos; if ( npos >= stats->regions[tid].mpos ) { stats->regions[tid].mpos += 1000; stats->regions[tid].pos = realloc(stats->regions[tid].pos,sizeof(pos_t)*stats->regions[tid].mpos); } if ( (sscanf(line+i+1,"%d %d",&stats->regions[tid].pos[npos].from,&stats->regions[tid].pos[npos].to))!=2 ) error("Could not parse the region [%s]\n"); if ( prev_tid==-1 || prev_tid!=tid ) { prev_tid = tid; prev_pos = stats->regions[tid].pos[npos].from; } if ( prev_pos>stats->regions[tid].pos[npos].from ) error("The positions are not in chromosomal order (%s:%d comes after %d)\n", line,stats->regions[tid].pos[npos].from,prev_pos); stats->regions[tid].npos++; } if (line) free(line); if ( !stats->regions ) error("Unable to map the -t sequences to the BAM sequences.\n"); fclose(fp); } void destroy_regions(stats_t *stats) { int i; for (i=0; inregions; i++) { if ( !stats->regions[i].mpos ) continue; free(stats->regions[i].pos); } if ( stats->regions ) free(stats->regions); } static int fetch_read(const bam1_t *bam_line, void *data) { collect_stats((bam1_t*)bam_line,(stats_t*)data); return 1; } void reset_regions(stats_t *stats) { int i; for (i=0; inregions; i++) stats->regions[i].cpos = 0; } int is_in_regions(bam1_t *bam_line, stats_t *stats) { if ( !stats->regions ) return 1; if ( bam_line->core.tid >= stats->nregions || bam_line->core.tid<0 ) return 0; if ( !stats->is_sorted ) error("The BAM must be sorted in order for -t to work.\n"); regions_t *reg = &stats->regions[bam_line->core.tid]; if ( reg->cpos==reg->npos ) return 0; // done for this chr // Find a matching interval or skip this read. No splicing of reads is done, no indels or soft clips considered, // even small overlap is enough to include the read in the stats. int i = reg->cpos; while ( inpos && reg->pos[i].to<=bam_line->core.pos ) i++; if ( i>=reg->npos ) { reg->cpos = reg->npos; return 0; } if ( bam_line->core.pos + bam_line->core.l_qseq + 1 < reg->pos[i].from ) return 0; reg->cpos = i; stats->reg_from = reg->pos[i].from; stats->reg_to = reg->pos[i].to; return 1; } void init_group_id(stats_t *stats, char *id) { if ( !stats->sam->header->dict ) stats->sam->header->dict = sam_header_parse2(stats->sam->header->text); void *iter = stats->sam->header->dict; const char *key, *val; int n = 0; stats->rg_hash = kh_init(kh_rg); while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) ) { if ( !strcmp(id,key) || (val && !strcmp(id,val)) ) { khiter_t k = kh_get(kh_rg, stats->rg_hash, key); if ( k != kh_end(stats->rg_hash) ) fprintf(stderr, "[init_group_id] The group ID not unique: \"%s\"\n", key); int ret; k = kh_put(kh_rg, stats->rg_hash, key, &ret); kh_value(stats->rg_hash, k) = val; n++; } } if ( !n ) error("The sample or read group \"%s\" not present.\n", id); } void error(const char *format, ...) { if ( !format ) { printf("Version: %s\n", BAMCHECK_VERSION); printf("About: The program collects statistics from BAM files. The output can be visualized using plot-bamcheck.\n"); printf("Usage: bamcheck [OPTIONS] file.bam\n"); printf(" bamcheck [OPTIONS] file.bam chr:from-to\n"); printf("Options:\n"); printf(" -c, --coverage ,, Coverage distribution min,max,step [1,1000,1]\n"); printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n"); printf(" -f, --required-flag Required flag, 0 for unset [0]\n"); printf(" -F, --filtering-flag Filtering flag, 0 for unset [0]\n"); printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n"); printf(" -h, --help This help message\n"); printf(" -i, --insert-size Maximum insert size [8000]\n"); printf(" -I, --id Include only listed read group or sample name\n"); printf(" -l, --read-length Include in the statistics only reads with the given read length []\n"); printf(" -m, --most-inserts Report only the main part of inserts [0.99]\n"); printf(" -q, --trim-quality The BWA trimming parameter [0]\n"); printf(" -r, --ref-seq Reference sequence (required for GC-depth calculation).\n"); printf(" -t, --target-regions Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n"); printf(" -s, --sam Input is SAM\n"); printf("\n"); } else { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); } exit(-1); } int main(int argc, char *argv[]) { char *targets = NULL; char *bam_fname = NULL; char *group_id = NULL; samfile_t *sam = NULL; char in_mode[5]; stats_t *stats = calloc(1,sizeof(stats_t)); stats->ngc = 200; stats->nquals = 256; stats->nbases = 300; stats->nisize = 8000; stats->max_len = 30; stats->max_qual = 40; stats->isize_main_bulk = 0.99; // There are always outliers at the far end stats->gcd_bin_size = 20e3; stats->gcd_ref_size = 4.2e9; stats->rseq_pos = -1; stats->tid = stats->gcd_pos = -1; stats->igcd = 0; stats->is_sorted = 1; stats->cov_min = 1; stats->cov_max = 1000; stats->cov_step = 1; stats->argc = argc; stats->argv = argv; stats->filter_readlen = -1; stats->nindels = stats->nbases; strcpy(in_mode, "rb"); static struct option loptions[] = { {"help",0,0,'h'}, {"remove-dups",0,0,'d'}, {"sam",0,0,'s'}, {"ref-seq",1,0,'r'}, {"coverage",1,0,'c'}, {"read-length",1,0,'l'}, {"insert-size",1,0,'i'}, {"most-inserts",1,0,'m'}, {"trim-quality",1,0,'q'}, {"target-regions",0,0,'t'}, {"required-flag",1,0,'f'}, {"filtering-flag",0,0,'F'}, {"id",1,0,'I'}, {"GC-depth",1,0,1}, {0,0,0,0} }; int opt; while ( (opt=getopt_long(argc,argv,"?hdsr:c:l:i:t:m:q:f:F:I:1:",loptions,NULL))>0 ) { switch (opt) { case 'f': stats->flag_require=strtol(optarg,0,0); break; case 'F': stats->flag_filter=strtol(optarg,0,0); break; case 'd': stats->flag_filter|=BAM_FDUP; break; case 's': strcpy(in_mode, "r"); break; case 'r': stats->fai = fai_load(optarg); if (stats->fai==0) error("Could not load faidx: %s\n", optarg); break; case 1 : { float flen,fbin; if ( sscanf(optarg,"%f,%f",&fbin,&flen)!= 2 ) error("Unable to parse --GC-depth %s\n", optarg); stats->gcd_bin_size = fbin; stats->gcd_ref_size = flen; } break; case 'c': if ( sscanf(optarg,"%d,%d,%d",&stats->cov_min,&stats->cov_max,&stats->cov_step)!= 3 ) error("Unable to parse -c %s\n", optarg); break; case 'l': stats->filter_readlen = atoi(optarg); break; case 'i': stats->nisize = atoi(optarg); break; case 'm': stats->isize_main_bulk = atof(optarg); break; case 'q': stats->trim_qual = atoi(optarg); break; case 't': targets = optarg; break; case 'I': group_id = optarg; break; case '?': case 'h': error(NULL); default: error("Unknown argument: %s\n", optarg); } } if ( optindcov_step > stats->cov_max - stats->cov_min + 1 ) { stats->cov_step = stats->cov_max - stats->cov_min; if ( stats->cov_step <= 0 ) stats->cov_step = 1; } stats->ncov = 3 + (stats->cov_max-stats->cov_min) / stats->cov_step; stats->cov_max = stats->cov_min + ((stats->cov_max-stats->cov_min)/stats->cov_step +1)*stats->cov_step - 1; stats->cov = calloc(sizeof(uint64_t),stats->ncov); stats->cov_rbuf.size = stats->nbases*5; stats->cov_rbuf.buffer = calloc(sizeof(int32_t),stats->cov_rbuf.size); // .. bam if ((sam = samopen(bam_fname, in_mode, NULL)) == 0) error("Failed to open: %s\n", bam_fname); stats->sam = sam; if ( group_id ) init_group_id(stats, group_id); bam1_t *bam_line = bam_init1(); // .. arrays stats->quals_1st = calloc(stats->nquals*stats->nbases,sizeof(uint64_t)); stats->quals_2nd = calloc(stats->nquals*stats->nbases,sizeof(uint64_t)); stats->gc_1st = calloc(stats->ngc,sizeof(uint64_t)); stats->gc_2nd = calloc(stats->ngc,sizeof(uint64_t)); stats->isize_inward = calloc(stats->nisize,sizeof(uint64_t)); stats->isize_outward = calloc(stats->nisize,sizeof(uint64_t)); stats->isize_other = calloc(stats->nisize,sizeof(uint64_t)); stats->gcd = calloc(stats->ngcd,sizeof(gc_depth_t)); stats->mpc_buf = stats->fai ? calloc(stats->nquals*stats->nbases,sizeof(uint64_t)) : NULL; stats->acgt_cycles = calloc(4*stats->nbases,sizeof(uint64_t)); stats->read_lengths = calloc(stats->nbases,sizeof(uint64_t)); stats->insertions = calloc(stats->nbases,sizeof(uint64_t)); stats->deletions = calloc(stats->nbases,sizeof(uint64_t)); stats->ins_cycles_1st = calloc(stats->nbases+1,sizeof(uint64_t)); stats->ins_cycles_2nd = calloc(stats->nbases+1,sizeof(uint64_t)); stats->del_cycles_1st = calloc(stats->nbases+1,sizeof(uint64_t)); stats->del_cycles_2nd = calloc(stats->nbases+1,sizeof(uint64_t)); realloc_rseq_buffer(stats); if ( targets ) init_regions(stats, targets); // Collect statistics if ( optindsam->header, argv[i], &tid, &beg, &end); if ( tid < 0 ) continue; reset_regions(stats); bam_fetch(stats->sam->x.bam, bam_idx, tid, beg, end, stats, fetch_read); } bam_index_destroy(bam_idx); } else { // Stream through the entire BAM ignoring off-target regions if -t is given while (samread(sam,bam_line) >= 0) collect_stats(bam_line,stats); } round_buffer_flush(stats,-1); output_stats(stats); bam_destroy1(bam_line); samclose(stats->sam); if (stats->fai) fai_destroy(stats->fai); free(stats->cov_rbuf.buffer); free(stats->cov); free(stats->quals_1st); free(stats->quals_2nd); free(stats->gc_1st); free(stats->gc_2nd); free(stats->isize_inward); free(stats->isize_outward); free(stats->isize_other); free(stats->gcd); free(stats->rseq_buf); free(stats->mpc_buf); free(stats->acgt_cycles); free(stats->read_lengths); free(stats->insertions); free(stats->deletions); free(stats->ins_cycles_1st); free(stats->ins_cycles_2nd); free(stats->del_cycles_1st); free(stats->del_cycles_2nd); destroy_regions(stats); free(stats); if ( stats->rg_hash ) kh_destroy(kh_rg, stats->rg_hash); return 0; } samtools-0.1.19/misc/blast2sam.pl000077500000000000000000000045571212162403000166300ustar00rootroot00000000000000#!/usr/bin/perl -w use strict; use warnings; use Getopt::Std; &blast2sam; sub blast2sam { my %opts = (); getopts('s', \%opts); die("Usage: blast2sam.pl \n") if (-t STDIN && @ARGV == 0); my ($qlen, $slen, $q, $s, $qbeg, $qend, @sam, @cigar, @cmaux, $show_seq); $show_seq = defined($opts{s}); @sam = (); @sam[0,4,6..8,10] = ('', 255, '*', 0, 0, '*'); while (<>) { if (@cigar && (/^Query=/ || /Score =.*bits.*Expect/)) { # print &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend); @cigar = (); } if (/^Query= (\S+)/) { $sam[0] = $1; } elsif (/\((\S+)\s+letters\)/) { $qlen = $1; $qlen =~ s/,//g; } elsif (/^>(\S+)/) { $sam[2] = $1; } elsif (/Length = (\d+)/) { $slen = $1; } elsif (/Score =\s+(\S+) bits.+Expect(\(\d+\))? = (\S+)/) { # the start of an alignment block my ($as, $ev) = (int($1 + .499), $3); $ev = "1$ev" if ($ev =~ /^e/); @sam[1,3,9,11,12] = (0, 0, '', "AS:i:$as", "EV:Z:$ev"); @cigar = (); $qbeg = 0; @cmaux = (0, 0, 0, ''); } elsif (/Strand = (\S+) \/ (\S+)/) { $sam[1] |= 0x10 if ($2 eq 'Minus'); } elsif (/Query\:\s(\d+)\s*(\S+)\s(\d+)/) { $q = $2; unless ($qbeg) { $qbeg = $1; push(@cigar, ($1-1) . "H") if ($1 > 1); } $qend = $3; if ($show_seq) { my $x = $q; $x =~ s/-//g; $sam[9] .= $x; } } elsif (/Sbjct\:\s(\d+)\s*(\S+)\s(\d+)/) { $s = $2; if ($sam[1] & 0x10) { $sam[3] = $3; } else { $sam[3] = $1 unless ($sam[3]); } &aln2cm(\@cigar, \$q, \$s, \@cmaux); } } &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend); } sub blast_print_sam { my ($sam, $cigar, $cmaux, $qrest) = @_; push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1)); push(@$cigar, $qrest . 'H') if ($qrest); if ($sam->[1] & 0x10) { @$cigar = reverse(@$cigar); $sam->[9] = reverse($sam->[9]); $sam->[9] =~ tr/atgcrymkswATGCRYMKSW/tacgyrkmswTACGYRKMSW/; } $sam->[9] = '*' if (!$sam->[9]); $sam->[5] = join('', @$cigar); print join("\t", @$sam), "\n"; } sub aln2cm { my ($cigar, $q, $s, $cmaux) = @_; my $l = length($$q); for (my $i = 0; $i < $l; ++$i) { my $op; # set $op if (substr($$q, $i, 1) eq '-') { $op = 2; } elsif (substr($$s, $i, 1) eq '-') { $op = 1; } else { $op = 0; } # for CIGAR if ($cmaux->[0] == $op) { ++$cmaux->[1]; } else { push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1)); $cmaux->[0] = $op; $cmaux->[1] = 1; } } } samtools-0.1.19/misc/bowtie2sam.pl000077500000000000000000000041001212162403000167740ustar00rootroot00000000000000#!/usr/bin/perl -w # Contact: lh3 # Version: 0.1.1 use strict; use warnings; use Getopt::Std; &bowtie2sam; exit; sub bowtie2sam { my %opts = (); die("Usage: bowtie2sam.pl \n") if (@ARGV == 0 && -t STDIN); # core loop my (@s, $last, @staging, $k, $best_s, $subbest_s, $best_k); $last = ''; while (<>) { my ($name, $nm) = &bowtie2sam_aux($_, \@s); # read_name, number of mismatches if ($name eq $last) { # I do not know whether the multiple hits are ordered on the # number of mismatches. I assume they are not and so I have to # keep all these multiple hits in memory. @{$staging[$k]} = @s; if ($best_s > $nm) { $subbest_s = $best_s; $best_s = $nm; $best_k = $k; } elsif ($subbest_s > $nm) { $subbest_s = $nm; } ++$k; } else { if ($last) { if ($best_s == $subbest_s) { $staging[$best_k][4] = 0; } elsif ($subbest_s - $best_s == 1) { $staging[$best_k][4] = 15 if ($staging[$best_k][4] > 15); } print join("\t", @{$staging[$best_k]}), "\n"; } $k = 1; $best_s = $nm; $subbest_s = 1000; $best_k = 0; @{$staging[0]} = @s; $last = $name; } } print join("\t", @{$staging[$best_k]}), "\n" if ($best_k >= 0); } sub bowtie2sam_aux { my ($line, $s) = @_; chomp($line); my @t = split("\t", $line); my $ret; @$s = (); # read name $s->[0] = $ret = $t[0]; $s->[0] =~ s/\/[12]$//g; # initial flag (will be updated later) $s->[1] = 0; # read & quality $s->[9] = $t[4]; $s->[10] = $t[5]; # cigar $s->[5] = length($s->[9]) . "M"; # coor $s->[2] = $t[2]; $s->[3] = $t[3] + 1; $s->[1] |= 0x10 if ($t[1] eq '-'); # mapQ $s->[4] = $t[6] == 0? 25 : 0; # mate coordinate $s->[6] = '*'; $s->[7] = $s->[8] = 0; # aux my $nm = @t - 7; push(@$s, "NM:i:" . (@t-7)); push(@$s, "X$nm:i:" . ($t[6]+1)); my $md = ''; if ($t[7]) { $_ = $t[7]; my $a = 0; while (/(\d+):[ACGTN]>([ACGTN])/gi) { my ($y, $z) = ($1, $2); $md .= (int($y)-$a) . $z; $a += $y - $a + 1; } $md .= length($s->[9]) - $a; } else { $md = length($s->[9]); } push(@$s, "MD:Z:$md"); return ($ret, $nm); } samtools-0.1.19/misc/export2sam.pl000077500000000000000000000442141212162403000170360ustar00rootroot00000000000000#!/usr/bin/env perl # # # export2sam.pl converts GERALD export files to SAM format. # # # ########## License: # # The MIT License # # Original SAMtools work copyright (c) 2008-2009 Genome Research Ltd. # Modified SAMtools work copyright (c) 2010 Illumina, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # # # ########## ChangeLog: # # Version: 2.3.1 (18MAR2011) # # - Restore file '-' as stdin input. # # Version: 2.3.0 (24JAN2011) # # - Add support for export reserved chromosome name "CONTROL", # which is translated to optional field "XC:Z:CONTROL". # - Check for ".gz" file extension on export files and open # these as gzip pipes when the extension is found. # # Version: 2.2.0 (16NOV2010) # # - Remove any leading zeros in export fields: RUNNO,LANE,TILE,X,Y # - For export records with reserved chromosome name identifiers # "QC" and "RM", add the optional field "XC:Z:QC" or "XC:Z:RM" # to the SAM record, so that these cases can be distinguished # from other unmatched reads. # # Version: 2.1.0 (21SEP2010) # # - Additional export record error checking. # - Convert export records with chromomsome value of "RM" to unmapped # SAM records. # # Version: 2.0.0 (15FEB2010) # # Script updated by Illumina in conjunction with CASAVA 1.7.0 # release. # # Major changes are as follows: # - The CIGAR string has been updated to include all gaps from # ELANDv2 alignments. # - The ELAND single read alignment score is always stored in the # optional "SM" field and the ELAND paired read alignment score # is stored in the optional "AS" field when it exists. # - The MAPQ value is set to the higher of the two alignment scores, # but no greater than 254, i.e. min(254,max(SM,AS)) # - The SAM "proper pair" bit (0x0002) is now set for read pairs # meeting ELAND's expected orientation and insert size criteria. # - The default quality score translation is set for export files # which contain Phread+64 quality values. An option, # "--qlogodds", has been added to translate quality values from # the Solexa+64 format used in export files prior to Pipeline # 1.3 # - The export match descriptor is now reverse-complemented when # necessary such that it always corresponds to the forward # strand of the reference, to be consistent with other # information in the SAM record. It is now written to the # optional 'XD' field (rather than 'MD') to acknowledge its # minor differences from the samtools match descriptor (see # additional detail below). # - An option, "--nofilter", has been added to include reads which # have failed primary analysis quality filtration. Such reads # will have the corresponding SAM flag bit (0x0200) set. # - Labels in the export 'contig' field are preserved by setting # RNAME to "$export_chromosome/$export_contig" when the contig # label exists. # # # Contact: lh3 # Version: 0.1.2 (03JAN2009) # # # ########## Known Conversion Limitations: # # - Export records for reads that map to a position < 1 (allowed # in export format), are converted to unmapped reads in the SAM # record. # - Export records contain the reserved chromosome names: "NM", # "QC","RM" and "CONTROL". "NM" indicates that the aligner could # not map the read to the reference sequence set. "QC" means that # the aligner did not attempt to map the read due to some # technical limitation. "RM" means that the read mapped to a set # of 'contaminant' sequences specified in GERALD's RNA-seq # workflow. "CONTROL" means that the read is a control. All of # these alignment types are collapsed to the single unmapped # alignment state in the SAM record, but the optional SAM "XC" # field is used to record the original reserved chromosome name of # the read for all but the "NM" case. # - The export match descriptor is slightly different than the # samtools match descriptor. For this reason it is stored in the # optional SAM field 'XD' (and not 'MD'). Note that the export # match descriptor differs from the samtools version in two # respects: (1) indels are explicitly closed with the '$' # character and (2) insertions must be enumerated in the match # descriptor. For example a 35-base read with a two-base insertion # is described as: 20^2$14 # # # my $version = "2.3.1"; use strict; use warnings; use Getopt::Long; use File::Spec; use List::Util qw(min max); use constant { EXPORT_MACHINE => 0, EXPORT_RUNNO => 1, EXPORT_LANE => 2, EXPORT_TILE => 3, EXPORT_X => 4, EXPORT_Y => 5, EXPORT_INDEX => 6, EXPORT_READNO => 7, EXPORT_READ => 8, EXPORT_QUAL => 9, EXPORT_CHROM => 10, EXPORT_CONTIG => 11, EXPORT_POS => 12, EXPORT_STRAND => 13, EXPORT_MD => 14, EXPORT_SEMAP => 15, EXPORT_PEMAP => 16, EXPORT_PASSFILT => 21, EXPORT_SIZE => 22, }; use constant { SAM_QNAME => 0, SAM_FLAG => 1, SAM_RNAME => 2, SAM_POS => 3, SAM_MAPQ => 4, SAM_CIGAR => 5, SAM_MRNM => 6, SAM_MPOS => 7, SAM_ISIZE => 8, SAM_SEQ => 9, SAM_QUAL => 10, }; # function prototypes for Richard's code sub match_desc_to_cigar($); sub match_desc_frag_length($); sub reverse_compl_match_descriptor($); sub write_header($;$;$); &export2sam; exit; sub export2sam { my $cmdline = $0 . " " . join(" ",@ARGV); my $arg_count = scalar @ARGV; my $progname = (File::Spec->splitpath($0))[2]; my $is_logodds_qvals = 0; # if true, assume files contain logodds (i.e. "solexa") quality values my $is_nofilter = 0; my $read1file; my $read2file; my $print_version = 0; my $help = 0; my $result = GetOptions( "qlogodds" => \$is_logodds_qvals, "nofilter" => \$is_nofilter, "read1=s" => \$read1file, "read2=s" => \$read2file, "version" => \$print_version, "help" => \$help ); my $usage = <) { $export_line_count++; my (@s1, @s2); &export2sam_aux($_, $export_line_count, \@s1, \@conv_table, $is_paired, 1, $is_nofilter); if ($is_paired) { my $read2line = <$fh2>; if(not $read2line){ die("\nERROR: read1 and read2 export files do not contain the same number of reads.\n Extra reads observed in read1 file at line no: $export_line_count.\n\n"); } &export2sam_aux($read2line, $export_line_count, \@s2, \@conv_table, $is_paired, 2, $is_nofilter); if (@s1 && @s2) { # then set mate coordinate if($s1[SAM_QNAME] ne $s2[SAM_QNAME]){ die("\nERROR: Non-paired reads in export files on line: $export_line_count.\n Read1: $_ Read2: $read2line\n"); } my $isize = 0; if ($s1[SAM_RNAME] ne '*' && $s1[SAM_RNAME] eq $s2[SAM_RNAME]) { # then calculate $isize my $x1 = ($s1[SAM_FLAG] & 0x10)? $s1[SAM_POS] + length($s1[SAM_SEQ]) : $s1[SAM_POS]; my $x2 = ($s2[SAM_FLAG] & 0x10)? $s2[SAM_POS] + length($s2[SAM_SEQ]) : $s2[SAM_POS]; $isize = $x2 - $x1; } foreach ([\@s1,\@s2,$isize],[\@s2,\@s1,-$isize]){ my ($sa,$sb,$is) = @{$_}; if ($sb->[SAM_RNAME] ne '*') { $sa->[SAM_MRNM] = ($sb->[SAM_RNAME] eq $sa->[SAM_RNAME]) ? "=" : $sb->[SAM_RNAME]; $sa->[SAM_MPOS] = $sb->[SAM_POS]; $sa->[SAM_ISIZE] = $is; $sa->[SAM_FLAG] |= 0x20 if ($sb->[SAM_FLAG] & 0x10); } else { $sa->[SAM_FLAG] |= 0x8; } } } } print join("\t", @s1), "\n" if (@s1); print join("\t", @s2), "\n" if (@s2 && $is_paired); } close($fh1); if($is_paired) { while(my $read2line = <$fh2>){ $export_line_count++; die("\nERROR: read1 and read2 export files do not contain the same number of reads.\n Extra reads observed in read2 file at line no: $export_line_count.\n\n"); } close($fh2); } } sub export2sam_aux { my ($line, $line_no, $s, $ct, $is_paired, $read_no, $is_nofilter) = @_; chomp($line); my @t = split("\t", $line); if(scalar(@t) < EXPORT_SIZE) { my $msg="\nERROR: Unexpected number of fields in export record on line $line_no of read$read_no export file. Found " . scalar(@t) . " fields but expected " . EXPORT_SIZE . ".\n"; $msg.="\t...erroneous export record:\n" . $line . "\n\n"; die($msg); } @$s = (); my $isPassFilt = ($t[EXPORT_PASSFILT] eq 'Y'); return if(not ($isPassFilt or $is_nofilter)); # read name my $samQnamePrefix = $t[EXPORT_MACHINE] . (($t[EXPORT_RUNNO] ne "") ? "_" . int($t[EXPORT_RUNNO]) : ""); $s->[SAM_QNAME] = join(':', $samQnamePrefix, int($t[EXPORT_LANE]), int($t[EXPORT_TILE]), int($t[EXPORT_X]), int($t[EXPORT_Y])); # initial flag (will be updated later) $s->[SAM_FLAG] = 0; if($is_paired) { if($t[EXPORT_READNO] != $read_no){ die("\nERROR: read$read_no export file contains record with read number: " .$t[EXPORT_READNO] . " on line: $line_no\n\n"); } $s->[SAM_FLAG] |= 1 | 1<<(5 + $read_no); } $s->[SAM_FLAG] |= 0x200 if (not $isPassFilt); # read & quality my $is_export_rev = ($t[EXPORT_STRAND] eq 'R'); if ($is_export_rev) { # then reverse the sequence and quality $s->[SAM_SEQ] = reverse($t[EXPORT_READ]); $s->[SAM_SEQ] =~ tr/ACGTacgt/TGCAtgca/; $s->[SAM_QUAL] = reverse($t[EXPORT_QUAL]); } else { $s->[SAM_SEQ] = $t[EXPORT_READ]; $s->[SAM_QUAL] = $t[EXPORT_QUAL]; } my @convqual = (); foreach (unpack('C*', $s->[SAM_QUAL])){ my $val=$ct->[$_]; if(not defined $val){ my $msg="\nERROR: can't interpret export quality value: " . $_ . " in read$read_no export file, line: $line_no\n"; if( $_ < 64 ) { $msg .= " Use --qlogodds flag to translate logodds (solexa) quality values.\n"; } die($msg . "\n"); } push @convqual,$val; } $s->[SAM_QUAL] = pack('C*',@convqual); # change coding # coor my $has_coor = 0; $s->[SAM_RNAME] = "*"; if (($t[EXPORT_CHROM] eq 'NM') or ($t[EXPORT_CHROM] eq 'QC') or ($t[EXPORT_CHROM] eq 'RM') or ($t[EXPORT_CHROM] eq 'CONTROL')) { $s->[SAM_FLAG] |= 0x4; # unmapped push(@$s,"XC:Z:".$t[EXPORT_CHROM]) if($t[EXPORT_CHROM] ne 'NM'); } elsif ($t[EXPORT_CHROM] =~ /(\d+):(\d+):(\d+)/) { $s->[SAM_FLAG] |= 0x4; # TODO: should I set BAM_FUNMAP in this case? push(@$s, "H0:i:$1", "H1:i:$2", "H2:i:$3") } elsif ($t[EXPORT_POS] < 1) { $s->[SAM_FLAG] |= 0x4; # unmapped } else { $s->[SAM_RNAME] = $t[EXPORT_CHROM]; $s->[SAM_RNAME] .= "/" . $t[EXPORT_CONTIG] if($t[EXPORT_CONTIG] ne ''); $has_coor = 1; } $s->[SAM_POS] = $has_coor? $t[EXPORT_POS] : 0; # print STDERR "t[14] = " . $t[14] . "\n"; my $matchDesc = ''; $s->[SAM_CIGAR] = "*"; if($has_coor){ $matchDesc = ($is_export_rev) ? reverse_compl_match_descriptor($t[EXPORT_MD]) : $t[EXPORT_MD]; if($matchDesc =~ /\^/){ # construct CIGAR string using Richard's function $s->[SAM_CIGAR] = match_desc_to_cigar($matchDesc); # indel processing } else { $s->[SAM_CIGAR] = length($s->[SAM_SEQ]) . "M"; } } # print STDERR "cigar_string = $cigar_string\n"; $s->[SAM_FLAG] |= 0x10 if ($has_coor && $is_export_rev); if($has_coor){ my $semap = ($t[EXPORT_SEMAP] ne '') ? $t[EXPORT_SEMAP] : 0; my $pemap = 0; if($is_paired) { $pemap = ($t[EXPORT_PEMAP] ne '') ? $t[EXPORT_PEMAP] : 0; # set `proper pair' bit if non-blank, non-zero PE alignment score: $s->[SAM_FLAG] |= 0x02 if ($pemap > 0); } $s->[SAM_MAPQ] = min(254,max($semap,$pemap)); } else { $s->[SAM_MAPQ] = 0; } # mate coordinate $s->[SAM_MRNM] = '*'; $s->[SAM_MPOS] = 0; $s->[SAM_ISIZE] = 0; # aux push(@$s, "BC:Z:$t[EXPORT_INDEX]") if ($t[EXPORT_INDEX]); if($has_coor){ # The export match descriptor differs slightly from the samtools match descriptor. # In order for the converted SAM files to be as compliant as possible, # we put the export match descriptor in optional field 'XD' rather than 'MD': push(@$s, "XD:Z:$matchDesc"); push(@$s, "SM:i:$t[EXPORT_SEMAP]") if ($t[EXPORT_SEMAP] ne ''); push(@$s, "AS:i:$t[EXPORT_PEMAP]") if ($is_paired and ($t[EXPORT_PEMAP] ne '')); } } # # the following code is taken from Richard Shaw's sorted2sam.pl file # sub reverse_compl_match_descriptor($) { # print "\nREVERSING THE MATCH DESCRIPTOR!\n"; my ($match_desc) = @_; my $rev_compl_match_desc = reverse($match_desc); $rev_compl_match_desc =~ tr/ACGT\^\$/TGCA\$\^/; # Unreverse the digits of numbers. $rev_compl_match_desc = join('', map {($_ =~ /\d+/) ? join('', reverse(split('', $_))) : $_} split(/(\d+)/, $rev_compl_match_desc)); return $rev_compl_match_desc; } sub match_desc_to_cigar($) { my ($match_desc) = @_; my @match_desc_parts = split(/(\^.*?\$)/, $match_desc); my $cigar_str = ''; my $cigar_del_ch = 'D'; my $cigar_ins_ch = 'I'; my $cigar_match_ch = 'M'; foreach my $match_desc_part (@match_desc_parts) { next if (!$match_desc_part); if ($match_desc_part =~ /^\^([ACGTN]+)\$$/) { # Deletion $cigar_str .= (length($1) . $cigar_del_ch); } elsif ($match_desc_part =~ /^\^(\d+)\$$/) { # Insertion $cigar_str .= ($1 . $cigar_ins_ch); } else { $cigar_str .= (match_desc_frag_length($match_desc_part) . $cigar_match_ch); } } return $cigar_str; } #------------------------------------------------------------------------------ sub match_desc_frag_length($) { my ($match_desc_str) = @_; my $len = 0; my @match_desc_fields = split(/([ACGTN]+)/, $match_desc_str); foreach my $match_desc_field (@match_desc_fields) { next if ($match_desc_field eq ''); $len += (($match_desc_field =~ /(\d+)/) ? $1 : length($match_desc_field)); } return $len; } # argument holds the command line sub write_header($;$;$) { my ($progname,$version,$cl) = @_; my $complete_header = ""; $complete_header .= "\@PG\tID:$progname\tVN:$version\tCL:$cl\n"; return $complete_header; } samtools-0.1.19/misc/interpolate_sam.pl000077500000000000000000000116041212162403000201150ustar00rootroot00000000000000#!/usr/bin/perl use strict; ###Builds interpolated pileup from SAM file ##@description counts bases between paired ends and piles up single end reads. ##@output, uses a #header for the RNAME and then the number of reads per base ##@author sm8@sanger.ac.uk, Stephen B. Montgomery ##@caveats ##Requires RNAME to have format as per example ## chromosome:NCBI36:18:1:76117153:1 ## supercontig::NT_113883:1:137703:1 ## clone::AC138827.3:1:149397:1 ##Expects simple CIGAR characters, M, I and D ##Expects SAM file to be sorted. ##Expects 0x0010 to mark second read in PE file (as has been the observed case from MAQ output) (important for line 77) ##Verify and read in SAM file my $sam_file = $ARGV[0]; if(!defined($sam_file)) { die("No sam file defined on arg 1"); } unless(-f $sam_file) { die("Sam file does not exist: $sam_file"); } open(SAM, $sam_file) || die("Cannot open sam file"); ##Globals my $current_location = ""; ##Current RNAME being processed my $current_size = 0; ##Size of sequence region being processed my $current_position = 1; ##Current base being processed my $open = 0; ##Number of open reads (PE reads that have not been closed) my %close = (); ##Hash of closing positions, when the current_position gets to this position it subtracts the ##contained value from those open and deletes the indexed position from the hash while (my $line = ) { my @tokens = split /\t/, $line; if ($current_location ne $tokens[2]) { ##Start a new sequence region for (my $i = $current_position; $i <= $current_size; $i++) { ##Close the previous sequence region if (defined($close{$i})) { $open = $open - $close{$i}; delete $close{$i}; } print $open . "\n"; } if ($current_location ne "") { print "\n"; } ##Initiate a new sequence region my @location_tokens = split /:/, $tokens[2]; $current_position = 1; $current_location = $tokens[2]; $current_size = $location_tokens[4]; $open = 0; %close = (); print "#" . $tokens[2] . "\n"; ##Print pileup to just before the first read (will be 0) for (my $current_position = 1; $current_position < $tokens[3]; $current_position++) { print $open . "\n"; } $current_position = $tokens[3]; } else { ##Sequence region already open if ($tokens[3] > $current_position) { ##If the new read's position is greater than the current position ##cycle through to catch up to the current position for (my $i = $current_position; $i < $tokens[3]; $i++) { if (defined($close{$i})) { $open = $open - $close{$i}; delete $close{$i}; } print $open . "\n"; } $current_position = $tokens[3]; } } $open++; ##Increment the number of open reads if (($tokens[1] & 0x0080 || $tokens[1] & 0x0040) && $tokens[1] & 0x0010 && $tokens[1] & 0x0002) { ##if second read of mate pair, add close condition $open--; my $parsed_cig = &parseCigar($tokens[5]); my $seq_region_end = $tokens[3] + $parsed_cig->{'M'} + $parsed_cig->{'D'} - 1; if (!defined($close{$seq_region_end + 1})) { $close{$seq_region_end + 1} = 0; } $close{$seq_region_end + 1} = $close{$seq_region_end + 1} + 1; } elsif (!($tokens[1] & 0x0001) || !($tokens[1] & 0x0002)) { ##if unpaired, add close condition my $parsed_cig = &parseCigar($tokens[5]); my $seq_region_end = $tokens[3] + $parsed_cig->{'M'} + $parsed_cig->{'D'} - 1; if (!defined($close{$seq_region_end + 1})) { $close{$seq_region_end + 1} = 0; } $close{$seq_region_end + 1} = $close{$seq_region_end + 1} + 1; } else { #do nothing } } for (my $i = $current_position; $i <= $current_size; $i++) { ##Finish up the last sequence region if (defined($close{$i})) { $open = $open - $close{$i}; delete $close{$i}; } print $open . "\n"; } print "\n"; close(SAM); exit(0); ##reads and tokenizes simple cigarline sub parseCigar() { my $cigar_line = shift; $cigar_line =~ s/([0-9]*[A-Z]{1})/$1\t/g; my @cigar_tokens = split /\t/, $cigar_line; my %parsed = ('M' => 0, 'I' => 0, 'D' => 0); my @events = (); for(my $i = 0; $i < scalar(@cigar_tokens); $i++) { if ($cigar_tokens[$i] =~ /([0-9]+)([A-Z]{1})/g) { if (!defined($parsed{$2})) { $parsed{$2} = 0; } my $nt = $2; if ($nt ne "M" && $nt ne "D" && $nt ne "I") { $nt = "M"; } $parsed{$nt} += $1; my %event_el = ("t" => $nt, "n" => $1); push @events, \%event_el; } } $parsed{'events'} = \@events; return \%parsed; } samtools-0.1.19/misc/maq2sam.c000066400000000000000000000107651212162403000161030ustar00rootroot00000000000000#include #include #include #include #include #include #define PACKAGE_VERSION "r439" //#define MAQ_LONGREADS #ifdef MAQ_LONGREADS # define MAX_READLEN 128 #else # define MAX_READLEN 64 #endif #define MAX_NAMELEN 36 #define MAQMAP_FORMAT_OLD 0 #define MAQMAP_FORMAT_NEW -1 #define PAIRFLAG_FF 0x01 #define PAIRFLAG_FR 0x02 #define PAIRFLAG_RF 0x04 #define PAIRFLAG_RR 0x08 #define PAIRFLAG_PAIRED 0x10 #define PAIRFLAG_DIFFCHR 0x20 #define PAIRFLAG_NOMATCH 0x40 #define PAIRFLAG_SW 0x80 typedef struct { uint8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */ uint8_t size, map_qual, info1, info2, c[2], flag, alt_qual; uint32_t seqid, pos; int dist; char name[MAX_NAMELEN]; } maqmap1_t; typedef struct { int format, n_ref; char **ref_name; uint64_t n_mapped_reads; maqmap1_t *mapped_reads; } maqmap_t; maqmap_t *maq_new_maqmap() { maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t)); mm->format = MAQMAP_FORMAT_NEW; return mm; } void maq_delete_maqmap(maqmap_t *mm) { int i; if (mm == 0) return; for (i = 0; i < mm->n_ref; ++i) free(mm->ref_name[i]); free(mm->ref_name); free(mm->mapped_reads); free(mm); } maqmap_t *maqmap_read_header(gzFile fp) { maqmap_t *mm; int k, len; mm = maq_new_maqmap(); gzread(fp, &mm->format, sizeof(int)); if (mm->format != MAQMAP_FORMAT_NEW) { if (mm->format > 0) { fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n"); exit(3); } assert(mm->format == MAQMAP_FORMAT_NEW); } gzread(fp, &mm->n_ref, sizeof(int)); mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*)); for (k = 0; k != mm->n_ref; ++k) { gzread(fp, &len, sizeof(int)); mm->ref_name[k] = (char*)malloc(len * sizeof(char)); gzread(fp, mm->ref_name[k], len); } /* read number of mapped reads */ gzread(fp, &mm->n_mapped_reads, sizeof(uint64_t)); return mm; } void maq2tam_core(gzFile fp, const char *rg) { maqmap_t *mm; maqmap1_t mm1, *m1; int ret; m1 = &mm1; mm = maqmap_read_header(fp); while ((ret = gzread(fp, m1, sizeof(maqmap1_t))) == sizeof(maqmap1_t)) { int j, flag = 0, se_mapq = m1->seq[MAX_READLEN-1]; if (m1->flag) flag |= 1; if ((m1->flag&PAIRFLAG_PAIRED) || ((m1->flag&PAIRFLAG_SW) && m1->flag != 192)) flag |= 2; if (m1->flag == 192) flag |= 4; if (m1->flag == 64) flag |= 8; if (m1->pos&1) flag |= 0x10; if ((flag&1) && m1->dist != 0) { int c; if (m1->dist > 0) { if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_RF)) c = 0; else if (m1->flag&(PAIRFLAG_FR|PAIRFLAG_RR)) c = 1; else c = m1->pos&1; } else { if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_FR)) c = 0; else if (m1->flag&(PAIRFLAG_RF|PAIRFLAG_RR)) c = 1; else c = m1->pos&1; } if (c) flag |= 0x20; } if (m1->flag) { int l = strlen(m1->name); if (m1->name[l-2] == '/') { flag |= (m1->name[l-1] == '1')? 0x40 : 0x80; m1->name[l-2] = '\0'; } } printf("%s\t%d\t", m1->name, flag); printf("%s\t%d\t", mm->ref_name[m1->seqid], (m1->pos>>1)+1); if (m1->flag == 130) { int c = (int8_t)m1->seq[MAX_READLEN-1]; printf("%d\t", m1->alt_qual); if (c == 0) printf("%dM\t", m1->size); else { if (c > 0) printf("%dM%dI%dM\t", m1->map_qual, c, m1->size - m1->map_qual - c); else printf("%dM%dD%dM\t", m1->map_qual, -c, m1->size - m1->map_qual); } se_mapq = 0; // zero SE mapQ for reads aligned by SW } else { if (flag&4) printf("0\t*\t"); else printf("%d\t%dM\t", m1->map_qual, m1->size); } printf("*\t0\t%d\t", m1->dist); for (j = 0; j != m1->size; ++j) { if (m1->seq[j] == 0) putchar('N'); else putchar("ACGT"[m1->seq[j]>>6&3]); } putchar('\t'); for (j = 0; j != m1->size; ++j) putchar((m1->seq[j]&0x3f) + 33); putchar('\t'); if (rg) printf("RG:Z:%s\t", rg); if (flag&4) { // unmapped printf("MF:i:%d\n", m1->flag); } else { printf("MF:i:%d\t", m1->flag); if (m1->flag) printf("AM:i:%d\tSM:i:%d\t", m1->alt_qual, se_mapq); printf("NM:i:%d\tUQ:i:%d\tH0:i:%d\tH1:i:%d\n", m1->info1&0xf, m1->info2, m1->c[0], m1->c[1]); } } if (ret > 0) fprintf(stderr, "Truncated! Continue anyway.\n"); maq_delete_maqmap(mm); } int main(int argc, char *argv[]) { gzFile fp; if (argc == 1) { fprintf(stderr, "Version: %s\n", PACKAGE_VERSION); fprintf(stderr, "Usage: maq2sam []\n"); return 1; } fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r"); maq2tam_core(fp, argc > 2? argv[2] : 0); gzclose(fp); return 0; } samtools-0.1.19/misc/md5.c000066400000000000000000000215631212162403000152250ustar00rootroot00000000000000/* * This code implements the MD5 message-digest algorithm. * The algorithm is due to Ron Rivest. This code was * written by Colin Plumb in 1993, no copyright is claimed. * This code is in the public domain; do with it what you wish. * * Equivalent code is available from RSA Data Security, Inc. * This code has been tested against that, and is equivalent, * except that you don't need to include two pages of legalese * with every copy. * * To compute the message digest of a chunk of bytes, declare an * MD5Context structure, pass it to MD5Init, call MD5Update as * needed on buffers full of bytes, and then call MD5Final, which * will fill a supplied 16-byte array with the digest. */ /* Brutally hacked by John Walker back from ANSI C to K&R (no prototypes) to maintain the tradition that Netfone will compile with Sun's original "cc". */ #include #include "md5.h" #ifndef HIGHFIRST #define byteReverse(buf, len) /* Nothing */ #else /* * Note: this code is harmless on little-endian machines. */ void byteReverse(buf, longs) unsigned char *buf; unsigned longs; { uint32_t t; do { t = (uint32_t) ((unsigned) buf[3] << 8 | buf[2]) << 16 | ((unsigned) buf[1] << 8 | buf[0]); *(uint32_t *) buf = t; buf += 4; } while (--longs); } #endif void MD5Transform(uint32_t buf[4], uint32_t in[16]); /* * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious * initialization constants. */ void MD5Init(ctx) struct MD5Context *ctx; { ctx->buf[0] = 0x67452301; ctx->buf[1] = 0xefcdab89; ctx->buf[2] = 0x98badcfe; ctx->buf[3] = 0x10325476; ctx->bits[0] = 0; ctx->bits[1] = 0; } /* * Update context to reflect the concatenation of another buffer full * of bytes. */ void MD5Update(ctx, buf, len) struct MD5Context *ctx; unsigned char *buf; unsigned len; { uint32_t t; /* Update bitcount */ t = ctx->bits[0]; if ((ctx->bits[0] = t + ((uint32_t) len << 3)) < t) ctx->bits[1]++; /* Carry from low to high */ ctx->bits[1] += len >> 29; t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ /* Handle any leading odd-sized chunks */ if (t) { unsigned char *p = (unsigned char *) ctx->in + t; t = 64 - t; if (len < t) { memcpy(p, buf, len); return; } memcpy(p, buf, t); byteReverse(ctx->in, 16); MD5Transform(ctx->buf, (uint32_t *) ctx->in); buf += t; len -= t; } /* Process data in 64-byte chunks */ while (len >= 64) { memcpy(ctx->in, buf, 64); byteReverse(ctx->in, 16); MD5Transform(ctx->buf, (uint32_t *) ctx->in); buf += 64; len -= 64; } /* Handle any remaining bytes of data. */ memcpy(ctx->in, buf, len); } /* * Final wrapup - pad to 64-byte boundary with the bit pattern * 1 0* (64-bit count of bits processed, MSB-first) */ void MD5Final(digest, ctx) unsigned char digest[16]; struct MD5Context *ctx; { unsigned count; unsigned char *p; /* Compute number of bytes mod 64 */ count = (ctx->bits[0] >> 3) & 0x3F; /* Set the first char of padding to 0x80. This is safe since there is always at least one byte free */ p = ctx->in + count; *p++ = 0x80; /* Bytes of padding needed to make 64 bytes */ count = 64 - 1 - count; /* Pad out to 56 mod 64 */ if (count < 8) { /* Two lots of padding: Pad the first block to 64 bytes */ memset(p, 0, count); byteReverse(ctx->in, 16); MD5Transform(ctx->buf, (uint32_t *) ctx->in); /* Now fill the next block with 56 bytes */ memset(ctx->in, 0, 56); } else { /* Pad block to 56 bytes */ memset(p, 0, count - 8); } byteReverse(ctx->in, 14); /* Append length in bits and transform */ ((uint32_t *) ctx->in)[14] = ctx->bits[0]; ((uint32_t *) ctx->in)[15] = ctx->bits[1]; MD5Transform(ctx->buf, (uint32_t *) ctx->in); byteReverse((unsigned char *) ctx->buf, 4); memcpy(digest, ctx->buf, 16); memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */ } /* The four core functions - F1 is optimized somewhat */ /* #define F1(x, y, z) (x & y | ~x & z) */ #define F1(x, y, z) (z ^ (x & (y ^ z))) #define F2(x, y, z) F1(z, x, y) #define F3(x, y, z) (x ^ y ^ z) #define F4(x, y, z) (y ^ (x | ~z)) /* This is the central step in the MD5 algorithm. */ #define MD5STEP(f, w, x, y, z, data, s) \ ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) /* * The core of the MD5 algorithm, this alters an existing MD5 hash to * reflect the addition of 16 longwords of new data. MD5Update blocks * the data and converts bytes into longwords for this routine. */ void MD5Transform(buf, in) uint32_t buf[4]; uint32_t in[16]; { register uint32_t a, b, c, d; a = buf[0]; b = buf[1]; c = buf[2]; d = buf[3]; MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); buf[0] += a; buf[1] += b; buf[2] += c; buf[3] += d; } /* lh3: the following code is added by me */ #ifdef MD5SUM_MAIN #include #include #include #define HEX_STR "0123456789abcdef" static void md5_one(const char *fn) { unsigned char buf[4096], digest[16]; MD5_CTX md5; int l; FILE *fp; fp = strcmp(fn, "-")? fopen(fn, "r") : stdin; if (fp == 0) { fprintf(stderr, "md5sum: %s: No such file or directory\n", fn); exit(1); } MD5Init(&md5); while ((l = fread(buf, 1, 4096, fp)) > 0) MD5Update(&md5, buf, l); MD5Final(digest, &md5); if (fp != stdin) fclose(fp); for (l = 0; l < 16; ++l) printf("%c%c", HEX_STR[digest[l]>>4&0xf], HEX_STR[digest[l]&0xf]); printf(" %s\n", fn); } int main(int argc, char *argv[]) { int i; if (argc == 1) md5_one("-"); else for (i = 1; i < argc; ++i) md5_one(argv[i]); return 0; } #endif samtools-0.1.19/misc/md5.h000066400000000000000000000032451212162403000152270ustar00rootroot00000000000000/* This file is adapted from a program in this page: http://www.fourmilab.ch/md5/ The original source code does not work on 64-bit machines due to the wrong typedef "uint32". I also added prototypes. -lh3 */ #ifndef MD5_H #define MD5_H /* The following tests optimise behaviour on little-endian machines, where there is no need to reverse the byte order of 32 bit words in the MD5 computation. By default, HIGHFIRST is defined, which indicates we're running on a big-endian (most significant byte first) machine, on which the byteReverse function in md5.c must be invoked. However, byteReverse is coded in such a way that it is an identity function when run on a little-endian machine, so calling it on such a platform causes no harm apart from wasting time. If the platform is known to be little-endian, we speed things up by undefining HIGHFIRST, which defines byteReverse as a null macro. Doing things in this manner insures we work on new platforms regardless of their byte order. */ #define HIGHFIRST #if __LITTLE_ENDIAN__ != 0 #undef HIGHFIRST #endif #include struct MD5Context { uint32_t buf[4]; uint32_t bits[2]; unsigned char in[64]; }; void MD5Init(struct MD5Context *ctx); void MD5Update(struct MD5Context *ctx, unsigned char *buf, unsigned len); void MD5Final(unsigned char digest[16], struct MD5Context *ctx); /* * This is needed to make RSAREF happy on some MS-DOS compilers. */ typedef struct MD5Context MD5_CTX; /* Define CHECK_HARDWARE_PROPERTIES to have main,c verify byte order and uint32_t settings. */ #define CHECK_HARDWARE_PROPERTIES #endif /* !MD5_H */ samtools-0.1.19/misc/md5fa.c000066400000000000000000000030161212162403000155250ustar00rootroot00000000000000#include #include #include "md5.h" #include "kseq.h" #define HEX_STR "0123456789abcdef" KSEQ_INIT(gzFile, gzread) static void md5_one(const char *fn) { MD5_CTX md5_one, md5_all; int l, i, k; gzFile fp; kseq_t *seq; unsigned char unordered[16], digest[16]; for (l = 0; l < 16; ++l) unordered[l] = 0; fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); if (fp == 0) { fprintf(stderr, "md5fa: %s: No such file or directory\n", fn); exit(1); } MD5Init(&md5_all); seq = kseq_init(fp); while ((l = kseq_read(seq)) >= 0) { for (i = k = 0; i < seq->seq.l; ++i) { if (islower(seq->seq.s[i])) seq->seq.s[k++] = toupper(seq->seq.s[i]); else if (isupper(seq->seq.s[i])) seq->seq.s[k++] = seq->seq.s[i]; } MD5Init(&md5_one); MD5Update(&md5_one, (unsigned char*)seq->seq.s, k); MD5Final(digest, &md5_one); for (l = 0; l < 16; ++l) { printf("%c%c", HEX_STR[digest[l]>>4&0xf], HEX_STR[digest[l]&0xf]); unordered[l] ^= digest[l]; } printf(" %s %s\n", fn, seq->name.s); MD5Update(&md5_all, (unsigned char*)seq->seq.s, k); } MD5Final(digest, &md5_all); kseq_destroy(seq); for (l = 0; l < 16; ++l) printf("%c%c", HEX_STR[digest[l]>>4&0xf], HEX_STR[digest[l]&0xf]); printf(" %s >ordered\n", fn); for (l = 0; l < 16; ++l) printf("%c%c", HEX_STR[unordered[l]>>4&0xf], HEX_STR[unordered[l]&0xf]); printf(" %s >unordered\n", fn); } int main(int argc, char *argv[]) { int i; if (argc == 1) md5_one("-"); else for (i = 1; i < argc; ++i) md5_one(argv[i]); return 0; } samtools-0.1.19/misc/novo2sam.pl000077500000000000000000000135451212162403000165010ustar00rootroot00000000000000#!/usr/bin/perl -w # Contact: lh3 # Version: 0.1.3 #Modified by Zayed Albertyn(zayed.albertyn@gmail.com) & Colin Hercus(colin@novocraft.com) #use strict; #use warnings; use Data::Dumper; use Getopt::Std; &novo2sam; exit; sub mating { my ($s1, $s2) = @_; my $isize = 0; if ($s1->[2] ne '*' && $s1->[2] eq $s2->[2]) { # then calculate $isize my $x1 = ($s1->[1] & 0x10)? $s1->[3] + length($s1->[9]) : $s1->[3]; my $x2 = ($s2->[1] & 0x10)? $s2->[3] + length($s2->[9]) : $s2->[3]; $isize = $x2 - $x1; } # update mate coordinate if ($s2->[2] ne '*') { @$s1[6..8] = (($s2->[2] eq $s1->[2])? "=" : $s2->[2], $s2->[3], $isize); $s1->[1] |= 0x20 if ($s2->[1] & 0x10); } else { $s1->[1] |= 0x8; } if ($s1->[2] ne '*') { @$s2[6..8] = (($s1->[2] eq $s2->[2])? "=" : $s1->[2], $s1->[3], -$isize); $s2->[1] |= 0x20 if ($s1->[1] & 0x10); } else { $s2->[1] |= 0x8; } } sub novo2sam { my %opts = (); getopts("p", \%opts); die("Usage: novo2sam.pl [-p] \n") if (@ARGV == 0); my $is_paired = defined($opts{p}); # core loop my @s1 = (); my @s2 = (); my ($s_last, $s_curr) = (\@s1, \@s2); while (<>) { next if (/^#/); next if (/(QC|NM)\s*$/ || /(R\s+\d+)\s*$/); &novo2sam_aux($_, $s_curr, $is_paired); if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) { &mating($s_last, $s_curr); print join("\t", @$s_last), "\n"; print join("\t", @$s_curr), "\n"; @$s_last = (); @$s_curr = (); } else { print join("\t", @$s_last), "\n" if (@$s_last != 0); my $s = $s_last; $s_last = $s_curr; $s_curr = $s; } } print join("\t", @$s_last), "\n" if (@$s_last != 0); } sub novo2sam_aux { my ($line, $s, $is_paired) = @_; chomp($line); my @t = split(/\s+/, $line); my @variations = @t[13 .. $#t]; @$s = (); return if ($t[4] ne 'U'); my $len = length($t[2]); # read name $s->[0] = substr($t[0], 1); $s->[0] =~ s/\/[12]$//g; # initial flag (will be updated later) $s->[1] = 0; $s->[1] |= 1 | 1<<($t[1] eq 'L'? 6 : 7); $s->[1] |= 2 if ($t[10] eq '.'); # read & quality if ($t[9] eq 'R') { $s->[9] = reverse($t[2]); $s->[10] = reverse($t[3]); $s->[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/; } else { $s->[9] = $t[2]; $s->[10] = $t[3]; } # cigar my $cigarstring =""; if (scalar @variations ==0 ) { $s->[5] = $len . "M"; # IMPORTANT: this cigar is not correct for gapped alignment } else { #convert to correct CIGAR my $tmpstr = join" ",@variations ; if ( $tmpstr=~ /\+|\-/ ) { $cigarstring = cigar_method($line,\@variations,$len); $s->[5]=$cigarstring; } else { $s->[5]=$len. "M"; } } # coor $s->[2] = substr($t[7], 1); $s->[3] = $t[8]; $s->[1] |= 0x10 if ($t[9] eq 'R'); # mapQ $s->[4] = $t[5] > $t[6]? $t[5] : $t[6]; # mate coordinate $s->[6] = '*'; $s->[7] = $s->[8] = 0; # aux push(@$s, "NM:i:".(@t-13)); my $md = ''; $md = mdtag($md,$line,\@variations,$len); push(@$s, "MD:Z:$md"); } sub mdtag { my $oldmd = shift; my $line = shift; my $ref =shift; my $rdlen = shift; my @variations = @$ref; my $string=""; my $mdtag=""; my $t=1; my $q=1; my $deleteflag=0; my $len =0; foreach $string (@variations) { my ($indeltype,$insert) = indeltype($string); if ($indeltype eq "+") { $len = length ($insert); $q+=$len; next; } my $pos = $1 if $string =~ /^(\d+)/; $len = $pos - $t; if ($len !=0 || ($deleteflag eq 1 && $indeltype eq ">")) { $mdtag.=$len; } $t+=$len; $q+=$len; if ($indeltype eq ">") { $mdtag.=$insert; $deleteflag=0; $t+=1; $q+=1; } if ($indeltype eq "-") { my $deletedbase = $2 if $string =~ /(\d+)\-([A-Za-z]+)/; if ($deleteflag == 0 ) { $mdtag.="^"; } $mdtag.=$deletedbase; $deleteflag=1; $t+=1; } } $len = $rdlen - $q + 1; if ($len > 0) { $mdtag.="$len"; } # print "In:$line\n"; # print "MD: OLD => NEW\nMD: $oldmd => $mdtag\n\n"; return $mdtag; } sub indeltype { my $string = shift; my $insert=""; my $indeltype; if ($string =~ /([A-Za-z]+)\>/) { $indeltype=">"; $insert=$1; } elsif ($string =~ /\-/) { $indeltype="-"; } elsif ($string =~ /\+([A-Za-z]+)/) { $indeltype="+"; $insert=$1; } return ($indeltype,$insert); } sub cigar_method { my $line = shift; my $ref =shift; my $rdlen = shift; my @variations = @$ref; my $string=""; my $type=""; my $t =1; my $q=1; my $indeltype=""; my $cigar= ""; my $insert = ""; my $len=0; my @cig=(); foreach $string (@variations) { next if $string =~ />/; my $pos = $1 if $string =~ /^(\d+)/; if ($string =~ /\+([A-Za-z]+)/) { $indeltype="+"; $insert = $1; }elsif ($string =~ /\-([A-Za-z]+)/) { $indeltype="-"; $insert = $1; } #print "$pos $indeltype $insert $t $q\n"; $len = $pos - $t; if ( $len > 0) { $cigar.=$len."M"; push(@cig,$len."M"); } $t+=$len; $q+=$len; if ($indeltype eq "-") { $cigar.="D"; push(@cig,"D"); $t++; } if ($indeltype eq "+") { $len = length ($insert); if ($len == 1) { $cigar.="I"; push(@cig,"I"); } if ($len > 1) { $cigar.=$len."I"; push(@cig,$len."I") } $q+=$len; } $insert=""; } $len= $rdlen - $q + 1; if ($len > 0) { $cigar.=$len."M"; push(@cig,$len."M"); } $cigar = newcigar($cigar,'D'); $cigar = newcigar($cigar,'I'); #print "$line\n"; #print "c CIGAR:\t$cigar\n\n"; return $cigar; } sub newcigar { my $cigar = shift; my $char = shift; my $new = ""; my $copy = $cigar; #print "$cigar\n"; $copy =~ s/^($char+)/$1;/g; #print "$copy\n"; $copy =~ s/([^0-9$char])($char+)/$1;$2;/g; #print "$copy\n"; my @parts = split(/;/,$copy); my $el=""; foreach $el (@parts) { #print "$el\n"; if ($el =~ /^$char+$/) { $new.=length($el).$char; }else { $new.=$el; } } return $new; } samtools-0.1.19/misc/plot-bamcheck000077500000000000000000000717211212162403000170340ustar00rootroot00000000000000#!/usr/bin/env perl # # Author: petr.danecek@sanger # use strict; use warnings; use Carp; my $opts = parse_params(); parse_bamcheck($opts); plot_qualities($opts); plot_acgt_cycles($opts); plot_gc($opts); plot_gc_depth($opts); plot_isize($opts); plot_coverage($opts); plot_mismatches_per_cycle($opts); plot_indel_dist($opts); plot_indel_cycles($opts); exit; #-------------------------------- sub error { my (@msg) = @_; if ( scalar @msg ) { confess @msg; } die "Usage: plot-bamcheck [OPTIONS] file.bam.bc\n", " plot-bamcheck -p outdir/ file.bam.bc\n", "Options:\n", " -k, --keep-files Do not remove temporary files.\n", " -p, --prefix The output files prefix, add a slash to create new directory.\n", " -r, --ref-stats Optional reference stats file with expected GC content (created with -s).\n", " -s, --do-ref-stats Calculate reference sequence GC for later use with -r\n", " -t, --targets Restrict -s to the listed regions (tab-delimited chr,from,to. 1-based, inclusive)\n", " -h, -?, --help This help message.\n", "\n"; } sub parse_params { $0 =~ s{^.+/}{}; my $opts = { args=>join(' ',$0,@ARGV) }; while (defined(my $arg=shift(@ARGV))) { if ( $arg eq '-k' || $arg eq '--keep-files' ) { $$opts{keep_files}=1; next; } if ( $arg eq '-r' || $arg eq '--ref-stats' ) { $$opts{ref_stats}=shift(@ARGV); next; } if ( $arg eq '-s' || $arg eq '--do-ref-stats' ) { $$opts{do_ref_stats}=shift(@ARGV); next; } if ( $arg eq '-t' || $arg eq '--targets' ) { $$opts{targets}=shift(@ARGV); next; } if ( $arg eq '-p' || $arg eq '--prefix' ) { $$opts{prefix}=shift(@ARGV); next; } if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } if ( -e $arg ) { $$opts{bamcheck}=$arg; next; } error("Unknown parameter or non-existent file \"$arg\". Run -h for help.\n"); } if ( exists($$opts{do_ref_stats }) ) { do_ref_stats($opts); exit; } if ( !exists($$opts{bamcheck}) ) { error("No bamcheck file?\n") } if ( !exists($$opts{prefix}) ) { error("Expected -p parameter.\n") } if ( $$opts{prefix}=~m{/$} ) { `mkdir -p $$opts{prefix}`; } elsif ( !($$opts{prefix}=~/-$/) ) { $$opts{prefix} .= '-'; } return $opts; } # Creates GC stats for either the whole reference or only on target regions for exome QC sub do_ref_stats { my ($opts) = @_; my %targets = (); if ( exists($$opts{targets}) ) { my ($prev_chr,$prev_pos); open(my $fh,'<',$$opts{targets}) or error("$$opts{targets}: $!"); while (my $line=<$fh>) { if ( $line=~/^#/ ) { next; } my ($chr,$from,$to) = split(/\s+/,$line); chomp($to); push @{$targets{$chr}}, $from,$to; if ( !defined $prev_chr or $chr ne $prev_chr ) { $prev_chr=$chr; $prev_pos=$from } if ( $prev_pos > $from ) { error("The file must be sorted: $$opts{targets}\n"); } $prev_pos = $from; } close($fh); } my $_len = 60; # for now do only standard fasta's with 60 bases per line my %gc_counts = (); my ($skip_chr,$pos,$ireg,$regions); open(my $fh,'<',$$opts{do_ref_stats}) or error("$$opts{do_ref_stats}: $!"); while (my $line=<$fh>) { if ( $line=~/^>/ ) { if ( !scalar %targets ) { next; } if ( !($line=~/>(\S+)/) ) { error("FIXME: could not determine chromosome name: $line"); } if ( !exists($targets{$1}) ) { $skip_chr=$1; next; } undef $skip_chr; $pos = 0; $ireg = 0; $regions = $targets{$1}; } if ( defined $skip_chr ) { next; } # Only $_len sized lines are considered and no chopping for target regions. chomp($line); my $len = length($line); if ( $len ne $_len ) { next; } if ( scalar %targets ) { while ( $ireg<@$regions && $$regions[$ireg+1]<=$pos ) { $ireg += 2; } $pos += $len; if ( $ireg==@$regions ) { next; } if ( $pos < $$regions[$ireg] ) { next; } } my $gc_count = 0; for (my $i=0; $i<$len; $i++) { my $base = substr($line,$i,1); if ( $base eq 'g' || $base eq 'G' || $base eq 'c' || $base eq 'C' ) { $gc_count++; } } $gc_counts{$gc_count}++; } print "# Generated by $$opts{args}\n"; print "# The columns are: GC content bin, normalized frequency\n"; my $max; for my $count (values %gc_counts) { if ( !defined $max or $count>$max ) { $max=$count; } } for my $gc (sort {$a<=>$b} keys %gc_counts) { if ( $gc==0 ) { next; } printf "%f\t%f\n", $gc*100./$_len, $gc_counts{$gc}/$max; } } sub plot { my ($cmdfile) = @_; my $cmd = "gnuplot $cmdfile"; system($cmd); if ( $? ) { error("The command exited with non-zero status $?:\n\t$cmd\n\n"); } } sub parse_bamcheck { my ($opts) = @_; open(my $fh,'<',$$opts{bamcheck}) or error("$$opts{bamcheck}: $!"); my $line = <$fh>; if ( !($line=~/^# This file was produced by bamcheck (\S+)/) ) { error("Sanity check failed: was this file generated by bamcheck?"); } $$opts{dat}{version} = $1; while ($line=<$fh>) { if ( $line=~/^#/ ) { next; } my @items = split(/\t/,$line); chomp($items[-1]); if ( $items[0] eq 'SN' ) { $$opts{dat}{$items[1]} = splice(@items,2); next; } push @{$$opts{dat}{$items[0]}}, [splice(@items,1)]; } close($fh); # Check sanity if ( !exists($$opts{dat}{'sequences:'}) or !$$opts{dat}{'sequences:'} ) { error("Sanity check failed: no sequences found by bamcheck??\n"); } } sub older_than { my ($opts,$version) = @_; my ($year,$month,$day) = split(/-/,$version); $version = $$opts{dat}{version}; if ( !($version=~/\((\d+)-(\d+)-(\d+)\)$/) ) { return 1; } if ( $1<$year ) { return 1; } elsif ( $1>$year ) { return 0; } if ( $2<$month ) { return 1; } elsif ( $2>$month ) { return 0; } if ( $3<$day ) { return 1; } return 0; } sub get_defaults { my ($opts,$img_fname,%args) = @_; if ( !($img_fname=~/\.png$/i) ) { error("FIXME: currently only PNG supported. (Easy to extend.)\n"); } # Determine the gnuplot script file name my $gp_file = $img_fname; $gp_file =~ s{\.[^.]+$}{.gp}; if ( !($gp_file=~/.gp$/) ) { $gp_file .= '.gp'; } # Determine the default title: # 5446_6/5446_6.bam.bc.gp -> 5446_6 # test.aaa.png -> test.aaa if ( !($$opts{bamcheck}=~m{([^/]+?)(?:\.bam)?(?:\.bc)?$}i) ) { error("FIXME: Could not determine the title from [$img_fname]\n"); } my $title = $1; my $dir = $gp_file; $dir =~ s{/[^/]+$}{}; if ( $dir && $dir ne $gp_file ) { `mkdir -p $dir`; } my $wh = exists($args{wh}) ? $args{wh} : '600,400'; open(my $fh,'>',$gp_file) or error("$gp_file: $!"); return { title => $title, gp => $gp_file, img => $img_fname, fh => $fh, terminal => qq[set terminal png size $wh truecolor], grid => 'set grid xtics ytics y2tics back lc rgb "#cccccc"', }; } sub percentile { my ($p,@vals) = @_; my $N = 0; for my $val (@vals) { $N += $val; } my $n = $p*($N+1)/100.; my $k = int($n); my $d = $n-$k; if ( $k<=0 ) { return 0; } if ( $k>=$N ) { return scalar @vals-1; } my $cnt; for (my $i=0; $i<@vals; $i++) { $cnt += $vals[$i]; if ( $cnt>=$k ) { return $i; } } error("FIXME: this should not happen [percentile]\n"); } sub plot_qualities { my ($opts) = @_; if ( !exists($$opts{dat}{FFQ}) or !@{$$opts{dat}{FFQ}} ) { return; } my $yrange = @{$$opts{dat}{FFQ}[0]} > 50 ? @{$$opts{dat}{FFQ}[0]} : 50; my $is_paired = $$opts{dat}{'is paired:'}; # Average quality per cycle, forward and reverse reads in one plot my $args = get_defaults($opts,"$$opts{prefix}quals.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set ylabel "Average Quality" set xlabel "Cycle" set yrange [0:$yrange] set title "$$args{title}" plot '-' using 1:2 with lines title 'Forward reads' ] . ($is_paired ? q[, '-' using 1:2 with lines title 'Reverse reads'] : '') . q[ ]; my (@fp75,@fp50,@fmean); my (@lp75,@lp50,@lmean); my ($fmax,$fmax_qual,$fmax_cycle); my ($lmax,$lmax_qual,$lmax_cycle); for my $cycle (@{$$opts{dat}{FFQ}}) { my $sum=0; my $n=0; for (my $iqual=1; $iqual<@$cycle; $iqual++) { $sum += $$cycle[$iqual]*$iqual; $n += $$cycle[$iqual]; if ( !defined $fmax or $fmax<$$cycle[$iqual] ) { $fmax=$$cycle[$iqual]; $fmax_qual=$iqual; $fmax_cycle=$$cycle[0]; } } my $p25 = percentile(25,(@$cycle)[1..$#$cycle]); my $p50 = percentile(50,(@$cycle)[1..$#$cycle]); my $p75 = percentile(75,(@$cycle)[1..$#$cycle]); if ( !$n ) { next; } push @fp75, "$$cycle[0]\t$p25\t$p75\n"; push @fp50, "$$cycle[0]\t$p50\n"; push @fmean, sprintf "%d\t%.2f\n", $$cycle[0],$sum/$n; printf $fh $fmean[-1]; } print $fh "end\n"; if ( $is_paired ) { for my $cycle (@{$$opts{dat}{LFQ}}) { my $sum=0; my $n=0; for (my $iqual=1; $iqual<@$cycle; $iqual++) { $sum += $$cycle[$iqual]*$iqual; $n += $$cycle[$iqual]; if ( !defined $lmax or $lmax<$$cycle[$iqual] ) { $lmax=$$cycle[$iqual]; $lmax_qual=$iqual; $lmax_cycle=$$cycle[0]; } } my $p25 = percentile(25,(@$cycle)[1..$#$cycle]); my $p50 = percentile(50,(@$cycle)[1..$#$cycle]); my $p75 = percentile(75,(@$cycle)[1..$#$cycle]); if ( !$n ) { next; } push @lp75, "$$cycle[0]\t$p25\t$p75\n"; push @lp50, "$$cycle[0]\t$p50\n"; push @lmean, sprintf "%d\t%.2f\n", $$cycle[0],$sum/$n; printf $fh $lmean[-1]; } print $fh "end\n"; } close($fh); plot($$args{gp}); # Average, mean and quality percentiles per cycle, forward and reverse reads in separate plots $args = get_defaults($opts,"$$opts{prefix}quals2.png",wh=>'700,500'); $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set multiplot set rmargin 0 set lmargin 0 set tmargin 0 set bmargin 0 set origin 0.1,0.1 set size 0.4,0.8 set yrange [0:$yrange] set ylabel "Quality" set xlabel "Cycle (fwd reads)" plot '-' using 1:2:3 with filledcurve lt 1 lc rgb "#cccccc" t '25-75th percentile' , '-' using 1:2 with lines lc rgb "#000000" t 'Median', '-' using 1:2 with lines lt 1 t 'Mean' ]; print $fh join('',@fp75),"end\n"; print $fh join('',@fp50),"end\n"; print $fh join('',@fmean),"end\n"; if ( $is_paired ) { print $fh qq[ set origin 0.55,0.1 set size 0.4,0.8 unset ytics set y2tics mirror set yrange [0:$yrange] unset ylabel set xlabel "Cycle (rev reads)" set label "$$args{title}" at screen 0.5,0.95 center plot '-' using 1:2:3 with filledcurve lt 1 lc rgb "#cccccc" t '25-75th percentile' , '-' using 1:2 with lines lc rgb "#000000" t 'Median', '-' using 1:2 with lines lt 2 t 'Mean' ]; print $fh join('',@lp75),"end\n"; print $fh join('',@lp50),"end\n"; print $fh join('',@lmean),"end\n"; } close($fh); plot($$args{gp}); # Quality distribution per cycle, the distribution is for each cycle plotted as a separate curve $args = get_defaults($opts,"$$opts{prefix}quals3.png",wh=>'600,600'); $fh = $$args{fh}; my $nquals = @{$$opts{dat}{FFQ}[0]}-1; my $ncycles = @{$$opts{dat}{FFQ}}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set multiplot set rmargin 0 set lmargin 0 set tmargin 0 set bmargin 0 set origin 0.15,0.52 set size 0.8,0.4 set title "$$args{title}" set ylabel "Frequency (fwd reads)" set label "Cycle $fmax_cycle" at $fmax_qual+1,$fmax unset xlabel set xrange [0:$nquals] set format x "" ]; my @plots; for (my $i=0; $i<$ncycles; $i++) { push @plots, q['-' using 1:2 with lines t ''] } print $fh "plot ", join(",", @plots), "\n"; for my $cycle (@{$$opts{dat}{FFQ}}) { for (my $iqual=1; $iqual<$nquals; $iqual++) { print $fh "$iqual\t$$cycle[$iqual]\n"; } print $fh "end\n"; } if ( $is_paired ) { print $fh qq[ set origin 0.15,0.1 set size 0.8,0.4 unset title unset format set xtics set xlabel "Quality" unset label set label "Cycle $lmax_cycle" at $lmax_qual+1,$lmax set ylabel "Frequency (rev reads)" ]; print $fh "plot ", join(",", @plots), "\n"; for my $cycle (@{$$opts{dat}{LFQ}}) { for (my $iqual=1; $iqual<$nquals; $iqual++) { print $fh "$iqual\t$$cycle[$iqual]\n"; } print $fh "end\n"; } } close($fh); plot($$args{gp}); # Heatmap qualitites $args = get_defaults($opts,"$$opts{prefix}quals-hm.png", wh=>'600,500'); $fh = $$args{fh}; my $max = defined $lmax && $lmax > $fmax ? $lmax : $fmax; my @ytics; for my $cycle (@{$$opts{dat}{FFQ}}) { if ( $$cycle[0]%10==0 ) { push @ytics,qq["$$cycle[0]" $$cycle[0]]; } } my $ytics = join(',', @ytics); print $fh qq[ $$args{terminal} set output "$$args{img}" unset key unset colorbox set palette defined (0 0 0 0, 1 0 0 1, 3 0 1 0, 4 1 0 0, 6 1 1 1) set cbrange [0:$max] set yrange [0:$ncycles] set xrange [0:$nquals] set view map set multiplot set rmargin 0 set lmargin 0 set tmargin 0 set bmargin 0 set origin 0,0.46 set size 0.95,0.6 set obj 1 rectangle behind from first 0,0 to first $nquals,$ncycles set obj 1 fillstyle solid 1.0 fillcolor rgbcolor "black" set ylabel "Cycle (fwd reads)" offset character -1,0 unset ytics set ytics ($ytics) unset xtics set title "$$args{title}" splot '-' matrix with image ]; for my $cycle (@{$$opts{dat}{FFQ}}) { for (my $iqual=1; $iqual<@$cycle; $iqual++) { print $fh "\t$$cycle[$iqual]"; } print $fh "\n"; } print $fh "end\nend\n"; @ytics = (); for my $cycle (@{$$opts{dat}{LFQ}}) { if ( $$cycle[0]%10==0 ) { push @ytics,qq["$$cycle[0]" $$cycle[0]]; } } $ytics = join(',', @ytics); print $fh qq[ set origin 0,0.03 set size 0.95,0.6 set ylabel "Cycle (rev reads)" offset character -1,0 set xlabel "Base Quality" unset title unset ytics set ytics ($ytics) set xrange [0:$nquals] set xtics set colorbox vertical user origin first ($nquals+1),0 size screen 0.025,0.812 set cblabel "Number of bases" splot '-' matrix with image ]; for my $cycle (@{$$opts{dat}{LFQ}}) { for (my $iqual=1; $iqual<@$cycle; $iqual++) { print $fh "\t$$cycle[$iqual]"; } print $fh "\n"; } print $fh "end\nend\n"; close($fh); plot($$args{gp}); } sub plot_acgt_cycles { my ($opts) = @_; if ( !exists($$opts{dat}{GCC}) or !@{$$opts{dat}{GCC}} ) { return; } my $args = get_defaults($opts,"$$opts{prefix}acgt-cycles.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set style line 1 linecolor rgb "green" set style line 2 linecolor rgb "red" set style line 3 linecolor rgb "black" set style line 4 linecolor rgb "blue" set style increment user set ylabel "Base content [%]" set xlabel "Read Cycle" set yrange [0:100] set title "$$args{title}" plot '-' w l ti 'A', '-' w l ti 'C', '-' w l ti 'G', '-' w l ti 'T' ]; for my $base (1..4) { for my $cycle (@{$$opts{dat}{GCC}}) { print $fh $$cycle[0]+1,"\t",$$cycle[$base],"\n"; } print $fh "end\n"; } close($fh); plot($$args{gp}); } sub plot_gc { my ($opts) = @_; my $is_paired = $$opts{dat}{'is paired:'}; my $args = get_defaults($opts,"$$opts{prefix}gc-content.png"); my $fh = $$args{fh}; my ($gcl_max,$gcf_max,$lmax,$fmax); for my $gc (@{$$opts{dat}{GCF}}) { if ( !defined $gcf_max or $gcf_max<$$gc[1] ) { $gcf_max=$$gc[1]; $fmax=$$gc[0]; } } for my $gc (@{$$opts{dat}{GCL}}) { if ( !defined $gcl_max or $gcl_max<$$gc[1] ) { $gcl_max=$$gc[1]; $lmax=$$gc[0]; } } my $gcmax = $is_paired && $gcl_max > $gcf_max ? $lmax : $fmax; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set title "$$args{title}" set ylabel "Normalized Frequency" set xlabel "GC Content [%]" set yrange [0:1.1] set label sprintf("%.1f",$gcmax) at $gcmax,1 front offset 1,0 plot ] . (exists($$opts{ref_stats}) ? q['-' smooth csplines with lines lt 0 title 'Reference', ] : '') . q['-' smooth csplines with lines lc 1 title 'First fragments' ] . ($is_paired ? q[, '-' smooth csplines with lines lc 2 title 'Last fragments'] : '') . q[ ]; if ( exists($$opts{ref_stats}) ) { open(my $ref,'<',$$opts{ref_stats}) or error("$$opts{ref_stats}: $!"); while (my $line=<$ref>) { print $fh $line } close($ref); print $fh "end\n"; } for my $cycle (@{$$opts{dat}{GCF}}) { printf $fh "%d\t%f\n", $$cycle[0],$$cycle[1]/$gcf_max; } print $fh "end\n"; if ( $is_paired ) { for my $cycle (@{$$opts{dat}{GCL}}) { printf $fh "%d\t%f\n", $$cycle[0],$$cycle[1]/$gcl_max; } print $fh "end\n"; } close($fh); plot($$args{gp}); } sub plot_gc_depth { my ($opts) = @_; if ( !exists($$opts{dat}{GCD}) or !@{$$opts{dat}{GCD}} ) { return; } # Find unique sequence percentiles for 30,40, and 50% GC content, just to draw x2tics. my @tics = ( {gc=>30},{gc=>40},{gc=>50} ); for my $gc (@{$$opts{dat}{GCD}}) { for my $tic (@tics) { my $diff = abs($$gc[0]-$$tic{gc}); if ( !exists($$tic{pr}) or $diff<$$tic{diff} ) { $$tic{pr}=$$gc[1]; $$tic{diff}=$diff; } } } my @x2tics; for my $tic (@tics) { push @x2tics, qq["$$tic{gc}" $$tic{pr}]; } my $x2tics = join(',',@x2tics); my $args = get_defaults($opts,"$$opts{prefix}gc-depth.png", wh=>'600,500'); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set ylabel "Mapped depth" set xlabel "Percentile of mapped sequence ordered by GC content" set x2label "GC Content [%]" set title "$$args{title}" set x2tics ($x2tics) set xtics nomirror set xrange [0.1:99.9] plot '-' using 1:2:3 with filledcurve lt 1 lc rgb "#dedede" t '10-90th percentile' , \\ '-' using 1:2:3 with filledcurve lt 1 lc rgb "#bbdeff" t '25-75th percentile' , \\ '-' using 1:2 with lines lc rgb "#0084ff" t 'Median' ]; for my $gc (@{$$opts{dat}{GCD}}) { print $fh "$$gc[1]\t$$gc[2]\t$$gc[6]\n"; } print $fh "end\n"; for my $gc (@{$$opts{dat}{GCD}}) { print $fh "$$gc[1]\t$$gc[3]\t$$gc[5]\n"; } print $fh "end\n"; for my $gc (@{$$opts{dat}{GCD}}) { print $fh "$$gc[1]\t$$gc[4]\n"; } print $fh "end\n"; close($fh); plot($$args{gp}); } sub plot_isize { my ($opts) = @_; if ( !$$opts{dat}{'is paired:'} or !exists($$opts{dat}{IS}) or !@{$$opts{dat}{IS}} ) { return; } my ($isize_max,$isize_cnt); for my $isize (@{$$opts{dat}{IS}}) { if ( !defined $isize_max or $isize_cnt<$$isize[1] ) { $isize_cnt=$$isize[1]; $isize_max=$$isize[0]; } } my $args = get_defaults($opts,"$$opts{prefix}insert-size.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set rmargin 5 set label sprintf("%d",$isize_max) at $isize_max+10,$isize_cnt set ylabel "Number of pairs" set xlabel "Insert Size" set title "$$args{title}" plot \\ '-' with lines lc rgb 'black' title 'All pairs', \\ '-' with lines title 'Inward', \\ '-' with lines title 'Outward', \\ '-' with lines title 'Other' ]; for my $isize (@{$$opts{dat}{IS}}) { print $fh "$$isize[0]\t$$isize[1]\n"; } print $fh "end\n"; for my $isize (@{$$opts{dat}{IS}}) { print $fh "$$isize[0]\t$$isize[2]\n"; } print $fh "end\n"; for my $isize (@{$$opts{dat}{IS}}) { print $fh "$$isize[0]\t$$isize[3]\n"; } print $fh "end\n"; for my $isize (@{$$opts{dat}{IS}}) { print $fh "$$isize[0]\t$$isize[4]\n"; } print $fh "end\n"; close($fh); plot($$args{gp}); } sub plot_coverage { my ($opts) = @_; if ( !exists($$opts{dat}{COV}) or !@{$$opts{dat}{COV}} ) { return; } my @vals; for my $cov (@{$$opts{dat}{COV}}) { push @vals,$$cov[2]; } my $i = percentile(99.8,@vals); my $p99 = $$opts{dat}{COV}[$i][1]; my $args = get_defaults($opts,"$$opts{prefix}coverage.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set ylabel "Number of mapped bases" set xlabel "Coverage" set style fill solid border -1 set title "$$args{title}" set xrange [:$p99] plot '-' with lines notitle ]; for my $cov (@{$$opts{dat}{COV}}) { if ( $$cov[2]==0 ) { next; } print $fh "$$cov[1]\t$$cov[2]\n"; } print $fh "end\n"; close($fh); plot($$args{gp}); } sub plot_mismatches_per_cycle { my ($opts) = @_; if ( !exists($$opts{dat}{MPC}) or !@{$$opts{dat}{MPC}} ) { return; } if ( older_than($opts,'2012-02-06') ) { plot_mismatches_per_cycle_old($opts); } my $nquals = @{$$opts{dat}{MPC}[0]} - 2; my $ncycles = @{$$opts{dat}{MPC}}; my ($style,$with); if ( $ncycles>100 ) { $style = ''; $with = 'w l'; } else { $style = 'set style data histogram; set style histogram rowstacked'; $with = ''; } my $args = get_defaults($opts,"$$opts{prefix}mism-per-cycle.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set style line 1 linecolor rgb "#e40000" set style line 2 linecolor rgb "#ff9f00" set style line 3 linecolor rgb "#eeee00" set style line 4 linecolor rgb "#4ebd68" set style line 5 linecolor rgb "#0061ff" set style increment user set key left top $style set ylabel "Number of mismatches" set xlabel "Read Cycle" set style fill solid border -1 set title "$$args{title}" set xrange [-1:$ncycles] plot '-' $with ti 'Base Quality>30', \\ '-' $with ti '30>=Q>20', \\ '-' $with ti '20>=Q>10', \\ '-' $with ti '10>=Q', \\ '-' $with ti "N's" ]; for my $cycle (@{$$opts{dat}{MPC}}) { my $sum; for my $idx (31..$#$cycle) { $sum += $$cycle[$idx]; } print $fh "$sum\n"; } print $fh "end\n"; for my $cycle (@{$$opts{dat}{MPC}}) { my $sum; for my $idx (22..31) { $sum += $$cycle[$idx]; } print $fh "$sum\n"; } print $fh "end\n"; for my $cycle (@{$$opts{dat}{MPC}}) { my $sum; for my $idx (12..21) { $sum += $$cycle[$idx]; } print $fh "$sum\n"; } print $fh "end\n"; for my $cycle (@{$$opts{dat}{MPC}}) { my $sum; for my $idx (2..11) { $sum += $$cycle[$idx]; } print $fh "$sum\n"; } print $fh "end\n"; for my $cycle (@{$$opts{dat}{MPC}}) { print $fh "$$cycle[1]\n"; } print $fh "end\n"; close($fh); plot($$args{gp}); } sub plot_indel_dist { my ($opts) = @_; if ( !exists($$opts{dat}{ID}) or !@{$$opts{dat}{ID}} ) { return; } my $args = get_defaults($opts,"$$opts{prefix}indel-dist.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set style line 1 linetype 1 linecolor rgb "red" set style line 2 linetype 2 linecolor rgb "black" set style line 3 linetype 3 linecolor rgb "green" set style increment user set ylabel "Indel count [log]" set xlabel "Indel length" set y2label "Insertions/Deletions ratio" set log y set y2tics nomirror set ytics nomirror set title "$$args{title}" plot '-' w l ti 'Insertions', '-' w l ti 'Deletions', '-' axes x1y2 w l ti "Ins/Dels ratio" ]; for my $len (@{$$opts{dat}{ID}}) { print $fh "$$len[0]\t$$len[1]\n"; } print $fh "end\n"; for my $len (@{$$opts{dat}{ID}}) { print $fh "$$len[0]\t$$len[2]\n"; } print $fh "end\n"; for my $len (@{$$opts{dat}{ID}}) { printf $fh "%d\t%f\n", $$len[0],$$len[2]?$$len[1]/$$len[2]:0; } print $fh "end\n"; close($fh); plot($$args{gp}); } sub plot_indel_cycles { my ($opts) = @_; if ( !exists($$opts{dat}{IC}) or !@{$$opts{dat}{IC}} ) { return; } my $args = get_defaults($opts,"$$opts{prefix}indel-cycles.png"); my $fh = $$args{fh}; print $fh qq[ $$args{terminal} set output "$$args{img}" $$args{grid} set style line 1 linetype 1 linecolor rgb "red" set style line 2 linetype 2 linecolor rgb "black" set style line 3 linetype 3 linecolor rgb "green" set style line 4 linetype 4 linecolor rgb "blue" set style increment user set ylabel "Indel count" set xlabel "Read Cycle" set title "$$args{title}" plot '-' w l ti 'Insertions (fwd)', '' w l ti 'Insertions (rev)', '' w l ti 'Deletions (fwd)', '' w l ti 'Deletions (rev)' ]; for my $len (@{$$opts{dat}{IC}}) { print $fh "$$len[0]\t$$len[1]\n"; } print $fh "end\n"; for my $len (@{$$opts{dat}{IC}}) { print $fh "$$len[0]\t$$len[2]\n"; } print $fh "end\n"; for my $len (@{$$opts{dat}{IC}}) { print $fh "$$len[0]\t$$len[3]\n"; } print $fh "end\n"; for my $len (@{$$opts{dat}{IC}}) { print $fh "$$len[0]\t$$len[4]\n"; } print $fh "end\n"; close($fh); plot($$args{gp}); } sub has_values { my ($opts,@tags) = @_; for my $tag (@tags) { my (@lines) = `cat $$opts{bamcheck} | grep ^$tag | wc -l`; chomp($lines[0]); if ( $lines[0]<2 ) { return 0; } } return 1; } sub plot_mismatches_per_cycle_old { my ($opts) = @_; my $args = get_defaults($opts,"$$opts{prefix}mism-per-cycle.png"); my ($nquals) = `grep ^MPC $$opts{bamcheck} | awk '\$2==1' | sed 's,\\t,\\n,g' | wc -l`; my ($ncycles) = `grep ^MPC $$opts{bamcheck} | wc -l`; chomp($nquals); chomp($ncycles); $nquals--; $ncycles--; my @gr0_15 = (2..17); my @gr16_30 = (18..32); my @gr31_n = (33..$nquals); my $gr0_15 = '$'. join('+$',@gr0_15); my $gr16_30 = '$'. join('+$',@gr16_30); my $gr31_n = '$'. join('+$',@gr31_n); open(my $fh,'>',$$args{gp}) or error("$$args{gp}: $!"); print $fh q[ set terminal png size 600,400 truecolor font "DejaVuSansMono,9" set output "] . $$args{img} . q[" set key left top set style data histogram set style histogram rowstacked set grid back lc rgb "#aaaaaa" set ylabel "Number of mismatches" set xlabel "Read Cycle" set style fill solid border -1 set title "] . $$args{title} . qq[" set xrange [-1:$ncycles] plot '< grep ^MPC $$opts{bamcheck} | cut -f 2-' using ($gr31_n) ti 'Base Quality>30', '' using ($gr16_30) ti '30>=Q>15', '' using ($gr0_15) ti '15>=Q' ]; close($fh); plot($$args{gp}); } samtools-0.1.19/misc/psl2sam.pl000077500000000000000000000037461212162403000163200ustar00rootroot00000000000000#!/usr/bin/perl -w # Author: lh3 # This script calculates a score using the BLAST scoring # system. However, I am not sure how to count gap opens and gap # extensions. It seems to me that column 5-8 are not what I am # after. This script counts gaps from the last three columns. It does # not generate reference skip (N) in the CIGAR as it is not easy to # directly tell which gaps correspond to introns. use strict; use warnings; use Getopt::Std; my %opts = (a=>1, b=>3, q=>5, r=>2); getopts('a:b:q:r:', \%opts); die("Usage: psl2sam.pl [-a $opts{a}] [-b $opts{b}] [-q $opts{q}] [-r $opts{r}] \n") if (@ARGV == 0 && -t STDIN); my @stack; my $last = ''; my ($a, $b, $q, $r) = ($opts{a}, $opts{b}, $opts{q}, $opts{r}); while (<>) { next unless (/^\d/); my @t = split; my @s; my $cigar = ''; if ($t[8] eq '-') { my $tmp = $t[11]; $t[11] = $t[10] - $t[12]; $t[12] = $t[10] - $tmp; } @s[0..4] = ($t[9], (($t[8] eq '+')? 0 : 16), $t[13], $t[15]+1, 0); @s[6..10] = ('*', 0, 0, '*', '*'); $cigar .= $t[11].'H' if ($t[11]); # 5'-end clipping my @x = split(',', $t[18]); my @y = split(',', $t[19]); my @z = split(',', $t[20]); my ($y0, $z0) = ($y[0], $z[0]); my ($gap_open, $gap_ext) = (0, 0, 0); for (1 .. $t[17]-1) { my $ly = $y[$_] - $y[$_-1] - $x[$_-1]; my $lz = $z[$_] - $z[$_-1] - $x[$_-1]; if ($ly < $lz) { # del: the reference gap is longer ++$gap_open; $gap_ext += $lz - $ly; $cigar .= ($y[$_] - $y0) . 'M'; $cigar .= ($lz - $ly) . 'D'; ($y0, $z0) = ($y[$_], $z[$_]); } elsif ($lz < $ly) { # ins: the query gap is longer ++$gap_open; $gap_ext += $ly - $lz; $cigar .= ($z[$_] - $z0) . 'M'; $cigar .= ($ly - $lz) . 'I'; ($y0, $z0) = ($y[$_], $z[$_]); } } $cigar .= ($t[12] - $y0) . 'M'; $cigar .= ($t[10] - $t[12]).'H' if ($t[10] != $t[12]); # 3'-end clipping $s[5] = $cigar; my $score = $a * $t[0] - $b * $t[1] - $q * $gap_open - $r * $gap_ext; $score = 0 if ($score < 0); $s[11] = "AS:i:$score"; print join("\t", @s), "\n"; } samtools-0.1.19/misc/r2plot.lua000077500000000000000000000051751212162403000163250ustar00rootroot00000000000000#!/usr/bin/env luajit function string:split(sep, n) local a, start = {}, 1; sep = sep or "%s+"; repeat local b, e = self:find(sep, start); if b == nil then table.insert(a, self:sub(start)); break end a[#a+1] = self:sub(start, b - 1); start = e + 1; if n and #a == n then table.insert(a, self:sub(start)); break end until start > #self; return a; end function io.xopen(fn, mode) mode = mode or 'r'; if fn == nil then return io.stdin; elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout; elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w'); elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w'); else return io.open(fn, mode) end end local eps = {}; function eps.func(fp) fp = fp or io.stdout fp:write("/C { dup 255 and 255 div exch dup -8 bitshift 255 and 255 div 3 1 roll -16 bitshift 255 and 255 div 3 1 roll setrgbcolor } bind def\n") fp:write("/L { 4 2 roll moveto lineto } bind def\n") fp:write("/LX { dup 4 -1 roll exch moveto lineto } bind def\n") fp:write("/LY { dup 4 -1 roll moveto exch lineto } bind def\n") fp:write("/LS { 3 1 roll moveto show } bind def\n") fp:write("/RS { dup stringwidth pop 4 -1 roll exch sub 3 -1 roll moveto show } bind def\n") fp:write("/B { 4 copy 3 1 roll exch 6 2 roll 8 -2 roll moveto lineto lineto lineto closepath } bind def\n") end function eps.font(ft, size, fp) fp = fp or io.stdout fp:write(string.format('/FS %d def\n', size)); fp:write('/FS4 FS 4 div def\n'); fp:write('/' .. ft .. ' findfont FS scalefont setfont\n'); end local scale = 8; if #arg == 0 then print("Usage: r2plot.lua "); os.exit(1) end local fp = io.xopen(arg[1]); local n = tonumber(fp:read()); print('%!PS-Adobe-3.0 EPSF-3.0'); print('%%' .. string.format('BoundingBox: -%d -%d %.3f %.3f\n', 10*scale, scale, (n+1)*scale, (n+1)*scale)); print(string.format('%.3f setlinewidth', scale)); print(string.format('/plot { setgray moveto 0 %d rlineto } def', scale)); print(string.format('/plothalf { setgray moveto 0 %.2f rlineto } def', scale/2)); eps.func(); eps.font('Helvetica', scale-1); local i = 1; for l in fp:lines() do local t = l:split('\t'); print(string.format("%d %d FS4 add (%s) RS", (i-1)*scale-2, (i-1)*scale, t[1])); for j = 2, #t do if tonumber(t[j]) > 0.01 then print(string.format('%.2f %.2f %.2f plot stroke', (i-1+.5)*scale, (j-2)*scale, 1.-t[j])); end end i = i + 1; end for j = 1, 21 do print(string.format('%.2f %.2f %.2f plothalf stroke', -8*scale, (j-1) * scale/2, 1.-(j-1)/20)); end print('showpage'); samtools-0.1.19/misc/sam2vcf.pl000077500000000000000000000173111212162403000162710ustar00rootroot00000000000000#!/usr/bin/perl -w # # VCF specs: http://www.1000genomes.org/wiki/doku.php?id=1000_genomes:analysis:vcf3.3 # # Contact: pd3@sanger # Version: 2010-04-23 use strict; use warnings; use Carp; my $opts = parse_params(); do_pileup_to_vcf($opts); exit; #--------------- sub error { my (@msg) = @_; if ( scalar @msg ) { croak(@msg); } die "Usage: sam2vcf.pl [OPTIONS] < in.pileup > out.vcf\n", "Options:\n", " -h, -?, --help This help message.\n", " -i, --indels-only Ignore SNPs.\n", " -r, --refseq The reference sequence, required when indels are present.\n", " -R, --keep-ref Print reference alleles as well.\n", " -s, --snps-only Ignore indels.\n", " -t, --column-title The column title.\n", "\n"; } sub parse_params { my %opts = (); $opts{fh_in} = *STDIN; $opts{fh_out} = *STDOUT; while (my $arg=shift(@ARGV)) { if ( $arg eq '-R' || $arg eq '--keep-ref' ) { $opts{keep_ref}=1; next; } if ( $arg eq '-r' || $arg eq '--refseq' ) { $opts{refseq}=shift(@ARGV); next; } if ( $arg eq '-t' || $arg eq '--column-title' ) { $opts{title}=shift(@ARGV); next; } if ( $arg eq '-s' || $arg eq '--snps-only' ) { $opts{snps_only}=1; next; } if ( $arg eq '-i' || $arg eq '--indels-only' ) { $opts{indels_only}=1; next; } if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } error("Unknown parameter \"$arg\". Run -h for help.\n"); } return \%opts; } sub iupac_to_gtype { my ($ref,$base) = @_; my %iupac = ( 'K' => ['G','T'], 'M' => ['A','C'], 'S' => ['C','G'], 'R' => ['A','G'], 'W' => ['A','T'], 'Y' => ['C','T'], ); if ( !exists($iupac{$base}) ) { if ( $base ne 'A' && $base ne 'C' && $base ne 'G' && $base ne 'T' ) { error("FIXME: what is this [$base]?\n"); } if ( $ref eq $base ) { return ('.','0/0'); } return ($base,'1/1'); } my $gt = $iupac{$base}; if ( $$gt[0] eq $ref ) { return ($$gt[1],'0/1'); } elsif ( $$gt[1] eq $ref ) { return ($$gt[0],'0/1'); } return ("$$gt[0],$$gt[1]",'1/2'); } sub parse_indel { my ($cons) = @_; if ( $cons=~/^-/ ) { my $len = length($'); return "D$len"; } elsif ( $cons=~/^\+/ ) { return "I$'"; } elsif ( $cons eq '*' ) { return undef; } error("FIXME: could not parse [$cons]\n"); } # An example of the pileup format: # 1 3000011 C C 32 0 98 1 ^~, A # 1 3002155 * +T/+T 53 119 52 5 +T * 4 1 0 # 1 3003094 * -TT/-TT 31 164 60 11 -TT * 5 6 0 # 1 3073986 * */-AAAAAAAAAAAAAA 3 3 45 9 * -AAAAAAAAAAAAAA 7 2 0 # sub do_pileup_to_vcf { my ($opts) = @_; my $fh_in = $$opts{fh_in}; my $fh_out = $$opts{fh_out}; my ($prev_chr,$prev_pos,$prev_ref); my $refseq; my $ignore_indels = $$opts{snps_only} ? 1 : 0; my $ignore_snps = $$opts{indels_only} ? 1 : 0; my $keep_ref = $$opts{keep_ref} ? 1 : 0; my $title = exists($$opts{title}) ? $$opts{title} : 'data'; print $fh_out qq[##fileformat=VCFv3.3\n], qq[##INFO=DP,1,Integer,"Total Depth"\n], qq[##FORMAT=GT,1,String,"Genotype"\n], qq[##FORMAT=GQ,1,Integer,"Genotype Quality"\n], qq[##FORMAT=DP,1,Integer,"Read Depth"\n], qq[#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t$title\n] ; while (my $line=<$fh_in>) { chomp($line); my (@items) = split(/\t/,$line); if ( scalar @items<8 ) { error("\nToo few columns, does not look like output of 'samtools pileup -c': $line\n"); } my ($chr,$pos,$ref,$cons,$cons_qual,$snp_qual,$rms_qual,$depth,$a1,$a2) = @items; $ref = uc($ref); $cons = uc($cons); my ($alt,$gt); if ( $ref eq '*' ) { # An indel is involved. if ( $ignore_indels ) { $prev_ref = $ref; $prev_pos = $pos; $prev_chr = $chr; next; } if (!defined $prev_chr || $chr ne $prev_chr || $pos ne $prev_pos) { if ( !$$opts{refseq} ) { error("Cannot do indels without the reference.\n"); } if ( !$refseq ) { $refseq = Fasta->new(file=>$$opts{refseq}); } $ref = $refseq->get_base($chr,$pos); $ref = uc($ref); } else { $ref = $prev_ref; } # One of the alleles can be a reference and it can come in arbitrary order. In some # cases */* can be encountered. In such a case, look in the additional columns. my ($al1,$al2) = split(m{/},$cons); if ( $al1 eq $al2 && $al1 eq '*' ) { $al1=$a1; $al2=$a2; } my $alt1 = parse_indel($al1); my $alt2 = parse_indel($al2); if ( !$alt1 && !$alt2 ) { error("FIXME: could not parse indel:\n", $line); } if ( !$alt1 ) { $alt=$alt2; $gt='0/1'; } elsif ( !$alt2 ) { $alt=$alt1; $gt='0/1'; } elsif ( $alt1 eq $alt2 ) { $alt="$alt1"; $gt='1/1'; } else { $alt="$alt1,$alt2"; $gt='1/2'; } } else { if ( $ignore_snps || (!$keep_ref && $ref eq $cons) ) { $prev_ref = $ref; $prev_pos = $pos; $prev_chr = $chr; next; } # SNP ($alt,$gt) = iupac_to_gtype($ref,$cons); } print $fh_out "$chr\t$pos\t.\t$ref\t$alt\t$snp_qual\t0\tDP=$depth\tGT:GQ:DP\t$gt:$cons_qual:$depth\n"; $prev_ref = $ref; $prev_pos = $pos; $prev_chr = $chr; } } #------------- Fasta -------------------- # # Uses samtools to get a requested base from a fasta file. For efficiency, preloads # a chunk to memory. The size of the cached sequence can be controlled by the 'size' # parameter. # package Fasta; use strict; use warnings; use Carp; sub Fasta::new { my ($class,@args) = @_; my $self = {@args}; bless $self, ref($class) || $class; if ( !$$self{file} ) { $self->throw(qq[Missing the parameter "file"\n]); } $$self{chr} = undef; $$self{from} = undef; $$self{to} = undef; if ( !$$self{size} ) { $$self{size}=10_000_000; } bless $self, ref($class) || $class; return $self; } sub read_chunk { my ($self,$chr,$pos) = @_; my $to = $pos + $$self{size}; my $cmd = "samtools faidx $$self{file} $chr:$pos-$to"; my @out = `$cmd`; if ( $? ) { $self->throw("$cmd: $!"); } my $line = shift(@out); if ( !($line=~/^>$chr:(\d+)-(\d+)/) ) { $self->throw("Could not parse: $line"); } $$self{chr} = $chr; $$self{from} = $1; $$self{to} = $2; my $chunk = ''; while ($line=shift(@out)) { chomp($line); $chunk .= $line; } $$self{chunk} = $chunk; return; } sub get_base { my ($self,$chr,$pos) = @_; if ( !$$self{chr} || $chr ne $$self{chr} || $pos<$$self{from} || $pos>$$self{to} ) { $self->read_chunk($chr,$pos); } my $idx = $pos - $$self{from}; return substr($$self{chunk},$idx,1); } sub throw { my ($self,@msg) = @_; croak(@msg); } samtools-0.1.19/misc/samtools.pl000077500000000000000000000337331212162403000165770ustar00rootroot00000000000000#!/usr/bin/perl -w # Author: lh3 use strict; use warnings; use Getopt::Std; my $version = '0.3.3'; &usage if (@ARGV < 1); my $command = shift(@ARGV); my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter, plp2vcf=>\&plp2vcf, unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr, sam2fq=>\&sam2fq); die("Unknown command \"$command\".\n") if (!defined($func{$command})); &{$func{$command}}; exit(0); # # showALEN # sub showALEN { die(qq/Usage: samtools.pl showALEN \n/) if (@ARGV == 0 && -t STDIN); while (<>) { my @t = split; next if (/^\@/ || @t < 11); my $l = 0; $_ = $t[5]; s/(\d+)[MI]/$l+=$1/eg; print join("\t", @t[0..5]), "\t$l\t", join("\t", @t[6..$#t]), "\n"; } } # # varFilter # # # Filtration code: # # d low depth # D high depth # W too many SNPs in a window (SNP only) # G close to a high-quality indel (SNP only) # Q low RMS mapping quality (SNP only) # g close to another indel with higher quality (indel only) # s low SNP quality (SNP only) # i low indel quality (indel only) sub varFilter { my %opts = (d=>3, D=>100, l=>30, Q=>25, q=>10, G=>25, s=>100, w=>10, W=>10, N=>2, p=>undef, S=>'', i=>''); getopts('pq:d:D:l:Q:w:W:N:G:S:i:', \%opts); die(qq/ Usage: samtools.pl varFilter [options] Options: -Q INT minimum RMS mapping quality for SNPs [$opts{Q}] -q INT minimum RMS mapping quality for gaps [$opts{q}] -d INT minimum read depth [$opts{d}] -D INT maximum read depth [$opts{D}] -S INT minimum SNP quality [$opts{S}] -i INT minimum indel quality [$opts{i}] -G INT min indel score for nearby SNP filtering [$opts{G}] -w INT SNP within INT bp around a gap to be filtered [$opts{w}] -W INT window size for filtering dense SNPs [$opts{W}] -N INT max number of SNPs in a window [$opts{N}] -l INT window size for filtering adjacent gaps [$opts{l}] -p print filtered variants \n/) if (@ARGV == 0 && -t STDIN); # calculate the window size my ($ol, $ow, $oW) = ($opts{l}, $opts{w}, $opts{W}); my $max_dist = $ol > $ow? $ol : $ow; $max_dist = $oW if ($max_dist < $oW); # the core loop my @staging; # (indel_filtering_score, flt_tag) while (<>) { my @t = split; next if (uc($t[2]) eq uc($t[3]) || $t[3] eq '*/*'); # skip non-var sites # clear the out-of-range elements while (@staging) { # Still on the same chromosome and the first element's window still affects this position? last if ($staging[0][3] eq $t[0] && $staging[0][4] + $staging[0][2] + $max_dist >= $t[1]); varFilter_aux(shift(@staging), $opts{p}); # calling a function is a bit slower, not much } my ($flt, $score) = (0, -1); # first a simple filter if ($t[7] < $opts{d}) { $flt = 2; } elsif ($t[7] > $opts{D}) { $flt = 3; } if ($t[2] eq '*') { # an indel if ($opts{i} && $opts{i}>$t[5]) { $flt = 8; } } elsif ($opts{S} && $opts{S}>$t[5]) { $flt = 7; } # SNP # site dependent filters my $len=0; if ($flt == 0) { if ($t[2] eq '*') { # an indel # If deletion, remember the length of the deletion my ($a,$b) = split(m{/},$t[3]); my $alen = length($a) - 1; my $blen = length($b) - 1; if ( $alen>$blen ) { if ( substr($a,0,1) eq '-' ) { $len=$alen; } } elsif ( substr($b,0,1) eq '-' ) { $len=$blen; } $flt = 1 if ($t[6] < $opts{q}); # filtering SNPs if ($t[5] >= $opts{G}) { for my $x (@staging) { # Is it a SNP and is it outside the SNP filter window? next if ($x->[0] >= 0 || $x->[4] + $x->[2] + $ow < $t[1]); $x->[1] = 5 if ($x->[1] == 0); } } # calculate the filtering score (different from indel quality) $score = $t[5]; $score += $opts{s} * $t[10] if ($t[8] ne '*'); $score += $opts{s} * $t[11] if ($t[9] ne '*'); # check the staging list for indel filtering for my $x (@staging) { # Is it a SNP and is it outside the gap filter window next if ($x->[0] < 0 || $x->[4] + $x->[2] + $ol < $t[1]); if ($x->[0] < $score) { $x->[1] = 6; } else { $flt = 6; last; } } } else { # a SNP $flt = 1 if ($t[6] < $opts{Q}); # check adjacent SNPs my $k = 1; for my $x (@staging) { ++$k if ($x->[0] < 0 && $x->[4] + $x->[2] + $oW >= $t[1] && ($x->[1] == 0 || $x->[1] == 4 || $x->[1] == 5)); } # filtering is necessary if ($k > $opts{N}) { $flt = 4; for my $x (@staging) { $x->[1] = 4 if ($x->[0] < 0 && $x->[4] + $x->[2] + $oW >= $t[1] && $x->[1] == 0); } } else { # then check gap filter for my $x (@staging) { next if ($x->[0] < 0 || $x->[4] + $x->[2] + $ow < $t[1]); if ($x->[0] >= $opts{G}) { $flt = 5; last; } } } } } push(@staging, [$score, $flt, $len, @t]); } # output the last few elements in the staging list while (@staging) { varFilter_aux(shift @staging, $opts{p}); } } sub varFilter_aux { my ($first, $is_print) = @_; if ($first->[1] == 0) { print join("\t", @$first[3 .. @$first-1]), "\n"; } elsif ($is_print) { print STDERR join("\t", substr("UQdDWGgsiX", $first->[1], 1), @$first[3 .. @$first-1]), "\n"; } } # # pileup2fq # sub pileup2fq { my %opts = (d=>3, D=>255, Q=>25, G=>25, l=>10); getopts('d:D:Q:G:l:', \%opts); die(qq/ Usage: samtools.pl pileup2fq [options] Options: -d INT minimum depth [$opts{d}] -D INT maximum depth [$opts{D}] -Q INT min RMS mapQ [$opts{Q}] -G INT minimum indel score [$opts{G}] -l INT indel filter winsize [$opts{l}]\n /) if (@ARGV == 0 && -t STDIN); my ($last_chr, $seq, $qual, @gaps, $last_pos); my $_Q = $opts{Q}; my $_d = $opts{d}; my $_D = $opts{D}; $last_chr = ''; while (<>) { my @t = split; if ($last_chr ne $t[0]) { &p2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr); $last_chr = $t[0]; $last_pos = 0; $seq = ''; $qual = ''; @gaps = (); } if ($t[1] - $last_pos != 1) { $seq .= 'n' x ($t[1] - $last_pos - 1); $qual .= '!' x ($t[1] - $last_pos - 1); } if ($t[2] eq '*') { push(@gaps, $t[1]) if ($t[5] >= $opts{G}); } else { $seq .= ($t[6] >= $_Q && $t[7] >= $_d && $t[7] <= $_D)? uc($t[3]) : lc($t[3]); my $q = $t[4] + 33; $q = 126 if ($q > 126); $qual .= chr($q); } $last_pos = $t[1]; } &p2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}); } sub p2q_post_process { my ($chr, $seq, $qual, $gaps, $l) = @_; &p2q_filter_gaps($seq, $gaps, $l); print "\@$chr\n"; &p2q_print_str($seq); print "+\n"; &p2q_print_str($qual); } sub p2q_filter_gaps { my ($seq, $gaps, $l) = @_; for my $g (@$gaps) { my $x = $g > $l? $g - $l : 0; substr($$seq, $x, $l + $l) = lc(substr($$seq, $x, $l + $l)); } } sub p2q_print_str { my ($s) = @_; my $l = length($$s); for (my $i = 0; $i < $l; $i += 60) { print substr($$s, $i, 60), "\n"; } } # # sam2fq # sub sam2fq { my %opts = (n=>20, p=>''); getopts('n:p:', \%opts); die("Usage: samtools.pl sam2fq [-n 20] [-p ] \n") if (@ARGV == 0 && -t STDIN); if ($opts{p} && $opts{n} > 1) { my $pre = $opts{p}; my @fh; for (0 .. $opts{n}-1) { open($fh[$_], sprintf("| gzip > $pre.%.3d.fq.gz", $_)) || die; } my $i = 0; while (<>) { next if (/^@/); chomp; my @t = split("\t"); next if ($t[9] eq '*'); my ($name, $seq, $qual); if ($t[1] & 16) { # reverse strand $seq = reverse($t[9]); $qual = reverse($t[10]); $seq =~ tr/ACGTacgt/TGCAtgca/; } else { ($seq, $qual) = @t[9,10]; } $name = $t[0]; $name .= "/1" if ($t[1] & 0x40); $name .= "/2" if ($t[1] & 0x80); print {$fh[$i]} "\@$name\n$seq\n"; if ($qual ne '*') { print {$fh[$i]} "+\n$qual\n"; } $i = 0 if (++$i == $opts{n}); } close($fh[$_]) for (0 .. $opts{n}-1); } else { die("To be implemented.\n"); } } # # sra2hdr # # This subroutine does not use an XML parser. It requires that the SRA # XML files are properly formated. sub sra2hdr { my %opts = (); getopts('', \%opts); die("Usage: samtools.pl sra2hdr \n") if (@ARGV == 0); my $pre = $ARGV[0]; my $fh; # read sample my $sample = 'UNKNOWN'; open($fh, "$pre.sample.xml") || die; while (<$fh>) { $sample = $1 if (/) { if (/\s*(\S+)\s*<\/LIBRARY_NAME>/i) { $exp2lib{$exp} = $1; } } close($fh); # read run my ($run, @fn); open($fh, "$pre.run.xml") || die; while (<$fh>) { if (//i) { if (@fn == 1) { print STDERR "$fn[0]\t$run\n"; } else { for (0 .. $#fn) { print STDERR "$fn[$_]\t$run", "_", $_+1, "\n"; } } } } close($fh); } # # unique # sub unique { my %opts = (f=>250.0, q=>5, r=>2, a=>1, b=>3); getopts('Qf:q:r:a:b:m', \%opts); die("Usage: samtools.pl unique [-f $opts{f}] \n") if (@ARGV == 0 && -t STDIN); my $last = ''; my $recal_Q = !defined($opts{Q}); my $multi_only = defined($opts{m}); my @a; while (<>) { my $score = -1; print $_ if (/^\@/); $score = $1 if (/AS:i:(\d+)/); my @t = split("\t"); next if (@t < 11); if ($score < 0) { # AS tag is unavailable my $cigar = $t[5]; my ($mm, $go, $ge) = (0, 0, 0); $cigar =~ s/(\d+)[ID]/++$go,$ge+=$1/eg; $cigar = $t[5]; $cigar =~ s/(\d+)M/$mm+=$1/eg; $score = $mm * $opts{a} - $go * $opts{q} - $ge * $opts{r}; # no mismatches... } $score = 1 if ($score < 1); if ($t[0] ne $last) { &unique_aux(\@a, $opts{f}, $recal_Q, $multi_only) if (@a); $last = $t[0]; } push(@a, [$score, \@t]); } &unique_aux(\@a, $opts{f}, $recal_Q, $multi_only) if (@a); } sub unique_aux { my ($a, $fac, $is_recal, $multi_only) = @_; my ($max, $max2, $max_i) = (0, 0, -1); for (my $i = 0; $i < @$a; ++$i) { if ($a->[$i][0] > $max) { $max2 = $max; $max = $a->[$i][0]; $max_i = $i; } elsif ($a->[$i][0] > $max2) { $max2 = $a->[$i][0]; } } if ($is_recal) { if (!$multi_only || @$a > 1) { my $q = int($fac * ($max - $max2) / $max + .499); $q = 250 if ($q > 250); $a->[$max_i][1][4] = $q < 250? $q : 250; } } print join("\t", @{$a->[$max_i][1]}); @$a = (); } # # uniqcmp: compare two SAM files # sub uniqcmp { my %opts = (q=>10, s=>100); getopts('pq:s:', \%opts); die("Usage: samtools.pl uniqcmp \n") if (@ARGV < 2); my ($fh, %a); warn("[uniqcmp] read the first file...\n"); &uniqcmp_aux($ARGV[0], \%a, 0); warn("[uniqcmp] read the second file...\n"); &uniqcmp_aux($ARGV[1], \%a, 1); warn("[uniqcmp] stats...\n"); my @cnt; $cnt[$_] = 0 for (0..9); for my $x (keys %a) { my $p = $a{$x}; my $z; if (defined($p->[0]) && defined($p->[1])) { $z = ($p->[0][0] == $p->[1][0] && $p->[0][1] eq $p->[1][1] && abs($p->[0][2] - $p->[1][2]) < $opts{s})? 0 : 1; if ($p->[0][3] >= $opts{q} && $p->[1][3] >= $opts{q}) { ++$cnt[$z*3+0]; } elsif ($p->[0][3] >= $opts{q}) { ++$cnt[$z*3+1]; } elsif ($p->[1][3] >= $opts{q}) { ++$cnt[$z*3+2]; } print STDERR "$x\t$p->[0][1]:$p->[0][2]\t$p->[0][3]\t$p->[0][4]\t$p->[1][1]:$p->[1][2]\t$p->[1][3]\t$p->[1][4]\t", $p->[0][5]-$p->[1][5], "\n" if ($z && defined($opts{p}) && ($p->[0][3] >= $opts{q} || $p->[1][3] >= $opts{q})); } elsif (defined($p->[0])) { ++$cnt[$p->[0][3]>=$opts{q}? 6 : 7]; print STDERR "$x\t$p->[0][1]:$p->[0][2]\t$p->[0][3]\t$p->[0][4]\t*\t0\t*\t", $p->[0][5], "\n" if (defined($opts{p}) && $p->[0][3] >= $opts{q}); } else { print STDERR "$x\t*\t0\t*\t$p->[1][1]:$p->[1][2]\t$p->[1][3]\t$p->[1][4]\t", -$p->[1][5], "\n" if (defined($opts{p}) && $p->[1][3] >= $opts{q}); ++$cnt[$p->[1][3]>=$opts{q}? 8 : 9]; } } print "Consistent (high, high): $cnt[0]\n"; print "Consistent (high, low ): $cnt[1]\n"; print "Consistent (low , high): $cnt[2]\n"; print "Inconsistent (high, high): $cnt[3]\n"; print "Inconsistent (high, low ): $cnt[4]\n"; print "Inconsistent (low , high): $cnt[5]\n"; print "Second missing (high): $cnt[6]\n"; print "Second missing (low ): $cnt[7]\n"; print "First missing (high): $cnt[8]\n"; print "First missing (low ): $cnt[9]\n"; } sub uniqcmp_aux { my ($fn, $a, $which) = @_; my $fh; $fn = "samtools view $fn |" if ($fn =~ /\.bam/); open($fh, $fn) || die; while (<$fh>) { my @t = split; next if (@t < 11); # my $l = ($t[5] =~ /^(\d+)S/)? $1 : 0; my $l = 0; my ($x, $nm) = (0, 0); $nm = $1 if (/NM:i:(\d+)/); $_ = $t[5]; s/(\d+)[MI]/$x+=$1/eg; @{$a->{$t[0]}[$which]} = (($t[1]&0x10)? 1 : 0, $t[2], $t[3]-$l, $t[4], "$x:$nm", $x - 4 * $nm); } close($fh); } sub plp2vcf { while (<>) { my @t = split; next if ($t[3] eq '*/*'); if ($t[2] eq '*') { # indel my @s = split("/", $t[3]); my (@a, @b); my ($ref, $alt); for (@s) { next if ($_ eq '*'); if (/^-/) { push(@a, 'N'.substr($_, 1)); push(@b, 'N'); } elsif (/^\+/) { push(@a, 'N'); push(@b, 'N'.substr($_, 1)); } } if ($a[0] && $a[1]) { if (length($a[0]) < length($a[1])) { $ref = $a[1]; $alt = ($b[0] . ('N' x (length($a[1]) - length($a[0])))) . ",$b[1]"; } elsif (length($a[0]) > length($a[1])) { $ref = $a[0]; $alt = ($b[1] . ('N' x (length($a[0]) - length($a[1])))) . ",$b[0]"; } else { $ref = $a[0]; $alt = ($b[0] eq $b[1])? $b[0] : "$b[0],$b[1]"; } } else { $ref = $a[0]; $alt = $b[0]; } print join("\t", @t[0,1], '.', $ref, $alt, $t[5], '.', '.'), "\n"; } else { # SNP } } } # # Usage # sub usage { die(qq/ Program: samtools.pl (helper script for SAMtools) Version: $version Contact: Heng Li \n Usage: samtools.pl []\n Command: varFilter filtering SNPs and short indels pileup2fq generate fastq from `pileup -c' showALEN print alignment length (ALEN) following CIGAR \n/); } samtools-0.1.19/misc/soap2sam.pl000077500000000000000000000052101212162403000164500ustar00rootroot00000000000000#!/usr/bin/perl -w # Contact: lh3 # Version: 0.1.1 use strict; use warnings; use Getopt::Std; &soap2sam; exit; sub mating { my ($s1, $s2) = @_; my $isize = 0; if ($s1->[2] ne '*' && $s1->[2] eq $s2->[2]) { # then calculate $isize my $x1 = ($s1->[1] & 0x10)? $s1->[3] + length($s1->[9]) : $s1->[3]; my $x2 = ($s2->[1] & 0x10)? $s2->[3] + length($s2->[9]) : $s2->[3]; $isize = $x2 - $x1; } # update mate coordinate if ($s2->[2] ne '*') { @$s1[6..8] = (($s2->[2] eq $s1->[2])? "=" : $s2->[2], $s2->[3], $isize); $s1->[1] |= 0x20 if ($s2->[1] & 0x10); } else { $s1->[1] |= 0x8; } if ($s1->[2] ne '*') { @$s2[6..8] = (($s1->[2] eq $s2->[2])? "=" : $s1->[2], $s1->[3], -$isize); $s2->[1] |= 0x20 if ($s1->[1] & 0x10); } else { $s2->[1] |= 0x8; } } sub soap2sam { my %opts = (); getopts("p", \%opts); die("Usage: soap2sam.pl [-p] \n") if (@ARGV == 0 && -t STDIN); my $is_paired = defined($opts{p}); # core loop my @s1 = (); my @s2 = (); my ($s_last, $s_curr) = (\@s1, \@s2); while (<>) { s/[\177-\377]|[\000-\010]|[\012-\040]//g; next if (&soap2sam_aux($_, $s_curr, $is_paired) < 0); if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) { &mating($s_last, $s_curr); print join("\t", @$s_last), "\n"; print join("\t", @$s_curr), "\n"; @$s_last = (); @$s_curr = (); } else { print join("\t", @$s_last), "\n" if (@$s_last != 0); my $s = $s_last; $s_last = $s_curr; $s_curr = $s; } } print join("\t", @$s_last), "\n" if (@$s_last != 0); } sub soap2sam_aux { my ($line, $s, $is_paired) = @_; chomp($line); my @t = split(/\s+/, $line); return -1 if (@t < 9 || $line =~ /^\s/ || !$t[0]); @$s = (); # fix SOAP-2.1.x bugs @t = @t[0..2,4..$#t] unless ($t[3] =~ /^\d+$/); # read name $s->[0] = $t[0]; $s->[0] =~ s/\/[12]$//g; # initial flag (will be updated later) $s->[1] = 0; $s->[1] |= 1 | 1<<($t[4] eq 'a'? 6 : 7); $s->[1] |= 2 if ($is_paired); # read & quality $s->[9] = $t[1]; $s->[10] = (length($t[2]) > length($t[1]))? substr($t[2], 0, length($t[1])) : $t[2]; # cigar $s->[5] = length($s->[9]) . "M"; # coor $s->[2] = $t[7]; $s->[3] = $t[8]; $s->[1] |= 0x10 if ($t[6] eq '-'); # mapQ $s->[4] = $t[3] == 1? 30 : 0; # mate coordinate $s->[6] = '*'; $s->[7] = $s->[8] = 0; # aux push(@$s, "NM:i:$t[9]"); my $md = ''; if ($t[9]) { my @x; for (10 .. $#t) { push(@x, sprintf("%.3d,$1", $2)) if ($t[$_] =~ /^([ACGT])->(\d+)/i); } @x = sort(@x); my $a = 0; for (@x) { my ($y, $z) = split(","); $md .= (int($y)-$a) . $z; $a += $y - $a + 1; } $md .= length($t[1]) - $a; } else { $md = length($t[1]); } push(@$s, "MD:Z:$md"); return 0; } samtools-0.1.19/misc/varfilter.py000077500000000000000000000132271212162403000167450ustar00rootroot00000000000000#!/software/bin/python # Author: lh3, converted to python and modified to add -C option by Aylwyn Scally # # About: # varfilter.py is a port of Heng's samtools.pl varFilter script into # python, with an additional -C INT option. This option sets a minimum # consensus score, above which the script will output a pileup line # wherever it _could have_ called a variant, even if none is actually # called (i.e. hom-ref positions). This is important if you want to # subsequently merge the calls with those for another individual to get a # synoptic view of calls at each site. Without this option, and in all # other respects, it behaves like samtools.pl varFilter. # # Aylwyn Scally as6@sanger.ac.uk # Filtration code: # # C low CNS quality (hom-ref only) # d low depth # D high depth # W too many SNPs in a window (SNP only) # G close to a high-quality indel (SNP only) # Q low RMS mapping quality (SNP only) # g close to another indel with higher quality (indel only) # s low SNP quality (SNP only) # i low indel quality (indel only) import sys import getopt def usage(): print '''usage: varfilter.py [options] [cns-pileup] Options: -Q INT minimum RMS mapping quality for SNPs -q INT minimum RMS mapping quality for gaps -d INT minimum read depth -D INT maximum read depth -S INT minimum SNP quality -i INT minimum indel quality -C INT minimum consensus quality for hom-ref sites -G INT min indel score for nearby SNP filtering -w INT SNP within INT bp around a gap to be filtered -W INT window size for filtering dense SNPs -N INT max number of SNPs in a window -l INT window size for filtering adjacent gaps -p print filtered variants''' def varFilter_aux(first, is_print): try: if first[1] == 0: sys.stdout.write("\t".join(first[4:]) + "\n") elif is_print: sys.stderr.write("\t".join(["UQdDWGgsiCX"[first[1]]] + first[4:]) + "\n") except IOError: sys.exit() mindepth = 3 maxdepth = 100 gapgapwin = 30 minsnpmapq = 25 mingapmapq = 10 minindelscore = 25 scorefactor = 100 snpgapwin = 10 densesnpwin = 10 densesnps = 2 printfilt = False minsnpq = 0 minindelq = 0 mincnsq = 0 try: options, args = getopt.gnu_getopt(sys.argv[1:], 'pq:d:D:l:Q:w:W:N:G:S:i:C:', []) except getopt.GetoptError: usage() sys.exit(2) for (oflag, oarg) in options: if oflag == '-d': mindepth = int(oarg) if oflag == '-D': maxdepth = int(oarg) if oflag == '-l': gapgapwin = int(oarg) if oflag == '-Q': minsnpmapq = int(oarg) if oflag == '-q': mingapmapq = int(oarg) if oflag == '-G': minindelscore = int(oarg) if oflag == '-s': scorefactor = int(oarg) if oflag == '-w': snpgapwin = int(oarg) if oflag == '-W': densesnpwin = int(oarg) if oflag == '-C': mincnsq = int(oarg) if oflag == '-N': densesnps = int(oarg) if oflag == '-p': printfilt = True if oflag == '-S': minsnpq = int(oarg) if oflag == '-i': minindelq = int(oarg) if len(args) < 1: inp = sys.stdin else: inp = open(args[0]) # calculate the window size max_dist = max(gapgapwin, snpgapwin, densesnpwin) staging = [] for t in (line.strip().split() for line in inp): (flt, score) = (0, -1) # non-var sites if t[3] == '*/*': continue is_snp = t[2].upper() != t[3].upper() if not (is_snp or mincnsq): continue # clear the out-of-range elements while staging: # Still on the same chromosome and the first element's window still affects this position? if staging[0][4] == t[0] and int(staging[0][5]) + staging[0][2] + max_dist >= int(t[1]): break varFilter_aux(staging.pop(0), printfilt) # first a simple filter if int(t[7]) < mindepth: flt = 2 elif int(t[7]) > maxdepth: flt = 3 if t[2] == '*': # an indel if minindelq and minindelq > int(t[5]): flt = 8 elif is_snp: if minsnpq and minsnpq> int(t[5]): flt = 7 else: if mincnsq and mincnsq > int(t[4]): flt = 9 # site dependent filters dlen = 0 if flt == 0: if t[2] == '*': # an indel # If deletion, remember the length of the deletion (a,b) = t[3].split('/') alen = len(a) - 1 blen = len(b) - 1 if alen>blen: if a[0] == '-': dlen=alen elif b[0] == '-': dlen=blen if int(t[6]) < mingapmapq: flt = 1 # filtering SNPs if int(t[5]) >= minindelscore: for x in (y for y in staging if y[3]): # Is it a SNP and is it outside the SNP filter window? if x[0] >= 0 or int(x[5]) + x[2] + snpgapwin < int(t[1]): continue if x[1] == 0: x[1] = 5 # calculate the filtering score (different from indel quality) score = int(t[5]) if t[8] != '*': score += scorefactor * int(t[10]) if t[9] != '*': score += scorefactor * int(t[11]) # check the staging list for indel filtering for x in (y for y in staging if y[3]): # Is it a SNP and is it outside the gap filter window if x[0] < 0 or int(x[5]) + x[2] + gapgapwin < int(t[1]): continue if x[0] < score: x[1] = 6 else: flt = 6 break else: # a SNP or hom-ref if int(t[6]) < minsnpmapq: flt = 1 # check adjacent SNPs k = 1 for x in (y for y in staging if y[3]): if x[0] < 0 and int(x[5]) + x[2] + densesnpwin >= int(t[1]) and (x[1] == 0 or x[1] == 4 or x[1] == 5): k += 1 # filtering is necessary if k > densesnps: flt = 4 for x in (y for y in staging if y[3]): if x[0] < 0 and int(x[5]) + x[2] + densesnpwin >= int(t[1]) and x[1] == 0: x[1] = 4 else: # then check gap filter for x in (y for y in staging if y[3]): if x[0] < 0 or int(x[5]) + x[2] + snpgapwin < int(t[1]): continue if x[0] >= minindelscore: flt = 5 break staging.append([score, flt, dlen, is_snp] + t) # output the last few elements in the staging list while staging: varFilter_aux(staging.pop(0), printfilt) samtools-0.1.19/misc/vcfutils.lua000077500000000000000000000510441212162403000167360ustar00rootroot00000000000000#!/usr/bin/env luajit ----------------------------------- -- BEGIN: routines from klib.lua -- ----------------------------------- -- Description: getopt() translated from the BSD getopt(); compatible with the default Unix getopt() --[[ Example: for o, a in os.getopt(arg, 'a:b') do print(o, a) end ]]-- function os.getopt(args, ostr) local arg, place = nil, 0; return function () if place == 0 then -- update scanning pointer place = 1 if #args == 0 or args[1]:sub(1, 1) ~= '-' then place = 0; return nil end if #args[1] >= 2 then place = place + 1 if args[1]:sub(2, 2) == '-' then -- found "--" table.remove(args, 1); place = 0 return nil; end end end local optopt = place <= #args[1] and args[1]:sub(place, place) or nil place = place + 1; local oli = optopt and ostr:find(optopt) or nil if optopt == ':' or oli == nil then -- unknown option if optopt == '-' then return nil end if place > #args[1] then table.remove(args, 1); place = 0; end return '?'; end oli = oli + 1; if ostr:sub(oli, oli) ~= ':' then -- do not need argument arg = nil; if place > #args[1] then table.remove(args, 1); place = 0; end else -- need an argument if place <= #args[1] then -- no white space arg = args[1]:sub(place); else table.remove(args, 1); if #args == 0 then -- an option requiring argument is the last one place = 0; if ostr:sub(1, 1) == ':' then return ':' end return '?'; else arg = args[1] end end table.remove(args, 1); place = 0; end return optopt, arg; end end -- Description: string split function string:split(sep, n) local a, start = {}, 1; sep = sep or "%s+"; repeat local b, e = self:find(sep, start); if b == nil then table.insert(a, self:sub(start)); break end a[#a+1] = self:sub(start, b - 1); start = e + 1; if n and #a == n then table.insert(a, self:sub(start)); break end until start > #self; return a; end -- Description: smart file open function io.xopen(fn, mode) mode = mode or 'r'; if fn == nil then return io.stdin; elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout; elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w'); elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w'); else return io.open(fn, mode) end end -- Description: log gamma function -- Required by: math.lbinom() -- Reference: AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 function math.lgamma(z) local x; x = 0.1659470187408462e-06 / (z+7); x = x + 0.9934937113930748e-05 / (z+6); x = x - 0.1385710331296526 / (z+5); x = x + 12.50734324009056 / (z+4); x = x - 176.6150291498386 / (z+3); x = x + 771.3234287757674 / (z+2); x = x - 1259.139216722289 / (z+1); x = x + 676.5203681218835 / z; x = x + 0.9999999999995183; return math.log(x) - 5.58106146679532777 - z + (z-0.5) * math.log(z+6.5); end -- Description: regularized incomplete gamma function -- Dependent on: math.lgamma() --[[ Formulas are taken from Wiki, with additional input from Numerical Recipes in C (for modified Lentz's algorithm) and AS245 (http://lib.stat.cmu.edu/apstat/245). A good online calculator is available at: http://www.danielsoper.com/statcalc/calc23.aspx It calculates upper incomplete gamma function, which equals math.igamma(s,z,true)*math.exp(math.lgamma(s)) ]]-- function math.igamma(s, z, complement) local function _kf_gammap(s, z) local sum, x = 1, 1; for k = 1, 100 do x = x * z / (s + k); sum = sum + x; if x / sum < 1e-14 then break end end return math.exp(s * math.log(z) - z - math.lgamma(s + 1.) + math.log(sum)); end local function _kf_gammaq(s, z) local C, D, f, TINY; f = 1. + z - s; C = f; D = 0.; TINY = 1e-290; -- Modified Lentz's algorithm for computing continued fraction. See Numerical Recipes in C, 2nd edition, section 5.2 for j = 1, 100 do local d; local a, b = j * (s - j), j*2 + 1 + z - s; D = b + a * D; if D < TINY then D = TINY end C = b + a / C; if C < TINY then C = TINY end D = 1. / D; d = C * D; f = f * d; if math.abs(d - 1) < 1e-14 then break end end return math.exp(s * math.log(z) - z - math.lgamma(s) - math.log(f)); end if complement then return ((z <= 1 or z < s) and 1 - _kf_gammap(s, z)) or _kf_gammaq(s, z); else return ((z <= 1 or z < s) and _kf_gammap(s, z)) or (1 - _kf_gammaq(s, z)); end end function math.brent(func, a, b, tol) local gold1, gold2, tiny, max_iter = 1.6180339887, 0.3819660113, 1e-20, 100 local fa, fb = func(a, data), func(b, data) if fb > fa then -- swap, such that f(a) > f(b) a, b, fa, fb = b, a, fb, fa end local c = b + gold1 * (b - a) local fc = func(c) -- golden section extrapolation while fb > fc do local bound = b + 100.0 * (c - b) -- the farthest point where we want to go local r = (b - a) * (fb - fc) local q = (b - c) * (fb - fa) if math.abs(q - r) < tiny then -- avoid 0 denominator tmp = q > r and tiny or 0.0 - tiny else tmp = q - r end u = b - ((b - c) * q - (b - a) * r) / (2.0 * tmp) -- u is the parabolic extrapolation point if (b > u and u > c) or (b < u and u < c) then -- u lies between b and c fu = func(u) if fu < fc then -- (b,u,c) bracket the minimum a, b, fa, fb = b, u, fb, fu break elseif fu > fb then -- (a,b,u) bracket the minimum c, fc = u, fu break end u = c + gold1 * (c - b) fu = func(u) -- golden section extrapolation elseif (c > u and u > bound) or (c < u and u < bound) then -- u lies between c and bound fu = func(u) if fu < fc then -- fb > fc > fu b, c, u = c, u, c + gold1 * (c - b) fb, fc, fu = fc, fu, func(u) else -- (b,c,u) bracket the minimum a, b, c = b, c, u fa, fb, fc = fb, fc, fu break end elseif (u > bound and bound > c) or (u < bound and bound < c) then -- u goes beyond the bound u = bound fu = func(u) else -- u goes the other way around, use golden section extrapolation u = c + gold1 * (c - b) fu = func(u) end a, b, c = b, c, u fa, fb, fc = fb, fc, fu end if a > c then a, c = c, a end -- swap -- now, afb and fb tol1 then -- related to parabolic interpolation local r = (b - w) * (fb - fv) local q = (b - v) * (fb - fw) local p = (b - v) * q - (b - w) * r q = 2.0 * (q - r) if q > 0.0 then p = 0.0 - p else q = 0.0 - q end eold, e = e, d if math.abs(p) >= math.abs(0.5 * q * eold) or p <= q * (a - b) or p >= q * (c - b) then e = b >= mid and a - b or c - b d = gold2 * e else d, u = p / q, b + d -- actual parabolic interpolation happens here if u - a < tol2 or c - u < tol2 then d = mid > b and tol1 or 0.0 - tol1 end end else -- golden section interpolation e = b >= min and a - b or c - b d = gold2 * e end u = fabs(d) >= tol1 and b + d or b + (d > 0.0 and tol1 or -tol1); fu = func(u) if fu <= fb then -- u is the minimum point so far if u >= b then a = b else c = b end v, w, b = w, b, u fv, fw, fb = fw, fb, fu else -- adjust (a,c) and (u,v,w) if u < b then a = u else c = u end if fu <= fw or w == b then v, w = w, u fv, fw = fw, fu elseif fu <= fv or v == b or v == w then v, fv = u, fu; end end end return fb, b end matrix = {} -- Description: chi^2 test for contingency tables -- Dependent on: math.igamma() function matrix.chi2(a) if #a == 2 and #a[1] == 2 then -- 2x2 table local x, z x = (a[1][1] + a[1][2]) * (a[2][1] + a[2][2]) * (a[1][1] + a[2][1]) * (a[1][2] + a[2][2]) if x == 0 then return 0, 1, false end z = a[1][1] * a[2][2] - a[1][2] * a[2][1] z = (a[1][1] + a[1][2] + a[2][1] + a[2][2]) * z * z / x return z, math.igamma(.5, .5 * z, true), true else -- generic table local rs, cs, n, m, N, z = {}, {}, #a, #a[1], 0, 0 for i = 1, n do rs[i] = 0 end for j = 1, m do cs[j] = 0 end for i = 1, n do -- compute column sum and row sum for j = 1, m do cs[j], rs[i] = cs[j] + a[i][j], rs[i] + a[i][j] end end for i = 1, n do N = N + rs[i] end for i = 1, n do -- compute the chi^2 statistics for j = 1, m do local E = rs[i] * cs[j] / N; z = z + (a[i][j] - E) * (a[i][j] - E) / E end end return z, math.igamma(.5 * (n-1) * (m-1), .5 * z, true), true; end end --------------------------------- -- END: routines from klib.lua -- --------------------------------- -------------------------- -- BEGIN: misc routines -- -------------------------- -- precompute an array for PL->probability conversion -- @param m maximum PL function algo_init_q2p(m) local q2p = {} for i = 0, m do q2p[i] = math.pow(10, -i / 10) end return q2p end -- given the haplotype frequency, compute r^2 -- @param f 4 haplotype frequencies; f[] is 0-indexed. -- @return r^2 function algo_r2(f) local p = { f[0] + f[1], f[0] + f[2] } local D = f[0] * f[3] - f[1] * f[2] return (p[1] == 0 or p[2] == 0 or 1-p[1] == 0 or 1-p[2] == 0) and 0 or D * D / (p[1] * p[2] * (1 - p[1]) * (1 - p[2])) end -- parse a VCF line to get PL -- @param q2p is computed by algo_init_q2p() function text_parse_pl(t, q2p, parse_GT) parse_GT = parse_GT == nil and true or false local ht, gt, pl = {}, {}, {} local s, j0 = t[9]:split(':'), 0 for j = 1, #s do if s[j] == 'PL' then j0 = j break end end local has_GT = (s[1] == 'GT' and parse_GT) and true or false for i = 10, #t do if j0 > 0 then local s = t[i]:split(':') local a, b = 1, s[j0]:find(',') pl[#pl+1] = q2p[tonumber(s[j0]:sub(a, b - 1))] a, b = b + 1, s[j0]:find(',', b + 1) pl[#pl+1] = q2p[tonumber(s[j0]:sub(a, b - 1))] a, b = b + 1, s[j0]:find(',', b + 1) pl[#pl+1] = q2p[tonumber(s[j0]:sub(a, (b and b - 1) or nil))] end if has_GT then if t[i]:sub(1, 1) ~= '.' then local g = tonumber(t[i]:sub(1, 1)) + tonumber(t[i]:sub(3, 3)); gt[#gt+1] = 1e-6; gt[#gt+1] = 1e-6; gt[#gt+1] = 1e-6 gt[#gt - 2 + g] = 1 ht[#ht+1] = tonumber(t[i]:sub(1, 1)); ht[#ht+1] = tonumber(t[i]:sub(3, 3)); else gt[#gt+1] = 1; gt[#gt+1] = 1; gt[#gt+1] = 1 ht[#ht+1] = -1; ht[#ht+1] = -1; end end -- print(t[i], pl[#pl-2], pl[#pl-1], pl[#pl], gt[#gt-2], gt[#gt-1], gt[#gt]) end if #pl == 0 then pl = nil end local x = has_GT and { t[1], t[2], ht, gt, pl } or { t[1], t[2], nil, nil, pl } return x end -- Infer haplotype frequency -- @param pdg genotype likelihoods P(D|g) generated by text_parse_pl(). pdg[] is 1-indexed. -- @param eps precision [1e-5] -- @return 2-locus haplotype frequencies, 0-indexed array function algo_hapfreq2(pdg, eps) eps = eps or 1e-5 local n, f = #pdg[1] / 3, {[0]=0.25, 0.25, 0.25, 0.25} for iter = 1, 100 do local F = {[0]=0, 0, 0, 0} for i = 0, n - 1 do local p1, p2 = {[0]=pdg[1][i*3+1], pdg[1][i*3+2], pdg[1][i*3+3]}, {[0]=pdg[2][i*3+1], pdg[2][i*3+2], pdg[2][i*3+3]} local u = { [0]= f[0] * (f[0] * p1[0] * p2[0] + f[1] * p1[0] * p2[1] + f[2] * p1[1] * p2[0] + f[3] * p1[1] * p2[1]), f[1] * (f[0] * p1[0] * p2[1] + f[1] * p1[0] * p2[2] + f[2] * p1[1] * p2[1] + f[3] * p1[1] * p2[2]), f[2] * (f[0] * p1[1] * p2[0] + f[1] * p1[1] * p2[1] + f[2] * p1[2] * p2[0] + f[3] * p1[2] * p2[1]), f[3] * (f[0] * p1[1] * p2[1] + f[1] * p1[1] * p2[2] + f[2] * p1[2] * p2[1] + f[3] * p1[2] * p2[2]) } local s = u[0] + u[1] + u[2] + u[3] s = 1 / (s * n) F[0] = F[0] + u[0] * s F[1] = F[1] + u[1] * s F[2] = F[2] + u[2] * s F[3] = F[3] + u[3] * s end local e = 0 for k = 0, 3 do e = math.abs(f[k] - F[k]) > e and math.abs(f[k] - F[k]) or e end for k = 0, 3 do f[k] = F[k] end if e < eps then break end -- print(f[0], f[1], f[2], f[3]) end return f end ------------------------ -- END: misc routines -- ------------------------ --------------------- -- BEGIN: commands -- --------------------- -- CMD vcf2bgl: convert PL tagged VCF to Beagle input -- function cmd_vcf2bgl() if #arg == 0 then print("\nUsage: vcf2bgl.lua ") print("\nNB: This command finds PL by matching /(\\d+),(\\d+),(\\d+)/.\n"); os.exit(1) end local lookup = {} for i = 0, 10000 do lookup[i] = string.format("%.4f", math.pow(10, -i/10)) end local fp = io.xopen(arg[1]) for l in fp:lines() do if l:sub(1, 2) == '##' then -- meta lines; do nothing elseif l:sub(1, 1) == '#' then -- sample lines local t, s = l:split('\t'), {} for i = 10, #t do s[#s+1] = t[i]; s[#s+1] = t[i]; s[#s+1] = t[i] end print('marker', 'alleleA', 'alleleB', table.concat(s, '\t')) else -- data line local t = l:split('\t'); if t[5] ~= '.' and t[5]:find(",") == nil and #t[5] == 1 and #t[4] == 1 then -- biallic SNP local x, z = -1, {}; if t[9]:find('PL') then for i = 10, #t do local AA, Aa, aa = t[i]:match('(%d+),(%d+),(%d+)') AA = tonumber(AA); Aa = tonumber(Aa); aa = tonumber(aa); if AA ~= nil then z[#z+1] = lookup[AA]; z[#z+1] = lookup[Aa]; z[#z+1] = lookup[aa]; else z[#z+1] = 1; z[#z+1] = 1; z[#z+1] = 1; end end print(t[1]..':'..t[2], t[4], t[5], table.concat(z, '\t')) elseif t[9]:find('GL') then print('Error: not implemented') os.exit(1) end end end end fp:close() end -- CMD bgl2vcf: convert Beagle output to VCF function cmd_bgl2vcf() if #arg < 2 then print('Usage: bgl2vcf.lua ') os.exit(1) end local fpp = io.xopen(arg[1]); local fpg = io.xopen(arg[2]); for lg in fpg:lines() do local tp, tg, a = fpp:read():split('%s'), lg:split('%s', 4), {} if tp[1] == 'I' then for i = 3, #tp, 2 do a[#a+1] = tp[i] end print('#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', table.concat(a, '\t')) else local chr, pos = tg[1]:match('(%S+):(%d+)$') a = {chr, pos, '.', tg[2], tg[3], 30, '.', '.', 'GT'} for i = 3, #tp, 2 do a[#a+1] = ((tp[i] == tg[2] and 0) or 1) .. '|' .. ((tp[i+1] == tg[2] and 0) or 1) end print(table.concat(a, '\t')) end end fpg:close(); fpp:close(); end -- CMD freq: count alleles in each population function cmd_freq() -- parse the command line local site_only = true; -- print site allele frequency or not for c in os.getopt(arg, 's') do if c == 's' then site_only = false end end if #arg == 0 then print("\nUsage: vcfutils.lua freq [-s] [samples.txt]\n") print("NB: 1) This command only considers biallelic variants.") print(" 2) Apply '-s' to get the allele frequency spectrum.") print(" 3) 'samples.txt' is TAB-delimited with each line consisting of sample and population.") print("") os.exit(1) end -- read the sample-population pairs local pop, sample = {}, {} if #arg > 1 then local fp = io.xopen(arg[2]); for l in fp:lines() do local s, p = l:match("^(%S+)%s+(%S+)"); -- sample, population pair sample[s] = p; -- FIXME: check duplications if pop[p] then table.insert(pop[p], s) else pop[p] = {s} end end fp:close(); end pop['NA'] = {} -- parse VCF fp = (#arg >= 2 and io.xopen(arg[1])) or io.stdin; local col, cnt = {}, {}; for k in pairs(pop) do col[k], cnt[k] = {}, {[0]=0}; end for l in fp:lines() do if l:sub(1, 2) == '##' then -- meta lines; do nothing elseif l:sub(1, 1) == '#' then -- the sample line local t, del_NA = l:split('\t'), true; for i = 10, #t do local k = sample[t[i]] if k == nil then k, del_NA = 'NA', false table.insert(pop[k], t[i]) end table.insert(col[k], i); table.insert(cnt[k], 0); table.insert(cnt[k], 0); end if del_NA then pop['NA'], col['NA'], cnt['NA'] = nil, nil, nil end else -- data lines local t = l:split('\t'); if t[5] ~= '.' and t[5]:find(",") == nil then -- biallic if site_only == true then io.write(t[1], '\t', t[2], '\t', t[4], '\t', t[5]) end for k, v in pairs(col) do local ac, an = 0, 0; for i = 1, #v do local a1, a2 = t[v[i]]:match("^(%d).(%d)"); if a1 ~= nil then ac, an = ac + a1 + a2, an + 2 end end if site_only == true then io.write('\t', k, ':', an, ':', ac) end if an == #cnt[k] then cnt[k][ac] = cnt[k][ac] + 1 end end if site_only == true then io.write('\n') end end end end fp:close(); -- print if site_only == false then for k, v in pairs(cnt) do io.write(k .. "\t" .. #v); for i = 0, #v do io.write("\t" .. v[i]) end io.write('\n'); end end end function cmd_vcf2chi2() if #arg < 3 then print("Usage: vcfutils.lua vcf2chi2 "); os.exit(1) end local g = {}; -- read the list of groups local fp = io.xopen(arg[2]); for l in fp:lines() do local x = l:match("^(%S+)"); g[x] = 1 end -- FIXME: check duplicate fp:close() fp = io.xopen(arg[3]); for l in fp:lines() do local x = l:match("^(%S+)"); g[x] = 2 end fp:close() -- process VCF fp = io.xopen(arg[1]) local h = {{}, {}} for l in fp:lines() do if l:sub(1, 2) == '##' then print(l) -- meta lines; do nothing elseif l:sub(1, 1) == '#' then -- sample lines local t = l:split('\t'); for i = 10, #t do if g[t[i]] == 1 then table.insert(h[1], i) elseif g[t[i]] == 2 then table.insert(h[2], i) end end while #t > 8 do table.remove(t) end print(table.concat(t, "\t")) else -- data line local t = l:split('\t'); if t[5] ~= '.' and t[5]:find(",") == nil then -- biallic local a = {{0, 0}, {0, 0}} for i = 1, 2 do for _, k in pairs(h[i]) do if t[k]:find("^0.0") then a[i][1] = a[i][1] + 2 elseif t[k]:find("^1.1") then a[i][2] = a[i][2] + 2 elseif t[k]:find("^0.1") or t[k]:find("^1.0") then a[i][1], a[i][2] = a[i][1] + 1, a[i][2] + 1 end end end local chi2, p, succ = matrix.chi2(a); while #t > 8 do table.remove(t) end --print(a[1][1], a[1][2], a[2][1], a[2][2], chi2, p); if succ then print(table.concat(t, "\t") .. ";PCHI2=" .. string.format("%.3g", p) .. string.format(';AF1=%.4g;AF2=%.4g,%.4g', (a[1][2]+a[2][2]) / (a[1][1]+a[1][2]+a[2][1]+a[2][2]), a[1][2]/(a[1][1]+a[1][2]), a[2][2]/(a[2][1]+a[2][2]))) else print(table.concat(t, "\t")) end end end end fp:close() end -- CMD: compute r^2 function cmd_r2() local w, is_ht, is_gt = 1, false, false for o, a in os.getopt(arg, 'w:hg') do if o == 'w' then w = tonumber(a) elseif o == 'h' then is_ht, is_gt = true, true elseif o == 'g' then is_gt = true end end if #arg == 0 then print("Usage: vcfutils.lua r2 [-hg] [-w 1] ") os.exit(1) end local stack, fp, q2p = {}, io.xopen(arg[1]), algo_init_q2p(1023) for l in fp:lines() do if l:sub(1, 1) ~= '#' then local t = l:split('\t') local x = text_parse_pl(t, q2p) if #t[5] == 1 and t[5] ~= '.' then -- biallelic local r2 = {} for k = 1, w do if is_gt == false then -- use PL if stack[k] then local pdg = { stack[k][5], x[5] } r2[#r2+1] = algo_r2(algo_hapfreq2(pdg)) else r2[#r2+1] = 0 end elseif is_ht == false then -- use unphased GT if stack[k] then local pdg = { stack[k][4], x[4] } r2[#r2+1] = algo_r2(algo_hapfreq2(pdg)) else r2[#r2+1] = 0 end else -- use phased GT if stack[k] then local f, ht = { [0]=0, 0, 0, 0 }, { stack[k][3], x[3] } for i = 1, #ht[1] do local j = ht[1][i] * 2 + ht[2][i] f[j] = f[j] + 1 end local sum = f[0] + f[1] + f[2] + f[3] for k = 0, 3 do f[k] = f[k] / sum end r2[#r2+1] = algo_r2(f) else r2[#r2+1] = 0 end end end for k = 1, #r2 do r2[k] = string.format('%.3f', r2[k]) end print(x[1], x[2], table.concat(r2, '\t')) if #stack == w then table.remove(stack, 1) end stack[#stack+1] = x end end end fp:close() end ------------------- -- END: commands -- ------------------- ------------------- -- MAIN FUNCTION -- ------------------- if #arg == 0 then print("\nUsage: vcfutils.lua \n") print("Command: freq count biallelic alleles in each population") print(" r2 compute r^2") print(" vcf2chi2 compute 1-degree chi-square between two groups of samples") print(" vcf2bgl convert PL annotated VCF to Beagle input") print(" bgl2vcf convert Beagle input to VCF") print("") os.exit(1) end local cmd = arg[1] table.remove(arg, 1) if cmd == 'vcf2bgl' then cmd_vcf2bgl() elseif cmd == 'bgl2vcf' then cmd_bgl2vcf() elseif cmd == 'freq' then cmd_freq() elseif cmd == 'r2' then cmd_r2() elseif cmd == 'vcf2chi2' then cmd_vcf2chi2() else print('ERROR: unknown command "' .. cmd .. '"') os.exit(1) end samtools-0.1.19/misc/wgsim.c000066400000000000000000000340731212162403000156660ustar00rootroot00000000000000/* The MIT License Copyright (c) 2008 Genome Research Ltd (GRL). 2011 Heng Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* This program is separated from maq's read simulator with Colin * Hercus' modification to allow longer indels. */ #include #include #include #include #include #include #include #include #include #include #include "kseq.h" KSEQ_INIT(gzFile, gzread) #define PACKAGE_VERSION "0.3.0" const uint8_t nst_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; /* Simple normal random number generator, copied from genran.c */ double ran_normal() { static int iset = 0; static double gset; double fac, rsq, v1, v2; if (iset == 0) { do { v1 = 2.0 * drand48() - 1.0; v2 = 2.0 * drand48() - 1.0; rsq = v1 * v1 + v2 * v2; } while (rsq >= 1.0 || rsq == 0.0); fac = sqrt(-2.0 * log(rsq) / rsq); gset = v1 * fac; iset = 1; return v2 * fac; } else { iset = 0; return gset; } } /* wgsim */ enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000}; typedef unsigned short mut_t; static mut_t mutmsk = (mut_t)0xf000; typedef struct { int l, m; /* length and maximum buffer size */ mut_t *s; /* sequence */ } mutseq_t; static double ERR_RATE = 0.02; static double MUT_RATE = 0.001; static double INDEL_FRAC = 0.15; static double INDEL_EXTEND = 0.3; static double MAX_N_RATIO = 0.1; void wgsim_mut_diref(const kseq_t *ks, int is_hap, mutseq_t *hap1, mutseq_t *hap2) { int i, deleting = 0; mutseq_t *ret[2]; ret[0] = hap1; ret[1] = hap2; ret[0]->l = ks->seq.l; ret[1]->l = ks->seq.l; ret[0]->m = ks->seq.m; ret[1]->m = ks->seq.m; ret[0]->s = (mut_t *)calloc(ks->seq.m, sizeof(mut_t)); ret[1]->s = (mut_t *)calloc(ks->seq.m, sizeof(mut_t)); for (i = 0; i != ks->seq.l; ++i) { int c; c = ret[0]->s[i] = ret[1]->s[i] = (mut_t)nst_nt4_table[(int)ks->seq.s[i]]; if (deleting) { if (drand48() < INDEL_EXTEND) { if (deleting & 1) ret[0]->s[i] |= DELETE; if (deleting & 2) ret[1]->s[i] |= DELETE; continue; } else deleting = 0; } if (c < 4 && drand48() < MUT_RATE) { // mutation if (drand48() >= INDEL_FRAC) { // substitution double r = drand48(); c = (c + (int)(r * 3.0 + 1)) & 3; if (is_hap || drand48() < 0.333333) { // hom ret[0]->s[i] = ret[1]->s[i] = SUBSTITUTE|c; } else { // het ret[drand48()<0.5?0:1]->s[i] = SUBSTITUTE|c; } } else { // indel if (drand48() < 0.5) { // deletion if (is_hap || drand48() < 0.333333) { // hom-del ret[0]->s[i] = ret[1]->s[i] = DELETE; deleting = 3; } else { // het-del deleting = drand48()<0.5?1:2; ret[deleting-1]->s[i] = DELETE; } } else { // insertion int num_ins = 0, ins = 0; do { num_ins++; ins = (ins << 2) | (int)(drand48() * 4.0); } while (num_ins < 4 && drand48() < INDEL_EXTEND); if (is_hap || drand48() < 0.333333) { // hom-ins ret[0]->s[i] = ret[1]->s[i] = (num_ins << 12) | (ins << 4) | c; } else { // het-ins ret[drand48()<0.5?0:1]->s[i] = (num_ins << 12) | (ins << 4) | c; } } } } } } void wgsim_print_mutref(const char *name, const kseq_t *ks, mutseq_t *hap1, mutseq_t *hap2) { int i; for (i = 0; i != ks->seq.l; ++i) { int c[3]; c[0] = nst_nt4_table[(int)ks->seq.s[i]]; c[1] = hap1->s[i]; c[2] = hap2->s[i]; if (c[0] >= 4) continue; if ((c[1] & mutmsk) != NOCHANGE || (c[2] & mutmsk) != NOCHANGE) { printf("%s\t%d\t", name, i+1); if (c[1] == c[2]) { // hom if ((c[1]&mutmsk) == SUBSTITUTE) { // substitution printf("%c\t%c\t-\n", "ACGTN"[c[0]], "ACGTN"[c[1]&0xf]); } else if ((c[1]&mutmsk) == DELETE) { // del printf("%c\t-\t-\n", "ACGTN"[c[0]]); } else if (((c[1] & mutmsk) >> 12) <= 5) { // ins printf("-\t"); int n = (c[1]&mutmsk) >> 12, ins = c[1] >> 4; while (n > 0) { putchar("ACGTN"[ins & 0x3]); ins >>= 2; n--; } printf("\t-\n"); } else assert(0); } else { // het if ((c[1]&mutmsk) == SUBSTITUTE || (c[2]&mutmsk) == SUBSTITUTE) { // substitution printf("%c\t%c\t+\n", "ACGTN"[c[0]], "XACMGRSVTWYHKDBN"[1<<(c[1]&0x3)|1<<(c[2]&0x3)]); } else if ((c[1]&mutmsk) == DELETE) { printf("%c\t-\t+\n", "ACGTN"[c[0]]); } else if ((c[2]&mutmsk) == DELETE) { printf("%c\t-\t+\n", "ACGTN"[c[0]]); } else if (((c[1] & mutmsk) >> 12) <= 4) { // ins1 printf("-\t"); int n = (c[1]&mutmsk) >> 12, ins = c[1] >> 4; while (n > 0) { putchar("ACGTN"[ins & 0x3]); ins >>= 2; n--; } printf("\t+\n"); } else if (((c[2] & mutmsk) >> 12) <= 5) { // ins2 printf("-\t"); int n = (c[2]&mutmsk) >> 12, ins = c[2] >> 4; while (n > 0) { putchar("ACGTN"[ins & 0x3]); ins >>= 2; n--; } printf("\t+\n"); } else assert(0); } } } } void wgsim_core(FILE *fpout1, FILE *fpout2, const char *fn, int is_hap, uint64_t N, int dist, int std_dev, int size_l, int size_r) { kseq_t *ks; mutseq_t rseq[2]; gzFile fp_fa; uint64_t tot_len, ii; int i, l, n_ref; char *qstr; int size[2], Q, max_size; uint8_t *tmp_seq[2]; mut_t *target; l = size_l > size_r? size_l : size_r; qstr = (char*)calloc(l+1, 1); tmp_seq[0] = (uint8_t*)calloc(l+2, 1); tmp_seq[1] = (uint8_t*)calloc(l+2, 1); size[0] = size_l; size[1] = size_r; max_size = size_l > size_r? size_l : size_r; Q = (ERR_RATE == 0.0)? 'I' : (int)(-10.0 * log(ERR_RATE) / log(10.0) + 0.499) + 33; fp_fa = gzopen(fn, "r"); ks = kseq_init(fp_fa); tot_len = n_ref = 0; fprintf(stderr, "[%s] calculating the total length of the reference sequence...\n", __func__); while ((l = kseq_read(ks)) >= 0) { tot_len += l; ++n_ref; } fprintf(stderr, "[%s] %d sequences, total length: %llu\n", __func__, n_ref, (long long)tot_len); kseq_destroy(ks); gzclose(fp_fa); fp_fa = gzopen(fn, "r"); ks = kseq_init(fp_fa); while ((l = kseq_read(ks)) >= 0) { uint64_t n_pairs = (uint64_t)((long double)l / tot_len * N + 0.5); if (l < dist + 3 * std_dev) { fprintf(stderr, "[%s] skip sequence '%s' as it is shorter than %d!\n", __func__, ks->name.s, dist + 3 * std_dev); continue; } // generate mutations and print them out wgsim_mut_diref(ks, is_hap, rseq, rseq+1); wgsim_print_mutref(ks->name.s, ks, rseq, rseq+1); for (ii = 0; ii != n_pairs; ++ii) { // the core loop double ran; int d, pos, s[2], is_flip = 0; int n_sub[2], n_indel[2], n_err[2], ext_coor[2], j, k; FILE *fpo[2]; do { // avoid boundary failure ran = ran_normal(); ran = ran * std_dev + dist; d = (int)(ran + 0.5); d = d > max_size? d : max_size; pos = (int)((l - d + 1) * drand48()); } while (pos < 0 || pos >= ks->seq.l || pos + d - 1 >= ks->seq.l); // flip or not if (drand48() < 0.5) { fpo[0] = fpout1; fpo[1] = fpout2; s[0] = size[0]; s[1] = size[1]; } else { fpo[1] = fpout1; fpo[0] = fpout2; s[1] = size[0]; s[0] = size[1]; is_flip = 1; } // generate the read sequences target = rseq[drand48()<0.5?0:1].s; // haplotype from which the reads are generated n_sub[0] = n_sub[1] = n_indel[0] = n_indel[1] = n_err[0] = n_err[1] = 0; #define __gen_read(x, start, iter) do { \ for (i = (start), k = 0, ext_coor[x] = -10; i >= 0 && i < ks->seq.l && k < s[x]; iter) { \ int c = target[i], mut_type = c & mutmsk; \ if (ext_coor[x] < 0) { \ if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) continue; \ ext_coor[x] = i; \ } \ if (mut_type == DELETE) ++n_indel[x]; \ else if (mut_type == NOCHANGE || mut_type == SUBSTITUTE) { \ tmp_seq[x][k++] = c & 0xf; \ if (mut_type == SUBSTITUTE) ++n_sub[x]; \ } else { \ int n, ins; \ ++n_indel[x]; \ tmp_seq[x][k++] = c & 0xf; \ for (n = mut_type>>12, ins = c>>4; n > 0 && k < s[x]; --n, ins >>= 2) \ tmp_seq[x][k++] = ins & 0x3; \ } \ } \ if (k != s[x]) ext_coor[x] = -10; \ } while (0) __gen_read(0, pos, ++i); __gen_read(1, pos + d - 1, --i); for (k = 0; k < s[1]; ++k) tmp_seq[1][k] = tmp_seq[1][k] < 4? 3 - tmp_seq[1][k] : 4; // complement if (ext_coor[0] < 0 || ext_coor[1] < 0) { // fail to generate the read(s) --ii; continue; } // generate sequencing errors for (j = 0; j < 2; ++j) { int n_n = 0; for (i = 0; i < s[j]; ++i) { int c = tmp_seq[j][i]; if (c >= 4) { // actually c should be never larger than 4 if everything is correct c = 4; ++n_n; } else if (drand48() < ERR_RATE) { // c = (c + (int)(drand48() * 3.0 + 1)) & 3; // random sequencing errors c = (c + 1) & 3; // recurrent sequencing errors ++n_err[j]; } tmp_seq[j][i] = c; } if ((double)n_n / s[j] > MAX_N_RATIO) break; } if (j < 2) { // too many ambiguous bases on one of the reads --ii; continue; } // print for (j = 0; j < 2; ++j) { for (i = 0; i < s[j]; ++i) qstr[i] = Q; qstr[i] = 0; fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%d:%d:%d_%llx/%d\n", ks->name.s, ext_coor[0]+1, ext_coor[1]+1, n_err[0], n_sub[0], n_indel[0], n_err[1], n_sub[1], n_indel[1], (long long)ii, j==0? is_flip+1 : 2-is_flip); for (i = 0; i < s[j]; ++i) fputc("ACGTN"[(int)tmp_seq[j][i]], fpo[j]); fprintf(fpo[j], "\n+\n%s\n", qstr); } } free(rseq[0].s); free(rseq[1].s); } kseq_destroy(ks); gzclose(fp_fa); free(qstr); free(tmp_seq[0]); free(tmp_seq[1]); } static int simu_usage() { fprintf(stderr, "\n"); fprintf(stderr, "Program: wgsim (short read simulator)\n"); fprintf(stderr, "Version: %s\n", PACKAGE_VERSION); fprintf(stderr, "Contact: Heng Li \n\n"); fprintf(stderr, "Usage: wgsim [options] \n\n"); fprintf(stderr, "Options: -e FLOAT base error rate [%.3f]\n", ERR_RATE); fprintf(stderr, " -d INT outer distance between the two ends [500]\n"); fprintf(stderr, " -s INT standard deviation [50]\n"); fprintf(stderr, " -N INT number of read pairs [1000000]\n"); fprintf(stderr, " -1 INT length of the first read [70]\n"); fprintf(stderr, " -2 INT length of the second read [70]\n"); fprintf(stderr, " -r FLOAT rate of mutations [%.4f]\n", MUT_RATE); fprintf(stderr, " -R FLOAT fraction of indels [%.2f]\n", INDEL_FRAC); fprintf(stderr, " -X FLOAT probability an indel is extended [%.2f]\n", INDEL_EXTEND); fprintf(stderr, " -S INT seed for random generator [-1]\n"); fprintf(stderr, " -h haplotype mode\n"); fprintf(stderr, "\n"); return 1; } int main(int argc, char *argv[]) { int64_t N; int dist, std_dev, c, size_l, size_r, is_hap = 0; FILE *fpout1, *fpout2; int seed = -1; N = 1000000; dist = 500; std_dev = 50; size_l = size_r = 70; while ((c = getopt(argc, argv, "e:d:s:N:1:2:r:R:hX:S:")) >= 0) { switch (c) { case 'd': dist = atoi(optarg); break; case 's': std_dev = atoi(optarg); break; case 'N': N = atoi(optarg); break; case '1': size_l = atoi(optarg); break; case '2': size_r = atoi(optarg); break; case 'e': ERR_RATE = atof(optarg); break; case 'r': MUT_RATE = atof(optarg); break; case 'R': INDEL_FRAC = atof(optarg); break; case 'X': INDEL_EXTEND = atof(optarg); break; case 'S': seed = atoi(optarg); break; case 'h': is_hap = 1; break; } } if (argc - optind < 3) return simu_usage(); fpout1 = fopen(argv[optind+1], "w"); fpout2 = fopen(argv[optind+2], "w"); if (!fpout1 || !fpout2) { fprintf(stderr, "[wgsim] file open error\n"); return 1; } srand48(seed > 0? seed : time(0)); wgsim_core(fpout1, fpout2, argv[optind], is_hap, N, dist, std_dev, size_l, size_r); fclose(fpout1); fclose(fpout2); return 0; } samtools-0.1.19/misc/wgsim_eval.pl000077500000000000000000000052511212162403000170650ustar00rootroot00000000000000#!/usr/bin/perl -w # Contact: lh3 # Version: 0.1.5 use strict; use warnings; use Getopt::Std; &wgsim_eval; exit; sub wgsim_eval { my %opts = (g=>5); getopts('pcag:', \%opts); die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] \n") if (@ARGV == 0 && -t STDIN); my (@c0, @c1, %fnfp); my ($max_q, $flag) = (0, 0); my $gap = $opts{g}; $flag |= 1 if (defined $opts{p}); $flag |= 2 if (defined $opts{c}); while (<>) { next if (/^\@/); my @t = split("\t"); next if (@t < 11); my $line = $_; my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]); $max_q = $q if ($q > $max_q); # right coordinate $_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg; --$rght; # correct for soft clipping my ($left0, $rght0) = ($left, $rght); $left -= $1 if (/^(\d+)[SH]/); $rght += $1 if (/(\d+)[SH]$/); $left0 -= $1 if (/(\d+)[SH]$/); $rght0 += $1 if (/^(\d+)[SH]/); # skip unmapped reads next if (($t[1]&0x4) || $chr eq '*'); # parse read name and check if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) { if ($1 ne $chr) { # different chr $is_correct = 0; } else { if ($flag & 2) { if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap); } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward $is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap); } else { # R3, reverse $is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap); } } else { if ($t[1] & 0x10) { # reverse $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads } else { $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap); } } } } else { warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n"); next; } ++$c0[$q]; ++$c1[$q] unless ($is_correct); @{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]}); ++$fnfp{$t[4]}[0]; ++$fnfp{$t[4]}[1] unless ($is_correct); print STDERR $line if (($flag&1) && !$is_correct && $q > 0); } # print my ($cc0, $cc1) = (0, 0); if (!defined($opts{a})) { for (my $i = $max_q; $i >= 0; --$i) { $c0[$i] = 0 unless (defined $c0[$i]); $c1[$i] = 0 unless (defined $c1[$i]); $cc0 += $c0[$i]; $cc1 += $c1[$i]; printf("%.2dx %12d / %-12d %12d %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0); } } else { for (reverse(sort {$a<=>$b} (keys %fnfp))) { next if ($_ == 0); $cc0 += $fnfp{$_}[0]; $cc1 += $fnfp{$_}[1]; print join("\t", $_, $cc0, $cc1), "\n"; } } } samtools-0.1.19/misc/zoom2sam.pl000077500000000000000000000045351212162403000165030ustar00rootroot00000000000000#!/usr/bin/perl -w # Contact: lh3 # Version: 0.1.0 use strict; use warnings; use Getopt::Std; &zoom2sam; exit; sub mating { my ($s1, $s2) = @_; my $isize = 0; if ($s1->[2] ne '*' && $s1->[2] eq $s2->[2]) { # then calculate $isize my $x1 = ($s1->[1] & 0x10)? $s1->[3] + length($s1->[9]) : $s1->[3]; my $x2 = ($s2->[1] & 0x10)? $s2->[3] + length($s2->[9]) : $s2->[3]; $isize = $x2 - $x1; } # update mate coordinate if ($s2->[2] ne '*') { @$s1[6..8] = (($s2->[2] eq $s1->[2])? "=" : $s2->[2], $s2->[3], $isize); $s1->[1] |= 0x20 if ($s2->[1] & 0x10); } else { $s1->[1] |= 0x8; } if ($s1->[2] ne '*') { @$s2[6..8] = (($s1->[2] eq $s2->[2])? "=" : $s1->[2], $s1->[3], -$isize); $s2->[1] |= 0x20 if ($s1->[1] & 0x10); } else { $s2->[1] |= 0x8; } } sub zoom2sam { my %opts = (); getopts("p", \%opts); die("Usage: zoom2sam.pl [-p] Warnings: This script only supports the default Illumina outputs.\n") if (@ARGV < 2); my $is_paired = defined($opts{p}); my $len = shift(@ARGV); # core loop my @s1 = (); my @s2 = (); my ($s_last, $s_curr) = (\@s1, \@s2); while (<>) { &zoom2sam_aux($_, $s_curr, $is_paired, $len); if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) { &mating($s_last, $s_curr); print join("\t", @$s_last), "\n"; print join("\t", @$s_curr), "\n"; @$s_last = (); @$s_curr = (); } else { print join("\t", @$s_last), "\n" if (@$s_last != 0); my $s = $s_last; $s_last = $s_curr; $s_curr = $s; } } print join("\t", @$s_last), "\n" if (@$s_last != 0); } sub zoom2sam_aux { my ($line, $s, $is_paired, $len) = @_; chomp($line); my @t = split("\t", $line); @$s = (); # read name $s->[0] = $t[0]; # initial flag (will be updated later) $s->[1] = 0; $s->[1] |= 1 | 1<<6 if ($s->[0] =~ /_F$/); $s->[1] |= 1 | 1<<7 if ($s->[0] =~ /_R$/); $s->[1] |= 2 if ($is_paired); # read & quality $s->[9] = "*"; $s->[10] = "*"; # cigar $s->[5] = $len . "M"; # coor my @s = split(/\s+/, $t[1]); $s->[2] = $s[0]; $t[1] =~ /:(\d+)$/; $s->[3] = $1 + 1; if ($s->[0] =~ /_[FR]$/) { my $u = ($s->[0] =~ /_F$/)? 1 : 0; my $w = ($t[2] eq '+')? 1 : 0; $s->[1] |= 0x10 if ($u ^ $w); $s->[0] =~ s/_[FR]$//; } else { $s->[1] |= 0x10 if ($t[2] eq '-'); } # mapQ $s->[4] = 30; # mate coordinate $s->[6] = '*'; $s->[7] = $s->[8] = 0; # aux push(@$s, "NM:i:$t[3]"); } samtools-0.1.19/padding.c000066400000000000000000000413641212162403000152140ustar00rootroot00000000000000#include #include #include #include "kstring.h" #include "sam_header.h" #include "sam.h" #include "bam.h" #include "faidx.h" bam_header_t *bam_header_dup(const bam_header_t *h0); /*in sam.c*/ static void replace_cigar(bam1_t *b, int n, uint32_t *cigar) { if (n != b->core.n_cigar) { int o = b->core.l_qname + b->core.n_cigar * 4; if (b->data_len + (n - b->core.n_cigar) * 4 > b->m_data) { b->m_data = b->data_len + (n - b->core.n_cigar) * 4; kroundup32(b->m_data); b->data = (uint8_t*)realloc(b->data, b->m_data); } memmove(b->data + b->core.l_qname + n * 4, b->data + o, b->data_len - o); memcpy(b->data + b->core.l_qname, cigar, n * 4); b->data_len += (n - b->core.n_cigar) * 4; b->core.n_cigar = n; } else memcpy(b->data + b->core.l_qname, cigar, n * 4); } #define write_cigar(_c, _n, _m, _v) do { \ if (_n == _m) { \ _m = _m? _m<<1 : 4; \ _c = (uint32_t*)realloc(_c, _m * 4); \ } \ _c[_n++] = (_v); \ } while (0) static void unpad_seq(bam1_t *b, kstring_t *s) { int k, j, i; int length; uint32_t *cigar = bam1_cigar(b); uint8_t *seq = bam1_seq(b); // b->core.l_qseq gives length of the SEQ entry (including soft clips, S) // We need the padded length after alignment from the CIGAR (excluding // soft clips S, but including pads from CIGAR D operations) length = 0; for (k = 0; k < b->core.n_cigar; ++k) { int op, ol; op= bam_cigar_op(cigar[k]); ol = bam_cigar_oplen(cigar[k]); if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF || op == BAM_CDEL) length += ol; } ks_resize(s, length); for (k = 0, s->l = 0, j = 0; k < b->core.n_cigar; ++k) { int op, ol; op = bam_cigar_op(cigar[k]); ol = bam_cigar_oplen(cigar[k]); if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (i = 0; i < ol; ++i, ++j) s->s[s->l++] = bam1_seqi(seq, j); } else if (op == BAM_CSOFT_CLIP) { j += ol; } else if (op == BAM_CHARD_CLIP) { /* do nothing */ } else if (op == BAM_CDEL) { for (i = 0; i < ol; ++i) s->s[s->l++] = 0; } else { fprintf(stderr, "[depad] ERROR: Didn't expect CIGAR op %c in read %s\n", BAM_CIGAR_STR[op], bam1_qname(b)); assert(-1); } } assert(length == s->l); } int load_unpadded_ref(faidx_t *fai, char *ref_name, int ref_len, kstring_t *seq) { char base; char *fai_ref = 0; int fai_ref_len = 0, k; fai_ref = fai_fetch(fai, ref_name, &fai_ref_len); if (fai_ref_len != ref_len) { fprintf(stderr, "[depad] ERROR: FASTA sequence %s length %i, expected %i\n", ref_name, fai_ref_len, ref_len); free(fai_ref); return -1; } ks_resize(seq, ref_len); seq->l = 0; for (k = 0; k < ref_len; ++k) { base = fai_ref[k]; if (base == '-' || base == '*') { // Map gaps to null to match unpad_seq function seq->s[seq->l++] = 0; } else { int i = bam_nt16_table[(int)base]; if (i == 0 || i==16) { // Equals maps to 0, anything unexpected to 16 fprintf(stderr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence %s\n", base, (int)base, ref_name); free(fai_ref); return -1; } seq->s[seq->l++] = i; } } assert(ref_len == seq->l); free(fai_ref); return 0; } int get_unpadded_len(faidx_t *fai, char *ref_name, int padded_len) { char base; char *fai_ref = 0; int fai_ref_len = 0, k; int bases=0, gaps=0; fai_ref = fai_fetch(fai, ref_name, &fai_ref_len); if (fai_ref_len != padded_len) { fprintf(stderr, "[depad] ERROR: FASTA sequence '%s' length %i, expected %i\n", ref_name, fai_ref_len, padded_len); free(fai_ref); return -1; } for (k = 0; k < padded_len; ++k) { //fprintf(stderr, "[depad] checking base %i of %i or %i\n", k+1, ref_len, strlen(fai_ref)); base = fai_ref[k]; if (base == '-' || base == '*') { gaps += 1; } else { int i = bam_nt16_table[(int)base]; if (i == 0 || i==16) { // Equals maps to 0, anything unexpected to 16 fprintf(stderr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence '%s'\n", base, (int)base, ref_name); free(fai_ref); return -1; } bases += 1; } } free(fai_ref); assert (padded_len == bases + gaps); return bases; } inline int * update_posmap(int *posmap, kstring_t ref) { int i, k; posmap = realloc(posmap, ref.m * sizeof(int)); for (i = k = 0; i < ref.l; ++i) { posmap[i] = k; if (ref.s[i]) ++k; } return posmap; } int bam_pad2unpad(samfile_t *in, samfile_t *out, faidx_t *fai) { bam_header_t *h = 0; bam1_t *b = 0; kstring_t r, q; int r_tid = -1; uint32_t *cigar2 = 0; int ret = 0, n2 = 0, m2 = 0, *posmap = 0; b = bam_init1(); r.l = r.m = q.l = q.m = 0; r.s = q.s = 0; int read_ret; h = in->header; while ((read_ret = samread(in, b)) >= 0) { // read one alignment from `in' uint32_t *cigar = bam1_cigar(b); n2 = 0; if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam1_qname(b), h->target_name[b->core.tid]) == 0) { // fprintf(stderr, "[depad] Found embedded reference '%s'\n", bam1_qname(b)); r_tid = b->core.tid; unpad_seq(b, &r); if (h->target_len[r_tid] != r.l) { fprintf(stderr, "[depad] ERROR: (Padded) length of '%s' is %d in BAM header, but %ld in embedded reference\n", bam1_qname(b), h->target_len[r_tid], r.l); return -1; } if (fai) { // Check the embedded reference matches the FASTA file if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &q)) { fprintf(stderr, "[depad] ERROR: Failed to load embedded reference '%s' from FASTA\n", h->target_name[b->core.tid]); return -1; } assert(r.l == q.l); int i; for (i = 0; i < r.l; ++i) { if (r.s[i] != q.s[i]) { // Show gaps as ASCII 45 fprintf(stderr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n", h->target_name[b->core.tid], i+1, r.s[i] ? bam_nt16_rev_table[(int)r.s[i]] : 45, q.s[i] ? bam_nt16_rev_table[(int)q.s[i]] : 45); return -1; } } } write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH)); replace_cigar(b, n2, cigar2); posmap = update_posmap(posmap, r); } else if (b->core.n_cigar > 0) { int i, k, op; if (b->core.tid < 0) { fprintf(stderr, "[depad] ERROR: Read '%s' has CIGAR but no RNAME\n", bam1_qname(b)); return -1; } else if (b->core.tid == r_tid) { ; // good case, reference available //fprintf(stderr, "[depad] Have ref '%s' for read '%s'\n", h->target_name[b->core.tid], bam1_qname(b)); } else if (fai) { if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) { fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]); return -1; } posmap = update_posmap(posmap, r); r_tid = b->core.tid; // fprintf(stderr, "[depad] Loaded %s from FASTA file\n", h->target_name[b->core.tid]); } else { fprintf(stderr, "[depad] ERROR: Missing %s embedded reference sequence (and no FASTA file)\n", h->target_name[b->core.tid]); return -1; } unpad_seq(b, &q); if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) { write_cigar(cigar2, n2, m2, cigar[0]); } else if (bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP) { write_cigar(cigar2, n2, m2, cigar[0]); if (b->core.n_cigar > 2 && bam_cigar_op(cigar[1]) == BAM_CSOFT_CLIP) { write_cigar(cigar2, n2, m2, cigar[1]); } } /* Determine CIGAR operator for each base in the aligned read */ for (i = 0, k = b->core.pos; i < q.l; ++i, ++k) q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD); /* Include any pads if starts with an insert */ if (q.s[0] == BAM_CINS) { for (k = 0; k+1 < b->core.pos && !r.s[b->core.pos - k - 1]; ++k); if (k) write_cigar(cigar2, n2, m2, bam_cigar_gen(k, BAM_CPAD)); } /* Count consecutive CIGAR operators to turn into a CIGAR string */ for (i = k = 1, op = q.s[0]; i < q.l; ++i) { if (op != q.s[i]) { write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op)); op = q.s[i]; k = 1; } else ++k; } write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op)); if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) { write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]); } else if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CHARD_CLIP) { if (b->core.n_cigar > 2 && bam_cigar_op(cigar[b->core.n_cigar-2]) == BAM_CSOFT_CLIP) { write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-2]); } write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]); } /* Remove redundant P operators between M/X/=/D operators, e.g. 5M2P10M -> 15M */ int pre_op, post_op; for (i = 2; i < n2; ++i) if (bam_cigar_op(cigar2[i-1]) == BAM_CPAD) { pre_op = bam_cigar_op(cigar2[i-2]); post_op = bam_cigar_op(cigar2[i]); /* Note don't need to check for X/= as code above will use M only */ if ((pre_op == BAM_CMATCH || pre_op == BAM_CDEL) && (post_op == BAM_CMATCH || post_op == BAM_CDEL)) { /* This is a redundant P operator */ cigar2[i-1] = 0; // i.e. 0M /* If had same operator either side, combine them in post_op */ if (pre_op == post_op) { /* If CIGAR M, could treat as simple integers since BAM_CMATCH is zero*/ cigar2[i] = bam_cigar_gen(bam_cigar_oplen(cigar2[i-2]) + bam_cigar_oplen(cigar2[i]), post_op); cigar2[i-2] = 0; // i.e. 0M } } } /* Remove the zero'd operators (0M) */ for (i = k = 0; i < n2; ++i) if (cigar2[i]) cigar2[k++] = cigar2[i]; n2 = k; replace_cigar(b, n2, cigar2); b->core.pos = posmap[b->core.pos]; if (b->core.mtid < 0 || b->core.mpos < 0) { /* Nice case, no mate to worry about*/ // fprintf(stderr, "[depad] Read '%s' mate not mapped\n", bam1_qname(b)); /* TODO - Warning if FLAG says mate should be mapped? */ /* Clean up funny input where mate position is given but mate reference is missing: */ b->core.mtid = -1; b->core.mpos = -1; } else if (b->core.mtid == b->core.tid) { /* Nice case, same reference */ // fprintf(stderr, "[depad] Read '%s' mate mapped to same ref\n", bam1_qname(b)); b->core.mpos = posmap[b->core.mpos]; } else { /* Nasty case, Must load alternative posmap */ // fprintf(stderr, "[depad] Loading reference '%s' temporarily\n", h->target_name[b->core.mtid]); if (!fai) { fprintf(stderr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]); return -1; } /* Temporarily load the other reference sequence */ if (load_unpadded_ref(fai, h->target_name[b->core.mtid], h->target_len[b->core.mtid], &r)) { fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]); return -1; } posmap = update_posmap(posmap, r); b->core.mpos = posmap[b->core.mpos]; /* Restore the reference and posmap*/ if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) { fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]); return -1; } posmap = update_posmap(posmap, r); } } samwrite(out, b); } if (read_ret < -1) { fprintf(stderr, "[depad] truncated file.\n"); ret = 1; } free(r.s); free(q.s); free(posmap); bam_destroy1(b); return ret; } bam_header_t * fix_header(bam_header_t *old, faidx_t *fai) { int i = 0, unpadded_len = 0; bam_header_t *header = 0 ; header = bam_header_dup(old); for (i = 0; i < old->n_targets; ++i) { unpadded_len = get_unpadded_len(fai, old->target_name[i], old->target_len[i]); if (unpadded_len < 0) { fprintf(stderr, "[depad] ERROR getting unpadded length of '%s', padded length %i\n", old->target_name[i], old->target_len[i]); } else { header->target_len[i] = unpadded_len; //fprintf(stderr, "[depad] Recalculating '%s' length %i -> %i\n", old->target_name[i], old->target_len[i], header->target_len[i]); } } /* Duplicating the header allocated new buffer for header string */ /* After modifying the @SQ lines it will only get smaller, since */ /* the LN entries will be the same or shorter, and we'll remove */ /* any MD entries (MD5 checksums). */ assert(strlen(old->text) == strlen(header->text)); assert (0==strcmp(old->text, header->text)); const char *text; text = old->text; header->text[0] = '\0'; /* Resuse the allocated buffer */ char * newtext = header->text; char * end=NULL; while (text[0]=='@') { end = strchr(text, '\n'); assert(end != 0); if (text[1]=='S' && text[2]=='Q' && text[3]=='\t') { /* TODO - edit the @SQ line here to remove MD and fix LN. */ /* For now just remove the @SQ line, and samtools will */ /* automatically generate a minimal replacement with LN. */ /* However, that discards any other tags like AS, SP, UR. */ //fprintf(stderr, "[depad] Removing @SQ line\n"); } else { /* Copy this line to the new header */ strncat(newtext, text, end - text + 1); } text = end + 1; } assert (text[0]=='\0'); /* Check we didn't overflow the buffer */ assert (strlen(header->text) <= strlen(old->text)); if (strlen(header->text) < header->l_text) { //fprintf(stderr, "[depad] Reallocating header buffer\n"); assert (newtext == header->text); newtext = malloc(strlen(header->text) + 1); strcpy(newtext, header->text); free(header->text); header->text = newtext; header->l_text = strlen(newtext); } //fprintf(stderr, "[depad] Here is the new header (pending @SQ lines),\n\n%s\n(end)\n", header->text); return header; } static int usage(int is_long_help); int main_pad2unpad(int argc, char *argv[]) { samfile_t *in = 0, *out = 0; bam_header_t *h = 0; faidx_t *fai = 0; int c, is_bamin = 1, compress_level = -1, is_bamout = 1, is_long_help = 0; char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0; int ret=0; /* parse command-line options */ strcpy(in_mode, "r"); strcpy(out_mode, "w"); while ((c = getopt(argc, argv, "Sso:u1T:?")) >= 0) { switch (c) { case 'S': is_bamin = 0; break; case 's': assert(compress_level == -1); is_bamout = 0; break; case 'o': fn_out = strdup(optarg); break; case 'u': assert(is_bamout == 1); compress_level = 0; break; case '1': assert(is_bamout == 1); compress_level = 1; break; case 'T': fn_ref = strdup(optarg); break; case '?': is_long_help = 1; break; default: return usage(is_long_help); } } if (argc == optind) return usage(is_long_help); if (is_bamin) strcat(in_mode, "b"); if (is_bamout) strcat(out_mode, "b"); strcat(out_mode, "h"); if (compress_level >= 0) { char tmp[2]; tmp[0] = compress_level + '0'; tmp[1] = '\0'; strcat(out_mode, tmp); } // Load FASTA reference (also needed for SAM -> BAM if missing header) if (fn_ref) { fn_list = samfaipath(fn_ref); fai = fai_load(fn_ref); } // open file handlers if ((in = samopen(argv[optind], in_mode, fn_list)) == 0) { fprintf(stderr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]); ret = 1; goto depad_end; } if (in->header == 0) { fprintf(stderr, "[depad] failed to read the header from \"%s\".\n", argv[optind]); ret = 1; goto depad_end; } if (in->header->text == 0 || in->header->l_text == 0) { fprintf(stderr, "[depad] Warning - failed to read any header text from \"%s\".\n", argv[optind]); assert (0 == in->header->l_text); assert (0 == in->header->text); } if (fn_ref) { h = fix_header(in->header, fai); } else { fprintf(stderr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n"); h = in->header; } if ((out = samopen(fn_out? fn_out : "-", out_mode, h)) == 0) { fprintf(stderr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output"); ret = 1; goto depad_end; } // Do the depad ret = bam_pad2unpad(in, out, fai); depad_end: // close files, free and return if (fai) fai_destroy(fai); if (h != in->header) bam_header_destroy(h); samclose(in); samclose(out); free(fn_list); free(fn_out); return ret; } static int usage(int is_long_help) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools depad \n\n"); fprintf(stderr, "Options: -s output is SAM (default is BAM)\n"); fprintf(stderr, " -S input is SAM (default is BAM)\n"); fprintf(stderr, " -u uncompressed BAM output (can't use with -s)\n"); fprintf(stderr, " -1 fast compression BAM output (can't use with -s)\n"); fprintf(stderr, " -T FILE reference sequence file [null]\n"); fprintf(stderr, " -o FILE output file name [stdout]\n"); fprintf(stderr, " -? longer help\n"); fprintf(stderr, "\n"); if (is_long_help) fprintf(stderr, "Notes:\n\ \n\ 1. Requires embedded reference sequences (before the reads for that reference),\n\ with the future aim to also support a FASTA padded reference sequence file.\n\ \n\ 2. The input padded alignment read's CIGAR strings must not use P or I operators.\n\ \n"); return 1; } samtools-0.1.19/phase.c000066400000000000000000000505231212162403000147030ustar00rootroot00000000000000#include #include #include #include #include #include #include "bam.h" #include "errmod.h" #include "kseq.h" KSTREAM_INIT(gzFile, gzread, 16384) #define MAX_VARS 256 #define FLIP_PENALTY 2 #define FLIP_THRES 4 #define MASK_THRES 3 #define FLAG_FIX_CHIMERA 0x1 #define FLAG_LIST_EXCL 0x4 #define FLAG_DROP_AMBI 0x8 typedef struct { // configurations, initialized in the main function int flag, k, min_baseQ, min_varLOD, max_depth; // other global variables int vpos_shift; bamFile fp; char *pre; bamFile out[3]; // alignment queue int n, m; bam1_t **b; } phaseg_t; typedef struct { int8_t seq[MAX_VARS]; // TODO: change to dynamic memory allocation! int vpos, beg, end; uint32_t vlen:16, single:1, flip:1, phase:1, phased:1, ambig:1; uint32_t in:16, out:16; // in-phase and out-phase } frag_t, *frag_p; #define rseq_lt(a,b) ((a)->vpos < (b)->vpos) #include "khash.h" KHASH_SET_INIT_INT64(set64) KHASH_MAP_INIT_INT64(64, frag_t) typedef khash_t(64) nseq_t; #include "ksort.h" KSORT_INIT(rseq, frag_p, rseq_lt) static char nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; static inline uint64_t X31_hash_string(const char *s) { uint64_t h = *s; if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; return h; } static void count1(int l, const uint8_t *seq, int *cnt) { int i, j, n_ambi; uint32_t z, x; if (seq[l-1] == 0) return; // do nothing is the last base is ambiguous for (i = n_ambi = 0; i < l; ++i) // collect ambiguous bases if (seq[i] == 0) ++n_ambi; if (l - n_ambi <= 1) return; // only one SNP for (x = 0; x < 1u<>j&1; ++j; } z = z<<1 | c; } ++cnt[z]; } } static int **count_all(int l, int vpos, nseq_t *hash) { khint_t k; int i, j, **cnt; uint8_t *seq; seq = calloc(l, 1); cnt = calloc(vpos, sizeof(void*)); for (i = 0; i < vpos; ++i) cnt[i] = calloc(1<vpos >= vpos || f->single) continue; // out of region; or singleton if (f->vlen == 1) { // such reads should be flagged as deleted previously if everything is right f->single = 1; continue; } for (j = 1; j < f->vlen; ++j) { for (i = 0; i < l; ++i) seq[i] = j < l - 1 - i? 0 : f->seq[j - (l - 1 - i)]; count1(l, seq, cnt[f->vpos + j]); } } } free(seq); return cnt; } // phasing static int8_t *dynaprog(int l, int vpos, int **w) { int *f[2], *curr, *prev, max, i; int8_t **b, *h = 0; uint32_t x, z = 1u<<(l-1), mask = (1u<>1; y1 = xc>>1; c0 = prev[y0] + wi[x] + wi[xc]; c1 = prev[y1] + wi[x] + wi[xc]; if (c0 > c1) bi[x] = 0, curr[x] = c0; else bi[x] = 1, curr[x] = c1; } tmp = prev; prev = curr; curr = tmp; // swap } { // backtrack uint32_t max_x = 0; int which = 0; h = calloc(vpos, 1); for (x = 0, max = 0, max_x = 0; x < z; ++x) if (prev[x] > max) max = prev[x], max_x = x; for (i = vpos - 1, x = max_x; i >= 0; --i) { h[i] = which? (~x&1) : (x&1); which = b[i][x]? !which : which; x = b[i][x]? (~x&mask)>>1 : x>>1; } } // free for (i = 0; i < vpos; ++i) free(b[i]); free(f[0]); free(f[1]); free(b); return h; } // phase each fragment static uint64_t *fragphase(int vpos, const int8_t *path, nseq_t *hash, int flip) { khint_t k; uint64_t *pcnt; uint32_t *left, *rght, max; left = rght = 0; max = 0; pcnt = calloc(vpos, 8); for (k = 0; k < kh_end(hash); ++k) { if (kh_exist(hash, k)) { int i, c[2]; frag_t *f = &kh_val(hash, k); if (f->vpos >= vpos) continue; // get the phase c[0] = c[1] = 0; for (i = 0; i < f->vlen; ++i) { if (f->seq[i] == 0) continue; ++c[f->seq[i] == path[f->vpos + i] + 1? 0 : 1]; } f->phase = c[0] > c[1]? 0 : 1; f->in = c[f->phase]; f->out = c[1 - f->phase]; f->phased = f->in == f->out? 0 : 1; f->ambig = (f->in && f->out && f->out < 3 && f->in <= f->out + 1)? 1 : 0; // fix chimera f->flip = 0; if (flip && c[0] >= 3 && c[1] >= 3) { int sum[2], m, mi, md; if (f->vlen > max) { // enlarge the array max = f->vlen; kroundup32(max); left = realloc(left, max * 4); rght = realloc(rght, max * 4); } for (i = 0, sum[0] = sum[1] = 0; i < f->vlen; ++i) { // get left counts if (f->seq[i]) { int c = f->phase? 2 - f->seq[i] : f->seq[i] - 1; ++sum[c == path[f->vpos + i]? 0 : 1]; } left[i] = sum[1]<<16 | sum[0]; } for (i = f->vlen - 1, sum[0] = sum[1] = 0; i >= 0; --i) { // get right counts if (f->seq[i]) { int c = f->phase? 2 - f->seq[i] : f->seq[i] - 1; ++sum[c == path[f->vpos + i]? 0 : 1]; } rght[i] = sum[1]<<16 | sum[0]; } // find the best flip point for (i = m = 0, mi = -1, md = -1; i < f->vlen - 1; ++i) { int a[2]; a[0] = (left[i]&0xffff) + (rght[i+1]>>16&0xffff) - (rght[i+1]&0xffff) * FLIP_PENALTY; a[1] = (left[i]>>16&0xffff) + (rght[i+1]&0xffff) - (rght[i+1]>>16&0xffff) * FLIP_PENALTY; if (a[0] > a[1]) { if (a[0] > m) m = a[0], md = 0, mi = i; } else { if (a[1] > m) m = a[1], md = 1, mi = i; } } if (m - c[0] >= FLIP_THRES && m - c[1] >= FLIP_THRES) { // then flip f->flip = 1; if (md == 0) { // flip the tail for (i = mi + 1; i < f->vlen; ++i) if (f->seq[i] == 1) f->seq[i] = 2; else if (f->seq[i] == 2) f->seq[i] = 1; } else { // flip the head for (i = 0; i <= mi; ++i) if (f->seq[i] == 1) f->seq[i] = 2; else if (f->seq[i] == 2) f->seq[i] = 1; } } } // update pcnt[] if (!f->single) { for (i = 0; i < f->vlen; ++i) { int c; if (f->seq[i] == 0) continue; c = f->phase? 2 - f->seq[i] : f->seq[i] - 1; if (c == path[f->vpos + i]) { if (f->phase == 0) ++pcnt[f->vpos + i]; else pcnt[f->vpos + i] += 1ull<<32; } else { if (f->phase == 0) pcnt[f->vpos + i] += 1<<16; else pcnt[f->vpos + i] += 1ull<<48; } } } } } free(left); free(rght); return pcnt; } static uint64_t *genmask(int vpos, const uint64_t *pcnt, int *_n) { int i, max = 0, max_i = -1, m = 0, n = 0, beg = 0, score = 0; uint64_t *list = 0; for (i = 0; i < vpos; ++i) { uint64_t x = pcnt[i]; int c[4], pre = score, s; c[0] = x&0xffff; c[1] = x>>16&0xffff; c[2] = x>>32&0xffff; c[3] = x>>48&0xffff; s = (c[1] + c[3] == 0)? -(c[0] + c[2]) : (c[1] + c[3] - 1); if (c[3] > c[2]) s += c[3] - c[2]; if (c[1] > c[0]) s += c[1] - c[0]; score += s; if (score < 0) score = 0; if (pre == 0 && score > 0) beg = i; // change from zero to non-zero if ((i == vpos - 1 || score == 0) && max >= MASK_THRES) { if (n == m) { m = m? m<<1 : 4; list = realloc(list, m * 8); } list[n++] = (uint64_t)beg<<32 | max_i; i = max_i; // reset i to max_i score = 0; } else if (score > max) max = score, max_i = i; if (score == 0) max = 0; } *_n = n; return list; } // trim heading and tailing ambiguous bases; mark deleted and remove sequence static int clean_seqs(int vpos, nseq_t *hash) { khint_t k; int ret = 0; for (k = 0; k < kh_end(hash); ++k) { if (kh_exist(hash, k)) { frag_t *f = &kh_val(hash, k); int beg, end, i; if (f->vpos >= vpos) { ret = 1; continue; } for (i = 0; i < f->vlen; ++i) if (f->seq[i] != 0) break; beg = i; for (i = f->vlen - 1; i >= 0; --i) if (f->seq[i] != 0) break; end = i + 1; if (end - beg <= 0) kh_del(64, hash, k); else { if (beg != 0) memmove(f->seq, f->seq + beg, end - beg); f->vpos += beg; f->vlen = end - beg; f->single = f->vlen == 1? 1 : 0; } } } return ret; } static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash) { int i, is_flip, drop_ambi; drop_ambi = g->flag & FLAG_DROP_AMBI; is_flip = (drand48() < 0.5); for (i = 0; i < g->n; ++i) { int end, which; uint64_t key; khint_t k; bam1_t *b = g->b[i]; key = X31_hash_string(bam1_qname(b)); end = bam_calend(&b->core, bam1_cigar(b)); if (end > min_pos) break; k = kh_get(64, hash, key); if (k == kh_end(hash)) which = 3; else { frag_t *f = &kh_val(hash, k); if (f->ambig) which = drop_ambi? 2 : 3; else if (f->phased && f->flip) which = 2; else if (f->phased == 0) which = 3; else { // phased and not flipped char c = 'Y'; which = f->phase; bam_aux_append(b, "ZP", 'A', 1, (uint8_t*)&c); } if (which < 2 && is_flip) which = 1 - which; // increase the randomness } if (which == 3) which = (drand48() < 0.5); bam_write1(g->out[which], b); bam_destroy1(b); g->b[i] = 0; } memmove(g->b, g->b + i, (g->n - i) * sizeof(void*)); g->n -= i; } static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *hash) { int i, j, n_seqs = kh_size(hash), n_masked = 0, min_pos; khint_t k; frag_t **seqs; int8_t *path, *sitemask; uint64_t *pcnt, *regmask; if (vpos == 0) return 0; i = clean_seqs(vpos, hash); // i is true if hash has an element with its vpos >= vpos min_pos = i? cns[vpos]>>32 : 0x7fffffff; if (vpos == 1) { printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1); printf("M0\t%s\t%d\t%d\t%c\t%c\t%d\t0\t0\t0\t0\n//\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1, "ACGTX"[cns[0]&3], "ACGTX"[cns[0]>>16&3], g->vpos_shift + 1); for (k = 0; k < kh_end(hash); ++k) { if (kh_exist(hash, k)) { frag_t *f = &kh_val(hash, k); if (f->vpos) continue; f->flip = 0; if (f->seq[0] == 0) f->phased = 0; else f->phased = 1, f->phase = f->seq[0] - 1; } } dump_aln(g, min_pos, hash); ++g->vpos_shift; return 1; } { // phase int **cnt; uint64_t *mask; printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[vpos-1]>>32) + 1); sitemask = calloc(vpos, 1); cnt = count_all(g->k, vpos, hash); path = dynaprog(g->k, vpos, cnt); for (i = 0; i < vpos; ++i) free(cnt[i]); free(cnt); pcnt = fragphase(vpos, path, hash, 0); // do not fix chimeras when masking mask = genmask(vpos, pcnt, &n_masked); regmask = calloc(n_masked, 8); for (i = 0; i < n_masked; ++i) { regmask[i] = cns[mask[i]>>32]>>32<<32 | cns[(uint32_t)mask[i]]>>32; for (j = mask[i]>>32; j <= (int32_t)mask[i]; ++j) sitemask[j] = 1; } free(mask); if (g->flag & FLAG_FIX_CHIMERA) { free(pcnt); pcnt = fragphase(vpos, path, hash, 1); } } for (i = 0; i < n_masked; ++i) printf("FL\t%s\t%d\t%d\n", chr, (int)(regmask[i]>>32) + 1, (int)regmask[i] + 1); for (i = 0; i < vpos; ++i) { uint64_t x = pcnt[i]; int8_t c[2]; c[0] = (cns[i]&0xffff)>>2 == 0? 4 : (cns[i]&3); c[1] = (cns[i]>>16&0xffff)>>2 == 0? 4 : (cns[i]>>16&3); printf("M%d\t%s\t%d\t%d\t%c\t%c\t%d\t%d\t%d\t%d\t%d\n", sitemask[i]+1, chr, (int)(cns[0]>>32) + 1, (int)(cns[i]>>32) + 1, "ACGTX"[c[path[i]]], "ACGTX"[c[1-path[i]]], i + g->vpos_shift + 1, (int)(x&0xffff), (int)(x>>16&0xffff), (int)(x>>32&0xffff), (int)(x>>48&0xffff)); } free(path); free(pcnt); free(regmask); free(sitemask); seqs = calloc(n_seqs, sizeof(void*)); for (k = 0, i = 0; k < kh_end(hash); ++k) if (kh_exist(hash, k) && kh_val(hash, k).vpos < vpos && !kh_val(hash, k).single) seqs[i++] = &kh_val(hash, k); n_seqs = i; ks_introsort_rseq(n_seqs, seqs); for (i = 0; i < n_seqs; ++i) { frag_t *f = seqs[i]; printf("EV\t0\t%s\t%d\t40\t%dM\t*\t0\t0\t", chr, f->vpos + 1 + g->vpos_shift, f->vlen); for (j = 0; j < f->vlen; ++j) { uint32_t c = cns[f->vpos + j]; if (f->seq[j] == 0) putchar('N'); else putchar("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)]); } printf("\t*\tYP:i:%d\tYF:i:%d\tYI:i:%d\tYO:i:%d\tYS:i:%d\n", f->phase, f->flip, f->in, f->out, f->beg+1); } free(seqs); printf("//\n"); fflush(stdout); g->vpos_shift += vpos; dump_aln(g, min_pos, hash); return vpos; } static void update_vpos(int vpos, nseq_t *hash) { khint_t k; for (k = 0; k < kh_end(hash); ++k) { if (kh_exist(hash, k)) { frag_t *f = &kh_val(hash, k); if (f->vpos < vpos) kh_del(64, hash, k); // TODO: if frag_t::seq is allocated dynamically, free it else f->vpos -= vpos; } } } static nseq_t *shrink_hash(nseq_t *hash) // TODO: to implement { return hash; } static int readaln(void *data, bam1_t *b) { phaseg_t *g = (phaseg_t*)data; int ret; ret = bam_read1(g->fp, b); if (ret < 0) return ret; if (!(b->core.flag & (BAM_FUNMAP|BAM_FSECONDARY|BAM_FQCFAIL|BAM_FDUP)) && g->pre) { if (g->n == g->m) { g->m = g->m? g->m<<1 : 16; g->b = realloc(g->b, g->m * sizeof(void*)); } g->b[g->n++] = bam_dup1(b); } return ret; } static khash_t(set64) *loadpos(const char *fn, bam_header_t *h) { gzFile fp; kstream_t *ks; int ret, dret; kstring_t *str; khash_t(set64) *hash; hash = kh_init(set64); str = calloc(1, sizeof(kstring_t)); fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); ks = ks_init(fp); while (ks_getuntil(ks, 0, str, &dret) >= 0) { int tid = bam_get_tid(h, str->s); if (tid >= 0 && dret != '\n') { if (ks_getuntil(ks, 0, str, &dret) >= 0) { uint64_t x = (uint64_t)tid<<32 | (atoi(str->s) - 1); kh_put(set64, hash, x, &ret); } else break; } if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); if (dret < 0) break; } ks_destroy(ks); gzclose(fp); free(str->s); free(str); return hash; } static int gl2cns(float q[16]) { int i, j, min_ij; float min, min2; min = min2 = 1e30; min_ij = -1; for (i = 0; i < 4; ++i) { for (j = i; j < 4; ++j) { if (q[i<<2|j] < min) min_ij = i<<2|j, min2 = min, min = q[i<<2|j]; else if (q[i<<2|j] < min2) min2 = q[i<<2|j]; } } return (min_ij>>2&3) == (min_ij&3)? 0 : 1<<18 | (min_ij>>2&3)<<16 | (min_ij&3) | (int)(min2 - min + .499) << 2; } int main_phase(int argc, char *argv[]) { extern void bam_init_header_hash(bam_header_t *header); int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0; const bam_pileup1_t *plp; bam_plp_t iter; bam_header_t *h; nseq_t *seqs; uint64_t *cns = 0; phaseg_t g; char *fn_list = 0; khash_t(set64) *set = 0; errmod_t *em; uint16_t *bases; memset(&g, 0, sizeof(phaseg_t)); g.flag = FLAG_FIX_CHIMERA; g.min_varLOD = 37; g.k = 13; g.min_baseQ = 13; g.max_depth = 256; while ((c = getopt(argc, argv, "Q:eFq:k:b:l:D:A:")) >= 0) { switch (c) { case 'D': g.max_depth = atoi(optarg); break; case 'q': g.min_varLOD = atoi(optarg); break; case 'Q': g.min_baseQ = atoi(optarg); break; case 'k': g.k = atoi(optarg); break; case 'F': g.flag &= ~FLAG_FIX_CHIMERA; break; case 'e': g.flag |= FLAG_LIST_EXCL; break; case 'A': g.flag |= FLAG_DROP_AMBI; break; case 'b': g.pre = strdup(optarg); break; case 'l': fn_list = strdup(optarg); break; } } if (argc == optind) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools phase [options] \n\n"); fprintf(stderr, "Options: -k INT block length [%d]\n", g.k); fprintf(stderr, " -b STR prefix of BAMs to output [null]\n"); fprintf(stderr, " -q INT min het phred-LOD [%d]\n", g.min_varLOD); fprintf(stderr, " -Q INT min base quality in het calling [%d]\n", g.min_baseQ); fprintf(stderr, " -D INT max read depth [%d]\n", g.max_depth); // fprintf(stderr, " -l FILE list of sites to phase [null]\n"); fprintf(stderr, " -F do not attempt to fix chimeras\n"); fprintf(stderr, " -A drop reads with ambiguous phase\n"); // fprintf(stderr, " -e do not discover SNPs (effective with -l)\n"); fprintf(stderr, "\n"); return 1; } g.fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); h = bam_header_read(g.fp); if (fn_list) { // read the list of sites to phase bam_init_header_hash(h); set = loadpos(fn_list, h); free(fn_list); } else g.flag &= ~FLAG_LIST_EXCL; if (g.pre) { // open BAMs to write char *s = malloc(strlen(g.pre) + 20); strcpy(s, g.pre); strcat(s, ".0.bam"); g.out[0] = bam_open(s, "w"); strcpy(s, g.pre); strcat(s, ".1.bam"); g.out[1] = bam_open(s, "w"); strcpy(s, g.pre); strcat(s, ".chimera.bam"); g.out[2] = bam_open(s, "w"); for (c = 0; c <= 2; ++c) bam_header_write(g.out[c], h); free(s); } iter = bam_plp_init(readaln, &g); g.vpos_shift = 0; seqs = kh_init(64); em = errmod_init(1. - 0.83); bases = calloc(g.max_depth, 2); printf("CC\n"); printf("CC\tDescriptions:\nCC\n"); printf("CC\t CC comments\n"); printf("CC\t PS start of a phase set\n"); printf("CC\t FL filtered region\n"); printf("CC\t M[012] markers; 0 for singletons, 1 for phased and 2 for filtered\n"); printf("CC\t EV supporting reads; SAM format\n"); printf("CC\t // end of a phase set\nCC\n"); printf("CC\tFormats of PS, FL and M[012] lines (1-based coordinates):\nCC\n"); printf("CC\t PS chr phaseSetStart phaseSetEnd\n"); printf("CC\t FL chr filterStart filterEnd\n"); printf("CC\t M? chr PS pos allele0 allele1 hetIndex #supports0 #errors0 #supp1 #err1\n"); printf("CC\nCC\n"); fflush(stdout); while ((plp = bam_plp_auto(iter, &tid, &pos, &n)) != 0) { int i, k, c, tmp, dophase = 1, in_set = 0; float q[16]; if (tid < 0) break; if (tid != lasttid) { // change of chromosome g.vpos_shift = 0; if (lasttid >= 0) { seqs = shrink_hash(seqs); phase(&g, h->target_name[lasttid], vpos, cns, seqs); update_vpos(0x7fffffff, seqs); } lasttid = tid; vpos = 0; } if (set && kh_get(set64, set, (uint64_t)tid<<32 | pos) != kh_end(set)) in_set = 1; if (n > g.max_depth) continue; // do not proceed if the depth is too high // fill the bases array and check if there is a variant for (i = k = 0; i < n; ++i) { const bam_pileup1_t *p = plp + i; uint8_t *seq; int q, baseQ, b; if (p->is_del || p->is_refskip) continue; baseQ = bam1_qual(p->b)[p->qpos]; if (baseQ < g.min_baseQ) continue; seq = bam1_seq(p->b); b = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos)]; if (b > 3) continue; q = baseQ < p->b->core.qual? baseQ : p->b->core.qual; if (q < 4) q = 4; if (q > 63) q = 63; bases[k++] = q<<5 | (int)bam1_strand(p->b)<<4 | b; } if (k == 0) continue; errmod_cal(em, k, 4, bases, q); // compute genotype likelihood c = gl2cns(q); // get the consensus // tell if to proceed if (set && (g.flag&FLAG_LIST_EXCL) && !in_set) continue; // not in the list if (!in_set && (c&0xffff)>>2 < g.min_varLOD) continue; // not a variant // add the variant if (vpos == max_vpos) { max_vpos = max_vpos? max_vpos<<1 : 128; cns = realloc(cns, max_vpos * 8); } cns[vpos] = (uint64_t)pos<<32 | c; for (i = 0; i < n; ++i) { const bam_pileup1_t *p = plp + i; uint64_t key; khint_t k; uint8_t *seq = bam1_seq(p->b); frag_t *f; if (p->is_del || p->is_refskip) continue; if (p->b->core.qual == 0) continue; // get the base code c = nt16_nt4_table[(int)bam1_seqi(seq, p->qpos)]; if (c == (cns[vpos]&3)) c = 1; else if (c == (cns[vpos]>>16&3)) c = 2; else c = 0; // write to seqs key = X31_hash_string(bam1_qname(p->b)); k = kh_put(64, seqs, key, &tmp); f = &kh_val(seqs, k); if (tmp == 0) { // present in the hash table if (vpos - f->vpos + 1 < MAX_VARS) { f->vlen = vpos - f->vpos + 1; f->seq[f->vlen-1] = c; f->end = bam_calend(&p->b->core, bam1_cigar(p->b)); } dophase = 0; } else { // absent memset(f->seq, 0, MAX_VARS); f->beg = p->b->core.pos; f->end = bam_calend(&p->b->core, bam1_cigar(p->b)); f->vpos = vpos, f->vlen = 1, f->seq[0] = c, f->single = f->phased = f->flip = f->ambig = 0; } } if (dophase) { seqs = shrink_hash(seqs); phase(&g, h->target_name[tid], vpos, cns, seqs); update_vpos(vpos, seqs); cns[0] = cns[vpos]; vpos = 0; } ++vpos; } if (tid >= 0) phase(&g, h->target_name[tid], vpos, cns, seqs); bam_header_destroy(h); bam_plp_destroy(iter); bam_close(g.fp); kh_destroy(64, seqs); kh_destroy(set64, set); free(cns); errmod_destroy(em); free(bases); if (g.pre) { for (c = 0; c <= 2; ++c) bam_close(g.out[c]); free(g.pre); free(g.b); } return 0; } samtools-0.1.19/razf.c000066400000000000000000000574271212162403000145570ustar00rootroot00000000000000/* * RAZF : Random Access compressed(Z) File * Version: 1.0 * Release Date: 2008-10-27 * * Copyright 2008, Jue Ruan , Heng Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NO_RAZF #include #include #include #include #include #include "razf.h" #if ZLIB_VERNUM < 0x1221 struct _gz_header_s { int text; uLong time; int xflags; int os; Bytef *extra; uInt extra_len; uInt extra_max; Bytef *name; uInt name_max; Bytef *comment; uInt comm_max; int hcrc; int done; }; #warning "zlib < 1.2.2.1; RAZF writing is disabled." #endif #define DEF_MEM_LEVEL 8 static inline uint32_t byte_swap_4(uint32_t v){ v = ((v & 0x0000FFFFU) << 16) | (v >> 16); return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); } static inline uint64_t byte_swap_8(uint64_t v){ v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); } static inline int is_big_endian(){ int x = 0x01; char *c = (char*)&x; return (c[0] != 0x01); } #ifndef _RZ_READONLY static void add_zindex(RAZF *rz, int64_t in, int64_t out){ if(rz->index->size == rz->index->cap){ rz->index->cap = rz->index->cap * 1.5 + 2; rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap); rz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1)); } if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out; rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE]; rz->index->size ++; } static void save_zindex(RAZF *rz, int fd){ int32_t i, v32; int is_be; is_be = is_big_endian(); if(is_be) write(fd, &rz->index->size, sizeof(int)); else { v32 = byte_swap_4((uint32_t)rz->index->size); write(fd, &v32, sizeof(uint32_t)); } v32 = rz->index->size / RZ_BIN_SIZE + 1; if(!is_be){ for(i=0;iindex->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); for(i=0;iindex->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); } write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size); } #endif #ifdef _USE_KNETFILE static void load_zindex(RAZF *rz, knetFile *fp){ #else static void load_zindex(RAZF *rz, int fd){ #endif int32_t i, v32; int is_be; if(!rz->load_index) return; if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex)); is_be = is_big_endian(); #ifdef _USE_KNETFILE knet_read(fp, &rz->index->size, sizeof(int)); #else read(fd, &rz->index->size, sizeof(int)); #endif if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size); rz->index->cap = rz->index->size; v32 = rz->index->size / RZ_BIN_SIZE + 1; rz->index->bin_offsets = malloc(sizeof(int64_t) * v32); #ifdef _USE_KNETFILE knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32); #else read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); #endif rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size); #ifdef _USE_KNETFILE knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size); #else read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size); #endif if(!is_be){ for(i=0;iindex->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); for(i=0;iindex->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); } } #ifdef _RZ_READONLY static RAZF* razf_open_w(int fd) { fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n"); return 0; } #else static RAZF* razf_open_w(int fd){ RAZF *rz; #ifdef _WIN32 setmode(fd, O_BINARY); #endif rz = calloc(1, sizeof(RAZF)); rz->mode = 'w'; #ifdef _USE_KNETFILE rz->x.fpw = fd; #else rz->filedes = fd; #endif rz->stream = calloc(sizeof(z_stream), 1); rz->inbuf = malloc(RZ_BUFFER_SIZE); rz->outbuf = malloc(RZ_BUFFER_SIZE); rz->index = calloc(sizeof(ZBlockIndex), 1); deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; rz->header = calloc(sizeof(gz_header), 1); rz->header->os = 0x03; //Unix rz->header->text = 0; rz->header->time = 0; rz->header->extra = malloc(7); strncpy((char*)rz->header->extra, "RAZF", 4); rz->header->extra[4] = 1; // obsolete field // block size = RZ_BLOCK_SIZE, Big-Endian rz->header->extra[5] = RZ_BLOCK_SIZE >> 8; rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF; rz->header->extra_len = 7; rz->header->name = rz->header->comment = 0; rz->header->hcrc = 0; deflateSetHeader(rz->stream, rz->header); rz->block_pos = rz->block_off = 0; return rz; } static void _razf_write(RAZF* rz, const void *data, int size){ int tout; rz->stream->avail_in = size; rz->stream->next_in = (void*)data; while(1){ tout = rz->stream->avail_out; deflate(rz->stream, Z_NO_FLUSH); rz->out += tout - rz->stream->avail_out; if(rz->stream->avail_out) break; #ifdef _USE_KNETFILE write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; if(rz->stream->avail_in == 0) break; }; rz->in += size - rz->stream->avail_in; rz->block_off += size - rz->stream->avail_in; } static void razf_flush(RAZF *rz){ uint32_t tout; if(rz->buf_len){ _razf_write(rz, rz->inbuf, rz->buf_len); rz->buf_off = rz->buf_len = 0; } if(rz->stream->avail_out){ #ifdef _USE_KNETFILE write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; } while(1){ tout = rz->stream->avail_out; deflate(rz->stream, Z_FULL_FLUSH); rz->out += tout - rz->stream->avail_out; if(rz->stream->avail_out == 0){ #ifdef _USE_KNETFILE write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; } else break; } rz->block_pos = rz->out; rz->block_off = 0; } static void razf_end_flush(RAZF *rz){ uint32_t tout; if(rz->buf_len){ _razf_write(rz, rz->inbuf, rz->buf_len); rz->buf_off = rz->buf_len = 0; } while(1){ tout = rz->stream->avail_out; deflate(rz->stream, Z_FINISH); rz->out += tout - rz->stream->avail_out; if(rz->stream->avail_out < RZ_BUFFER_SIZE){ #ifdef _USE_KNETFILE write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); #endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; } else break; } } static void _razf_buffered_write(RAZF *rz, const void *data, int size){ int i, n; while(1){ if(rz->buf_len == RZ_BUFFER_SIZE){ _razf_write(rz, rz->inbuf, rz->buf_len); rz->buf_len = 0; } if(size + rz->buf_len < RZ_BUFFER_SIZE){ for(i=0;iinbuf + rz->buf_len)[i] = ((char*)data)[i]; rz->buf_len += size; return; } else { n = RZ_BUFFER_SIZE - rz->buf_len; for(i=0;iinbuf + rz->buf_len)[i] = ((char*)data)[i]; size -= n; data += n; rz->buf_len += n; } } } int razf_write(RAZF* rz, const void *data, int size){ int ori_size, n; int64_t next_block; ori_size = size; next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; while(rz->in + rz->buf_len + size >= next_block){ n = next_block - rz->in - rz->buf_len; _razf_buffered_write(rz, data, n); data += n; size -= n; razf_flush(rz); add_zindex(rz, rz->in, rz->out); next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; } _razf_buffered_write(rz, data, size); return ori_size; } #endif /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){ int method, flags, n, len; if(size < 2) return 0; if(data[0] != 0x1f || data[1] != 0x8b) return 0; if(size < 4) return 0; method = data[2]; flags = data[3]; if(method != Z_DEFLATED || (flags & RESERVED)) return 0; n = 4 + 6; // Skip 6 bytes *extra_off = n + 2; *extra_len = 0; if(flags & EXTRA_FIELD){ if(size < n + 2) return 0; len = ((int)data[n + 1] << 8) | data[n]; n += 2; *extra_off = n; while(len){ if(n >= size) return 0; n ++; len --; } *extra_len = n - (*extra_off); } if(flags & ORIG_NAME) while(n < size && data[n++]); if(flags & COMMENT) while(n < size && data[n++]); if(flags & HEAD_CRC){ if(n + 2 > size) return 0; n += 2; } return n; } #ifdef _USE_KNETFILE static RAZF* razf_open_r(knetFile *fp, int _load_index){ #else static RAZF* razf_open_r(int fd, int _load_index){ #endif RAZF *rz; int ext_off, ext_len; int n, is_be, ret; int64_t end; unsigned char c[] = "RAZF"; rz = calloc(1, sizeof(RAZF)); rz->mode = 'r'; #ifdef _USE_KNETFILE rz->x.fpr = fp; #else #ifdef _WIN32 setmode(fd, O_BINARY); #endif rz->filedes = fd; #endif rz->stream = calloc(sizeof(z_stream), 1); rz->inbuf = malloc(RZ_BUFFER_SIZE); rz->outbuf = malloc(RZ_BUFFER_SIZE); rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL; #ifdef _USE_KNETFILE n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE); #else n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); #endif ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len); if(ret == 0){ PLAIN_FILE: rz->in = n; rz->file_type = FILE_TYPE_PLAIN; memcpy(rz->outbuf, rz->inbuf, n); rz->buf_len = n; free(rz->stream); rz->stream = NULL; return rz; } rz->header_size = ret; ret = inflateInit2(rz->stream, -WINDOW_BITS); if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;} rz->stream->avail_in = n - rz->header_size; rz->stream->next_in = rz->inbuf + rz->header_size; rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; rz->file_type = FILE_TYPE_GZ; rz->in = rz->header_size; rz->block_pos = rz->header_size; rz->next_block_pos = rz->header_size; rz->block_off = 0; if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz; if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){ fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file. in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__); return rz; } rz->load_index = _load_index; rz->file_type = FILE_TYPE_RZ; #ifdef _USE_KNETFILE if(knet_seek(fp, -16, SEEK_END) == -1){ #else if(lseek(fd, -16, SEEK_END) == -1){ #endif UNSEEKABLE: rz->seekable = 0; rz->index = NULL; rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL; } else { is_be = is_big_endian(); rz->seekable = 1; #ifdef _USE_KNETFILE knet_read(fp, &end, sizeof(int64_t)); #else read(fd, &end, sizeof(int64_t)); #endif if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end); else rz->src_end = end; #ifdef _USE_KNETFILE knet_read(fp, &end, sizeof(int64_t)); #else read(fd, &end, sizeof(int64_t)); #endif if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end); else rz->end = end; if(n > rz->end){ rz->stream->avail_in -= n - rz->end; n = rz->end; } if(rz->end > rz->src_end){ #ifdef _USE_KNETFILE knet_seek(fp, rz->in, SEEK_SET); #else lseek(fd, rz->in, SEEK_SET); #endif goto UNSEEKABLE; } #ifdef _USE_KNETFILE knet_seek(fp, rz->end, SEEK_SET); if(knet_tell(fp) != rz->end){ knet_seek(fp, rz->in, SEEK_SET); #else if(lseek(fd, rz->end, SEEK_SET) != rz->end){ lseek(fd, rz->in, SEEK_SET); #endif goto UNSEEKABLE; } #ifdef _USE_KNETFILE load_zindex(rz, fp); knet_seek(fp, n, SEEK_SET); #else load_zindex(rz, fd); lseek(fd, n, SEEK_SET); #endif } return rz; } #ifdef _USE_KNETFILE RAZF* razf_dopen(int fd, const char *mode){ if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n"); else if(strstr(mode, "w")) return razf_open_w(fd); return NULL; } RAZF* razf_dopen2(int fd, const char *mode) { fprintf(stderr,"[razf_dopen2] implement me\n"); return NULL; } #else RAZF* razf_dopen(int fd, const char *mode){ if(strstr(mode, "r")) return razf_open_r(fd, 1); else if(strstr(mode, "w")) return razf_open_w(fd); else return NULL; } RAZF* razf_dopen2(int fd, const char *mode) { if(strstr(mode, "r")) return razf_open_r(fd, 0); else if(strstr(mode, "w")) return razf_open_w(fd); else return NULL; } #endif static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){ int fd; RAZF *rz; if(strstr(mode, "r")){ #ifdef _USE_KNETFILE knetFile *fd = knet_open(filename, "r"); if (fd == 0) { fprintf(stderr, "[_razf_open] fail to open %s\n", filename); return NULL; } #else #ifdef _WIN32 fd = open(filename, O_RDONLY | O_BINARY); #else fd = open(filename, O_RDONLY); #endif #endif if(fd < 0) return NULL; rz = razf_open_r(fd, _load_index); } else if(strstr(mode, "w")){ #ifdef _WIN32 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666); #else fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666); #endif if(fd < 0) return NULL; rz = razf_open_w(fd); } else return NULL; return rz; } RAZF* razf_open(const char *filename, const char *mode){ return _razf_open(filename, mode, 1); } RAZF* razf_open2(const char *filename, const char *mode){ return _razf_open(filename, mode, 0); } int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){ int64_t n; if(rz->mode != 'r' && rz->mode != 'R') return 0; switch(rz->file_type){ case FILE_TYPE_PLAIN: if(rz->end == 0x7fffffffffffffffLL){ #ifdef _USE_KNETFILE if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0; n = knet_tell(rz->x.fpr); knet_seek(rz->x.fpr, 0, SEEK_END); rz->end = knet_tell(rz->x.fpr); knet_seek(rz->x.fpr, n, SEEK_SET); #else if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0; rz->end = lseek(rz->filedes, 0, SEEK_END); lseek(rz->filedes, n, SEEK_SET); #endif } *u_size = *c_size = rz->end; return 1; case FILE_TYPE_GZ: return 0; case FILE_TYPE_RZ: if(rz->src_end == rz->end) return 0; *u_size = rz->src_end; *c_size = rz->end; return 1; default: return 0; } } static int _razf_read(RAZF* rz, void *data, int size){ int ret, tin; if(rz->z_eof || rz->z_err) return 0; if (rz->file_type == FILE_TYPE_PLAIN) { #ifdef _USE_KNETFILE ret = knet_read(rz->x.fpr, data, size); #else ret = read(rz->filedes, data, size); #endif if (ret == 0) rz->z_eof = 1; return ret; } rz->stream->avail_out = size; rz->stream->next_out = data; while(rz->stream->avail_out){ if(rz->stream->avail_in == 0){ if(rz->in >= rz->end){ rz->z_eof = 1; break; } if(rz->end - rz->in < RZ_BUFFER_SIZE){ #ifdef _USE_KNETFILE rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in); #else rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in); #endif } else { #ifdef _USE_KNETFILE rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE); #else rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); #endif } if(rz->stream->avail_in == 0){ rz->z_eof = 1; break; } rz->stream->next_in = rz->inbuf; } tin = rz->stream->avail_in; ret = inflate(rz->stream, Z_BLOCK); rz->in += tin - rz->stream->avail_in; if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){ fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__); rz->z_err = 1; break; } if(ret == Z_STREAM_END){ rz->z_eof = 1; break; } if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){ rz->buf_flush = 1; rz->next_block_pos = rz->in; break; } } return size - rz->stream->avail_out; } int razf_read(RAZF *rz, void *data, int size){ int ori_size, i; ori_size = size; while(size > 0){ if(rz->buf_len){ if(size < rz->buf_len){ for(i=0;ioutbuf + rz->buf_off)[i]; rz->buf_off += size; rz->buf_len -= size; data += size; rz->block_off += size; size = 0; break; } else { for(i=0;ibuf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i]; data += rz->buf_len; size -= rz->buf_len; rz->block_off += rz->buf_len; rz->buf_off = 0; rz->buf_len = 0; if(rz->buf_flush){ rz->block_pos = rz->next_block_pos; rz->block_off = 0; rz->buf_flush = 0; } } } else if(rz->buf_flush){ rz->block_pos = rz->next_block_pos; rz->block_off = 0; rz->buf_flush = 0; } if(rz->buf_flush) continue; rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); if(rz->z_eof && rz->buf_len == 0) break; } rz->out += ori_size - size; return ori_size - size; } int razf_skip(RAZF* rz, int size){ int ori_size; ori_size = size; while(size > 0){ if(rz->buf_len){ if(size < rz->buf_len){ rz->buf_off += size; rz->buf_len -= size; rz->block_off += size; size = 0; break; } else { size -= rz->buf_len; rz->buf_off = 0; rz->buf_len = 0; rz->block_off += rz->buf_len; if(rz->buf_flush){ rz->block_pos = rz->next_block_pos; rz->block_off = 0; rz->buf_flush = 0; } } } else if(rz->buf_flush){ rz->block_pos = rz->next_block_pos; rz->block_off = 0; rz->buf_flush = 0; } if(rz->buf_flush) continue; rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); if(rz->z_eof || rz->z_err) break; } rz->out += ori_size - size; return ori_size - size; } static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){ #ifdef _USE_KNETFILE knet_seek(rz->x.fpr, in, SEEK_SET); #else lseek(rz->filedes, in, SEEK_SET); #endif rz->in = in; rz->out = out; rz->block_pos = in; rz->next_block_pos = in; rz->block_off = 0; rz->buf_flush = 0; rz->z_eof = rz->z_err = 0; inflateReset(rz->stream); rz->stream->avail_in = 0; rz->buf_off = rz->buf_len = 0; } int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){ int64_t pos; rz->z_eof = 0; if(rz->file_type == FILE_TYPE_PLAIN){ rz->buf_off = rz->buf_len = 0; pos = block_start + block_offset; #ifdef _USE_KNETFILE knet_seek(rz->x.fpr, pos, SEEK_SET); pos = knet_tell(rz->x.fpr); #else pos = lseek(rz->filedes, pos, SEEK_SET); #endif rz->out = rz->in = pos; return pos; } if(block_start == rz->block_pos && block_offset >= rz->block_off) { block_offset -= rz->block_off; goto SKIP; // Needn't reset inflate } if(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start _razf_reset_read(rz, block_start, 0); SKIP: if(block_offset) razf_skip(rz, block_offset); return rz->block_off; } int64_t razf_seek(RAZF* rz, int64_t pos, int where){ int64_t idx; int64_t seek_pos, new_out; rz->z_eof = 0; if (where == SEEK_CUR) pos += rz->out; else if (where == SEEK_END) pos += rz->src_end; if(rz->file_type == FILE_TYPE_PLAIN){ #ifdef _USE_KNETFILE knet_seek(rz->x.fpr, pos, SEEK_SET); seek_pos = knet_tell(rz->x.fpr); #else seek_pos = lseek(rz->filedes, pos, SEEK_SET); #endif rz->buf_off = rz->buf_len = 0; rz->out = rz->in = seek_pos; return seek_pos; } else if(rz->file_type == FILE_TYPE_GZ){ if(pos >= rz->out) goto SKIP; return rz->out; } if(pos == rz->out) return pos; if(pos > rz->src_end) return rz->out; if(!rz->seekable || !rz->load_index){ if(pos >= rz->out) goto SKIP; } idx = pos / RZ_BLOCK_SIZE - 1; seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); new_out = (idx + 1) * RZ_BLOCK_SIZE; if(pos > rz->out && new_out <= rz->out) goto SKIP; _razf_reset_read(rz, seek_pos, new_out); SKIP: razf_skip(rz, (int)(pos - rz->out)); return rz->out; } uint64_t razf_tell2(RAZF *rz) { /* if (rz->load_index) { int64_t idx, seek_pos; idx = rz->out / RZ_BLOCK_SIZE - 1; seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off) fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n", (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off); } */ return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff); } int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where) { if (where != SEEK_SET) return -1; return razf_jump(rz, voffset>>16, voffset&0xffff); } void razf_close(RAZF *rz){ if(rz->mode == 'w'){ #ifndef _RZ_READONLY razf_end_flush(rz); deflateEnd(rz->stream); #ifdef _USE_KNETFILE save_zindex(rz, rz->x.fpw); if(is_big_endian()){ write(rz->x.fpw, &rz->in, sizeof(int64_t)); write(rz->x.fpw, &rz->out, sizeof(int64_t)); } else { uint64_t v64 = byte_swap_8((uint64_t)rz->in); write(rz->x.fpw, &v64, sizeof(int64_t)); v64 = byte_swap_8((uint64_t)rz->out); write(rz->x.fpw, &v64, sizeof(int64_t)); } #else save_zindex(rz, rz->filedes); if(is_big_endian()){ write(rz->filedes, &rz->in, sizeof(int64_t)); write(rz->filedes, &rz->out, sizeof(int64_t)); } else { uint64_t v64 = byte_swap_8((uint64_t)rz->in); write(rz->filedes, &v64, sizeof(int64_t)); v64 = byte_swap_8((uint64_t)rz->out); write(rz->filedes, &v64, sizeof(int64_t)); } #endif #endif } else if(rz->mode == 'r'){ if(rz->stream) inflateEnd(rz->stream); } if(rz->inbuf) free(rz->inbuf); if(rz->outbuf) free(rz->outbuf); if(rz->header){ free(rz->header->extra); free(rz->header->name); free(rz->header->comment); free(rz->header); } if(rz->index){ free(rz->index->bin_offsets); free(rz->index->cell_offsets); free(rz->index); } free(rz->stream); #ifdef _USE_KNETFILE if (rz->mode == 'r') knet_close(rz->x.fpr); if (rz->mode == 'w') close(rz->x.fpw); #else close(rz->filedes); #endif free(rz); } #endif samtools-0.1.19/razf.h000066400000000000000000000100511212162403000145420ustar00rootroot00000000000000 /*- * RAZF : Random Access compressed(Z) File * Version: 1.0 * Release Date: 2008-10-27 * * Copyright 2008, Jue Ruan , Heng Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __RAZF_RJ_H #define __RAZF_RJ_H #include #include #include "zlib.h" #ifdef _USE_KNETFILE #include "knetfile.h" #endif #if ZLIB_VERNUM < 0x1221 #define _RZ_READONLY struct _gz_header_s; typedef struct _gz_header_s _gz_header; #define gz_header _gz_header #endif #define WINDOW_BITS 15 #ifndef RZ_BLOCK_SIZE #define RZ_BLOCK_SIZE (1<mode from HEAD to TYPE after call inflateReset */ int buf_off, buf_len; int z_err, z_eof; int seekable; /* Indice where the source is seekable */ int load_index; /* set has_index to 0 in mode 'w', then index will be discarded */ } RAZF; #ifdef __cplusplus extern "C" { #endif RAZF* razf_dopen(int data_fd, const char *mode); RAZF *razf_open(const char *fn, const char *mode); int razf_write(RAZF* rz, const void *data, int size); int razf_read(RAZF* rz, void *data, int size); int64_t razf_seek(RAZF* rz, int64_t pos, int where); void razf_close(RAZF* rz); #define razf_tell(rz) ((rz)->out) RAZF* razf_open2(const char *filename, const char *mode); RAZF* razf_dopen2(int fd, const char *mode); uint64_t razf_tell2(RAZF *rz); int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where); #ifdef __cplusplus } #endif #endif samtools-0.1.19/razip.c000066400000000000000000000100161212162403000147210ustar00rootroot00000000000000#include #include #include #include #include #include #include "razf.h" #define WINDOW_SIZE 4096 static int razf_main_usage() { printf("\n"); printf("Usage: razip [options] [file] ...\n\n"); printf("Options: -c write on standard output, keep original files unchanged\n"); printf(" -d decompress\n"); printf(" -l list compressed file contents\n"); printf(" -b INT decompress at INT position in the uncompressed file\n"); printf(" -s INT decompress INT bytes in the uncompressed file\n"); printf(" -h give this help\n"); printf("\n"); return 0; } static int write_open(const char *fn, int is_forced) { int fd = -1; char c; if (!is_forced) { if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) { printf("razip: %s already exists; do you wish to overwrite (y or n)? ", fn); scanf("%c", &c); if (c != 'Y' && c != 'y') { printf("razip: not overwritten\n"); exit(1); } } } if (fd < 0) { if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) { fprintf(stderr, "razip: %s: Fail to write\n", fn); exit(1); } } return fd; } int main(int argc, char **argv) { int c, compress, pstdout, is_forced; RAZF *rz; void *buffer; long start, end, size; compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; while((c = getopt(argc, argv, "cdlhfb:s:")) >= 0){ switch(c){ case 'h': return razf_main_usage(); case 'd': compress = 0; break; case 'c': pstdout = 1; break; case 'l': compress = 2; break; case 'b': start = atol(optarg); break; case 's': size = atol(optarg); break; case 'f': is_forced = 1; break; } } if (size >= 0) end = start + size; if(end >= 0 && end < start){ fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end); return 1; } if(compress == 1){ int f_src, f_dst = -1; if(argc > optind){ if((f_src = open(argv[optind], O_RDONLY)) < 0){ fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]); return 1; } if(pstdout){ f_dst = fileno(stdout); } else { char *name = malloc(sizeof(strlen(argv[optind]) + 5)); strcpy(name, argv[optind]); strcat(name, ".rz"); f_dst = write_open(name, is_forced); if (f_dst < 0) return 1; free(name); } } else if(pstdout){ f_src = fileno(stdin); f_dst = fileno(stdout); } else return razf_main_usage(); rz = razf_dopen(f_dst, "w"); buffer = malloc(WINDOW_SIZE); while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) razf_write(rz, buffer, c); razf_close(rz); // f_dst will be closed here if (argc > optind && !pstdout) unlink(argv[optind]); free(buffer); close(f_src); return 0; } else { if(argc <= optind) return razf_main_usage(); if(compress == 2){ rz = razf_open(argv[optind], "r"); if(rz->file_type == FILE_TYPE_RZ) { printf("%20s%20s%7s %s\n", "compressed", "uncompressed", "ratio", "name"); printf("%20lld%20lld%6.1f%% %s\n", (long long)rz->end, (long long)rz->src_end, rz->end * 100.0f / rz->src_end, argv[optind]); } else fprintf(stdout, "%s is not a regular rz file\n", argv[optind]); } else { int f_dst; if (argc > optind && !pstdout) { char *name; if (strstr(argv[optind], ".rz") - argv[optind] != strlen(argv[optind]) - 3) { printf("razip: %s: unknown suffix -- ignored\n", argv[optind]); return 1; } name = strdup(argv[optind]); name[strlen(name) - 3] = '\0'; f_dst = write_open(name, is_forced); free(name); } else f_dst = fileno(stdout); rz = razf_open(argv[optind], "r"); buffer = malloc(WINDOW_SIZE); razf_seek(rz, start, SEEK_SET); while(1){ if(end < 0) c = razf_read(rz, buffer, WINDOW_SIZE); else c = razf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); if(c <= 0) break; start += c; write(f_dst, buffer, c); if(end >= 0 && start >= end) break; } free(buffer); if (!pstdout) unlink(argv[optind]); } razf_close(rz); return 0; } } samtools-0.1.19/sam.c000066400000000000000000000136431212162403000143650ustar00rootroot00000000000000#include #include #include "faidx.h" #include "sam.h" #define TYPE_BAM 1 #define TYPE_READ 2 bam_header_t *bam_header_dup(const bam_header_t *h0) { bam_header_t *h; int i; h = bam_header_init(); *h = *h0; h->hash = h->dict = h->rg2lib = 0; h->text = (char*)calloc(h->l_text + 1, 1); memcpy(h->text, h0->text, h->l_text); h->target_len = (uint32_t*)calloc(h->n_targets, 4); h->target_name = (char**)calloc(h->n_targets, sizeof(void*)); for (i = 0; i < h->n_targets; ++i) { h->target_len[i] = h0->target_len[i]; h->target_name[i] = strdup(h0->target_name[i]); } return h; } static void append_header_text(bam_header_t *header, char* text, int len) { int x = header->l_text + 1; int y = header->l_text + len + 1; // 1 byte null if (text == 0) return; kroundup32(x); kroundup32(y); if (x < y) header->text = (char*)realloc(header->text, y); strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here. header->l_text += len; header->text[header->l_text] = 0; } int samthreads(samfile_t *fp, int n_threads, int n_sub_blks) { if (!(fp->type&1) || (fp->type&2)) return -1; bgzf_mt(fp->x.bam, n_threads, n_sub_blks); return 0; } samfile_t *samopen(const char *fn, const char *mode, const void *aux) { samfile_t *fp; fp = (samfile_t*)calloc(1, sizeof(samfile_t)); if (strchr(mode, 'r')) { // read fp->type |= TYPE_READ; if (strchr(mode, 'b')) { // binary fp->type |= TYPE_BAM; fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); if (fp->x.bam == 0) goto open_err_ret; fp->header = bam_header_read(fp->x.bam); } else { // text fp->x.tamr = sam_open(fn); if (fp->x.tamr == 0) goto open_err_ret; fp->header = sam_header_read(fp->x.tamr); if (fp->header->n_targets == 0) { // no @SQ fields if (aux) { // check if aux is present bam_header_t *textheader = fp->header; fp->header = sam_header_read2((const char*)aux); if (fp->header == 0) goto open_err_ret; append_header_text(fp->header, textheader->text, textheader->l_text); bam_header_destroy(textheader); } if (fp->header->n_targets == 0 && bam_verbose >= 1) fprintf(stderr, "[samopen] no @SQ lines in the header.\n"); } else if (bam_verbose >= 2) fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets); } } else if (strchr(mode, 'w')) { // write fp->header = bam_header_dup((const bam_header_t*)aux); if (strchr(mode, 'b')) { // binary char bmode[3]; int i, compress_level = -1; for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break; if (mode[i]) compress_level = mode[i] - '0'; if (strchr(mode, 'u')) compress_level = 0; bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0; fp->type |= TYPE_BAM; fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode); if (fp->x.bam == 0) goto open_err_ret; bam_header_write(fp->x.bam, fp->header); } else { // text // open file fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; if (fp->x.tamw == 0) goto open_err_ret; if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2; else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2; else fp->type |= BAM_OFDEC<<2; // write header if (strchr(mode, 'h')) { int i; bam_header_t *alt; // parse the header text alt = bam_header_init(); alt->l_text = fp->header->l_text; alt->text = fp->header->text; sam_header_parse(alt); alt->l_text = 0; alt->text = 0; // check if there are @SQ lines in the header fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL if (alt->n_targets) { // then write the header text without dumping ->target_{name,len} if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1) fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n"); } else { // then dump ->target_{name,len} for (i = 0; i < fp->header->n_targets; ++i) fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]); } bam_header_destroy(alt); } } } return fp; open_err_ret: free(fp); return 0; } void samclose(samfile_t *fp) { if (fp == 0) return; if (fp->header) bam_header_destroy(fp->header); if (fp->type & TYPE_BAM) bam_close(fp->x.bam); else if (fp->type & TYPE_READ) sam_close(fp->x.tamr); else fclose(fp->x.tamw); free(fp); } int samread(samfile_t *fp, bam1_t *b) { if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b); else return sam_read1(fp->x.tamr, fp->header, b); } int samwrite(samfile_t *fp, const bam1_t *b) { if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b); else { char *s = bam_format1_core(fp->header, b, fp->type>>2&3); int l = strlen(s); fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw); free(s); return l + 1; } } int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) { bam_plbuf_t *buf; int ret; bam1_t *b; b = bam_init1(); buf = bam_plbuf_init(func, func_data); bam_plbuf_set_mask(buf, mask); while ((ret = samread(fp, b)) >= 0) bam_plbuf_push(b, buf); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); bam_destroy1(b); return 0; } char *samfaipath(const char *fn_ref) { char *fn_list = 0; if (fn_ref == 0) return 0; fn_list = calloc(strlen(fn_ref) + 5, 1); strcat(strcpy(fn_list, fn_ref), ".fai"); if (access(fn_list, R_OK) == -1) { // fn_list is unreadable if (access(fn_ref, R_OK) == -1) { fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref); } else { if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n"); if (fai_build(fn_ref) == -1) { fprintf(stderr, "[samfaipath] fail to build FASTA index.\n"); free(fn_list); fn_list = 0; } } } return fn_list; } samtools-0.1.19/sam.h000066400000000000000000000052001212162403000143600ustar00rootroot00000000000000#ifndef BAM_SAM_H #define BAM_SAM_H #include "bam.h" /*! @header This file provides higher level of I/O routines and unifies the APIs for SAM and BAM formats. These APIs are more convenient and recommended. @copyright Genome Research Ltd. */ /*! @typedef @abstract SAM/BAM file handler @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3-4 for flag format @field bam BAM file handler; valid if (type&1) == 1 @field tamr SAM file handler for reading; valid if type == 2 @field tamw SAM file handler for writing; valid if type == 0 @field header header struct */ typedef struct { int type; union { tamFile tamr; bamFile bam; FILE *tamw; } x; bam_header_t *header; } samfile_t; #ifdef __cplusplus extern "C" { #endif /*! @abstract Open a SAM/BAM file @param fn SAM/BAM file name; "-" is recognized as stdin (for reading) or stdout (for writing). @param mode open mode /[rw](b?)(u?)(h?)([xX]?)/: 'r' for reading, 'w' for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output, 'h' for outputing header in SAM, 'x' for HEX flag and 'X' for string flag. If 'b' present, it must immediately follow 'r' or 'w'. Valid modes are "r", "w", "wh", "wx", "whx", "wX", "whX", "rb", "wb" and "wbu" exclusively. @param aux auxiliary data; if mode[0]=='w', aux points to bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM are absent, aux points the file name of the list of the reference; aux is not used otherwise. If @SQ header lines are present in SAM, aux is not used, either. @return SAM/BAM file handler */ samfile_t *samopen(const char *fn, const char *mode, const void *aux); /*! @abstract Close a SAM/BAM handler @param fp file handler to be closed */ void samclose(samfile_t *fp); /*! @abstract Read one alignment @param fp file handler @param b alignment @return bytes read */ int samread(samfile_t *fp, bam1_t *b); /*! @abstract Write one alignment @param fp file handler @param b alignment @return bytes written */ int samwrite(samfile_t *fp, const bam1_t *b); /*! @abstract Get the pileup for a whole alignment file @param fp file handler @param mask mask transferred to bam_plbuf_set_mask() @param func user defined function called in the pileup process #param data user provided data for func() */ int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); char *samfaipath(const char *fn_ref); int samthreads(samfile_t *fp, int n_threads, int n_sub_blks); #ifdef __cplusplus } #endif #endif samtools-0.1.19/sam_header.c000066400000000000000000000514001212162403000156660ustar00rootroot00000000000000#include "sam_header.h" #include #include #include #include #include #include "khash.h" KHASH_MAP_INIT_STR(str, const char *) struct _HeaderList { struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only. struct _HeaderList *next; void *data; }; typedef struct _HeaderList list_t; typedef list_t HeaderDict; typedef struct { char key[2]; char *value; } HeaderTag; typedef struct { char type[2]; list_t *tags; } HeaderLine; const char *o_hd_tags[] = {"SO","GO",NULL}; const char *r_hd_tags[] = {"VN",NULL}; const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL}; const char *r_sq_tags[] = {"SN","LN",NULL}; const char *u_sq_tags[] = {"SN",NULL}; const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL}; const char *r_rg_tags[] = {"ID",NULL}; const char *u_rg_tags[] = {"ID",NULL}; const char *o_pg_tags[] = {"VN","CL",NULL}; const char *r_pg_tags[] = {"ID",NULL}; const char *types[] = {"HD","SQ","RG","PG","CO",NULL}; const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL}; const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL}; const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL}; static void debug(const char *format, ...) { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); } #if 0 // Replaced by list_append_to_end static list_t *list_prepend(list_t *root, void *data) { list_t *l = malloc(sizeof(list_t)); l->next = root; l->data = data; return l; } #endif // Relies on the root->last being correct. Do not use with the other list_* // routines unless they are fixed to modify root->last as well. static list_t *list_append_to_end(list_t *root, void *data) { list_t *l = malloc(sizeof(list_t)); l->last = l; l->next = NULL; l->data = data; if ( !root ) return l; root->last->next = l; root->last = l; return root; } static list_t *list_append(list_t *root, void *data) { list_t *l = root; while (l && l->next) l = l->next; if ( l ) { l->next = malloc(sizeof(list_t)); l = l->next; } else { l = malloc(sizeof(list_t)); root = l; } l->data = data; l->next = NULL; return root; } static void list_free(list_t *root) { list_t *l = root; while (root) { l = root; root = root->next; free(l); } } // Look for a tag "XY" in a predefined const char *[] array. static int tag_exists(const char *tag, const char **tags) { int itag=0; if ( !tags ) return -1; while ( tags[itag] ) { if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag; itag++; } return -1; } // Mimics the behaviour of getline, except it returns pointer to the next chunk of the text // or NULL if everything has been read. The lineptr should be freed by the caller. The // newline character is stripped. static const char *nextline(char **lineptr, size_t *n, const char *text) { int len; const char *to = text; if ( !*to ) return NULL; while ( *to && *to!='\n' && *to!='\r' ) to++; len = to - text + 1; if ( *to ) { // Advance the pointer for the next call if ( *to=='\n' ) to++; else if ( *to=='\r' && *(to+1)=='\n' ) to+=2; } if ( !len ) return to; if ( !*lineptr ) { *lineptr = malloc(len); *n = len; } else if ( *nkey[0] = name[0]; tag->key[1] = name[1]; tag->value = malloc(len+1); memcpy(tag->value,value_from,len+1); tag->value[len] = 0; return tag; } static HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key) { list_t *tags = hline->tags; while (tags) { HeaderTag *tag = tags->data; if ( tag->key[0]==key[0] && tag->key[1]==key[1] ) return tag; tags = tags->next; } return NULL; } // Return codes: // 0 .. different types or unique tags differ or conflicting tags, cannot be merged // 1 .. all tags identical -> no need to merge, drop one // 2 .. the unique tags match and there are some conflicting tags (same tag, different value) -> error, cannot be merged nor duplicated // 3 .. there are some missing complementary tags and no unique conflict -> can be merged into a single line static int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2) { HeaderTag *t1, *t2; if ( hline1->type[0]!=hline2->type[0] || hline1->type[1]!=hline2->type[1] ) return 0; int itype = tag_exists(hline1->type,types); if ( itype==-1 ) { debug("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]); return -1; // FIXME (lh3): error; I do not know how this will be handled in Petr's code } if ( unique_tags[itype] ) { t1 = header_line_has_tag(hline1,unique_tags[itype][0]); t2 = header_line_has_tag(hline2,unique_tags[itype][0]); if ( !t1 || !t2 ) // this should never happen, the unique tags are required return 2; if ( strcmp(t1->value,t2->value) ) return 0; // the unique tags differ, cannot be merged } if ( !required_tags[itype] && !optional_tags[itype] ) { t1 = hline1->tags->data; t2 = hline2->tags->data; if ( !strcmp(t1->value,t2->value) ) return 1; // identical comments return 0; } int missing=0, itag=0; while ( required_tags[itype] && required_tags[itype][itag] ) { t1 = header_line_has_tag(hline1,required_tags[itype][itag]); t2 = header_line_has_tag(hline2,required_tags[itype][itag]); if ( !t1 && !t2 ) return 2; // this should never happen else if ( !t1 || !t2 ) missing = 1; // there is some tag missing in one of the hlines else if ( strcmp(t1->value,t2->value) ) { if ( unique_tags[itype] ) return 2; // the lines have a matching unique tag but have a conflicting tag return 0; // the lines contain conflicting tags, cannot be merged } itag++; } itag = 0; while ( optional_tags[itype] && optional_tags[itype][itag] ) { t1 = header_line_has_tag(hline1,optional_tags[itype][itag]); t2 = header_line_has_tag(hline2,optional_tags[itype][itag]); if ( !t1 && !t2 ) { itag++; continue; } if ( !t1 || !t2 ) missing = 1; // there is some tag missing in one of the hlines else if ( strcmp(t1->value,t2->value) ) { if ( unique_tags[itype] ) return 2; // the lines have a matching unique tag but have a conflicting tag return 0; // the lines contain conflicting tags, cannot be merged } itag++; } if ( missing ) return 3; // there are some missing complementary tags with no conflicts, can be merged return 1; } static HeaderLine *sam_header_line_clone(const HeaderLine *hline) { list_t *tags; HeaderLine *out = malloc(sizeof(HeaderLine)); out->type[0] = hline->type[0]; out->type[1] = hline->type[1]; out->tags = NULL; tags = hline->tags; while (tags) { HeaderTag *old = tags->data; HeaderTag *new = malloc(sizeof(HeaderTag)); new->key[0] = old->key[0]; new->key[1] = old->key[1]; new->value = strdup(old->value); out->tags = list_append(out->tags, new); tags = tags->next; } return out; } static int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline) { list_t *tmpl_tags; if ( out_hline->type[0]!=tmpl_hline->type[0] || out_hline->type[1]!=tmpl_hline->type[1] ) return 0; tmpl_tags = tmpl_hline->tags; while (tmpl_tags) { HeaderTag *tmpl_tag = tmpl_tags->data; HeaderTag *out_tag = header_line_has_tag(out_hline, tmpl_tag->key); if ( !out_tag ) { HeaderTag *tag = malloc(sizeof(HeaderTag)); tag->key[0] = tmpl_tag->key[0]; tag->key[1] = tmpl_tag->key[1]; tag->value = strdup(tmpl_tag->value); out_hline->tags = list_append(out_hline->tags,tag); } tmpl_tags = tmpl_tags->next; } return 1; } static HeaderLine *sam_header_line_parse(const char *headerLine) { HeaderLine *hline; HeaderTag *tag; const char *from, *to; from = headerLine; if ( *from != '@' ) { debug("[sam_header_line_parse] expected '@', got [%s]\n", headerLine); return 0; } to = ++from; while (*to && *to!='\t') to++; if ( to-from != 2 ) { debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine); return 0; } hline = malloc(sizeof(HeaderLine)); hline->type[0] = from[0]; hline->type[1] = from[1]; hline->tags = NULL; int itype = tag_exists(hline->type, types); from = to; while (*to && *to=='\t') to++; if ( to-from != 1 ) { debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); free(hline); return 0; } from = to; while (*from) { while (*to && *to!='\t') to++; if ( !required_tags[itype] && !optional_tags[itype] ) { // CO is a special case, it can contain anything, including tabs if ( *to ) { to++; continue; } tag = new_tag(" ",from,to-1); } else tag = new_tag(from,from+3,to-1); if ( header_line_has_tag(hline,tag->key) ) debug("The tag '%c%c' present (at least) twice on line [%s]\n", tag->key[0],tag->key[1], headerLine); hline->tags = list_append(hline->tags, tag); from = to; while (*to && *to=='\t') to++; if ( *to && to-from != 1 ) { debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); return 0; } from = to; } return hline; } // Must be of an existing type, all tags must be recognised and all required tags must be present static int sam_header_line_validate(HeaderLine *hline) { list_t *tags; HeaderTag *tag; int itype, itag; // Is the type correct? itype = tag_exists(hline->type, types); if ( itype==-1 ) { debug("The type [%c%c] not recognised.\n", hline->type[0],hline->type[1]); return 0; } // Has all required tags? itag = 0; while ( required_tags[itype] && required_tags[itype][itag] ) { if ( !header_line_has_tag(hline,required_tags[itype][itag]) ) { debug("The tag [%c%c] required for [%c%c] not present.\n", required_tags[itype][itag][0],required_tags[itype][itag][1], hline->type[0],hline->type[1]); return 0; } itag++; } // Are all tags recognised? tags = hline->tags; while ( tags ) { tag = tags->data; if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) { // Lower case tags are user-defined values. if( !(islower(tag->key[0]) || islower(tag->key[1])) ) { // Neither is lower case, but tag was not recognized. debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); // return 0; // Even unknown tags are allowed - for forward compatibility with new attributes } // else - allow user defined tag } tags = tags->next; } return 1; } static void print_header_line(FILE *fp, HeaderLine *hline) { list_t *tags = hline->tags; HeaderTag *tag; fprintf(fp, "@%c%c", hline->type[0],hline->type[1]); while (tags) { tag = tags->data; fprintf(fp, "\t"); if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) fprintf(fp, "%c%c:", tag->key[0],tag->key[1]); fprintf(fp, "%s", tag->value); tags = tags->next; } fprintf(fp,"\n"); } static void sam_header_line_free(HeaderLine *hline) { list_t *tags = hline->tags; while (tags) { HeaderTag *tag = tags->data; free(tag->value); free(tag); tags = tags->next; } list_free(hline->tags); free(hline); } void sam_header_free(void *_header) { HeaderDict *header = (HeaderDict*)_header; list_t *hlines = header; while (hlines) { sam_header_line_free(hlines->data); hlines = hlines->next; } list_free(header); } HeaderDict *sam_header_clone(const HeaderDict *dict) { HeaderDict *out = NULL; while (dict) { HeaderLine *hline = dict->data; out = list_append(out, sam_header_line_clone(hline)); dict = dict->next; } return out; } // Returns a newly allocated string char *sam_header_write(const void *_header) { const HeaderDict *header = (const HeaderDict*)_header; char *out = NULL; int len=0, nout=0; const list_t *hlines; // Calculate the length of the string to allocate hlines = header; while (hlines) { len += 4; // @XY and \n HeaderLine *hline = hlines->data; list_t *tags = hline->tags; while (tags) { HeaderTag *tag = tags->data; len += strlen(tag->value) + 1; // \t if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) len += strlen(tag->value) + 3; // XY: tags = tags->next; } hlines = hlines->next; } nout = 0; out = malloc(len+1); hlines = header; while (hlines) { HeaderLine *hline = hlines->data; nout += sprintf(out+nout,"@%c%c",hline->type[0],hline->type[1]); list_t *tags = hline->tags; while (tags) { HeaderTag *tag = tags->data; nout += sprintf(out+nout,"\t"); if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) nout += sprintf(out+nout,"%c%c:", tag->key[0],tag->key[1]); nout += sprintf(out+nout,"%s", tag->value); tags = tags->next; } hlines = hlines->next; nout += sprintf(out+nout,"\n"); } out[len] = 0; return out; } void *sam_header_parse2(const char *headerText) { list_t *hlines = NULL; HeaderLine *hline; const char *text; char *buf=NULL; size_t nbuf = 0; int tovalidate = 0; if ( !headerText ) return 0; text = headerText; while ( (text=nextline(&buf, &nbuf, text)) ) { hline = sam_header_line_parse(buf); if ( hline && (!tovalidate || sam_header_line_validate(hline)) ) // With too many (~250,000) reference sequences the header parsing was too slow with list_append. hlines = list_append_to_end(hlines, hline); else { if (hline) sam_header_line_free(hline); sam_header_free(hlines); if ( buf ) free(buf); return NULL; } } if ( buf ) free(buf); return hlines; } void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2]) { const HeaderDict *dict = (const HeaderDict*)_dict; const list_t *l = dict; khash_t(str) *tbl = kh_init(str); khiter_t k; int ret; if (_dict == 0) return tbl; // return an empty (not null) hash table while (l) { HeaderLine *hline = l->data; if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) { l = l->next; continue; } HeaderTag *key, *value; key = header_line_has_tag(hline,key_tag); value = header_line_has_tag(hline,value_tag); if ( !key || !value ) { l = l->next; continue; } k = kh_get(str, tbl, key->value); if ( k != kh_end(tbl) ) debug("[sam_header_lookup_table] They key %s not unique.\n", key->value); k = kh_put(str, tbl, key->value, &ret); kh_value(tbl, k) = value->value; l = l->next; } return tbl; } char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n) { const HeaderDict *dict = (const HeaderDict*)_dict; const list_t *l = dict; int max, n; char **ret; ret = 0; *_n = max = n = 0; while (l) { HeaderLine *hline = l->data; if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) { l = l->next; continue; } HeaderTag *key; key = header_line_has_tag(hline,key_tag); if ( !key ) { l = l->next; continue; } if (n == max) { max = max? max<<1 : 4; ret = realloc(ret, max * sizeof(void*)); } ret[n++] = key->value; l = l->next; } *_n = n; return ret; } void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **_key, const char **_value) { list_t *l = iter; if ( !l ) return NULL; while (l) { HeaderLine *hline = l->data; if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) { l = l->next; continue; } HeaderTag *key, *value; key = header_line_has_tag(hline,key_tag); value = header_line_has_tag(hline,value_tag); if ( !key && !value ) { l = l->next; continue; } *_key = key->value; *_value = value->value; return l->next; } return l; } const char *sam_tbl_get(void *h, const char *key) { khash_t(str) *tbl = (khash_t(str)*)h; khint_t k; k = kh_get(str, tbl, key); return k == kh_end(tbl)? 0 : kh_val(tbl, k); } int sam_tbl_size(void *h) { khash_t(str) *tbl = (khash_t(str)*)h; return h? kh_size(tbl) : 0; } void sam_tbl_destroy(void *h) { khash_t(str) *tbl = (khash_t(str)*)h; kh_destroy(str, tbl); } void *sam_header_merge(int n, const void **_dicts) { const HeaderDict **dicts = (const HeaderDict**)_dicts; HeaderDict *out_dict; int idict, status; if ( n<2 ) return NULL; out_dict = sam_header_clone(dicts[0]); for (idict=1; idictdata, out_hlines->data); if ( status==0 ) { out_hlines = out_hlines->next; continue; } if ( status==2 ) { print_header_line(stderr,tmpl_hlines->data); print_header_line(stderr,out_hlines->data); debug("Conflicting lines, cannot merge the headers.\n"); return 0; } if ( status==3 ) sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data); inserted = 1; break; } if ( !inserted ) out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data)); tmpl_hlines = tmpl_hlines->next; } } return out_dict; } char **sam_header2tbl_n(const void *dict, const char type[2], const char *tags[], int *n) { int nout = 0; char **out = NULL; *n = 0; list_t *l = (list_t *)dict; if ( !l ) return NULL; int i, ntags = 0; while ( tags[ntags] ) ntags++; while (l) { HeaderLine *hline = l->data; if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) { l = l->next; continue; } out = (char**) realloc(out, sizeof(char*)*(nout+1)*ntags); for (i=0; ivalue; } nout++; l = l->next; } *n = nout; return out; } samtools-0.1.19/sam_header.h000066400000000000000000000030751212162403000157000ustar00rootroot00000000000000#ifndef __SAM_HEADER_H__ #define __SAM_HEADER_H__ #ifdef __cplusplus extern "C" { #endif void *sam_header_parse2(const char *headerText); void *sam_header_merge(int n, const void **dicts); void sam_header_free(void *header); char *sam_header_write(const void *headerDict); // returns a newly allocated string /* // Usage example const char *key, *val; void *iter = sam_header_parse2(bam->header->text); while ( iter = sam_header_key_val(iter, "RG","ID","SM" &key,&val) ) printf("%s\t%s\n", key,val); */ void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **key, const char **value); char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); /* // Usage example int i, j, n; const char *tags[] = {"SN","LN","UR","M5",NULL}; void *dict = sam_header_parse2(bam->header->text); char **tbl = sam_header2tbl_n(h->dict, "SQ", tags, &n); for (i=0; i #include #include #include #include #include #include "sam_header.h" #include "sam.h" #include "faidx.h" #include "kstring.h" #include "khash.h" KHASH_SET_INIT_STR(rg) // When counting records instead of printing them, // data passed to the bam_fetch callback is encapsulated in this struct. typedef struct { bam_header_t *header; int64_t *count; // int does overflow for very big BAMs } count_func_data_t; typedef khash_t(rg) *rghash_t; // FIXME: we'd better use no global variables... static rghash_t g_rghash = 0; static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0, g_qual_scale = 0, g_min_qlen = 0; static uint32_t g_subsam_seed = 0; static double g_subsam_frac = -1.; static char *g_library, *g_rg; static void *g_bed; void *bed_read(const char *fn); void bed_destroy(void *_h); int bed_overlap(const void *_h, const char *chr, int beg, int end); static int process_aln(const bam_header_t *h, bam1_t *b) { if (g_qual_scale > 1) { int i; uint8_t *qual = bam1_qual(b); for (i = 0; i < b->core.l_qseq; ++i) { int c = qual[i] * g_qual_scale; qual[i] = c < 93? c : 93; } } if (g_min_qlen > 0) { int k, qlen = 0; uint32_t *cigar = bam1_cigar(b); for (k = 0; k < b->core.n_cigar; ++k) if ((bam_cigar_type(bam_cigar_op(cigar[k]))&1) || bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP) qlen += bam_cigar_oplen(cigar[k]); if (qlen < g_min_qlen) return 1; } if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off)) return 1; if (g_bed && b->core.tid >= 0 && !bed_overlap(g_bed, h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b)))) return 1; if (g_subsam_frac > 0.) { uint32_t k = __ac_X31_hash_string(bam1_qname(b)) + g_subsam_seed; if ((double)(k&0xffffff) / 0x1000000 >= g_subsam_frac) return 1; } if (g_rg || g_rghash) { uint8_t *s = bam_aux_get(b, "RG"); if (s) { if (g_rg) return (strcmp(g_rg, (char*)(s + 1)) == 0)? 0 : 1; if (g_rghash) { khint_t k = kh_get(rg, g_rghash, (char*)(s + 1)); return (k != kh_end(g_rghash))? 0 : 1; } } } if (g_library) { const char *p = bam_get_library((bam_header_t*)h, b); return (p && strcmp(p, g_library) == 0)? 0 : 1; } return 0; } static char *drop_rg(char *hdtxt, rghash_t h, int *len) { char *p = hdtxt, *q, *r, *s; kstring_t str; memset(&str, 0, sizeof(kstring_t)); while (1) { int toprint = 0; q = strchr(p, '\n'); if (q == 0) q = p + strlen(p); if (q - p < 3) break; // the line is too short; then stop if (strncmp(p, "@RG\t", 4) == 0) { int c; khint_t k; if ((r = strstr(p, "\tID:")) != 0) { r += 4; for (s = r; *s != '\0' && *s != '\n' && *s != '\t'; ++s); c = *s; *s = '\0'; k = kh_get(rg, h, r); *s = c; if (k != kh_end(h)) toprint = 1; } } else toprint = 1; if (toprint) { kputsn(p, q - p, &str); kputc('\n', &str); } p = q + 1; } *len = str.l; return str.s; } // callback function for bam_fetch() that prints nonskipped records static int view_func(const bam1_t *b, void *data) { if (!process_aln(((samfile_t*)data)->header, (bam1_t*)b)) samwrite((samfile_t*)data, b); return 0; } // callback function for bam_fetch() that counts nonskipped records static int count_func(const bam1_t *b, void *data) { if (!process_aln(((count_func_data_t*)data)->header, (bam1_t*)b)) { (*((count_func_data_t*)data)->count)++; } return 0; } static int usage(int is_long_help); int main_samview(int argc, char *argv[]) { int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, compress_level = -1, is_bamout = 0, is_count = 0; int of_type = BAM_OFDEC, is_long_help = 0, n_threads = 0; int64_t count = 0; samfile_t *in = 0, *out = 0; char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0, *fn_rg = 0, *q; /* parse command-line options */ strcpy(in_mode, "r"); strcpy(out_mode, "w"); while ((c = getopt(argc, argv, "SbBct:h1Ho:q:f:F:ul:r:xX?T:R:L:s:Q:@:m:")) >= 0) { switch (c) { case 's': if ((g_subsam_seed = strtol(optarg, &q, 10)) != 0) { srand(g_subsam_seed); g_subsam_seed = rand(); } g_subsam_frac = strtod(q, &q); break; case 'm': g_min_qlen = atoi(optarg); break; case 'c': is_count = 1; break; case 'S': is_bamin = 0; break; case 'b': is_bamout = 1; break; case 't': fn_list = strdup(optarg); is_bamin = 0; break; case 'h': is_header = 1; break; case 'H': is_header_only = 1; break; case 'o': fn_out = strdup(optarg); break; case 'f': g_flag_on = strtol(optarg, 0, 0); break; case 'F': g_flag_off = strtol(optarg, 0, 0); break; case 'q': g_min_mapQ = atoi(optarg); break; case 'u': compress_level = 0; break; case '1': compress_level = 1; break; case 'l': g_library = strdup(optarg); break; case 'L': g_bed = bed_read(optarg); break; case 'r': g_rg = strdup(optarg); break; case 'R': fn_rg = strdup(optarg); break; case 'x': of_type = BAM_OFHEX; break; case 'X': of_type = BAM_OFSTR; break; case '?': is_long_help = 1; break; case 'T': fn_ref = strdup(optarg); is_bamin = 0; break; case 'B': bam_no_B = 1; break; case 'Q': g_qual_scale = atoi(optarg); break; case '@': n_threads = strtol(optarg, 0, 0); break; default: return usage(is_long_help); } } if (compress_level >= 0) is_bamout = 1; if (is_header_only) is_header = 1; if (is_bamout) strcat(out_mode, "b"); else { if (of_type == BAM_OFHEX) strcat(out_mode, "x"); else if (of_type == BAM_OFSTR) strcat(out_mode, "X"); } if (is_bamin) strcat(in_mode, "b"); if (is_header) strcat(out_mode, "h"); if (compress_level >= 0) { char tmp[2]; tmp[0] = compress_level + '0'; tmp[1] = '\0'; strcat(out_mode, tmp); } if (argc == optind) return usage(is_long_help); // potential memory leak... // read the list of read groups if (fn_rg) { FILE *fp_rg; char buf[1024]; int ret; g_rghash = kh_init(rg); fp_rg = fopen(fn_rg, "r"); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but bear me... kh_put(rg, g_rghash, strdup(buf), &ret); // we'd better check duplicates... fclose(fp_rg); } // generate the fn_list if necessary if (fn_list == 0 && fn_ref) fn_list = samfaipath(fn_ref); // open file handlers if ((in = samopen(argv[optind], in_mode, fn_list)) == 0) { fprintf(stderr, "[main_samview] fail to open \"%s\" for reading.\n", argv[optind]); ret = 1; goto view_end; } if (in->header == 0) { fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", argv[optind]); ret = 1; goto view_end; } if (g_rghash) { // FIXME: I do not know what "bam_header_t::n_text" is for... char *tmp; int l; tmp = drop_rg(in->header->text, g_rghash, &l); free(in->header->text); in->header->text = tmp; in->header->l_text = l; } if (!is_count && (out = samopen(fn_out? fn_out : "-", out_mode, in->header)) == 0) { fprintf(stderr, "[main_samview] fail to open \"%s\" for writing.\n", fn_out? fn_out : "standard output"); ret = 1; goto view_end; } if (n_threads > 1) samthreads(out, n_threads, 256); if (is_header_only) goto view_end; // no need to print alignments if (argc == optind + 1) { // convert/print the entire file bam1_t *b = bam_init1(); int r; while ((r = samread(in, b)) >= 0) { // read one alignment from `in' if (!process_aln(in->header, b)) { if (!is_count) samwrite(out, b); // write the alignment to `out' count++; } } if (r < -1) { fprintf(stderr, "[main_samview] truncated file.\n"); ret = 1; } bam_destroy1(b); } else { // retrieve alignments in specified regions int i; bam_index_t *idx = 0; if (is_bamin) idx = bam_index_load(argv[optind]); // load BAM index if (idx == 0) { // index is unavailable fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM files.\n"); ret = 1; goto view_end; } for (i = optind + 1; i < argc; ++i) { int tid, beg, end, result; bam_parse_region(in->header, argv[i], &tid, &beg, &end); // parse a region in the format like `chr2:100-200' if (tid < 0) { // reference name is not found fprintf(stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]); continue; } // fetch alignments if (is_count) { count_func_data_t count_data = { in->header, &count }; result = bam_fetch(in->x.bam, idx, tid, beg, end, &count_data, count_func); } else result = bam_fetch(in->x.bam, idx, tid, beg, end, out, view_func); if (result < 0) { fprintf(stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]); ret = 1; break; } } bam_index_destroy(idx); // destroy the BAM index } view_end: if (is_count && ret == 0) printf("%" PRId64 "\n", count); // close files, free and return free(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg); free(fn_rg); if (g_bed) bed_destroy(g_bed); if (g_rghash) { khint_t k; for (k = 0; k < kh_end(g_rghash); ++k) if (kh_exist(g_rghash, k)) free((char*)kh_key(g_rghash, k)); kh_destroy(rg, g_rghash); } samclose(in); if (!is_count) samclose(out); return ret; } static int usage(int is_long_help) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools view [options] | [region1 [...]]\n\n"); fprintf(stderr, "Options: -b output BAM\n"); fprintf(stderr, " -h print header for the SAM output\n"); fprintf(stderr, " -H print header only (no alignments)\n"); fprintf(stderr, " -S input is SAM\n"); fprintf(stderr, " -u uncompressed BAM output (force -b)\n"); fprintf(stderr, " -1 fast compression (force -b)\n"); fprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\n"); fprintf(stderr, " -X output FLAG in string (samtools-C specific)\n"); fprintf(stderr, " -c print only the count of matching records\n"); fprintf(stderr, " -B collapse the backward CIGAR operation\n"); fprintf(stderr, " -@ INT number of BAM compression threads [0]\n"); fprintf(stderr, " -L FILE output alignments overlapping the input BED FILE [null]\n"); fprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\n"); fprintf(stderr, " -T FILE reference sequence file (force -S) [null]\n"); fprintf(stderr, " -o FILE output file name [stdout]\n"); fprintf(stderr, " -R FILE list of read groups to be outputted [null]\n"); fprintf(stderr, " -f INT required flag, 0 for unset [0]\n"); fprintf(stderr, " -F INT filtering flag, 0 for unset [0]\n"); fprintf(stderr, " -q INT minimum mapping quality [0]\n"); fprintf(stderr, " -l STR only output reads in library STR [null]\n"); fprintf(stderr, " -r STR only output reads in read group STR [null]\n"); fprintf(stderr, " -s FLOAT fraction of templates to subsample; integer part as seed [-1]\n"); fprintf(stderr, " -? longer help\n"); fprintf(stderr, "\n"); if (is_long_help) fprintf(stderr, "Notes:\n\ \n\ 1. By default, this command assumes the file on the command line is in\n\ the BAM format and it prints the alignments in SAM. If `-t' is\n\ applied, the input file is assumed to be in the SAM format. The\n\ file supplied with `-t' is SPACE/TAB delimited with the first two\n\ fields of each line consisting of the reference name and the\n\ corresponding sequence length. The `.fai' file generated by `faidx'\n\ can be used here. This file may be empty if reads are unaligned.\n\ \n\ 2. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n\ \n\ 3. BAM->SAM conversion: `samtools view in.bam'.\n\ \n\ 4. A region should be presented in one of the following formats:\n\ `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n\ specified, the input alignment file must be an indexed BAM file.\n\ \n\ 5. Option `-u' is preferred over `-b' when the output is piped to\n\ another samtools command.\n\ \n\ 6. In a string FLAG, each character represents one bit with\n\ p=0x1 (paired), P=0x2 (properly paired), u=0x4 (unmapped),\n\ U=0x8 (mate unmapped), r=0x10 (reverse), R=0x20 (mate reverse)\n\ 1=0x40 (first), 2=0x80 (second), s=0x100 (not primary), \n\ f=0x200 (failure) and d=0x400 (duplicate). Note that `-x' and\n\ `-X' are samtools-C specific. Picard and older samtools do not\n\ support HEX or string flags.\n\ \n"); return 1; } int main_import(int argc, char *argv[]) { int argc2, ret; char **argv2; if (argc != 4) { fprintf(stderr, "Usage: bamtk import \n"); return 1; } argc2 = 6; argv2 = calloc(6, sizeof(char*)); argv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2]; ret = main_samview(argc2, argv2); free(argv2); return ret; } int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 }; int main_bam2fq(int argc, char *argv[]) { bamFile fp; bam_header_t *h; bam1_t *b; int8_t *buf; int max_buf, c, no12 = 0; while ((c = getopt(argc, argv, "n")) > 0) if (c == 'n') no12 = 1; if (argc == 1) { fprintf(stderr, "Usage: samtools bam2fq \n"); return 1; } fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); if (fp == 0) return 1; h = bam_header_read(fp); b = bam_init1(); buf = 0; max_buf = 0; while (bam_read1(fp, b) >= 0) { int i, qlen = b->core.l_qseq; uint8_t *seq; putchar('@'); fputs(bam1_qname(b), stdout); if (no12) putchar('\n'); else { if ((b->core.flag & 0x40) && !(b->core.flag & 0x80)) puts("/1"); else if ((b->core.flag & 0x80) && !(b->core.flag & 0x40)) puts("/2"); else putchar('\n'); } if (max_buf < qlen + 1) { max_buf = qlen + 1; kroundup32(max_buf); buf = realloc(buf, max_buf); } buf[qlen] = 0; seq = bam1_seq(b); for (i = 0; i < qlen; ++i) buf[i] = bam1_seqi(seq, i); if (b->core.flag & 16) { // reverse complement for (i = 0; i < qlen>>1; ++i) { int8_t t = seq_comp_table[buf[qlen - 1 - i]]; buf[qlen - 1 - i] = seq_comp_table[buf[i]]; buf[i] = t; } if (qlen&1) buf[i] = seq_comp_table[buf[i]]; } for (i = 0; i < qlen; ++i) buf[i] = bam_nt16_rev_table[buf[i]]; puts((char*)buf); puts("+"); seq = bam1_qual(b); for (i = 0; i < qlen; ++i) buf[i] = 33 + seq[i]; if (b->core.flag & 16) { // reverse for (i = 0; i < qlen>>1; ++i) { int8_t t = buf[qlen - 1 - i]; buf[qlen - 1 - i] = buf[i]; buf[i] = t; } } puts((char*)buf); } free(buf); bam_destroy1(b); bam_header_destroy(h); bam_close(fp); return 0; } samtools-0.1.19/sample.c000066400000000000000000000056501212162403000150650ustar00rootroot00000000000000#include #include #include "sample.h" #include "khash.h" KHASH_MAP_INIT_STR(sm, int) bam_sample_t *bam_smpl_init(void) { bam_sample_t *s; s = calloc(1, sizeof(bam_sample_t)); s->rg2smid = kh_init(sm); s->sm2id = kh_init(sm); return s; } void bam_smpl_destroy(bam_sample_t *sm) { int i; khint_t k; khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; if (sm == 0) return; for (i = 0; i < sm->n; ++i) free(sm->smpl[i]); free(sm->smpl); for (k = kh_begin(rg2smid); k != kh_end(rg2smid); ++k) if (kh_exist(rg2smid, k)) free((char*)kh_key(rg2smid, k)); kh_destroy(sm, sm->rg2smid); kh_destroy(sm, sm->sm2id); free(sm); } static void add_pair(bam_sample_t *sm, khash_t(sm) *sm2id, const char *key, const char *val) { khint_t k_rg, k_sm; int ret; khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; k_rg = kh_get(sm, rg2smid, key); if (k_rg != kh_end(rg2smid)) return; // duplicated @RG-ID k_rg = kh_put(sm, rg2smid, strdup(key), &ret); k_sm = kh_get(sm, sm2id, val); if (k_sm == kh_end(sm2id)) { // absent if (sm->n == sm->m) { sm->m = sm->m? sm->m<<1 : 1; sm->smpl = realloc(sm->smpl, sizeof(void*) * sm->m); } sm->smpl[sm->n] = strdup(val); k_sm = kh_put(sm, sm2id, sm->smpl[sm->n], &ret); kh_val(sm2id, k_sm) = sm->n++; } kh_val(rg2smid, k_rg) = kh_val(sm2id, k_sm); } int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) { const char *p = txt, *q, *r; kstring_t buf, first_sm; int n = 0; khash_t(sm) *sm2id = (khash_t(sm)*)sm->sm2id; if (txt == 0) { add_pair(sm, sm2id, fn, fn); return 0; } memset(&buf, 0, sizeof(kstring_t)); memset(&first_sm, 0, sizeof(kstring_t)); while ((q = strstr(p, "@RG")) != 0) { p = q + 3; r = q = 0; if ((q = strstr(p, "\tID:")) != 0) q += 4; if ((r = strstr(p, "\tSM:")) != 0) r += 4; if (r && q) { char *u, *v; int oq, or; for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u); for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v); oq = *u; or = *v; *u = *v = '\0'; buf.l = 0; kputs(fn, &buf); kputc('/', &buf); kputs(q, &buf); add_pair(sm, sm2id, buf.s, r); if ( !first_sm.s ) kputs(r,&first_sm); *u = oq; *v = or; } else break; p = q > r? q : r; ++n; } if (n == 0) add_pair(sm, sm2id, fn, fn); // If there is only one RG tag present in the header and reads are not annotated, don't refuse to work but // use the tag instead. else if ( n==1 && first_sm.s ) add_pair(sm,sm2id,fn,first_sm.s); if ( first_sm.s ) free(first_sm.s); // add_pair(sm, sm2id, fn, fn); free(buf.s); return 0; } int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str) { khint_t k; khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; if (rg) { str->l = 0; kputs(fn, str); kputc('/', str); kputs(rg, str); k = kh_get(sm, rg2smid, str->s); } else k = kh_get(sm, rg2smid, fn); return k == kh_end(rg2smid)? -1 : kh_val(rg2smid, k); } samtools-0.1.19/sample.h000066400000000000000000000006141212162403000150650ustar00rootroot00000000000000#ifndef BAM_SAMPLE_H #define BAM_SAMPLE_H #include "kstring.h" typedef struct { int n, m; char **smpl; void *rg2smid, *sm2id; } bam_sample_t; bam_sample_t *bam_smpl_init(void); int bam_smpl_add(bam_sample_t *sm, const char *abs, const char *txt); int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str); void bam_smpl_destroy(bam_sample_t *sm); #endif samtools-0.1.19/samtools.1000066400000000000000000000671341212162403000153700ustar00rootroot00000000000000.TH samtools 1 "15 March 2013" "samtools-0.1.19" "Bioinformatics tools" .SH NAME .PP samtools - Utilities for the Sequence Alignment/Map (SAM) format bcftools - Utilities for the Binary Call Format (BCF) and VCF .SH SYNOPSIS .PP samtools view -bt ref_list.txt -o aln.bam aln.sam.gz .PP samtools sort aln.bam aln.sorted .PP samtools index aln.sorted.bam .PP samtools idxstats aln.sorted.bam .PP samtools view aln.sorted.bam chr2:20,100,000-20,200,000 .PP samtools merge out.bam in1.bam in2.bam in3.bam .PP samtools faidx ref.fasta .PP samtools pileup -vcf ref.fasta aln.sorted.bam .PP samtools mpileup -C50 -gf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam .PP samtools tview aln.sorted.bam ref.fasta .PP bcftools index in.bcf .PP bcftools view in.bcf chr2:100-200 > out.vcf .PP bcftools view -Nvm0.99 in.bcf > out.vcf 2> out.afs .SH DESCRIPTION .PP Samtools is a set of utilities that manipulate alignments in the BAM format. It imports from and exports to the SAM (Sequence Alignment/Map) format, does sorting, merging and indexing, and allows to retrieve reads in any regions swiftly. Samtools is designed to work on a stream. It regards an input file `-' as the standard input (stdin) and an output file `-' as the standard output (stdout). Several commands can thus be combined with Unix pipes. Samtools always output warning and error messages to the standard error output (stderr). Samtools is also able to open a BAM (not SAM) file on a remote FTP or HTTP server if the BAM file name starts with `ftp://' or `http://'. Samtools checks the current working directory for the index file and will download the index upon absence. Samtools does not retrieve the entire alignment file unless it is asked to do so. .SH SAMTOOLS COMMANDS AND OPTIONS .TP 10 .B view samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] [-F skipFlag] [-q minMapQ] [-l library] [-r readGroup] [-R rgFile] | [region1 [...]] Extract/print all or sub alignments in SAM or BAM format. If no region is specified, all the alignments will be printed; otherwise only alignments overlapping the specified regions will be output. An alignment may be given multiple times if it is overlapping several regions. A region can be presented, for example, in the following format: `chr2' (the whole chr2), `chr2:1000000' (region starting from 1,000,000bp) or `chr2:1,000,000-2,000,000' (region between 1,000,000 and 2,000,000bp including the end points). The coordinate is 1-based. .B OPTIONS: .RS .TP 10 .B -b Output in the BAM format. .TP .BI -f \ INT Only output alignments with all bits in INT present in the FLAG field. INT can be in hex in the format of /^0x[0-9A-F]+/ [0] .TP .BI -F \ INT Skip alignments with bits present in INT [0] .TP .B -h Include the header in the output. .TP .B -H Output the header only. .TP .BI -l \ STR Only output reads in library STR [null] .TP .BI -o \ FILE Output file [stdout] .TP .BI -q \ INT Skip alignments with MAPQ smaller than INT [0] .TP .BI -r \ STR Only output reads in read group STR [null] .TP .BI -R \ FILE Output reads in read groups listed in .I FILE [null] .TP .BI -s \ FLOAT Fraction of templates/pairs to subsample; the integer part is treated as the seed for the random number generator [-1] .TP .B -S Input is in SAM. If @SQ header lines are absent, the .B `-t' option is required. .TP .B -c Instead of printing the alignments, only count them and print the total number. All filter options, such as .B `-f', .B `-F' and .B `-q' , are taken into account. .TP .BI -t \ FILE This file is TAB-delimited. Each line must contain the reference name and the length of the reference, one line for each distinct reference; additional fields are ignored. This file also defines the order of the reference sequences in sorting. If you run `samtools faidx ', the resultant index file .I .fai can be used as this .I file. .TP .B -u Output uncompressed BAM. This option saves time spent on compression/decomprssion and is thus preferred when the output is piped to another samtools command. .RE .TP .B tview samtools tview .RB [ \-p .IR chr:pos ] .RB [ \-s .IR STR ] .RB [ \-d .IR display ] .RI .RI [ref.fasta] Text alignment viewer (based on the ncurses library). In the viewer, press `?' for help and press `g' to check the alignment start from a region in the format like `chr10:10,000,000' or `=10,000,000' when viewing the same reference sequence. .B Options: .RS .TP 14 .BI -d \ display Output as (H)tml or (C)urses or (T)ext .TP .BI -p \ chr:pos Go directly to this position .TP .BI -s \ STR Display only reads from this sample or read group .RE .TP .B mpileup samtools mpileup .RB [ \-EBugp ] .RB [ \-C .IR capQcoef ] .RB [ \-r .IR reg ] .RB [ \-f .IR in.fa ] .RB [ \-l .IR list ] .RB [ \-M .IR capMapQ ] .RB [ \-Q .IR minBaseQ ] .RB [ \-q .IR minMapQ ] .I in.bam .RI [ in2.bam .RI [ ... ]] Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. In the pileup format (without .BR -u or -g ), each line represents a genomic position, consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand, a comma for a match on the reverse strand, a '>' or '<' for a reference skip, `ACGTN' for a mismatch on the forward strand and `acgtn' for a mismatch on the reverse strand. A pattern `\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern `-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference. The deleted bases will be presented as `*' in the following lines. Also at the read base column, a symbol `^' marks the start of a read. The ASCII of the character following `^' minus 33 gives the mapping quality. A symbol `$' marks the end of a read segment. .B Input Options: .RS .TP 10 .B -6 Assume the quality is in the Illumina 1.3+ encoding. .B -A Do not skip anomalous read pairs in variant calling. .TP .B -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. .TP .BI -b \ FILE List of input BAM files, one file per line [null] .TP .BI -C \ INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] .TP .BI -d \ INT At a position, read maximally .I INT reads per input BAM. [250] .TP .B -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. .TP .BI -f \ FILE The .BR faidx -indexed reference file in the FASTA format. The file can be optionally compressed by .BR razip . [null] .TP .BI -l \ FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] .TP .BI -q \ INT Minimum mapping quality for an alignment to be used [0] .TP .BI -Q \ INT Minimum base quality for a base to be considered [13] .TP .BI -r \ STR Only generate pileup in region .I STR [all sites] .TP .B Output Options: .TP .B -D Output per-sample read depth .TP .B -g Compute genotype likelihoods and output them in the binary call format (BCF). .TP .B -S Output per-sample Phred-scaled strand bias P-value .TP .B -u Similar to .B -g except that the output is uncompressed BCF, which is preferred for piping. .TP .B Options for Genotype Likelihood Computation (for -g or -u): .TP .BI -e \ INT Phred-scaled gap extension sequencing error probability. Reducing .I INT leads to longer indels. [20] .TP .BI -h \ INT Coefficient for modeling homopolymer errors. Given an .IR l -long homopolymer run, the sequencing error of an indel of size .I s is modeled as .IR INT * s / l . [100] .TP .B -I Do not perform INDEL calling .TP .BI -L \ INT Skip INDEL calling if the average per-sample depth is above .IR INT . [250] .TP .BI -o \ INT Phred-scaled gap open sequencing error probability. Reducing .I INT leads to more indel calls. [40] .TP .BI -p Apply -m and -F thresholds per sample to increase sensitivity of calling. By default both options are applied to reads pooled from all samples. .TP .BI -P \ STR Comma dilimited list of platforms (determined by .BR @RG-PL ) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] .RE .TP .B reheader samtools reheader Replace the header in .I in.bam with the header in .I in.header.sam. This command is much faster than replacing the header with a BAM->SAM->BAM conversion. .TP .B cat samtools cat [-h header.sam] [-o out.bam] [ ... ] Concatenate BAMs. The sequence dictionary of each input BAM must be identical, although this command does not check this. This command uses a similar trick to .B reheader which enables fast BAM concatenation. .TP .B sort samtools sort [-nof] [-m maxMem] Sort alignments by leftmost coordinates. File .I .bam will be created. This command may also create temporary files .I .%d.bam when the whole alignment cannot be fitted into memory (controlled by option -m). .B OPTIONS: .RS .TP 8 .B -o Output the final alignment to the standard output. .TP .B -n Sort by read names rather than by chromosomal coordinates .TP .B -f Use .I as the full output path and do not append .I .bam suffix. .TP .BI -m \ INT Approximately the maximum required memory. [500000000] .RE .TP .B merge samtools merge [-nur1f] [-h inh.sam] [-R reg] [...] Merge multiple sorted alignments. The header reference lists of all the input BAM files, and the @SQ headers of .IR inh.sam , if any, must all refer to the same set of reference sequences. The header reference list and (unless overridden by .BR -h ) `@' headers of .I in1.bam will be copied to .IR out.bam , and the headers of other files will be ignored. .B OPTIONS: .RS .TP 8 .B -1 Use zlib compression level 1 to comrpess the output .TP .B -f Force to overwrite the output file if present. .TP 8 .BI -h \ FILE Use the lines of .I FILE as `@' headers to be copied to .IR out.bam , replacing any header lines that would otherwise be copied from .IR in1.bam . .RI ( FILE is actually in SAM format, though any alignment records it may contain are ignored.) .TP .B -n The input alignments are sorted by read names rather than by chromosomal coordinates .TP .BI -R \ STR Merge files in the specified region indicated by .I STR [null] .TP .B -r Attach an RG tag to each alignment. The tag value is inferred from file names. .TP .B -u Uncompressed BAM output .RE .TP .B index samtools index Index sorted alignment for fast random access. Index file .I .bai will be created. .TP .B idxstats samtools idxstats Retrieve and print stats in the index file. The output is TAB delimited with each line consisting of reference sequence name, sequence length, # mapped reads and # unmapped reads. .TP .B faidx samtools faidx [region1 [...]] Index reference sequence in the FASTA format or extract subsequence from indexed reference sequence. If no region is specified, .B faidx will index the file and create .I .fai on the disk. If regions are speficified, the subsequences will be retrieved and printed to stdout in the FASTA format. The input file can be compressed in the .B RAZF format. .TP .B fixmate samtools fixmate Fill in mate coordinates, ISIZE and mate related flags from a name-sorted alignment. .TP .B rmdup samtools rmdup [-sS] Remove potential PCR duplicates: if multiple read pairs have identical external coordinates, only retain the pair with highest mapping quality. In the paired-end mode, this command .B ONLY works with FR orientation and requires ISIZE is correctly set. It does not work for unpaired reads (e.g. two ends mapped to different chromosomes or orphan reads). .B OPTIONS: .RS .TP 8 .B -s Remove duplicate for single-end reads. By default, the command works for paired-end reads only. .TP 8 .B -S Treat paired-end reads and single-end reads. .RE .TP .B calmd samtools calmd [-EeubSr] [-C capQcoef] Generate the MD tag. If the MD tag is already present, this command will give a warning if the MD tag generated is different from the existing tag. Output SAM by default. .B OPTIONS: .RS .TP 8 .B -A When used jointly with .B -r this option overwrites the original base quality. .TP 8 .B -e Convert a the read base to = if it is identical to the aligned reference base. Indel caller does not support the = bases at the moment. .TP .B -u Output uncompressed BAM .TP .B -b Output compressed BAM .TP .B -S The input is SAM with header lines .TP .BI -C \ INT Coefficient to cap mapping quality of poorly mapped reads. See the .B pileup command for details. [0] .TP .B -r Compute the BQ tag (without -A) or cap base quality by BAQ (with -A). .TP .B -E Extended BAQ calculation. This option trades specificity for sensitivity, though the effect is minor. .RE .TP .B targetcut samtools targetcut [-Q minBaseQ] [-i inPenalty] [-0 em0] [-1 em1] [-2 em2] [-f ref] This command identifies target regions by examining the continuity of read depth, computes haploid consensus sequences of targets and outputs a SAM with each sequence corresponding to a target. When option .B -f is in use, BAQ will be applied. This command is .B only designed for cutting fosmid clones from fosmid pool sequencing [Ref. Kitzman et al. (2010)]. .RE .TP .B phase samtools phase [-AF] [-k len] [-b prefix] [-q minLOD] [-Q minBaseQ] Call and phase heterozygous SNPs. .B OPTIONS: .RS .TP 8 .B -A Drop reads with ambiguous phase. .TP 8 .BI -b \ STR Prefix of BAM output. When this option is in use, phase-0 reads will be saved in file .BR STR .0.bam and phase-1 reads in .BR STR .1.bam. Phase unknown reads will be randomly allocated to one of the two files. Chimeric reads with switch errors will be saved in .BR STR .chimeric.bam. [null] .TP .B -F Do not attempt to fix chimeric reads. .TP .BI -k \ INT Maximum length for local phasing. [13] .TP .BI -q \ INT Minimum Phred-scaled LOD to call a heterozygote. [40] .TP .BI -Q \ INT Minimum base quality to be used in het calling. [13] .RE .SH BCFTOOLS COMMANDS AND OPTIONS .TP 10 .B view .B bcftools view .RB [ \-AbFGNQSucgv ] .RB [ \-D .IR seqDict ] .RB [ \-l .IR listLoci ] .RB [ \-s .IR listSample ] .RB [ \-i .IR gapSNPratio ] .RB [ \-t .IR mutRate ] .RB [ \-p .IR varThres ] .RB [ \-m .IR varThres ] .RB [ \-P .IR prior ] .RB [ \-1 .IR nGroup1 ] .RB [ \-d .IR minFrac ] .RB [ \-U .IR nPerm ] .RB [ \-X .IR permThres ] .RB [ \-T .IR trioType ] .I in.bcf .RI [ region ] Convert between BCF and VCF, call variant candidates and estimate allele frequencies. .RS .TP .B Input/Output Options: .TP 10 .B -A Retain all possible alternate alleles at variant sites. By default, the view command discards unlikely alleles. .TP 10 .B -b Output in the BCF format. The default is VCF. .TP .BI -D \ FILE Sequence dictionary (list of chromosome names) for VCF->BCF conversion [null] .TP .B -F Indicate PL is generated by r921 or before (ordering is different). .TP .B -G Suppress all individual genotype information. .TP .BI -l \ FILE List of sites at which information are outputted [all sites] .TP .B -N Skip sites where the REF field is not A/C/G/T .TP .B -Q Output the QCALL likelihood format .TP .BI -s \ FILE List of samples to use. The first column in the input gives the sample names and the second gives the ploidy, which can only be 1 or 2. When the 2nd column is absent, the sample ploidy is assumed to be 2. In the output, the ordering of samples will be identical to the one in .IR FILE . [null] .TP .B -S The input is VCF instead of BCF. .TP .B -u Uncompressed BCF output (force -b). .TP .B Consensus/Variant Calling Options: .TP 10 .B -c Call variants using Bayesian inference. This option automatically invokes option .BR -e . .TP .BI -d \ FLOAT When .B -v is in use, skip loci where the fraction of samples covered by reads is below FLOAT. [0] .TP .B -e Perform max-likelihood inference only, including estimating the site allele frequency, testing Hardy-Weinberg equlibrium and testing associations with LRT. .TP .B -g Call per-sample genotypes at variant sites (force -c) .TP .BI -i \ FLOAT Ratio of INDEL-to-SNP mutation rate [0.15] .TP .BI -m \ FLOAT New model for improved multiallelic and rare-variant calling. Another ALT allele is accepted if P(chi^2) of LRT exceeds the FLOAT threshold. The parameter seems robust and the actual value usually does not affect the results much; a good value to use is 0.99. This is the recommended calling method. [0] .TP .BI -p \ FLOAT A site is considered to be a variant if P(ref|D) rg.txt samtools merge -rh rg.txt merged.bam ga.bam 454.bam The value in a .B RG tag is determined by the file name the read is coming from. In this example, in the .IR merged.bam , reads from .I ga.bam will be attached .IR RG:Z:ga , while reads from .I 454.bam will be attached .IR RG:Z:454 . .IP o 2 Call SNPs and short INDELs for one diploid individual: samtools mpileup -ugf ref.fa aln.bam | bcftools view -bvcg - > var.raw.bcf bcftools view var.raw.bcf | vcfutils.pl varFilter -D 100 > var.flt.vcf The .B -D option of varFilter controls the maximum read depth, which should be adjusted to about twice the average read depth. One may consider to add .B -C50 to .B mpileup if mapping quality is overestimated for reads containing excessive mismatches. Applying this option usually helps .B BWA-short but may not other mappers. .IP o 2 Generate the consensus sequence for one diploid individual: samtools mpileup -uf ref.fa aln.bam | bcftools view -cg - | vcfutils.pl vcf2fq > cns.fq .IP o 2 Call somatic mutations from a pair of samples: samtools mpileup -DSuf ref.fa aln.bam | bcftools view -bvcgT pair - > var.bcf In the output INFO field, .I CLR gives the Phred-log ratio between the likelihood by treating the two samples independently, and the likelihood by requiring the genotype to be identical. This .I CLR is effectively a score measuring the confidence of somatic calls. The higher the better. .IP o 2 Call de novo and somatic mutations from a family trio: samtools mpileup -DSuf ref.fa aln.bam | bcftools view -bvcgT pair -s samples.txt - > var.bcf File .I samples.txt should consist of three lines specifying the member and order of samples (in the order of child-father-mother). Similarly, .I CLR gives the Phred-log likelihood ratio with and without the trio constraint. .I UGT shows the most likely genotype configuration without the trio constraint, and .I CGT gives the most likely genotype configuration satisfying the trio constraint. .IP o 2 Phase one individual: samtools calmd -AEur aln.bam ref.fa | samtools phase -b prefix - > phase.out The .B calmd command is used to reduce false heterozygotes around INDELs. .IP o 2 Call SNPs and short indels for multiple diploid individuals: samtools mpileup -P ILLUMINA -ugf ref.fa *.bam | bcftools view -bcvg - > var.raw.bcf bcftools view var.raw.bcf | vcfutils.pl varFilter -D 2000 > var.flt.vcf Individuals are identified from the .B SM tags in the .B @RG header lines. Individuals can be pooled in one alignment file; one individual can also be separated into multiple files. The .B -P option specifies that indel candidates should be collected only from read groups with the .B @RG-PL tag set to .IR ILLUMINA . Collecting indel candidates from reads sequenced by an indel-prone technology may affect the performance of indel calling. Note that there is a new calling model which can be invoked by bcftools view -m0.99 ... which fixes some severe limitations of the default method. For filtering, best results seem to be achieved by first applying the .IR SnpGap filter and then applying some machine learning approach vcf-annotate -f SnpGap=n vcf filter ... Both can be found in the .B vcftools and .B htslib package (links below). .IP o 2 Derive the allele frequency spectrum (AFS) on a list of sites from multiple individuals: samtools mpileup -Igf ref.fa *.bam > all.bcf bcftools view -bl sites.list all.bcf > sites.bcf bcftools view -cGP cond2 sites.bcf > /dev/null 2> sites.1.afs bcftools view -cGP sites.1.afs sites.bcf > /dev/null 2> sites.2.afs bcftools view -cGP sites.2.afs sites.bcf > /dev/null 2> sites.3.afs ...... where .I sites.list contains the list of sites with each line consisting of the reference sequence name and position. The following .B bcftools commands estimate AFS by EM. .IP o 2 Dump BAQ applied alignment for other SNP callers: samtools calmd -bAr aln.bam > aln.baq.bam It adds and corrects the .B NM and .B MD tags at the same time. The .B calmd command also comes with the .B -C option, the same as the one in .B pileup and .BR mpileup . Apply if it helps. .SH LIMITATIONS .PP .IP o 2 Unaligned words used in bam_import.c, bam_endian.h, bam.c and bam_aux.c. .IP o 2 Samtools paired-end rmdup does not work for unpaired reads (e.g. orphan reads or ends mapped to different chromosomes). If this is a concern, please use Picard's MarkDuplicate which correctly handles these cases, although a little slower. .SH AUTHOR .PP Heng Li from the Sanger Institute wrote the C version of samtools. Bob Handsaker from the Broad Institute implemented the BGZF library and Jue Ruan from Beijing Genomics Institute wrote the RAZF library. John Marshall and Petr Danecek contribute to the source code and various people from the 1000 Genomes Project have contributed to the SAM format specification. .SH SEE ALSO .PP Samtools website: .br Samtools latest source: .br VCFtools website with stable link to VCF specification: .br HTSlib website: samtools-0.1.19/win32/000077500000000000000000000000001212162403000143745ustar00rootroot00000000000000samtools-0.1.19/win32/libcurses.a000066400000000000000000003401141212162403000165340ustar00rootroot00000000000000! / 1249167201 0 0 0 6570 ` ®îîîîîîîîîî########)))))))).h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h.h8b8b;°;°;°;°;°?ô?ô?ô?ô?ô?ô?ô?ô?ô?ô?ôIHIHIHIHIHIHIHIHMÆMÆMÆMÆMÆMÆMÆMÆMÆMÆMÆMÆMÆMÆZZZZ]˜]˜]˜]˜]˜]˜]˜]˜]˜]˜cìcìcìcìføføføføføføføførªrªrªrªrªrªrªrª}`}`}`}`}`}`}`}`}`ƒðƒðƒðƒðƒðƒðƒðƒð‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4‰4£ڣڣڣڣڣڣڣڪҪҪҪҪҪҪҪҰT°T°T°T°T°T°T°TµXµXµXµXµXµXµXµXµXµXµXµXµXµXµX¾€¾€Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼Û¼æÈæÈé6é6é6é6é6é6é6é6é6íBíBíBôôôôôýðýðýðýðýðýðýðýðýðýðýðýðýðýðýðýðýðýð888888      (4(4(4+²+²+²+²+²+²+²+²+²+²+²+²+²+²+²+²+²+²AAAAAAAAAADÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚDÚK„K„K„K„K„K„OÈOÈOÈOÈRšRšRšRšRšRšRšRšRšRšRšRšRšRšRšRšbìbìbìbìhhhhmþmþmþsÄsÄsÄsÄwLwLwLwLwLwLwLwL’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƹz¹z¹z½½½_waddch_addch_mvaddch_mvwaddch_wechochar_echochar_waddrawch_addrawch_mvaddrawch_mvwaddrawch_waddchnstr_addchstr_addchnstr_waddchstr_mvaddchstr_mvaddchnstr_mvwaddchstr_mvwaddchnstr_waddnstr_addstr_addnstr_waddstr_mvaddstr_mvaddnstr_mvwaddstr_mvwaddnstr_wattroff_attroff_wattron_attron_wattrset_attrset_standend_standout_wstandend_wstandout_getattrs_wcolor_set_color_set_wattr_get_attr_get_wattr_off_attr_off_wattr_on_attr_on_wattr_set_attr_set_wchgat_chgat_mvchgat_mvwchgat_flash_beep_wbkgdset_wbkgd_bkgd_bkgdset_getbkgd_wborder_border_box_whline_hline_mvhline_mvwhline_wvline_vline_mvvline_mvwvline_wclrtoeol_clrtoeol_wclrtobot_clrtobot_werase_erase_wclear_clear_PDC_init_atrtab_start_color_init_pair_has_colors_init_color_color_content_can_change_color_pair_content_assume_default_colors_use_default_colors_PDC_set_line_color_pdc_color_started_COLOR_PAIRS_COLORS_wdelch_delch_mvdelch_mvwdelch_wdeleteln_deleteln_mvdeleteln_mvwdeleteln_winsertln_winsdelln_insdelln_insertln_mvinsertln_mvwinsertln_PDC_check_bios_key_PDC_get_bios_key_PDC_get_ctrl_break_PDC_set_ctrl_break_wgetch_mvgetch_mvwgetch_PDC_ungetch_flushinp_PDC_get_key_modifiers_PDC_save_key_modifiers_PDC_return_key_modifiers_wgetnstr_getstr_wgetstr_mvgetstr_mvwgetstr_getnstr_mvgetnstr_mvwgetnstr_getbegy_getbegx_getcury_getcurx_getpary_getparx_getmaxy_getmaxx_setsyx_winch_inch_mvinch_mvwinch_winchnstr_inchstr_winchstr_mvinchstr_mvwinchstr_inchnstr_mvinchnstr_mvwinchnstr_Xinitscr_initscr_endwin_isendwin_newterm_set_term_delscreen_resize_term_is_termresized_curses_version_TABSIZE_COLS_LINES_pdc_lastscr_stdscr_curscr_SP__curses_notice_Mouse_status_ttytype_pdc_mouse_status_cbreak_nocbreak_echo_noecho_halfdelay_intrflush_keypad_meta_nl_nonl_nodelay_notimeout_raw_noraw_noqiflush_qiflush_typeahead_wtimeout_timeout_crmode_nocrmode_winsch_insch_mvinsch_mvwinsch_winsrawch_insrawch_mvinsrawch_mvwinsrawch_winsnstr_insstr_winsstr_mvinsstr_mvwinsstr_insnstr_mvinsnstr_mvwinsnstr_winnstr_instr_winstr_mvinstr_mvwinstr_innstr_mvinnstr_mvwinnstr_def_prog_mode_def_shell_mode_reset_prog_mode_reset_shell_mode_resetty_savetty_curs_set_napms_ripoffline_draino_resetterm_fixterm_saveterm_linesrippedoff_linesripped_has_key_keyname_mouse_set_mouse_on_mouse_off_map_button_request_mouse_pos_wenclose_wmouse_position_getmouse_getbmap_mouseinterval_wmouse_trafo_mouse_trafo_mousemask_nc_getmouse_ungetmouse_move_wmove_clearok_idlok_idcok_immedok_leaveok_wsetscrreg_setscrreg_scrollok_raw_output_overlay_overwrite_copywin_newpad_subpad_pnoutrefresh_prefresh_pechochar_hide_panel_bottom_panel_del_panel_move_panel_show_panel_new_panel_panel_above_panel_below_panel_hidden_panel_userptr_panel_window_replace_panel_set_panel_userptr_top_panel_update_panels__stdscr_pseudo_panel__top_panel__bottom_panel_vwprintw_printw_wprintw_mvprintw_mvwprintw_vw_printw_wnoutrefresh_doupdate_wrefresh_refresh_wredrawln_redrawwin_vwscanw_scanw_wscanw_mvscanw_mvwscanw_vw_scanw_putwin_getwin_scr_dump_scr_init_scr_restore_scr_set_wscrl_scrl_scroll_slk_init_slk_set_slk_noutrefresh_slk_refresh_slk_label_slk_clear_slk_restore_slk_touch_slk_attron_slk_attr_on_slk_attroff_slk_attr_off_slk_attrset_slk_color_slk_attr_set_PDC_slk_initialize_PDC_slk_free_PDC_mouse_in_slk_baudrate_erasechar_has_ic_has_il_killchar_longname_termattrs_term_attrs_termname_wordchar_mvcur_vidattr_vid_attr_vidputs_vid_puts_del_curterm_putp_restartterm_set_curterm_setterm_setupterm_tgetent_tgetflag_tgetnum_tgetstr_tgoto_tigetflag_tigetnum_tigetstr_tparm_tputs_cur_term_touchwin_touchline_untouchwin_wtouchln_is_linetouched_is_wintouched_unctrl_filter_use_env_delay_output_PDC_makenew_PDC_makelines_wsyncup_PDC_sync_newwin_delwin_mvwin_subwin_derwin_mvderwin_dupwin_resize_window_wresize_syncok_wcursyncup_wsyncdown_PDC_debug_traceon_traceoff_pdc_trace_on_PDC_getclipboard_PDC_setclipboard_PDC_freeclipboard_PDC_clearclipboard_PDC_gotoyx_PDC_transform_line_acs_map_PDC_get_cursor_mode_PDC_get_rows_PDC_get_buffer_rows_PDC_get_columns_PDC_get_input_fd_PDC_set_keyboard_binary_PDC_check_key_PDC_get_key_PDC_flushinp_PDC_mouse_set_PDC_modifiers_set_pdc_key_modifiers_PDC_scr_free_PDC_resize_screen_PDC_reset_prog_mode_PDC_scr_open_PDC_reset_shell_mode_PDC_scr_close_PDC_restore_screen_mode_PDC_save_screen_mode_PDC_init_pair_PDC_pair_content_PDC_can_change_color_PDC_color_content_PDC_init_color_pdc_con_in_pdc_con_out_pdc_atrtab_pdc_quick_edit_PDC_curs_set_PDC_set_title_PDC_set_blink_PDC_beep_PDC_napms_PDC_sysnameaddch.o/ 1249166489 502 20 100644 2292 ` Lz.text0Œ¼ P`.data@0À.bss€0ÀU‰åWVSƒì,‹u‹} ë+ƒù„:ƒù „n‰4$ƒË^‰\$èÎÿÿÿ@„σÇ@…ö„Ä‹‹F9V‰Uì‰EðŒ°‹F ‹Uð‰Eä9МÀ¶Ð‹EìÁè Â…‘‹Eð…Àˆ†‹1À‰Uà€zu ÷Çu°v‰ù‰ûáÿÿãÿÿ„Àtcƒù–Àƒù” ШtQƒù „;†<ÿÿÿƒù „ƒù…<ÿÿÿ‰4$‰ØƒÈ^‰D$èÿÿÿ@t ‰ßƒÏ?…ö…<ÿÿÿºÿÿÿÿƒÄ,‰Ð[^_]Ã÷Ãÿ„½÷Ãÿ…‹V ‰Ð%ÿÿ Ãù „% Ù‹Uì‹^,‹}ð‹“9 ¸t9‹F0‰EÜ‹ƒøÿ„v;EðŽÿ‹}ì‹Uð‹E܉¸‹~ ‰}ä‹Uì‹}ð‹“‰ ¸ÿEð‹Eä9Eð|ÿEìÇEð‹Uì;V<H€~(‹}ð‹Eì‰~‰u>€~)u 1ÒƒÄ,‰Ð[^_]É4$è1Òëê‹V Ó÷Ãÿ„>ÿÿÿ‹V ‰Ð%ÿé9ÿÿÿ‰4$è븋}à1À€•À÷Ø!Eð‰4$èÿEì‹Eì;F<~ƒH¹‰Eì‰L$‰4$è@ºÿÿÿÿ…bÿÿÿé¹þÿÿÿMð‰TÿÿÿÇEðéHÿÿÿ·ÊéÓþÿÿ‹F4‹Uì‹}ð9<ÿÿÿ‰<‹F ‰Eäéóþÿÿ‹Eð™÷=x¡¯ø9}ðÿÿÿƒË ‰\$‰4$èBýÿÿ@„Cþÿÿ‹F…À„øþÿÿÿEð9}ð|Ú1Òéëþÿÿ‹}ð‹F4‰<‹E܉<‹V ‰Uäé‹þÿÿJ¸‰Uì‰D$‰4$è@…œþÿÿ‰4$èéæýÿÿ´&¼'U‰åƒì‹E‰D$¡‰$è¶üÿÿÉÃt&U‰åSƒì‹E ‹]‰D$‹E‰$è@t‰] ¡‰EƒÄ[]é|üÿÿƒÄ¸ÿÿÿÿ[]ÃU‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]ø‰ì]é1üÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ÃfU‰åSƒì‹]‹E ‰$‰D$èüÿÿ@t ‰]ƒÄ[]éƒÄ¸ÿÿÿÿ[]ö¿U‰åƒì‹E‰D$¡‰$è¦ÿÿÿÉÃt&U‰åS‹M ‹]·Áƒø–ƒø”À ШtɉM ‰][]éŽûÿÿ´&¼'U‰åƒì‹E‰D$¡‰$è¦ÿÿÿÉÃt&U‰åSƒì‹E ‹]‰D$‹E‰$è@t‰] ¡‰EƒÄ[]élÿÿÿƒÄ¸ÿÿÿÿ[]ÃU‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]ø‰ì]é!ÿÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ÄØ?—Ÿ >hs´.ޏÃ.fileþÿgaddch.c_waddch _addch0 _mvaddchP  à   #@ .€ 8  Dà .text..data.bss_SP_TABSIZE_stdscr_wmove _move _wsyncup Q [ _wscrl e p_mvwaddch_wechochar_echochar_waddrawch_addrawch_mvaddrawch_mvwaddrawch_wrefresh_PDC_sync_wclrtoeoladdchstr.o/ 1249166490 502 20 100644 1464 ` Lì.textŒœ P`.data@0À.bss€0ÀU‰åWVSƒì‹M‹U ‹u…É”À…Ò” Шu…ö”ÀƒþÿœÂ Шt ƒÄ¸ÿÿÿÿ[^_]˃þÿ‹Y‰Eð‹Uð‹A,‹<˜„š‹Q ‰Ð)Ø9ðŒŽ‹A0…ö‹Uð‹I4‰Eä‹‹Eð‰Mà‰U苉EìtJ‹U ‹ …ÉtA‰ö¼'9t ;]èœÀƒ}èÿ” Шt‰]è;]ì~‰]쉃E CƒÇNt ‹E ‹…ÉuÈ‹Eè‹Uð‹M䉑‹Eì‹Mà‰‘ƒÄ1À[^_]ËQ ‰Ö)Þéiÿÿÿ¶¼'U¹ÿÿÿÿ‰åƒì ‹E‰L$‰D$¡‰$èÍþÿÿÉÃt&¼'U‰åƒì ‹E ‰D$‹E‰D$¡‰$èŸþÿÿÉö¼'U¸ÿÿÿÿ‰åƒì ‰D$‹E ‰D$‹E‰$èoþÿÿÉö¼'U‰åSƒì‹E ‹]‰D$‹E‰$è@tÇEÿÿÿÿ‰] ¡‰EƒÄ[]é%þÿÿƒÄ¸ÿÿÿÿ[]Ãv¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]éÍýÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]øÇEÿÿÿÿ‰ì]éjýÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]Ãt&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]éýÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]Ã'U¸Ê!tÚ.fileþÿgaddchstr.c  @ %p 0  <ð IP V° .text .data.bss_stdscr_wmove _move d_waddchnstr_addchstr_addchnstr_waddchstr_mvaddchstr_mvaddchnstr_mvwaddchstr_mvwaddchnstraddstr.o/ 1249166491 502 20 100644 1306 ` Lf.text€Œ  P`.data@0À.bss€0ÀU‰åWVSƒì 1Û‹E‹} ‹u…À”À…ÿ” Шºÿÿÿÿu9¶„Ét01Ò‰ð9óœÂÁè Ât ¶ÁC‰D$‹E‰$è@t¶ „ÉuÒf1ÒƒÄ ‰Ð[^_]úÿÿÿÿëï¶¼'Uºÿÿÿÿ‰åƒì‹E‰T$‰D$¡‰$è]ÿÿÿÉÃt&¼'U‰åƒì‹E ‰D$‹E‰D$¡‰$è/ÿÿÿÉö¼'U¹ÿÿÿÿ‰åƒì‹E ‰L$‰D$‹E‰$èÿþÿÿÉö¼'U‰åSƒì‹E ‹]‰D$‹E‰$è@tÇEÿÿÿÿ‰] ¡‰EƒÄ[]éµþÿÿƒÄ¸ÿÿÿÿ[]Ãv¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]é]þÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]øÇEÿÿÿÿ‰ì]éúýÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]Ãt&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]é•ýÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ÃO—Å(:€‘äJ.fileþÿgaddstr.c _addstr€ _addnstr° _waddstrà  ` #À .  .text} .data.bss_stdscr_wmove _move _waddch :_waddnstr_mvaddstr_mvaddnstr_mvwaddstr_mvwaddnstrattr.o/ 1249166492 502 20 100644 2494 ` LR&.text0Œ¼ P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰å‹M‹U …Ét‹A÷Ò!Ââÿÿ1À‰Q]Ãv¼'U‰åƒì‹E‰D$¡‰$è¶ÿÿÿÉÃt&Uºÿÿÿÿ‰åƒì‹E ‰$‹]‰t$…Ût6‹S‰Ñáÿt7‰Ææÿt-%ÿÿ1Ê1ð Ð ð‰C¶¼'1Ò‹$‰Ð‹t$‰ì]Ã%ÿÿ ‰Sëät&U‰åƒì‹E‰D$¡‰$èvÿÿÿÉÃt&U¸ÿÿÿÿ‰å‹M‹U …Ét âÿÿ1À‰Q]ÃU‰åƒì‹E‰D$¡‰$èÆÿÿÿÉÃt&U1Ò‰åƒì¡‰T$‰$è§ÿÿÿÉÃt&U¹ ‰åƒì¡‰L$‰$è„ÿÿÿÉÃfU1À‰åƒì‰D$‹E‰$èiÿÿÿÉô&U¸ ‰åƒì‰D$‹E‰$èFÿÿÿÉÃt&U1À‰å‹U…Òt‹B]Ãë U¸ÿÿÿÿ‰å¿U ‹M…ÉtˆQ1À]öU‰åƒì ¿U‹E ‰D$¡‰T$‰$è¾ÿÿÿÉö¿U¸ÿÿÿÿ‰åS‹U‹M ‹]…Òt…Ét ‹B%ÿ‰…Ût¶Bf‰1À[]ö¿U‰åƒì‹E‰D$ ‹E ‰D$‹E‰D$¡‰$è˜ÿÿÿÉöU‰å]éwýÿÿ´&U‰å‹E‰E ¡‰E]éYýÿÿ‰ö¼'U‰å]é—ýÿÿ´&U‰å‹E‰E ¡‰E]éyýÿÿ‰ö¼'U¸ÿÿÿÿ‰åS‹]‹U ¿M…ÛtâÿÁá ʉS1À[]öU‰åƒì¿U ‹E‰D$ ‹E‰T$‰D$¡‰$è§ÿÿÿÉÃt&U¸ÿÿÿÿ‰åWVSƒì ‹]¿M‹U …Ût‹EÁá‹u‹@æÿÿ ΉEð…Òxx‹Eð‹M‹A 9Ð]Xÿ‹E‹Uð‹8‹@,‹ ¸ë´&‰ðÁèf‰D‘B9Ú~ñ‹M‹Q0‹º;Eð*@t'‹U‹B49¸}‰¸‹M‰ $è1ÀƒÄ [^_]ÉÐ럋Eð‰ºëÑ‹U‹Z Kë‘´&U‰åƒì¿U‹E‰D$‹E ‰T$ ‰D$‹E‰D$¡‰$èÿÿÿÉô&¼'U‰åƒì(‰]ô‹E‰uø‹u‰Eð‹E ‰}ü‹}¿]‰D$‹E‰$è@t(‰u‹Eð‹uø‰]‹]ô‰}‹}ü‰E ¡‰E‰ì]é¤þÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ÃfU‰åƒì(‰]ô‹E‰uø‹u ‰Eð‹E‰}ü‹}‰Eì‹E¿]‰D$‹E ‰<$‰D$è@t&‰u‹Eì‹uø‰]‹]ô‰E‹Eð‰}‹}ü‰E ‰ì]é,þÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]Ã>"Î"")"L"ò"l"š"Ê"-"Õ%$"m$"ç#.fileþÿgattr.c _attroff0 _wattronP _attronÀ à _attrset   "@ ,` 7€ B  LÀ Xà c nP x€ ƒ ° _attr_onÀ —à ¢ _wchgat@ _chgat _mvchgat@ ¬° .text&.data.bss_stdscr_wmove _move ¶ À_wattroff_wattrset_standend_standout_wstandend_wstandout_getattrs_wcolor_set_color_set_wattr_get_attr_get_wattr_off_attr_off_wattr_on_wattr_set_attr_set_mvwchgat_PDC_syncbeep.o/ 1249166493 502 20 100644 786 ` L¶.textÐŒ\ P`.data@0À.bss€0ÀU‰åWVSƒì ÇEð‹=1Û9û}R‹ ‹5´&1Ò9Ê}‹F,‹˜t&4 B9Ê|ôC9û|߉4$è‹Eð…Àu#Ç$2èë‹5‰4$è‹Eð…ÀtÝÿEðƒ}ð~„ƒÄ 1À[^_]Ãv¼'U‰åƒì¡€xtèÉ1Àöè;ÿÿÿÉ1Àà  $ Uhp x§².fileþÿgbeep.c_flash _beep  .textÉ .data.bss_LINES_COLS_curscr_SP_napms   _wrefresh_PDC_beepbkgd.o/ 1249166494 502 20 100644 1032 ` L´.textŒŒ P`.data@0À.bss€0ÀU‰å‹U‹E …Òt f…ÀuƒÈ ‰B ]öU¸ÿÿÿÿ‰åWVSƒì,‹MÇEÜ‹U ÇEØ…É„-‹E‹H 1À9Ñ„‰È%ÿ‰Eì…‰T$‹Uáÿÿ‰Mè‰$è~ÿÿÿ‹U‹B ‰Ââÿ‰Uät‰Ââÿÿ‰UØ‹Uä1UØÇEð%ÿÿ‰Eà‹E‹@‰EÔƒøŽ˜‹U‹z v1öƒÿ~y‹U‹B,‹Uð‹ë3EÜâÿÿ EØ;Uèt: ÈF ‰ƒÃ9÷~L‹‰Ñ‰Ðáÿ%ÿ;MìuË3EÜâÿÿ‹Mä EØ;UèuÈf‹Uà ÈF ‰ƒÃ9÷Ãë ÿEð‹Eð9EÔqÿÿÿ‹U‰$è‹E‰$è1ÀƒÄ,[^_]ÉÈ%ÿÿ‰EÜ‹Eì1EÜéÖþÿÿ¶¿U‰åƒì‹E‰D$¡‰$èfþÿÿÉÃt&U‰åƒì‹E‰D$¡‰$è&þÿÿÉÃt&U¸ÿÿÿÿ‰å‹U…Òt‹B ]Ãfq®Î.fileþÿgbkgd.c _wbkgd  _bkgd  _bkgdsetÀ _getbkgdà .textô.data.bss_stdscr  "_wbkgdset_PDC_sync_touchwinborder.o/ 1249166496 502 20 100644 2328 ` L$.text Œ¬ P`.data@0À.bss€0ÀU‰Ñáÿÿ÷Âÿ‰åu H÷Áÿu]‹@ %ÿÿ Á·Â ÈÃ]‹@ %ÿ Á·Â ÈÃë U¸ÿÿÿÿ‰åWVSƒì ‹U…Ò„s‹E‹x‹p ‹E ON…À„e‰Â‹Eèwÿÿÿ‰E ‹E…Àu¸x‰Â‹Eè^ÿÿÿ‰E‹E…Àu¸q‰Â‹EèEÿÿÿ‰E‹E…Àu¸q‰Â‹Eè,ÿÿÿ‰E‹E…Àu¸l‰Â‹Eèÿÿÿ‰E‹E …Àu¸k‰Â‹Eèúþÿÿ‰E ‹E$…Àu¸m‰Â‹Eèáþÿÿ‰E$‹E(…Àu¸j‰Â‹EèÈþÿÿ‰E(º9ò¦‹M‹Y,‹‹ »‰Eì‰Mð´&‹E‹M쉑‹E‹Mð‰‘B9ò|éºëf‹ “B‹E ‰‹E‰±9ú|í‹U‹‰‹U ‰°‹U$‹»‰‹U(‰°1Ò9ú‹]‹K0‹C4vÇ‘‰4B9ú~ñ‹E‰$è1ÀƒÄ [^_]øxé‘þÿÿ‹E‹X,ëv¼'U‰åƒì(‹E$‰D$ ‹E ‰D$‹E‰D$‹E‰D$‹E‰D$‹E‰D$ ‹E ‰D$‹E‰D$¡‰$èþÿÿÉÃvU1ɉåƒì(‹E‰L$ ‹U 1ɉL$1ɉD$‰D$ ‹E‰L$1ɉL$‰T$‰T$‰$èÁýÿÿÉÃë U‰åWVSƒì ‹]‹M‹u …Û”À…ÉžÂ Шºÿÿÿÿ…„‹E‹@‰Â‰EðÊ‹M‹A 9ÐwXÿ‹E…ö‹U‹‰Eì‹Mì‹B,‹<ˆ‰ðu¸q‰Â‹Eèòüÿÿ‹Mð‰Æë‰4A9Ù~ø‹E‹Mì‹P0‹Š;Eð2@t/‹U‹Mì‹B49ˆ}‰ˆ‹E‰$è1ÒƒÄ ‰Ð[^_]ÉÐë…v‹Mð‹Eì‰ ‚ëÆt&U‰åƒì‹E ‰D$‹E‰D$¡‰$èÿÿÿÉö¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]é½þÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]éUþÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ÃvU‰åWVSƒì‹u‹M…ö”À…ÉžÂ Шºÿÿÿÿ…™‹E‹U‹0‹R9‰Uìr‹E ‹}…À‹_u¸x‰Â‹EèGûÿÿ;uì‰E ‰ñ}Q‹E‹U‹}‹@,‹R0‹w4‰Eð‰Uèëv@t$9Ž}‰ŽA;Mì}#‹Uð‹}苊‹U ‰˜‹9Ø~Ù‹E艈ëÔ‰Eì뉋U‰$è1҃ĉÐ[^_]Ãt&U‰åƒì‹E ‰D$‹E‰D$¡‰$èÿÿÿÉö¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]é½þÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]éUþÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ÃÖ?I…ÀÑ*E€‘ê.fileþÿgborder.c _wborderP _border _boxP _whline  _hlinep _mvhline   _wvline` _vline0 _mvvline` À .text .data.bss_stdscr_wmove _move ( 2__attr_passthru_mvwhline_mvwvline_PDC_syncclear.o/ 1249166497 502 20 100644 1090 ` L¢.textÐŒ\ P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰åWVSƒì ‹}…ÿtN‹‹_‹w ‰Eð‹Uð‰Ù‹G,‹˜‹G 9Øë‰2AƒÂ9Èö‹O0‹uð‹±9Ú$Bt!‹W4H‹Mð‰Љ<$è1ÀƒÄ [^_]Ãt&‹Eð‰‹G ëÔt&U‰åƒì¡‰$èmÿÿÿÉÃt&¼'U‰åƒì‰]ô‹]‰uø‰}ü‹3‹S‹{F9Ð}&9Â~ÇCt&‰‰$è&ÿÿÿ‹@9Cî‰3‰{‰$èÿÿÿ‰$è‹]ô1À‹uø‹}ü‰ì]Ãv¼'U‰åƒì¡‰$è}ÿÿÿÉÃt&¼'U1Ò‰åS1Àƒì‹]‰T$‰D$‰$è@t ‰]ƒÄ[]éBÿÿÿƒÄ¸ÿÿÿÿ[]ô&U‰åƒì¡‰$è­ÿÿÿÉÃt&¼'U‰å‹E…Àt Æ@$‰E]ëŒ]¸ÿÿÿÿÃt&U‰åƒì¡‰$èÍÿÿÿÉÃ]‡óJw·.fileþÿgclear.c €   $ _werase0 _erasep _wclear _clear° .textÅ.data.bss_stdscr_wmove . 8_wclrtoeol_clrtoeol_wclrtobot_clrtobot_PDC_synccolor.o/ 1249166498 502 20 100644 3093 ` Lê'.textp´<+ P`.data$@0À.bss(€@À.rdata(@0@U‰åSfƒ8ÿt&fƒ:ÿt[]á1É€x t·H [f‰ ]ö‹ »€y t·Y f‰ëÀ¶U‰åSƒì€=t €=tQfÇEøÿÿfÇEúÿÿUø1ÛEúè~ÿÿÿ´&¼'¿Eø‰D$¿Eú‰D$¿ÃC‰$èûÿ~܃Ä[]ÃfÇEúfÇEøë­´&Uºÿÿÿÿ‰åƒì¡€xuIÆÇ$è€=u ¡€x u&è>ÿÿÿÇ$(º1À‰T$‰D$è1ÒɉÐÃÇ$è…ÀtÊÆëÁt&U‰åVSƒì €=¿u¿M ¿]”Àf…öf‰MöžÂ Ðf‰]ô¨u.;5}&·‰Èf9Ñ|‹ ˜9È} f9Ó‰Ø|˜9È| ¸ÿÿÿÿƒÄ [^]ÃUôEöè9þÿÿ€¾(u&·Uö‹Eô‰4$˜‰D$¿Â‰D$èÆ†(1À뿉4$Eð‰D$Eò‰D$è·Uöf9Uòt ¡Æ@$믋Eôf9Eðuìë«t&U¡‰å]€x”À¶Àö¿U‰åƒì(‰]ô¿]¿E‰uø‰}üf…Û¿u ‰Eð¿}x;|‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]Ãè„Àtå‰ð1ÒÁèfþèŸÂ ÐuÒ‹Eð1ÒÁèf}ðèŸÂ Ðu½‰ø1ÒÁèfÿèŸÂ Ðuª‰}‹Eð‹}ü‰u ‹uø‰]‹]ô‰E‰ì]ét&¼'U‰åƒì(‰]ô¿]‹E‰uø‹u f…Û‰}ü‹}‰Eðx;À…ö” Шt¸ÿÿÿÿ‹]ô‹uø‹}ü‰ì]ö‹Mð…ÿ”À…É” ШuÕè„Àu;‰ØƒàƒøÀ%ÀþÿÿèöÉÂt@f‰öÉÂu1Òf‰öÃu1À‹Uðf‰1Àë–‰}‹Uð‹}ü‰u ‹uø‰]‹]ô‰U‰ì]é1Òë¼t&U‰åƒìèɶÀÃU‰åƒì‰$‰t$‹] ‹u¿Mf…Éx*; À…Û” Шu…öt‰M‹$‹t$‰ì]é‹$¸ÿÿÿÿ‹t$‰ì]Ãv¼'U‰åƒì(‰]ø‹]‰uü‹u ƒûÿ|0‹ 9ËÀƒþÿœÂ Шu9Î}€=u1À‹]ø‹uü‰ì]Ë]ø¸ÿÿÿÿ‹uü‰ì]Ãf‰]öUôEöf‰uôèWûÿÿÇ$Eð‰D$Eò‰D$è·Uöf9Uòt,¡Æ@$‹Eô·UöÇ$˜‰D$¿Â‰D$è1Àë„‹Eôf9EðuËëÙ¶¿U¸ÿÿÿÿ‰åƒìf£¸ÿÿÿÿ‰D$Ç$ÿÿÿÿÆèÿÿÿÉÃU‰å¿Ufƒúÿ|;| ]¸ÿÿÿÿÃt&¡f‰PT1À]ÃPDC_ORIGINAL_COLORS2Yb¨"Üèõ&û)$6;%EZ„šÉç"í#2y’!&d!×!þYt¿#Îñ"3Oañ Ã"#.fileþÿgcolor.c +P <Ð I(SP ^0 jP v …Ð —à ¥@ ¼ Ð@ .textm+.data.bss(.rdataä÷_COLORS_SP_curscr  ' = L _memset _getenv ^ m_default_colors_first_col__normalize_PDC_init_atrtab_start_color_pair_set_init_pair_has_colors_init_color_color_content_can_change_color_pair_content_assume_default_colors_use_default_colors_PDC_set_line_color_pdc_color_started_COLOR_PAIRS_PDC_color_content_PDC_init_color_PDC_can_change_color_PDC_init_pair_PDC_pair_content_PDC_set_blink delch.o/ 1249166499 502 20 100644 836 ` Lè.text Œ¬ P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰åƒì(‰uø‹u‰]ô‰}ü…öt^‹F‹>‹^ ‰Eð‹F,‹MðK‹¸ˆ‰$‰Ø)ÈÁà‰D$B‰D$è‹F,‹¸‹F ‰š‹F4‹V0‰¸‹ºƒøÿt;Eð‰4$è1À‹]ô‹uø‹}ü‰ì]ËEð‰ºëáfU‰åƒì¡‰$è]ÿÿÿÉÃt&¼'U‰åƒì‹E ‰D$‹E‰$è@t¡‰EÉé'ÿÿÿɸÿÿÿÿÃU‰åSƒì‹E‹]‰D$‹E ‰$‰D$è@t ‰]ƒÄ[]éðþÿÿƒÄ¸ÿÿÿÿ[]ÃIs— ÄÌ ü.fileþÿgdelch.c_wdelch _delch _mvdelch° à .text.data.bss_stdscr_wmove _move  _memmove _mvwdelch_PDC_syncdeleteln.o/ 1249166500 502 20 100644 1559 ` L.text0Œ¼ P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰åWVSƒì‹]…Û„’‹‹C ‹{,‰Uè‹K<‰Eð‹—9щEì~1‹C0‹s4‰Eäv‹D—‰—‹EäÇ‹C H‰–‹K@ .textV.data.bss_SP_raw _noraw R _ n_PDC_check_bios_key_PDC_get_bios_key_PDC_get_ctrl_break_PDC_set_ctrl_break_PDC_get_key_PDC_check_keygetch.o/ 1249166502 502 20 100644 2933 ` LÂ%.text Œ°5 P`.data¬@0À.bss €@ÀU¸ÿÿÿÿ‰åWVSƒì,‹u…ötQ‹ 1ÿ‹AD…ÀtJ<öFu#€~%…¥‹F‹^Ø;A„„‰4$è¡…Àt:H£‹… ƒÄ,[^_]Ë^@…Ût²‰Øº…ëQ÷ê‰ØÁøÁú‰×)Çu›¿딡€xu €x„¬1À£1À£v¼'è„Àu<´&¡‹XD…Ûu ‹N@…É„»…ÿ„½OÇ$2èè„ÀtËè‹ ‰Ã€yRt€~*t¢=„ã´&ƒûÿt‹ƒû t‹ €yt €yR„€y…—€y…ƒûtt¡=ý ‰… @£ƒû ”Àƒû ” Ш„(ÿÿÿ¡éÀ~'„DÿÿÿƒÄ,¸ÿÿÿÿ[^_]Ë €y„wÿÿÿ€y…mÿÿÿ» écÿÿÿ¡;~˜Hë‰\$‰4$è‰4$è‹ éDÿÿÿÇEì‹I(ÇEðÇEà‰Mè‹ ‰M܉Mä¶Mð‹EÜÓø¨tZ¶Mà‹E𿜸Óàƒã…Eè”Â1Àfƒû”À…Ðu¸Óà…Eè”Â1Àfƒû”À…Ðtp¶Mð¸Óà1EÜ‹E܉EäÿEðƒEàƒ}ð~ŒöEÜ‹Mä‰ t ÷EèB„øöEÜ`t÷Eèu ƒeÜŸ‹M܉ ‹Uܸÿÿÿÿ…Ò…‰Ãé:þÿÿ¸Óà…Eè”Â1Àfƒû”À…Ð…rÿÿÿ¸Óà…Eè”Â1Àfƒû”À…Ð…Tÿÿÿ¸Óà…Eè”Â1Àf…Û”À…ЄKÿÿÿé2ÿÿÿ¡;Cýÿÿ‹… @£‰ÐƒÄ,[^_]á‰D$¡‰$è‰Eð…ÀtöÇEìÿÿÿÿt‰Eì‹Eì‰ÃéuýÿÿƒuÜ‹EÜ£é÷þÿÿ‹‹VÐ;A…lüÿÿ‰4$è„À„düÿÿéWüÿÿƒÄ,‰Ø[^_]Ãt&U‰åƒì‹E ‰D$‹E‰$è@t¡‰EÉé×ûÿÿɸÿÿÿÿÃU‰åSƒì‹E‹]‰D$‹E ‰$‰D$è@t ‰]ƒÄ[]é ûÿÿƒÄ¸ÿÿÿÿ[]Ãt&U‹¸ÿÿÿÿ‰åúÿ‹E‰• B£1À]Ãt&U‰åƒì踣1À£1À£1ÀÉÉö¼'U¡‰å]öU¡‰å‹UˆPP1À]ô&¼'U¡‰å‹UˆPQ]éL"Q[b”«²Á!Ñø ý! <jx~™»Þäõý")K°ÛU[hn}†Ž$›Ãà#LsŒ”§±¸¿Òâ.fileþÿggetch.cbuffer.0 _wgetch _c_ungch _mvgetch "0 ,p 9  CÐ Zà r .text5.data.bss _SPŒ_stdscrž± Ä _wmove _move _waddch Ò _napms ß î ø  _c_ungind_c_pindex_c_gindex_mvwgetch_PDC_ungetch_flushinp_PDC_get_key_modifiers_PDC_save_key_modifiers_PDC_return_key_modifiers_pdc_mouse_status_pdc_key_modifiers_PDC_modifiers_set_PDC_flushinp_PDC_get_key_PDC_check_key_wrefresh_is_wintouched_PDC_mouse_in_slk getstr.o/ 1249166503 502 20 100644 2682 ` L.text´% P`.data@0À.bss€0À.rdataDÄz@0@U‰åWVSƒì‹}‹M …ÿ”À…É” Шºÿÿÿÿ…ÚÇEè‹G‹] ÆEç‰Eì¡¶PˆUå¶PˆUæ¶W'ˆUäÆ@èÆG'‰<$èt&‰<$è‰Á@øƒøwÿ$…´&‹U9Uèæ¡€xR”Â1ÀùÿžÀ…Ðtˆ C€}æ…­ÿEè‰ö¼'‰<$è€}çt’Æ¡¶UæˆP¡¶UåˆP1Ò¶EäˆG'ƒÄ‰Ð[^_]Ãt&‰<$¸‰D$è;] v&€}æ…`K¶ ƒùžÀ1Ò€}æ•Â…ÂuÉ;] wÚÇEèéxÿÿÿ‰<$¾K‰t$èÿMèv;] †Wÿÿÿ€{ÿ u!€}æuÓÿMèKëå‰<$º‰T$èÿMè;] †'ÿÿÿ€{ÿ „ÿÿÿ€}æ…K¶ ƒùžÀ1Ò€}æ•Â…Âu·ÿMèëÇ€}æÆEç„éþÿÿ‰<$¸ ‰D$èéÓþÿÿ;] †Êþÿÿ€}æ…âK¶ ƒùžÀ1Ò€}æ•Â…Â…®ÿMèéþÿÿ‹Uì‹G‹5)Й÷þ)Ö‰uð1öt&;uðwþÿÿ‹E9Eè}E€}æuIÆ FCÿEèë߉L$‰<$èéBþÿÿèéDþÿÿt&‰<$¸‰D$èéŠþÿÿèFë¢f‰<$¸ ‰D$è뤉<$¹‰L$èéäþÿÿ‰<$¸‰D$èÿMèéÞýÿÿ‰<$¸‰D$èéÿÿÿ´&U¸ÿ‰åƒì‰D$‹E‰D$¡‰$èÍüÿÿÉÃt&¼'U¸ÿ‰åƒì‰D$‹E ‰D$‹E‰$èŸüÿÿÉö¼'U‰åSƒì‹E ‹]‰D$‹E‰$è@tÇEÿ‰] ¡‰EƒÄ[]éUüÿÿƒÄ¸ÿÿÿÿ[]Ãv¼'U‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]øÇEÿ‰ì]éúûÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]Ãt&¼'U‰åƒì‹E ‰D$‹E‰D$¡‰$è¿ûÿÿÉö¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]émûÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]éûÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]à ý3ÙÙ!p=[gt…Ôâî\f˜ô;yƒ”§½ÇÐÝæöÿ'ˆšä5pÚ        $ ( , 0 4 8 < @ .fileþÿggetstr.c _getstr _wgetstr@ p À _getnstr  #P .° .text %.data.bss.rdataD_SP_TABSIZE_stdscr_wmove _move _waddstr _beep _waddch _wgetch : _cbreak D_wgetnstr_mvgetstr_mvwgetstr_mvgetnstr_mvwgetnstr_wrefreshgetyx.o/ 1249166504 502 20 100644 896 ` L.text`Œì P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰å‹U…Òt‹B]ö¿U¸ÿÿÿÿ‰å‹U…Òt‹B]ö¿U¸ÿÿÿÿ‰å‹U…Òt‹]ö¼'U¸ÿÿÿÿ‰å‹U…Òt‹B]ö¿U¸ÿÿÿÿ‰å‹U…Òt‹BH]ö¿U¸ÿÿÿÿ‰å‹U…Òt‹BD]ö¿U¸ÿÿÿÿ‰å‹U…Òt‹B]ö¿U¸ÿÿÿÿ‰å‹U…Òt‹B ]ö¿U‰åSƒì‹]‹M ƒûÿ”À1Òƒùÿ”Â…Ât¡Æ@%ƒÄ1À[]áÆ@%¡‰\$‰L$‰$èƒÄ[]à 1:J.fileþÿggetyx.c_getbegy _getbegx  _getcury@ _getcurx` _getpary€ _getparx  _getmaxyÀ _getmaxxà _setsyx .textT.data.bss_curscr_wmove inch.o/ 1249166505 502 20 100644 664 ` Lt.textÀŒL P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰å‹U…Òt‹ ‹B,‹R‹ˆ‹]ÃU‰åƒì¡‰$èÍÿÿÿÉÃt&¼'U‰åSƒì‹E ‰D$‹E‰$è@ºÿÿÿÿt¡‹P,‹‹H‹š‹ˆƒÄ‰Ð[]Ãt&U‰åSƒì‹E‹]‰D$‹E ‰$‰D$è@ºÿÿÿÿt‹ ‹C,‹S‹ˆ‹ƒÄ‰Ð[]Ã' Ub œ.fileþÿginch.c_winch _inch  _mvinch@ _mvwinch€ .text¾.data.bss_stdscr_wmove _move inchstr.o/ 1249166506 502 20 100644 1287 ` LL.textpŒü P`.data@0À.bss€0ÀU‰åWVS‹}‹] ‹u…ÿ”À…Û” ШuC…öx?‹O‹W 19Ð,‹W,‹‹‚ˆ1Éë ‹AƒÂ‰ƒÃ9ñ|ñÇ1À[^_]ÉÖ)ÎëÎ[¸ÿÿÿÿ^_]Ãt&U‰åƒì ‹‹B ‹J‰$)ȉD$‹E‰D$èiÿÿÿÉô&U‰åƒì ‹U‹B ‹J‰$)ȉD$‹E ‰D$è<ÿÿÿÉÃv¼'U‰åSƒì‹E ‹]‰D$‹E‰$è@t!‹‹B ‹J‰] ‰U)ȉEƒÄ[]éðþÿÿƒÄ¸ÿÿÿÿ[]Ãt&U‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‹C ‹S‰u ‹uü)Љ]‹]ø‰E‰ì]é–þÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&U‰åƒì ‹E ‰D$‹E‰D$¡‰$è_þÿÿÉö¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]é þÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]é¥ýÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ÃxèñD•Ðá:.fileþÿginchstr.c _inchstrp   Ð $  0€ :° F .textm.data.bss_stdscr_wmove _move S_winchnstr_winchstr_mvinchstr_mvwinchstr_inchnstr_mvinchnstr_mvwinchnstr initscr.o/ 1249166507 502 20 100644 5201 ` L?.textд‹ P`.data„þ @0À.bss€0À.rdataŒ@0@U‰åSƒì‹…Òt 1À€:…±‹E ‰D$‹E‰$è@„æ¡Æ@¡Æ@¡Æ@¡Æ@¡Æ@P¡Æ@Q¡Æ@¡Æ@‹Ç@ÇC(ÇC,ÇC<ÇC@ÇCDfÇCTÿÿè‰C¡‹H ‹@$ƒù‰ £~ ƒøô‰D$ ¸$‰D$¡‰L$ƒÀ@‰$èÇ$èt&Æ1À‰D$ ‹B@‰ $‰D$¡‰D$è£ …À„‰$è¡€xH…‘¡Æ@$è¸ÿÿÿÿ1É1Ò£¸ÿÿÿÿ1Û£1Àf£¡f‰ ‰f‰ Æèè‰D$¸M‰D$Ç$è¡ ƒÄ[]ÉD$1Ò1Û‰\$ ‰T$‰ $装À„1À‰D$ 1À‰D$¡‰D$¡‰$装À„–‰$ºÿÿÿÿ1Û‰T$衉$èè‹‹ ‹B4)Á€=‰ déŸþÿÿ1ɉL$ ‹B@‰D$ÿB@Ç$¡‰D$è‰$‹‰T$ÿݾC‹ ‹IÿB<9؉ Ž@þÿÿ‹Ý…Ày–1À‰D$ Aÿ‰D$ë—¡‰$è¡ ‰$è¡ Æ@$¡Æ@$éGþÿÿÇ$h¡ƒÀ@‰D$ ¸‰D$¸‰D$èÇ$èÇ$ˆ¡ƒÀ@‰D$ ¸$‰D$¸‰D$èÇ$èÇ$°¡ƒÀ@‰D$ ¸$‰D$¸‰D$èÇ$èÇ$Ø¡»)‰\$¹‰L$ƒÀ@‰D$ ëȶ¿U1À‰åƒì‰D$Ç$èøûÿÿÉöU‰åƒìèè¡Æ1ÀÉÃt&U‹1À‰å…Òt€:u ¸´&]ô&¼'U1À‰åƒì‰D$Ç$èˆûÿÿ1Ò…Àt‹ɉÐô&U‰å‹E;t1À]ô&¼'U‰åƒì¡9EtÉÃè¡ ‰$衉$衉$è1À1É1Ò£ ¡‰ ‰Æè1À£ÉÃt&U‰åSƒì‹ …Û„b‹E ‰D$‹E‰$è@„I‹è‰C ‹‹K<‹C ‹S4)È)У裋‰C$‹B$‰D$‹B ‰D$¡‰$è@„D$¡‰D$¡ ‰$è@„È‹‹B$‰D$‹B ‰D$¡‰$è@„ ¡‰$è¡Æ@$‹‹B8…ÀtY¡‰D$‹B4‰D$‹B8‰$è@ºÿÿÿÿt_1À‰D$1À‰D$¡‹@8‰$è¡‹@8‰$èèè¡ ‰$è¡ ‰$èƒÄ1Ò‰Ð[]úÿÿÿÿƒÄ‰Ð[]ö¿U¡‰å]¶@ÃfU¡‰å]ÃPDCurses 3.4 - Public Domain 2008initscr(): LINES=%d COLS=%d: too small. pdcurses|PDCurses for %sinitscr(): Unable to create SP initscr(): Unable to create stdscr. initscr(): Unable to create curscr. initscr(): Unable to create pdc_lastscr. *>6?HQZclu´;¼ËÐäí ü=<!(1:6F6KZc4q}…Š‘—ž¦3«1´¿Ä2Éç:ì:69;C8H7NT_!f‰’:›¦"­!´ºÆÓ"ëó5ø5# A#M<TY w#ƒ<Š ­#¹<ÀÅ 0/!3€˜·Ã.ÈÐ-ÕÝ-âê-õú,)C+PU*^pu)z€–ž(ª³¼Ä(Ñäì(ø81(JU'Ze6j7o&t|%‰$²Â.fileþÿginitscr.c _initscrð _endwin 0 _newterm`  "° -  :° JÀ .textÊ‹.data.bss.rdata_TABSIZE_COLS_LINESZ_stdscr _curscr_SPgw _ttytype€… —£³_fwrite À Î Ø _wmove _wresize é ú   _delwin ) 7 F U _sprintf b r ƒ  š _werase ® _newwin ¸ _exit _fprintf Í Û_Xinitscr_isendwin_set_term_delscreen_resize_term_is_termresized_curses_version_pdc_lastscr__curses_notice_Mouse_status_pdc_mouse_status__imp___iob_linesrippedoff_linesripped_wnoutrefresh_touchwin_slk_noutrefresh_PDC_get_columns_PDC_get_rows_PDC_resize_screen_PDC_scr_free_PDC_slk_free_PDC_scr_close_def_prog_mode_PDC_sysname_def_shell_mode_PDC_init_atrtab_untouchwin_wclrtobot_PDC_slk_initialize_wattrset_PDC_get_cursor_mode_PDC_scr_open inopts.o/ 1249166508 502 20 100644 1500 ` L!.textŒŒ P`.data@0À.bss€0ÀU¡‰åÆ@1À]ÃU¡‰åÆ@¡Ç@D1À]Ãt&U¡‰åÆ@1À]ÃU¡‰åÆ@1À]ÃUºÿÿÿÿ‰å‹MAÿ=þw ¡1Ò‰HD]‰Ðö¼'U1À‰å]Éö¼'U¸ÿÿÿÿ‰å‹M‹U …ÉtˆQ*1À]ô&U¡‰å‹U ˆP1À]ô&¼'U¡‰åÆ@1À]ÃU¡‰åÆ@1À]ÃU¸ÿÿÿÿ‰å‹M‹U …ÉtˆQ'1À]ô&U1À‰å]Éö¼'U‰åƒìÇ$è¡Æ@1ÀÉÃU‰åƒìÇ$è¡Æ@1ÀÉÃU‰å]Ãt&¼'U‰å]Ãt&¼'U1À‰å]Éö¼'U‰å‹E‹U …Àt…Ò|u Æ@'Ç@@]ÉP@]ÃÆ@'ëìt&U‰åƒì‹E‰D$¡‰$è¶ÿÿÿÉÃt&U‰å]éþÿÿ´&U‰å]éþÿÿ2Bf²Òâ. 3N SÎ.fileþÿginopts.c_cbreak  _echo0 _noecho@ P € _keypad _meta° _nlÐ _nonlà _nodelayð $ _raw  _noraw@ /` _qiflushp :€ E _timeoutÀ _crmodeà Oð .textù.data.bss_SP_stdscrY r_nocbreak_halfdelay_intrflush_notimeout_noqiflush_typeahead_wtimeout_nocrmode_PDC_set_keyboard_binaryinsch.o/ 1249166509 502 20 100644 1723 ` L¨.textŒ P`.data@0À.bss€0ÀU‰åWVSƒì‹u‹] …ö„´‰ö¼'‹‹F9V‰Uì‰EðŒ—‹N 1Ò9Á‰Mä‹EìœÂÁè Â…|‹Mð…Éxu¡1Ò€xu ÷Ãu²v‰ßçÿÿãÿÿ„Òtbƒû–Àƒû” ШtPƒû „;w9ƒû „óC@ ø‰D$‰4$èGÿÿÿƒøÿt‰ûƒË^…ö…Uÿÿÿ¸ÿÿÿÿƒÄ[^_]ÃûuˉøƒÈ?ëÉ÷Çÿ„‹÷Çÿ…‹V ‰Ð%ÿÿ ǃû „Ñ‹Mì û‹F,‹Uð‹ˆ<‹Eä‰|$)Ð…üÿÿÿ‰D$G‰$è‹Eä‹V4‹MìH‰Š‹V0‹Šƒøÿt;Eð~ ‹Mð‹Eì‰ ‚‰‰4$è1ÀƒÄ[^_]ËF Ç÷Çÿ„pÿÿÿ‹V ‰Ð%ÿékÿÿÿ‹Eð™÷=‹X¯Ú9]ð}»ƒÏ ‰|$‰4$è;þÿÿ@„ÿþÿÿÿEð9]ð|å1À뛉4$è뇷Úé'ÿÿÿ¶U‰åƒì‹E‰D$¡‰$èöýÿÿÉÃt&U‰åSƒì‹E ‹]‰D$‹E‰$è@t‰] ¡‰EƒÄ[]é¼ýÿÿƒÄ¸ÿÿÿÿ[]ÃU‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]ø‰ì]éqýÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ÃfU‰åS‹M ‹]·Áƒø–ƒø”À ШtɉM ‰][]é.ýÿÿ´&¼'U‰åƒì‹E‰D$¡‰$è¦ÿÿÿÉÃt&U‰åSƒì‹E ‹]‰D$‹E‰$è@t‰] ¡‰EƒÄ[]élÿÿÿƒÄ¸ÿÿÿÿ[]ÃU‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]ø‰ì]é!ÿÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ÃW=m¡§Üþ(3tî#d.fileþÿginsch.c_winsch _inschð _mvinsch P   à # /@ .textŽ.data.bss_SP_TABSIZE_stdscr_wmove _move < _memmove F Q_mvwinsch_winsrawch_insrawch_mvinsrawch_mvwinsrawch_PDC_sync_wclrtoeol insstr.o/ 1249166510 502 20 100644 1350 ` L€.textŒ P`.data@0À.bss€0ÀU‰åWVSƒì ‹}‹u ‹]…ÿ”À…ö” ШºÿÿÿÿuF‰4$è‰Á‰ØÁè1Ò9˜ Ðu6ë …Ût,K¶3‰<$‰D$è@uèºÿÿÿÿƒÄ ‰Ð[^_]É˅Ûu×vƒÄ 1Ò‰Ð[^_]Ãt&U¸ÿÿÿÿ‰åƒì‰D$‹E‰D$¡‰$èMÿÿÿÉÃt&¼'Uºÿÿÿÿ‰åƒì‹E ‰T$‰D$‹E‰$èÿÿÿÉö¼'U‰åSƒì‹E ‹]‰D$‹E‰$è@tÇEÿÿÿÿ‰] ¡‰EƒÄ[]éÕþÿÿƒÄ¸ÿÿÿÿ[]Ãv¼'U‰åƒì‰]ø‹E‹]‰uü‹u‰D$‹E ‰$‰D$è@t‰u ‹uü‰]‹]øÇEÿÿÿÿ‰ì]ézþÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]Ãt&¼'U‰åƒì‹E ‰D$‹E‰D$¡‰$è?þÿÿÉö¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]éíýÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]é…ýÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]Ã+a§dµðZ.fileþÿginsstr.c _insstr _winsstrÀ ð @ _insnstr  #Ð .0 .text .data.bss_stdscr_wmove _move _winsch _strlen :_winsnstr_mvinsstr_mvwinsstr_mvinsnstr_mvwinsnstrinstr.o/ 1249166511 502 20 100644 1223 ` L<.text`Œì P`.data@0À.bss€0ÀU‰åW‹} V‹uS‹]…ö”À…ÿ” Шºÿÿÿÿu/…Ûx2‹N‹V 9Ð+‹V,‹‹‚1Ò ˆë‹‘ˆB9Ú|õÆ[‰Ð^_]ËN‹V ‰Ó)ËëÏv¼'U‰åƒì ‹‹B ‰$‰D$‹E‰D$ènÿÿÿÉ@•À¶ÀHÃt&U‰åƒì ‹U‹B ‰$‰D$‹E ‰D$èAÿÿÿÉ@•À¶ÀHô&U‰åƒì‹E ‰D$‹E‰$è@ºÿÿÿÿt&‹‹B ‰$‰D$‹E‰D$èôþÿÿ@•À¶ÀPÿɉÐöU‰åSƒì‹E‹]‰D$‹E ‰$‰D$è@ºÿÿÿÿt ‹C ‰$‰D$‹E‰D$è¢þÿÿ@•À¶ÀPÿƒÄ‰Ð[]ÃU‰åƒì ‹E ‰D$‹E‰D$¡‰$èoþÿÿÉö¼'U‰åƒì‰]ø‹E ‹]‰uü‹u‰D$‹E‰$è@t‰]‹]ø‰u ¡‹uü‰E‰ì]éþÿÿ‹]ø¸ÿÿÿÿ‹uü‰ì]ô&¼'U‰åƒì‰]ô‹E‹]‰uø‹u‰}ü‹}‰D$‹E ‰4$‰D$è@t‰]‹]ô‰} ‹}ü‰u‹uø‰ì]éµýÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]Ãxäò<…ÀÑ*.fileþÿginstr.c_winnstr _instrp _winstr  _mvinstrÐ   _innstrp    .text].data.bss_stdscr_wmove _move #_mvwinstr_mvinnstr_mvwinnstr kernel.o/ 1249166512 502 20 100644 2283 ` Læ(.textŒœ! P`.data@0À.bss(€@ÀUkÐ\‰åSƒì‰ÃÆ‚¸X ‰D$¡‰$‰D$è‰$èƒÄ[]Ãë U‰åƒì‰]ôkØ\‰uø‰Æ‰}ü€»tkÆ\‹]ô‹uø‹}ü¶€<À‰ì]úX» ‰T$‰|$¡‰$耻ul‰4$è‹O “; tZ‹B ‰ $‰D$èkÞ\‹ƒ$‰$苃 ‰D$‹ƒ‰$èkÆ\‹]ô‹uø‹}ü¶€<À‰ì]Ãèë¡9B uœë©U¸‰åƒìèÀþÿÿÉ1Àö¿U1À‰åƒìè£þÿÿÉ1ÀÃë U¸‰åƒìèÐþÿÿèÉ1Àô&U1À‰åƒìè³þÿÿèÉ1ÀÃv¼'U¸‰å]é’þÿÿfU¸‰åƒìè0þÿÿÉ1Àö¿U¸ÿÿÿÿ‰åƒì‰]ø‹]‰uüƒûw‰$è‰Æ…Û•À1Ò…ö”Â…Âu ‰ð‹]ø‹uü‰ì]á‹P‰T$‹@‰$èëÚ´&U‰åƒì‹E…ÀuÉ1ÀÉ$èÉ1ÀÃvU‰åƒì‰$‰t$‹]‹u ¶ €ùžÀ1Ò…Û•Â…Ât*…öt&¾ÁþÁ‰ʼn4Å1Àˆ ‹$‹t$‰ì]Ë$¸ÿÿÿÿ‹t$‰ì]Ãt&¼'U‰å]égÿÿÿ´&U‰å]é§þÿÿ´&U‰å]éwþÿÿ´&U‰å]é'þÿÿ#/'7&f|œ¤'ªµ$¾ÄÕ#Þæ"ìöþ!%% žý"!3!Uv—ž¦.fileþÿgkernel.c _cttyP 0 .P >p O _resetty° _savettyÀ aà _napms@ k` _drainoÐ wà _fixtermð ‚ .text !.data.bss(Œœ0_SP_LINES_COLS© ´ Ê ß ë ù  _raw  _memcpy 5__save_mode__restore_mode_def_prog_mode_def_shell_mode_reset_prog_mode_reset_shell_mode_curs_set_ripoffline_resetterm_saveterm_linesrippedoff_linesripped_PDC_napms_PDC_reset_shell_mode_PDC_reset_prog_mode_PDC_gotoyx_PDC_curs_set_resize_term_PDC_restore_screen_mode_PDC_save_screen_mode keyname.o/ 1249166513 502 20 100644 7423 ` Lâ.text`´\ P`.data z$@`À.bss€0À.rdata¨ ´@0@U‰å‹E]-=#–À¶Àô&U‰åSƒì‹]ƒûv!‰$èÉÿÿÿ„Àºt‹üûÿÿ‰ÐZ[]ÃvY[]é #-6DKT]foxŠ“œ¦°ºÄÎØâìö (2<FPZdnx‚Œ– ª´¾ÈÒÜæðú",6@JT^hr|†š¤®¸¿ÆÍÔÜæîöý!*4>IS]dnx‚‹“ž¨´½ÈÐÙâëô '4=KWcozƒŒ˜¥¯»ÃËÖßèòü$1;ER`ku‚˜¢¯¹ÅÎÔÚàæìòøþ "(.4:@FLRX^djpv|‚ˆŽ”𠦝¹ÂËÔÜãêñøÿ $-:GOW`ht‚Ž›¨´ÀÍÚæòú "*4>GPYahq{„Ž–¡®»ÉÔÞçòû   $ - 6 ? H Q Z c l u ~ ‡ ™ ¢ « ´ ½ Å Ð Ú ç ô  ) 0 7 A M Y g u  ‰ ” œ KEY_BREAKKEY_DOWNKEY_UPKEY_LEFTKEY_RIGHTKEY_HOMEKEY_BACKSPACEKEY_F0KEY_F(1)KEY_F(2)KEY_F(3)KEY_F(4)KEY_F(5)KEY_F(6)KEY_F(7)KEY_F(8)KEY_F(9)KEY_F(10)KEY_F(11)KEY_F(12)KEY_F(13)KEY_F(14)KEY_F(15)KEY_F(16)KEY_F(17)KEY_F(18)KEY_F(19)KEY_F(20)KEY_F(21)KEY_F(22)KEY_F(23)KEY_F(24)KEY_F(25)KEY_F(26)KEY_F(27)KEY_F(28)KEY_F(29)KEY_F(30)KEY_F(31)KEY_F(32)KEY_F(33)KEY_F(34)KEY_F(35)KEY_F(36)KEY_F(37)KEY_F(38)KEY_F(39)KEY_F(40)KEY_F(41)KEY_F(42)KEY_F(43)KEY_F(44)KEY_F(45)KEY_F(46)KEY_F(47)KEY_F(48)KEY_F(49)KEY_F(50)KEY_F(51)KEY_F(52)KEY_F(53)KEY_F(54)KEY_F(55)KEY_F(56)KEY_F(57)KEY_F(58)KEY_F(59)KEY_F(60)KEY_F(61)KEY_F(62)KEY_F(63)KEY_DLKEY_ILKEY_DCKEY_ICKEY_EICKEY_CLEARKEY_EOSKEY_EOLKEY_SFKEY_SRKEY_NPAGEKEY_PPAGEKEY_STABKEY_CTABKEY_CATABKEY_ENTERKEY_SRESETKEY_RESETKEY_PRINTKEY_LLKEY_ABORTKEY_SHELPKEY_LHELPKEY_BTABKEY_BEGKEY_CANCELKEY_CLOSEKEY_COMMANDKEY_COPYKEY_CREATEKEY_ENDKEY_EXITKEY_FINDKEY_HELPKEY_MARKKEY_MESSAGEKEY_MOVEKEY_NEXTKEY_OPENKEY_OPTIONSKEY_PREVIOUSKEY_REDOKEY_REFERENCEKEY_REFRESHKEY_REPLACEKEY_RESTARTKEY_RESUMEKEY_SAVEKEY_SBEGKEY_SCANCELKEY_SCOMMANDKEY_SCOPYKEY_SCREATEKEY_SDCKEY_SDLKEY_SELECTKEY_SENDKEY_SEOLKEY_SEXITKEY_SFINDKEY_SHOMEKEY_SICUNKNOWN KEYKEY_SLEFTKEY_SMESSAGEKEY_SMOVEKEY_SNEXTKEY_SOPTIONSKEY_SPREVIOUSKEY_SPRINTKEY_SREDOKEY_SREPLACEKEY_SRIGHTKEY_SRSUMEKEY_SSAVEKEY_SSUSPENDKEY_SUNDOKEY_SUSPENDKEY_UNDOALT_0ALT_1ALT_2ALT_3ALT_4ALT_5ALT_6ALT_7ALT_8ALT_9ALT_AALT_BALT_CALT_DALT_EALT_FALT_GALT_HALT_IALT_JALT_KALT_LALT_MALT_NALT_OALT_PALT_QALT_RALT_SALT_TALT_UALT_VALT_WALT_XALT_YALT_ZCTL_LEFTCTL_RIGHTCTL_PGUPCTL_PGDNCTL_HOMECTL_ENDKEY_A1KEY_A2KEY_A3KEY_B1KEY_B2KEY_B3KEY_C1KEY_C2KEY_C3PADSLASHPADENTERCTL_PADENTERALT_PADENTERPADSTOPPADSTARPADMINUSPADPLUSCTL_PADSTOPCTL_PADCENTERCTL_PADPLUSCTL_PADMINUSCTL_PADSLASHCTL_PADSTARALT_PADPLUSALT_PADMINUSALT_PADSLASHALT_PADSTARALT_PADSTOPCTL_INSALT_DELALT_INSCTL_UPCTL_DOWNCTL_TABALT_TABALT_MINUSALT_EQUALALT_HOMEALT_PGUPALT_PGDNALT_ENDALT_UPALT_DOWNALT_RIGHTALT_LEFTALT_ENTERALT_ESCALT_BQUOTEALT_LBRACKETALT_RBRACKETALT_SEMICOLONALT_FQUOTEALT_COMMAALT_STOPALT_FSLASHALT_BKSPCTL_BKSPPAD0CTL_PAD0CTL_PAD1CTL_PAD2CTL_PAD3CTL_PAD4CTL_PAD5CTL_PAD6CTL_PAD7CTL_PAD8CTL_PAD9ALT_PAD0ALT_PAD1ALT_PAD2ALT_PAD3ALT_PAD4ALT_PAD5ALT_PAD6ALT_PAD7ALT_PAD8ALT_PAD9CTL_DELALT_BSLASHCTL_ENTERSHF_PADENTERSHF_PADSLASHSHF_PADSTARSHF_PADPLUSSHF_PADMINUSSHF_UPSHF_DOWNSHF_ICSHF_DCKEY_MOUSEKEY_SHIFT_LKEY_SHIFT_RKEY_CONTROL_LKEY_CONTROL_RKEY_ALT_LKEY_ALT_RKEY_RESIZEKEY_SUPKEY_SDOWN: CT         $ ( , 0 4 8 < @ D H L P T X \ ` d h l p t x | € „ ˆ Œ  ” ˜ œ   ¤ ¨ ¬ ° ´ ¸ ¼ À Ä È Ì Ð Ô Ø Ü à ä è ì ð ô ø ü            $ ( , 0 4 8 < @ D H L P T X \ ` d h l p t x | € „ ˆ Œ  ” ˜ œ   ¤ ¨ ¬ ° ´ ¸ ¼ À Ä È Ì Ð Ô Ø Ü à ä è ì ð ô ø ü            $ ( , 0 4 8 < @ D H L P T X \ ` d h l p t x | € „ ˆ Œ  ” ˜ œ   ¤ ¨ ¬ ° ´ ¸ ¼ À Ä È Ì Ð Ô Ø Ü à ä è ì ð ô ø ü            $ ( , 0 4 8 < @ D H L P T X \ ` d h l p t x | € „ ˆ Œ  ” ˜ œ   ¤ ¨ ¬ ° ´ ¸ ¼ À Ä È Ì Ð Ô Ø Ü à ä è ì ð ô ø ü            $ ( , 0 4 8 < @ D H L P T X \ ` d h l p t x | € „ ˆ Œ .fileþÿgkeyname.c_has_key _keyname  .textX.data$.bss.rdata¦ _unctrl key_name.0 mouse.o/ 1249166514 502 20 100644 2768 ` L°.text€Œ * P`.data@0À.bss€@ÀU¡‰å‹U‰P(]é¶¿U‹‰å‹M‹B( ȉB(]é¶U‹‰å‹E÷Ð!B(]é‰ö¼'U¡‰å‹U‰P,1À]ô&¼'U¡‰å]£¡£¡£¡ £ ¡£1Àô&U‰åƒì‰$‹M1Û‰t$‹E ‹u…Ét$‹Q9ÂQ9Â~‹Q9ò‹A Â9ò~»‰Ø‹t$‹$‰ì]ÃvU‰åƒì‰]ô‹]‰uø‹u …Û‰}ü‹}t¡‰D$¡‰$‰D$èzÿÿÿ„Àu&…ötÇÿÿÿÿ…ÿtÇÿÿÿÿ‹]ô‹uø‹}ü‰ì]Ãt&…öt ¡‹K)ȉ…ÿtÚ‹S¡)Љ‹]ô‹uø‹}ü‰ì]ÃvU¡‰å]‹@(ÃvU¡‰å]‹@,ÃvU‹‰å‹M‹B0ùèw‰J0]Ãt&U‰åƒì‰}ü‹}‹U ‰]ô…ÿ”À‰uø…Ҕ¶M ШuW‹E…ÀtP‹E „É‹‹E‹0tS‹GËG‰\$‰<$Ɖt$èˆþÿÿ1Ò„Àt$‹E º‰‹E‰0‹]ô‰Ð‹uø‹}ü‰ì]Ãt&1Ò‹]ô‰Ð‹uø‹}ü‰ì]Ét$‰\$‰<$è?þÿÿ1Ò„ÀtÛ‹Gº)ËG)Æ‹E ‰‹E‰0ë«´&¼'U‰åƒì¶E‰D$ ‹E ‰D$‹E‰D$¡‰$èÿÿÿɶÀÃfU‰åƒì‹M ‹U…Ét ¡‹@(‰âï½ÿÿ‰$èøüÿÿ¡‹@(Éô&¼'U¸ÿÿÿÿ‰åWVSƒì1Û‹}…ÿ„ÁÆè9ýÿÿÇEð‹E1ÿ‹UfÇ¡‰B¡ÇB ‰B‹5´&¶Mð‰ðÓø¨tD‹E𷔉Ѓàud¸‰ùÓà ÿ¨t˨t˨ t ˶ÿEðƒÇƒ}ð~¤÷Æ t<Ë¡‹p(‹E!ó‰X1ÀY[^_]Ãfƒøt%fƒøt)fƒøu•¸ëˆt&ƒæ@tÅË ë½¸élÿÿÿ¸ébÿÿÿë U‰åWVSƒì‹U…Ò„ü€=…ïÆ1Û1ö‹B1ÿ£‹B‹R‰1Û£´&ÇEð¸ˆÙÓà…Ðtl‰ñ¸Óà LjٸÓà…ÐtÇEð¸ˆÙÓà…ÐtÇEð¸ˆÙÓà…ÐtÇEð÷ÂtƒMð÷ÂtƒMð÷ÂtƒMð ‹EðƒÃf‰„6FƒþŽhÿÿÿ‰=÷Ât*ƒÏ ‰=ÇEY[^_]éZ¸ÿÿÿÿ[^_]Ãt&â t׃Ï@ëÌ#Cb‚Š”™ž£¨­²-6uˆ¢²ÃÍñ =\dt“ëfs‚Ž•*:K6S\.fileþÿgmouse.c_ungot   @ $` 0€ CÀ M ^  _getbmap° hÀ wà …° ’à   ªP .text}*.data.bss_SP¶È_stdscrÖ ã ò_mouse_set_mouse_on_mouse_off_map_button_request_mouse_pos_wenclose_wmouse_position_getmouse_mouseinterval_wmouse_trafo_mouse_trafo_mousemask_nc_getmouse_ungetmouse_pdc_mouse_status_Mouse_status_PDC_ungetch_PDC_mouse_setmove.o/ 1249166515 502 20 100644 562 ` LV .textÀŒL P`.data@0À.bss€0ÀU1Ò‰åƒì‰$‰t$‹] ‹ ‹u‰Ø…É”ÂÁè Âu …öx9Y ‹$¸ÿÿÿÿ‹t$‰ì]Ã9q~ë‰Y1À‰1‹$‹t$‰ì]ô&U1Ò‰åƒì‰$‰t$‹]‹M‹u …ۉȔÂÁè Âu …öx9K ‹$¸ÿÿÿÿ‹t$‰ì]Ã9s~ê‰K1À‰3‹$‹t$‰ì]à .fileþÿgmove.c_move _wmove` .text·.data.bss_stdscroutopts.o/ 1249166516 502 20 100644 975 ` L.textpŒü P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰å‹M‹U …ÉtˆQ$1À]ô&U1À‰å]Éö¼'U‰å]Ãt&¼'U‰å‹U‹E …ÒtˆB(]ô&¼'U¸ÿÿÿÿ‰åSƒì‹]‹U …ÛtˆS%1À„Ò”À‰$è1ÀZ[]ÃfU‰åƒì‰$‰t$‹M ‹]‹u‰È…Û÷ЕÂÁè…Ðt‹9È|9ð9st&¼'‹$¸ÿÿÿÿ‹t$‰ì]ÉK81À‰s<‹$‹t$‰ì]ö¼'U‰åƒì ‹E ‰D$‹E‰D$¡‰$èoÿÿÿÉö¼'U¸ÿÿÿÿ‰å‹M‹U …ÉtˆQ&1À]ô&U¡‰å‹UˆP1À]ÄR.fileþÿgoutopts.c_clearok _idlok  _idcok0 _immedok@ _leaveok`   0 %P .textb.data.bss_stdscr_SP1 ;_wsetscrreg_setscrreg_scrollok_raw_output_curs_set overlay.o/ 1249166517 502 20 100644 1692 ` L†.textðŒ| P`.data@0À.bss€0ÀU‰Ñ‰åWVSƒì(‰Ã¶E ÇEÜ‹U‹u )U)u…ÛˆEó”À…É” Шºÿÿÿÿ…‹Q4‹A0‰Uà‹U‰Eä1À9Ðë ƒEä@ƒEà;E|òÇEì‹E9Eìá‹UÁæ‹[,‰uØ‹I,Áâ‰]ЉM̉UÔë ÇEèÿÿÿÿ‹U‹Eì‹]‹uØЋUЋ<‚‹Eì‹UÌþØ‹ ‚1ÿ‹]ÔË;}}C¶¿‹; t%fƒù •À€}ó” Шtƒ}èÿ‰ ‹Ut[‰E܃ƃÃG;}|É‹U䋃øÿtGƒ}èÿt;Eè~‹Eè‹U䉋Uà‹EÜ9}‰ÿEì‹UƒEäƒEà9UìŒIÿÿÿ1ÒƒÄ(‰Ð[^_]ÉEèë ‹E艋EÜ‹Uà‰ëɉö¼'U‰åWVSƒì0‹M‹U …É”À…Ò” Шºÿÿÿÿ…Ö‹E ‹U‹@‹r‰Eä9ƉuìŒÆ‹] ‹E‹[‹x‰]à9߉}ð}‰]ð‹U ‹]ä‹B ‹UËJ ‰ðÈ9Ã~‰Ã‹E ‹Mà‹P‰øÑ‹UB9Á~‰Á;]ìœÀ;MðœÂ Ð1Ò¨u[‹Eì)ËEð)Á;uä‰Mèk‹Mä)ñ1ö;}àO‹Uà1À)ú‰D$‹]è‰D$ ¿‰$‹U ‰D$‹E‰|$‰t$‰L$è„ýÿÿ‰ÂƒÄ0‰Ð[^_]ÉEìé2ÿÿÿ‹Uà‰ø)Ð1Òë­t&‹Mä)Î1Éë“´&U‰åWVSƒì0‹M‹U …É”À…Ò” Шºÿÿÿÿ…Ó‹E ‹U‹@‹r‰Eä9ƉuìŒÃ‹] ‹E‹[‹x‰]à9߉}ð}‰]ð‹U ‹]ä‹B ‹UËJ ‰ðÈ9Ã~‰Ã‹E ‹Mà‹P‰øÑ‹UB9Á~‰Á;]ìœÀ;MðœÂ Ð1Ò¨uX‹Eì)ËEð)Á;uä‰Mèk‹Mä)ñ1ö;}àO‹Uà1À)ú‰D$‹]è‰D$ 1ÿ‰$‹U ‰D$‹E‰|$‰t$‰L$èWüÿÿ‰ÂƒÄ0‰Ð[^_]ÉEìé5ÿÿÿv‹Uà‰ø)Ð1Òë­t&‹Mä)Î1Éë“´&U‰åƒì$‰]ô‹E‹U‰uø‹] ‹M ‰Eð‹E‹u$‰Uì‹U‰Eè‹E‰Uä‹U(‰Eà‹Eð‰U܉}ü…À”À…Û” Ш… ;„”9KŒ‹1Ò9s ‹EäœÂÁè Âuy‹Eà…Àxr‹Uð‹}ì‹B)ø‹z ‹Uè)׋Uä)Ñ‹Uà)ÖQF9Â~‰Â9þ‰ðW‹Mì‹uø‹}üÑ‹Uè‰M¶E܉U‹Uè‰E ‹Eà‰U ‰Ú‹]ô‰E‹Eä‰E‹Eì‰E‹Eð‰ì]é-ûÿÿ‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]Éøë¥5 .fileþÿgoverlay.c _overlay€ ° _copywinà .texté.data.bss_curscr__copy_win_overwritepad.o/ 1249166518 502 20 100644 2457 ` L.text Œ¬" P`.data@0À.bss`€@ÀU¸ÿÿÿÿ‰åƒì(‰uø‹u‰}ü‹} ‰]ô‰D$ ¸ÿÿÿÿ‰D$‰|$‰4$è…Àty‰$è…À‰Ãtk‰$èÇC1Ò1À‰ ‹1É£1À9Ö£0‰ð‰ )‹H£@‰ø9×~‰ÐH£P‹uø‰Ø‹}ü‹]ô‰ì]ÉÐëÓt&‹]ô1À‹uø‹}ü‰ì]ÃU‰åƒì8‰uø‹u‰]ô‹]…ö‰}ü‹} „PöF„F‹F9Ø;‹N;M/;‰Uà‹VÐ9Eà‰Uì‹E‹U‹F Á9ʉEè…ÿ„‹E…Àu ‹U)Uè‹EèH‰E‰\$‹U‹E‰<$‰T$ ‰D$è‰Eð‹Mð1À…É„¼‹Uð1É‹F9ù‰rL‰B¶F%ˆB%¶F&ˆB&¶F'ˆB'¶F*ˆB*}0‹B,‹U‹v,‰EäÁâ‰Uàv¼'‹žC‹EàЋU䉊A9ù|ì‹Eð1ÒÇ@ 1À‰‹£1À£ 1À9×£0‰ø~‰ÐH‹£@‹E9Ð~‰ÐH£P‹Eðë ´&1À‹]ô‹uø‹}ü‰ì]Ã)]ì‹}ìOéíþÿÿt&U‰åƒì‰]ô‹E ‹]‰uø‰Eð‹E‰}ü…À„΋UöB0„Á‹M; ²; ¦‹E ÷ÐÁø!E ‹E÷ÐÁø!E‹E÷ÐÁø!E‹E÷ÐÁø!E‹E9E‹MœÂ9M œÀ Шºÿÿÿÿ…^‹E‹}‹P ‹E )ú)È@9Â~‰Â;]°üÁâ‰Uì‹M‹Uð9QŽŒ¡‹}‹Q,‹@,Áç‹4˜‹Eð÷‹u‹ ‚‹UìÁæ΃úv÷Çt ‹ƒêƒÆ‰ƒÇ‰ÑÁéó¥‹ ‹Q0‹šƒøÿt;E~‹E‰š‹A4‹U 9˜}‰˜‹M‹Uð‹A0Çÿÿÿÿ‹A4ÇÿÿÿÿÿEðC;]ŽXÿÿÿ‹M€y$t ÆA$¡Æ@$‹]€{%uY‹;U |R‹K;M|J‹E‹} )E‹Eø9Â8‹]‹u)] ‹E ð9Á&‹} ‹u¡)úò‰‹U)щPv¼'‹]ô1Ò‹uø‹}ü‰ì‰Ð]úÿÿÿÿ‹]ô‰Ð‹uø‹}ü‰ì]Ãt&¼'U‰åƒì(‹E ‰D$‹E‰D$‹E‰D$‹E‰D$ ‹E‰D$‹E ‰D$‹E‰$èµýÿÿ@ºÿÿÿÿtè1ÒɉÐÃfU‰åSƒì$‹]‹E ‰$‰D$è@ºÿÿÿÿt@‰$¡P‰D$¡@‰D$¡0‰D$¡ ‰D$ ¡‰D$¡‰D$èQÿÿÿ‰ÂƒÄ$‰Ð[]Ã/;IZ`gpx€†”bäêïöÿ  …‘ iƤÅÕÞçðù.fileþÿgpad.c_newpad   0.<@JP_subpadÀ XP f` p° .text".data.bss`_LINES_COLS_curscr_waddch { _werase … ” ¡_save_sminrow_save_pminrow_save_smincol_save_pmincol_save_smaxrow_save_smaxcol_pnoutrefresh_prefresh_pechochar_doupdate_PDC_makelines_PDC_makenew panel.o/ 1249166519 502 20 100644 4070 ` L 0.textÀ´|A P`.data@0À.bss@€`À.rdatat@0@U‰åV‰ÆSƒì‹@ ë ‹‰$è‰Ø…ÀuðÇF ƒÄ[^]ÃfU‰åW‰ÇVSƒì‹@ ƒú‰Eð„í…Ò„£B„¸ƒ}ðt<‹Eð‹p9þt%‹_‹G9Ø~¶¿9^9^C9Øñ‹Eð‹…À‰EðuŃÄ[^_]ËW‰Ø)ЉD$‹‰$è„Àu‰\$¡‰$è„Àt¹‰Ø‰L$‹V)ЉD$‹‰$è‹GCë–‹‰$衉$èéHÿÿÿ‹Mð…Étƒ‹Eðë‹Eð‹…À‰Eð„nÿÿÿ9xuëé"ÿÿÿ‹ëÉt&U‰åWVSƒì ‹5(…ö„›‰ö¼'‹^ …Û…»‹(1ÿ…Ûtd‰ö¼'…ö”À…Û” Ð1ɨu>‹F‹S9Ð|;C| 9Â|+;V}&‹F ‹S 9Ð|;C| 9Â|;V}¹‰ö¼'„Éu,‹[…Ûu¥‰ðºèIþÿÿ‹v…ö…nÿÿÿƒÄ [^_]öÇ$è…Àtâ‰X…ÿÇt‰‰Ç‹[뱉ðèÓýÿÿé9ÿÿÿ‰F ‰Çëè´&U‹(‰åë t&9Ât ‹R…Òuõ]1ÀÃ]¸Ãv¼'U¸ÿÿÿÿ‰åSƒì‹]…ÛtX‰Øè¶ÿÿÿ„À„†1Ò‰Øè•ýÿÿ‰Øè^ýÿÿ‹S‹C…Òt4‰B…Àt‰P;(tB;$tJèuþÿÿÇC1ÀÇCZ[]Ã…ÀtÒ;(Ç@u˶¼';$£(u¹v‰$ë®ÇC¸ÿÿÿÿÇCZ[]ÃU¸ÿÿÿÿ‰åSƒì‹]…ÛtN1À;(tD‰Øèìþÿÿ„Àu=ÇC¡(ÇC…Àt‰X‰C‰(¡$…Àu‰$è±ýÿÿ1ÀY[]É$èÓþÿÿë¹U¸ÿÿÿÿ‰åSƒì‹]…Ût‰Øè†þÿÿ„Àu‰$è1ÀZ[]Ãt&‰$è˜þÿÿëä¶Uºÿÿÿÿ‰åWVSƒì ‹u…ötp‰ðèDþÿÿ„Àuw‹E‹>‰D$‹E ‰<$‰D$è@ºÿÿÿÿtE‰<$è‰F‰<$è‰F ‰<$è‰<$‰Ãè‹~‹N û‰^ȉF‰ðèáýÿÿ„Àu 1ÒƒÄ ‰Ð[^_]ÃèËüÿÿëì1Ò‰ðè°ûÿÿé{ÿÿÿt&¼'U¸ÿÿÿÿ‰åSƒì‹]…ÛtO1À;$tE‰ØèŒýÿÿ„Àu>ÇC¡$ÇC…Àt‰X‰C‰$‹ (…Éu‰(èPüÿÿ1ÀZ[]É$èrýÿÿë¸U‰åƒìÇ$$‰uø‰}ü‹}‰]ôè‰Æ¡…Àu=¡£1À£1À£ ¡£¡£¸£1À£ …öt^‰>‰<$ÇFÇFè‰F‰<$è‰F ‰<$è‰<$‰ÃèÇF‹~ÇF ‰4$û‰^‹^ ؉FèÈþÿÿ‰ð‹]ô‹uø‹}ü‰ì]Éö¼'U‰å‹E…Àt]‹@Ã]¡(Éö¼'U‰å‹E…Àt]‹@Ã]¡$Éö¼'U¸ÿÿÿÿ‰å‹U…Òt ‰Ðè üÿÿ<À÷Ð]ÃfU1À‰å‹U…Òt‹B]Ãë U‰å‹E]‹Ã¶U¸ÿÿÿÿ‰åƒì‰uø‹u‰}ü‹} …ö‰]ôtR‰ðè«ûÿÿ„Àu^‰>‰<$è‰F‰<$è‰F ‰<$è‰<$‰Ãè‹N‹V ˉ^ЉF‰ðèfûÿÿ„Àu1À‹]ô‹uø‹}ü‰ì]ÃvèKúÿÿëç1Ò‰ðè0ùÿÿë—´&¼'U¸ÿÿÿÿ‰å‹U…Òt‹E ‰B1À]ô&U‰å]éWýÿÿ´&U‰åSƒì‹(ë‰ØºÿÿÿÿèÔøÿÿ‹[…Ûuí¡‰$è„Àu8‹(…Ût*f‹‰$è„Àu‹C…Àu ‹‰$è‹C‰Ã…ÛuØ[[]á‰$èë¹stdscr/´-Á!É-ë,û..Km+C¸ÀæG`w|†Â/*$)/(:'D&§À×Ýç+!*!/6=B"GL#QV[b~)‰(”'ž&òŸ)ª(µ'¿&Id!l%v†%›$­µ$.fileþÿgpanel.c 0 @ 2@ Dp P0 ^  ià u  Œà ™ ¦  ´@ Ã` Ñp à ó0 þ@ .text»A.data.bss,.rdata #$/(_stdscr_LINES_COLS> L _getmaxx _getmaxy _getbegx _getbegy _mvwin _malloc [ f v _free €__free_obscure__override__calculate_obscure__panel_is_linked_hide_panel_bottom_panel_del_panel_move_panel_show_panel_new_panel_panel_above_panel_below_panel_hidden_panel_userptr_panel_window_replace_panel_set_panel_userptr_top_panel_update_panels__stdscr_pseudo_panel__top_panel__bottom_panel_wnoutrefresh_is_wintouched_touchline_is_linetouched_touchwinprintw.o/ 1249166520 502 20 100644 1001 ` LH.text€Œ  P`.data@0À.bss€0ÀU‰åì(‰]ø‹Eèýÿÿ‰uü‰D$ ‹E ‰$‰D$¸‰D$è‰\$‰Æ‹E‰$è@‰òt ‹]ø‰Ð‹uü‰ì]Ë]øºÿÿÿÿ‹uü‰ì‰Ð]Éö¼'U‰åE ƒì‰D$‹E‰D$¡‰$èoÿÿÿÉö¼'U‰åEƒì‰D$‹E ‰D$‹E‰$èAÿÿÿÉÃë U‰åƒì‹E ‰D$‹E‰$è@ºÿÿÿÿtE‰D$‹E‰D$¡‰$èõþÿÿ‰ÂɉÐÃë U‰åSƒì‹E‹]‰D$‹E ‰$‰D$è@ºÿÿÿÿt‰$E‰D$‹E‰D$è¢þÿÿ‰ÂƒÄ‰Ð[]ô&U‰å]é‡þÿÿ0A…äÿ<.fileþÿgprintw.c _printwp _wprintw  Ð   #p .texty.data.bss_stdscr_wmove _move _waddstr . 9_vwprintw_mvprintw_mvwprintw_vw_printw_vsnprintf refresh.o/ 1249166521 502 20 100644 2464 ` Ll.textðŒ| P`.data@0À.bss€0ÀU‰åWVSƒì‹}…ÿ„Á‹Eö@0…´ÇEè‹]‹U‹M‹s‹R‹I…ö‰Uð‰Mì‰UäŽO‹{0‰}Ü‹{4¶¿‹Eè‹MÜ‹ƒúÿ„ ‹]‹Mè‹C,‹]ä‹4ˆ¡‹@,‹˜‹]ì ˜‹E苇9Ú‹‘9–u B9Ú~ó´&9Ó|‹™9žuK9Ó}óë 9Ú˜‰Ø)ÐÁà‰Eà•<‹Mà40Aƒøv÷Çt‹ƒÆ‰ƒÇ‰Èfü‰ÁÁéó¥‹Mì‹5‹EìÊ‹}ä‹N0˹9Щ@„¢‹F4‹Uä9¨‰‹M‹]‹I0‹{4‰MÜë ‹Uè‹MÜÇ‘ÿÿÿÿÿEä‹]è‹EÇŸÿÿÿÿC9X‰]èÆþÿÿ‹U€z$tÆB$‹M€y%u‹‹Q¡]ðUì‹}ð‹Mì‰8‰HƒÄ1À[^_]Ãĸÿÿÿÿ[^_]ËE䉋F4‹Uä9ŒXÿÿÿ‹}‹E‹0‰}Ü‹x4éeÿÿÿt&U¸ÿÿÿÿ‰åWVSƒì<‹5…ö„;è„À…A‹¶B$ˆEïÇEð‹ ‹Y …ÛŽâ€}ïu‹B0‹]ðƒ<˜ÿ„½€}ï‹uð‹B,‹°‰Eè¡‹@,‹°‰E䄪ÇEÔ‹K9]Ôot&€}ïÇEЄ®‹UÔ‰Ø)Ð@‰EЋEÐ…À…9]Ô2‹UÔ‹Mä‹uè‹‘9–uvÿEÔ9]Ô‹}Ô‹Uä‹M苺9¹tç9]Ô~¢‹‹ ‹B0‹]ðǘÿÿÿÿ‹B4ǘÿÿÿÿÿEð‹uð9q ÿÿÿÆB$‹ ‹A…À…¡‹‰Q‹@‰A1ÀƒÄ<[^_]Ã9]Ôš‹UÔ‹Mä‹uè‹‘9–„Nÿÿÿ´&ÿEЋUÔ‹EÐÂ9Ú)ÿÿÿ‹}ä‹Mè‹—9‘uß‹EÐ…À„ÿÿÿ9Úÿÿÿ‹D—9D‘u‹EÐ…À„þþÿÿ‹uÔ‹Eè‹}ÐÁæ‹Uð‰uàÆ‹EÔ‰$‰t$ ‰D$‰|$è‰uØ‹Uà‰ù‹EäÁáƒùvöÂt‹6ƒé‰2ƒÂƒEØü‰×‹uØÁéó¥‹EÐEÔé•þÿÿ‹}ð‹B0‹¸‰EÔ‹B4‹¸éMþÿÿ¡‹P‰T$‹‰$è‹ éÛþÿÿèÆEï¡Æ‹é°ýÿÿv¼'U‰åSƒì‹]…Ût2öC0u,¶C$;ˆEût-‰$è5ûÿÿ€}ûu)ƒÄ[]é5ýÿÿt&ƒÄ¸ÿÿÿÿ[]Ãt&ÆC$€}ût׋‹B 9CuÉ‹B$9C uÁ¡Æ@$ë¶t&U‰åƒì¡‰$èmÿÿÿÉÃt&¼'U‰åƒì ‰$‰t$‰|$‹u‹U …ötI‹F9Ð|B‹}‰Ñù9Á79Ñ~"‹~0‹^4‰ö¼'Ç—‹F H‰“B9Ñí‹$1À‹t$‹|$‰ì]Ë$¸ÿÿÿÿ‹t$‹|$‰ì]ô&U¸ÿÿÿÿ‰åƒì ‹U…Òt‹B‰$‰D$1À‰D$èYÿÿÿɶ -;O…¡FVZkq{„ºü'.fileþÿgrefresh.c    _refresh  &@ 1À .texté.data.bss_curscr_SP<_COLS_stdscrI U i z „_wnoutrefresh_doupdate_wrefresh_wredrawln_redrawwin_pdc_lastscr_PDC_gotoyx_PDC_transform_line_reset_prog_mode_isendwinscanw.o/ 1249166522 502 20 100644 962 ` L8.textpŒü P`.data@0À.bss€0ÀU¸ÿ‰åSìøþÿÿ‰D$‹E‰\$‰$è@ºÿÿÿÿt‰$‹E‰D$‹E ‰D$è‰ÂĉÐ[]ö¼'U‰åE ƒì‰D$‹E‰D$¡‰$èÿÿÿÉö¼'U‰åEƒì‰D$‹E ‰D$‹E‰$èQÿÿÿÉÃë U‰åƒì‹E ‰D$‹E‰$è@ºÿÿÿÿtE‰D$‹E‰D$¡‰$èÿÿÿ‰ÂɉÐÃë U‰åSƒì‹E‹]‰D$‹E ‰$‰D$è@ºÿÿÿÿt‰$E‰D$‹E‰D$è²þÿÿ‰ÂƒÄ‰Ð[]ô&U‰å]é—þÿÿ$BuÔï,.fileþÿgscanw.c_vwscanw _scanw` _wscanw _mvscanwÀ  ` .texti.data.bss_stdscr_wmove _move _vsscanf  "_mvwscanw_vw_scanw_wgetnstrscr_dump.o/ 1249166523 502 20 100644 2021 ` Ld.text`´$ P`.dataZ@0À.bss€0À.rdata @0@U‰åƒì(‰uø‹u ‰}ü‹}…ö‰]ôt+‹‰$è‰D$º‰t$ ‰T$‰$è…Àu¸ÿÿÿÿ‹]ô‹uø‹}ü‰ì]Ét$ ¸‰D$¸‰D$Ç$è…Àtȉt$ ¸‰D$¸P‰D$‰<$è…Àt¦‹G1Û…À~B‹W,ƒ:t:‰t$ ¹‰L$‹G Áà‰D$‹š‰$è…À„kÿÿÿC9_~ ‹W,‹š…ÀuÆ1ÀéYÿÿÿU‰åƒì(Ç$P‰]ô‰uø‰}üè‰Ã1À…ÛtW‹E…ÀtF‹E‰D$ ¸‰D$¸‰D$Eð‰$è…Àtü¿¹uðó¦u €}ót¶‰$è1À‹]ô‹uø‹}ü‰ì]É$‹E¿‰|$¾P‰t$‰D$ è…ÀtÄ‹C‹{ ‰Æ‰EìÁæ‰4$è‰C,…Àt§‰4$è‰C0…Àt|‰4$è‰C4…Àtb‰$è‰Ã1À…Ûtƒ1ö;uì}/Áç‰|$‹E¹‰L$‰D$ ‹C,‹°‰$è…ÀtF;uì|Ô‰$è‰Øé>ÿÿÿ‰$èé/ÿÿÿ‹C0‰$è‹C,‰$èé ÿÿÿ¶¿U‰åƒì‰]ø‹E‰uü…Àt>‰$º‰T$è…À‰Æt'‰D$¡‰$èeýÿÿ‰4$‰Ãè‰Ø‹uü‹]ø‰ì]Ë]ø¸ÿÿÿÿ‹uü‰ì]ÃU1À‰å]Éö¼'U‰åƒì‰]ø‹E‰uü…ÀtT‰$¹‰L$è…À‰Ãt=‰$èîýÿÿ‰$‰Æè…öt'¡‰4$‰D$è‰4$‰Ãè‰Ø‹uü‹]ø‰ì]Ë]ø¸ÿÿÿÿ‹uü‰ì]Ãv¼'U‰å]éwÿÿÿPDCwbrb #<ot–Í:Dd”®½ÌÛ!0@Kw€¡çð'.fileþÿgscr_dump.cmarker.0_putwin _getwinð ` À "Ð _scr_setP .textY.data.bss.rdata _curscr/ _fclose : _delwin D _free _fread _malloc _strlen _fwrite _fopen Sversion.1_scr_dump_scr_init_scr_restore_overwrite_touchwin_PDC_makelines scroll.o/ 1249166524 502 20 100644 833 ` L.textpŒü P`.data@0À.bss€0ÀU‰åWVSƒì,‹E‹M …À„ä‹E€x&”À…É” Ш…Ë‹U…É‹R ‰UäŽÇÇEì‹E‹U‹@8‹R<‰Eԉ׉Eè‰UÜÇEð‹Eì¯Áƒø~X‰Eà‹U‹r,‹R ‰UØ‹E苆‰Áë¶‹Eì‹–‰މÑ9ùu1Ƀ}Øëv‹UäA‰ƒÃ9MØòÿEð‹Eð9Eà·‹UÔ)UÜ‹E܉T$@‰D$‹E‰$è‹U‰$èƒÄ,1À[^_]ÃÄ,¸ÿÿÿÿ[^_]ÃÇEìÿÿÿÿ‹U‹E‹R8‹@<‰×‰E܉Eè‰UÔé4ÿÿÿt&U‰åƒì‹E‰D$¡‰$è¶þÿÿÉÃt&Uº‰åƒì‹E‰T$‰$è–þÿÿÉÃâí > .fileþÿgscroll.c_wscrl _scrl0 _scrollP .textl.data.bss_stdscr  _PDC_sync_touchline slk.o/ 1249166526 502 20 100644 5420 ` Lª:.text` ´ u P`.data@0À.bss€@À.rdata @0@U¸ÿÿÿÿ‰åƒì‹ ‹U…Éuƒút*~ƒút#ƒú7tg¸ÿÿÿÿÉÃ…Òt ƒúuð´&¸ë¸ £‰PºŒ‰T$¡‰$è£01Ò…Àt‰ÐÉÃt&1Àºÿÿÿÿ£‰Ðëê¹ ‰ ë´vU‰åWV‰ÆSƒì€=@…ØiÀŒ‹01ÿ‹”€‹„„‰Uð…À…¹iöŒ‹„ˆ‰uì1ö‰D$¡ ‰D$¡‹@8‰$è;5v‹]ì½)Ãë8‹Uð9ð~2¡0F‹ƒÃ‰D$¡‹@8‰$è;5}6¶9þ}ĸ FƒÃ‰D$¡‹@8‰$è;5|×´&ƒÄ[^_]ÃHt‹=‹Eð)Çé4ÿÿÿ‹ ‹Eð‰Ê)‰ÐÁè<‹UðÑÿ9ÈŽÿÿÿOé ÿÿÿv¼'U‰åSƒì1Û;}ë ‰ØCè˜þÿÿ;|ðX[]ö¿U‰åƒì‰]ô‹]‰uø‰}ü…ÛŽí1Ò;‹EŸÂÁè Â…Ôƒ}Ê‹} K…ÿ„׋U ¶„À„É1ÿ< u v‹M G€<9 tö¡1ö9ƉEðßiÃŒ‹0‰Uì ‹U ú¶¼'¾…À‰t FBƒÁ;uð|í‰ðøt(‹U €|ÿ u¶¼'N‰ðøt ‹M €|ÿ tïiÃŒ‹Mì°Ç ‹U‰´€‰”„ë>‹]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ô&iÃŒ‹01ö1Élj´„‰Œ€‰Øè4ýÿÿ‹]ô1À‹uø‹}ü‰ì]Ë 0‰MìéJÿÿÿ´&U‰åƒì¡‹@8‰$èÉô&U‰åƒìèÕÿÿÿ@tÉéɸÿÿÿÿÃt&U‰å‹E…À~F;>iÀŒ‹01ÉÐtÿÿÿ‹€tÿÿÿë ˆ`ƒÂ‹A…Àuð]¸`Æ`Ãf]1Àö¿U‰åƒìÆ@¡‹@8‰$è¡‹@8‰$èÉÃU‰åƒìÆ@ènýÿÿ¡‹@8‰$èÉö¿U‰åƒì¡‹@8‰$èÉô&U‰åSƒì‹E‰D$¡‹@8‰$è‰Ãè ýÿÿƒÄ‰Ø[]ÃvU‰å]ëÊv¼'U‰åSƒì‹E‰D$¡‹@8‰$è‰ÃèËüÿÿƒÄ‰Ø[]ÃvU‰å]ëÊv¼'U‰åSƒì‹E‰D$¡‹@8‰$è‰Ãè‹üÿÿƒÄ‰Ø[]ÃvU1ɉåSƒì¿E‰L$‰D$¡‹@8‰$è‰ÃèTüÿÿƒÄ‰Ø[]ö¿U‰å¿E Áà E]ë€U‰åWVSƒì‹=0…ÿ„$ƒ=P„|‹‹s8ÇC4…ö„”‹ 1ۉș÷=ƒø‰Æë‰5¡Pƒø„ð޵ƒø´‹ ‡ˆºt&‰ËŒJyô‹‰ÐÁè4Ñþ ‰ó)ÉØ@‰‡¸‰ð)ÈH‰‡DF‰‡ÐD‰‡\‹ )ÂZº‡è‰ö¼'‰ËŒJyôt&Aÿ£èûÿÿƒ=P„C‹‹C8‰$èƒÄ[^_]áP¾‰5ƒø…ÿÿÿ‹ 1Ò‹5)Ƈˆët&BŒƒú…‰˃úuì^ëç´&‹¸‹s8ÇC4£ …ö…zþÿÿ´&¼'1É¡‰L$ ‹S4)ЉD$¡‰D$‹C4‰$è‰C8…À„8ÿÿÿ¡¿ ‰|$‹@8‰$è‹=0éþÿÿ…À„‹ 1Ò‹5‰)Ƈˆë¶BŒƒú ±þÿÿ‰˃úuè^ëã‹‹B8‹x1À‰D$‹B8‰$è1À‰D$1À‰D$¡‹@8‰$衉D$1À‰D$¡‹@8‰$è1À;}W1öë X¸‰D$ ¡0‰\$‹„ˆÆŒ‰D$1À‰D$¡‹@8‰$è;‰Ø|º‹‹C8‰xéùýÿÿ‰È‹1ÛÁè4ÑþJ‰ð)Љ@‰Ÿˆ‰ÓÛ‰‡,F‰‡¸)Á‰Ÿ Y‰Ñ‰ŸDÓ‰ŸÐÓ‰—‰Ÿ\éqýÿÿU‰åƒì‹0…Òt>¡‹@8…Àu4‰$èÆ@1À£01À£1À£1À£P1À£ ÉÉ$è¡‹0Ç@8ë°t&U‰åƒì‰}ü‹ 0‹} ‰]ô…ɉuøt¡‹P8…Òu´&1À‹]ô‹uø‹}ü‰ì]á ‹ZØ;Euá‹51Û9ó}Õ¡‘ˆ‰Eð´&‹9ø ‹MðÈ9ø CÂŒ9ó|æë¤Cë¡F%d &M"S"a"i9n"ˆ"—"­"À"ö"ÿ& 8"3"C&N7T"r&}7ƒ""­"ë" ">"‹"¤"H"}"—&¢5Ü"ê""""8">&I4N&Y3h"s&~3—&¢2¿&Ê1ÿ& 0?&J/v&.»"É"Ö&î'ù" ""-"N'“"Ä"Ï"Ü&ç2ô"ÿ""'R&f"ƒ(•'¤-´&È/Î"á"é'&6/G&R8W'f&q,y"”$"½&È+Î"Ø&ë" "X "a &p )v "~ "… "Œ "“ "š "¤ *© &¯ "Ë "Ý & " " "À6.fileþÿgslk.c_labels _slk0_hidden@P) 3  __redrawà _slk_set  = N° temp.0`[Ð f0 q` ~ ‰° •à ¢ð ¯  ½0 Ê` Õ  ã° ÷P  À  .textQ u.data.bss.rdata_SP_COLS_LINES_free _delwin  _whline _newwin " . 8 _wattron B L _werase V d _waddch _wmove _calloc n_label_length_label_line_label_fmt_slk_init__drawone_slk_noutrefresh_slk_refresh_slk_label_slk_clear_slk_restore_slk_touch_slk_attron_slk_attr_on_slk_attroff_slk_attr_off_slk_attrset_slk_color_slk_attr_set_PDC_slk_initialize_PDC_slk_free_PDC_mouse_in_slk_mvwprintw_wcolor_set_wattrset_wattroff_touchwin_wrefresh_wnoutrefresh_doupdatetermattr.o/ 1249166527 502 20 100644 900 ` LŽ.text°´p P`.data@0À.bss€0À.rdata d@0@U¸ÿÿÿ‰å]öU¸‰å]öU¸‰å]öU¸‰å]öU¸‰å]öU¸ ‰å]öU¡ºø‰å€xuºøÿ]‰ÐÃt&U¸þ‰å]öU¸‰å]öU¸‰å]ÃpdcursesRb’.fileþÿgtermattr.c  _has_ic  _has_il0 @ #P -` 8€ D N  .textª.data.bss.rdata _ttytype_SPX_baudrate_erasechar_killchar_longname_termattrs_term_attrs_termname_wordcharterminfo.o/ 1249166528 502 20 100644 1645 ` L'.text´Ô P`.data@0À.bss€0À.rdata ´@0@U‰åƒì‰]ø‹]‰uü‹u;};1Ò;5‰ØÂÁè Âu'…öx#‰t$‰$衉X‰p‹]ø1À‹uü‰ì]Ë]ø¸ÿÿÿÿ‹uü‰ì]ö¿U¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU‰å‹E…ÀtÇÿÿÿÿ]¸ÿÿÿÿÉö¼'U1À‰å]Éö¼'U¸ÿÿÿÿ‰å]öU‰åƒì‹E…Àt ÇÿÿÿÿɸÿÿÿÿÃÇ$¡º‰T$ƒÀ@‰D$ ¸‰D$èëÍv¼'U¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU1À‰å]Éö¼'U1À‰å]Éö¼'U¸ÿÿÿÿ‰å]öU¸þÿÿÿ‰å]öU¸ÿÿÿÿ‰å]öU1À‰å]Éö¼'U¸ÿÿÿÿ‰å]ÃThere is no terminfo database !":&?#-2$P%.fileþÿgterminfo.c_mvcur _vidattrp € _vidputs   ° _putpÀ %Ð 2ð _setterm ? _tgetent` Jp _tgetnum€ _tgetstr _tgoto  T° _À iÐ _tparmà _tputsð .textú.data.bss.rdatas_LINES_COLS_SP}_fwrite ‰ •_vid_attr_vid_puts_del_curterm_restartterm_set_curterm_setupterm_tgetflag_tigetflag_tigetnum_tigetstr_cur_term__imp___iob_PDC_gotoyx touch.o/ 1249166530 502 20 100644 1032 ` L¬.text Œ P`.data@0À.bss€0ÀU¸ÿÿÿÿ‰åV‹MS…Ét%‹A1Ò…À~‹q0‹Y4Ç–‹A H‰“B9Qì1À[^]öU‰åƒì ‰$‰t$‰|$‹u‹U …ötI‹F9Ð|B‹]‰ÑÙ9Á79Ñ~"‹~0‹^4‰ö¼'Ç—‹F H‰“B9Ñí‹$1À‹t$‹|$‰ì]Ë$¸ÿÿÿÿ‹t$‹|$‰ì]ô&U¸ÿÿÿÿ‰åS‹]…Ût&‹S1À…Ò~‹K0‹S4ÇÿÿÿÿÇ‚ÿÿÿÿ@9Cì1À[]ô&U‰åƒì ‰$‰t$‰|$‹]‹U …Ûtf‹C9Ð|_‹u‰Ññ9ÁT9Ñ~?‹{0‹s4ë´&Ç—‹C H‰–B9Ñ~‹E…ÀuæÇ—ÿÿÿÿÇ–ÿÿÿÿB9Ñæv‹$1À‹t$‹|$‰ì]Ë$¸ÿÿÿÿ‹t$‹|$‰ì]Ãt&¼'U‰åS‹M‹] …Ét1Ò9Y‰ØœÂÁè Ât[1À]ËA0ƒ<˜ÿ[•À¶À]ö¿U‰å‹U…Òt!‹J1Àƒù~‹R0‰ö¼'ƒ<‚ÿu @9Áõ]1ÀÃ]¸Ã.fileþÿgtouch.c @ À % /  ?à .text.data.bssN_touchwin_touchline_untouchwin_wtouchln_is_linetouched_is_wintouchedutil.o/ 1249166531 502 20 100644 662 ` Lv.textŒ P`.data@0À.bss€@ÀU‰å·Mƒù—À1Òƒù•Â…Ât]¸ˆ ÆÃvÆ^ƒùt]ˆÈ@¢¸Ãt&]¸Æ?ÃfU‰å]Ãt&¼'U‰å]Ãt&¼'U‰å]é ! ' 2 B G R X ….fileþÿgutil.cstrbuf.0_unctrl _filter` _use_envp € .text‰ .data.bss_napms _delay_outputwindow.o/ 1249166532 502 20 100644 4117 ` LÚ '.text  Œ, + P`.data@0À.bss€0ÀU¸P‰åƒì‰D$‰]ô‰}ü‹}‰uøÇ$è‰Ã1À…Û„ƒ4½‰4$è‰C,…À„›‰4$è‰C0…À„}‰4$è‰C4…Àtc‰{‹E ÇC ‰C ‹E‰C‹E‰C1À;=t.ˆC$Gÿ‰C<ÇCHÿÿÿÿÇCDÿÿÿÿ‰$è‰Ø‹]ô‹uø‹}ü‰ì]ËU ;uǰëËC0‰$è‹C,‰$è‰$è1Àë¶¿U1À‰åWVSƒì ‹}…ÿt>‹G1ö‰Eð‹G ;uð‰Eì})t&¼'‹Eì‹_,Áà‰$艳…ÀtF;uð|â‰øƒÄ [^_]Ã1Û9ó}f‹G,‹˜C‰$è9ó|í‹G0‰$è‹G4‰$è‹G,‰$è‰<$èƒÄ 1À[^_]Ãv¼'U‰åSƒì‹]ët&‰$è‹[L…ÛuñZ[]ö¼'U‰åSƒì‹]€{(u€{)u Y[]ö‰][[]먉$èëÞ´&¼'U‰åƒì(‰]ô‹]‹M ‰uø…Û‰}ü‹u‹}u‹)û…Éu‹ )ñ‹;B ;B$~‹]ô1À‹uø‹}ü‰ì]Ét$ ‰|$‰L$‰$ètýÿÿ…ÀtÙ‰$èhþÿÿ…À‰Ãtˉ$è‰Ø‹uø‹]ô‹}ü‰ì]Ãt&¼'U¸ÿÿÿÿ‰åVSƒì‹]…ÛtWöC!u&‹C1ö…À~‹C,‹…Àt‰$Fè9s8¶‹C0‰$è‹C4‰$è‹C,‰$è‰$è1ÀƒÄ[^]ËC,‹°ë®¶U‰åƒì‰]ø‹M‹] ‰uü…É‹ut‹A1ÒØ;‰ØŸÂÁè Ât‹]ø¸ÿÿÿÿ‹uü‰ì]ËA 1Òð;‰ðŸÂÁè Âu؉Y‰q‰ $è‹]ø1À‹uü‰ì]ö¿U‰åƒì8‰]ô‹]‰uø‹u‰}ü‹C‹}‰ò‹K)‰Uð‰ú)ʉUì…Û„ü9ðô9ùì‹U ò‰Uà‹SÐ9Eà‰UèÓ‹E‹C ‰EäÁ9ʽ‹U …Ò„Á‹E…Àu ‹Uì)Uä‹EäH‰E‰|$ ‹U‹E ‰t$‰T$‰$è”ûÿÿ‰Á1À…É„z‰YL‹C‰A‹C ‰A ¶C%ˆA%¶C&ˆA&¶C'ˆA'¶C*ˆA*¶C(ˆA(¶C)ˆA)‹Uð‹Eì‰QH1Ò;U ‰AD}"‹y,Áà‹s,‰EÜ‹]ð‹EÜžC‰]ð‰—B;U |êƒI‰Èëf1À‹]ô‹uø‹}ü‰ì]ËUð)Uè‹EèH‰E é-ÿÿÿë U‰å‹M‹E‹QU‹QЉE]éƒþÿÿvU‰åƒì‰}ü‹}‰]ô‰uø…ÿtz‹OL…Éts‹E E xk‹w‹U ;A]‹E‹W Ð;A P1Ûƒþ‹U ~+‹I,‹G,‰Mì‹M‰EðÁá‰Mè‹Mì‹Eè‘‹MðB‰™C9Þë‹E ‹U‰GH1À‰WD‹]ô‹uø‹}ü‰ì]Ë]ô¸ÿÿÿÿ‹uø‹}ü‰ì]ô&¼'U1À‰åWVSƒì,‹M…É„A‹E‹U‹@‰Ñ‹Z ‹R‰Eð‹A‰T$‰\$‰D$ ‹Eð‰$èÌùÿÿ…À„‰$è¼úÿÿ…À‰Ç„1ö;uð}r‹M‹P,‹@0‹I,‰Uä‹W4‰MèKÿÁã‰Eà‰U؉M܉]ìt&‹]ä‹E苳‹]ì‹ °9Ðv‰Ã´&‹ƒÁ‰ƒÂ9Ówò‹Eà‹MÜ‹UØÇ°‰ ²F;uð|¹‹]‹C‰G‹‰‹C‰G‹C ‰G ‹C‰G‹C‰G‹C‰G‹C‰G¶C$ˆG$¶C%ˆG%¶C&ˆG&¶C'ˆG'¶C*ˆG*‹C8‰G8‹C<‰G<‹CD‰GD‹CH‰GH‹CL‰GL‹C ‰G ‹C‰G‰øƒÄ,[^_]ÃÄ,1À[^_]ô&U1À‰åWVSƒì<‹u‹] ‹M…ö„,‹F¨ …O¨…!¡9p8„ÿ‹V‹F‰D$ ‰T$‰L$‰$èIøÿÿ‰Ã1À…Û„å‹S ‹F‰Uè9Â~‰Eè‹S‹‰Ñ‰Uì9Â~‰Eì‹F¨!„‹~‰C‹FQÿ‰C¶F$ˆC$¶F%ˆC%¶F&ˆC&¶F'ˆC'¶F*ˆC*‹F89Ð~1À‰C8Qÿ‹N%Q%d%°#ÈÙ$ä$ì$=%k$z$…$$˜$Ä# "AMU¢!ñ$$$$%$bФ#t$$Š$š¢$ó  !^ ~ $E t ” #.fileþÿgwindow.c  _wsyncup°  à _newwin  _delwinÀ _mvwin@ _subwinÀ _derwin  *@ _dupwinð 4` _wresizeÀ  _syncokð  C  O`  .text˜ +.data.bss_LINES_COLS_SP_memcpy Z _wmove _copywin _subpad _werase i s _free _malloc _calloc }_PDC_makenew_PDC_makelines_PDC_sync_mvderwin_resize_window_wcursyncup_wsyncdown_is_wintouched_wrefresh_touchwin debug.o/ 1249166533 502 20 100644 1244 ` LÈ.text´ P`.data@0À.bss€0À.rdataPÄ@0@U‰åƒì8‰]ø‰uü€=u ‹]ø‹uü‰ì]ÃÇ$¸‰D$è…À‰Ætz]ä‰$è‰$]èè‰D$ ¹º ‰L$‰T$‰$èè‰D$¸‰\$ ‰4$‰D$èE ‰D$‹E‰4$‰D$è‰4$è‹]ø‹uü‰ì]á¾+»‰t$‰\$ƒÀ@‰D$ Ç$$èé1ÿÿÿ¶¿U‰å]ÆÃt&U‰å]ÆÃtracea%H:%M:%SAt: %8.8ld - %s PDC_debug(): Unable to open debug log file  # ( 1BMV kpy ‰Ÿ§¶Ö Ûö  .fileþÿgdebug.c _traceonð  .text .data.bss.rdataP'_fwrite _fclose 3 _clock = H _time _fprintf _fopen R_PDC_debug_traceoff_pdc_trace_on__imp___iob_vfprintf_localtime_strftimepdcclip.o/ 1249166534 502 20 100644 1469 ` L.textÀŒL P`.data@0À.bss€0ÀU‰åƒì‰]ø‰uüÇ$èƒì…Àºu ‹]ø‰Ð‹uü‰ì]ÃÇ$èƒì…À‰Ãt[‰$èÇ$‰Æ@‰D$è‰Â‹Eƒì…Ò‰t$‰$‰\$è‹E ‰0è‹]ø1Ò‹uü‰ì‰Ð]Ãèºë„èºéuÿÿÿU‰åƒì‰]ø‰uüÇ$èƒì…Àºu ‹]ø‰Ð‹uü‰ì]Ëu Ç$ F‰t$èƒì…À‰ÃºtÒ‰$èƒì‰t$‹U‰$‰T$è‰$èƒìè‰\$Ç$èƒì…Àu‰$èƒì‹]øº‹uü‰ì‰Ð]Ãè‰$èƒì‹]ø1Ò‹uü‰ì‰Ð]Ãë U‰åƒì‹E‰$èƒìÉ1Àô&U‰åƒìèÉ1ÀÃ8I^x‚•¡Äð$,<K dl  ·.fileþÿgpdcclip.c ° ( ;° .text¿.data.bssO ] q ƒ _memcpy “ _strcpy ¡ _strlen °  Ö ç_PDC_getclipboard_PDC_setclipboard_PDC_freeclipboard_PDC_clearclipboard_GlobalFree@4_SetClipboardData@8_EmptyClipboard@0_GlobalUnlock@4_GlobalLock@4_GlobalAlloc@8_CloseClipboard@0_GetClipboardData@4_OpenClipboard@4 pdcdisp.o/ 1249166536 502 20 100644 1417 ` Lø.text0Œ¼ P`.data¼@`À.bss€0ÀUâÿÿ‰åƒì·E Ââÿÿ·EÁà ‰T$¡‰$èƒìÉÉö¼'U‰åWVSì<ãÿÿ‹}‹M‹U ·Ç Ããÿÿf‰â÷ÿÿËf‰æ÷ÿÿ1É9ùf‰•à÷ÿÿT:ÿf‰•ä÷ÿÿ}[‹5´&‹E‹ˆ‰ÐÁè÷Âf¶0f‰„ê÷ÿÿt ÷€ÿu‰Ðƒà‹…¶¿f‰”è÷ÿÿA9ù|³‰\$…à÷ÿÿ‰D$1À‰D$ …è÷ÿÿ‰D$¡‰$èƒìeô[^_]à      !"#$%&'()*/Û123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_±bcdeøñ°Ù¿ÚÀÅ--Ä-_ôÁ³óòãØœù& .” Ð  .fileþÿgpdcdisp.c @ .text#.data.bss_acs_map$1= U q_PDC_gotoyx_PDC_transform_line_pdc_con_out_pdc_atrtab_WriteConsoleOutputA@20_SetConsoleCursorPosition@8 pdcgetsc.o/ 1249166537 502 20 100644 844 ` Lœ.textÀŒL P`.data@0À.bss€0ÀU‰åEøƒì‰D$¡‰$èƒì‹EøÉô&¼'U‰åE؃ì8‰D$¡‰$è¿Eèƒì¿UäÉ)Ð@öU‰åE؃ì8‰D$¡‰$è¿EÚƒìÉö¼'U‰åE؃ì8‰D$¡‰$è¿Eæƒì¿UâÉ)Ð@à > Fn vž ¦.fileþÿgpdcgetsc.c 0 '` < .textº.data.bssMZ x _PDC_get_cursor_mode_PDC_get_rows_PDC_get_buffer_rows_PDC_get_columns_pdc_con_out_GetConsoleScreenBufferInfo@8_GetConsoleCursorInfo@8pdckbd.o/ 1249166539 502 20 100644 6974 ` L¾(.text´@s P`.data€ ´@`À.bssˆ€@À.rdata 4@0@U1À‰å]Éö¼'U‰å]Ãt&¼'U¸‰åƒì‹…Ò~Éø‰D$¡‰$èƒì1Àƒ=É•ÀÃfU1À‰åWVSƒì\£¡…À„ÓH£·8ƒø„-ƒøt ¸ÿÿÿÿeô[^_]ÃÇE¼1À»£(¡ü¿‰ÙÆ@R‹H‹E¼ƒúó«„<ƒú¸t1Àƒú”À…‹=@1É»¶¼'…<‰Âu1Òf‰TKAƒù~èfH„7¿>1É‹5X¿<‰ »1É£‰E¸¡\‰‰E´·¼ f9¼ `t ‰ØÓà fƒÿ„ÐAƒù~Ô¡1É£X¡£\¡£`¡ £d¡£h·„ `ƒèfƒøw 1ÿf‰¼ `Aƒù~ß¡D¨tÇE¼ ¨ tƒM¼¨tƒM¼‹u¼…öt2‹1É´&‰ÐÓø¨t·„ ‹]¼ Øf‰„ Aƒù~ݸeô[^_]ÃEĉD$ ¸‰D$¸8‰D$¡‰$è·8ƒìÿ fƒø„fH„¡…À„óýÿÿH£·8ƒø…Øýÿÿt&·F¡‹H· BÆ@R‹5€~Pt7öÂtƒ öÂtƒ ö tƒ ö tƒ ´&ƒù„€1ƒù„ø…Ûtö„­öÂ…¤föÆt‰»·„¨f=ç·øu1Û‰Ïö„Q…Û…Š¿·œ¢fûÿÆFReô‰Ø[^_]÷ B1Û‹5<…ö„xƒù”Àù” Ш…}ù‘„qAðƒø‡‰ (ƒù„m'ƒù„g‰‰Øé„þÿÿ¡@¿X‰Ù%ÿƒøÀƒààƒÀ@£¸ÿÿÿÿ£¸ÿÿÿÿ£‹E¼ó«¸éëýÿÿvƒù…Ïþÿÿ…Û…Ëþÿÿ€~Q¸ÿÿÿÿ„üÿÿ¿xÁø!é üÿÿ¿¸‰=éÔûÿÿ€~Q¸ÿÿÿÿ„åûÿÿ¿xÁøéÑûÿÿö uLö„´…Û„Ó¿·œéžþÿÿ‰fƒ¼¨…LþÿÿÆFRéþÿÿ¿·œésþÿÿ…Û„Y¿·œé[þÿÿƒù„dAðƒø‡Âþÿÿ; (…¶þÿÿ1À»£(é¥þÿÿƒç„Àûÿÿ¡‹@0…À„°ûÿÿ‹=…ÿu/‰$¾è‰t$¡‰$苃ì…Û„wûÿÿ¹uÀ}ȉL$1Û‰t$ ‰|$¡‰$è‹EЃì1Éfƒ¼ „ßAƒù~ë„Û„,ûÿÿ‰t$ »‰\$‰|$¡‰$èƒìéûÿÿ¶€~Q¸ÿÿÿÿ„^úÿÿ¿xÁøéJúÿÿ1À£(é­ýÿÿ9ò„¡Óã Ãˉéûÿÿ¿·œ¤éýÿÿ…Ût3¿·œéîüÿÿfƒ=F…¦þÿÿé‰þÿÿ¿·œ¦éËüÿÿ¿·œ é»üÿÿ……ÿÿÿº³f‰” éÿÿÿ‹U´¸ÿÿÿÿ9U¸…`ÿÿÿé‘ùÿÿ1Òfƒ=F‰(t/·@éâüÿÿƒù…ÙüÿÿÇ$¤èf£xƒìé¿üÿÿ‰ $¸‰D$èƒì…Ày¹é¢üÿÿÇ$¢ëÇÇ$ ë¾t&U‰åƒì¡‰$èƒìÉô&Uº‰åWƒì¡‹@(…Àu ‹Ê€‰T$¡¿X‰$èü1À¹ƒìó«‹}üÉÃt&U1À‰å]ÃËÌÍÊÖÚSŒ½çRоèf€À鄿懻í#àê¼ì$áëK…ÝßJ}ÞÊÖÚø _âãçÅ5  îï Ã9É3þÇ1üÁ7 Ä4ÿ Â8 Æ6  È2ý ú0û Î.ÒÜ 0)—1!˜2@™3#š4$›5%œ6^7&ž8*Ÿ9( aA¡bB¢cC£dD¤eE¥fF¦gG§hH¨iI ©jJ ªkK «lL ¬mM ­nN®oO¯pP°qQ±rR²sS³tT´uUµvV¶wW·xX¸yY¹zZº0û1ü2ý3þ4ÿ 5 6 7 8 9Ï×ÛçÑÔØçÐÕÙç.ÒÜÊÖÚ !- ". #/ $0 %1&2'3(4)5*6+7 ,8[{ñ\|]}ò'ô-8AI'Tlq†´¹¿Ëøÿ3;BHT\bmu–¢§¬±¶»ÀÅÍàë,9ajr"y‚™§®ÃÈÎÕßðüaŠ©±ð6@Jƒ »éù,L^q‚Ž“$œ¤'ªÓÛ#ë"BVhu…™¥¼ÌØí!;&AZ%‡!­ºÉÎÖ .fileþÿgpdckbd.c((_kptab 3 E ^  m{` _save_ip8ˆXšx¤€ ²  Áð .text÷s.datat .bssˆ.rdata Ôç_SPó ' B X _napms n €  °_event_count_key_count_save_press_ext_kptab_PDC_get_input_fd_PDC_set_keyboard_binary_PDC_check_keybutton_mask.0_PDC_get_key_old_mouse_status_left_key_PDC_flushinp_PDC_mouse_set_PDC_modifiers_set_pdc_key_modifiers_pdc_con_in_pdc_mouse_status_pdc_quick_edit_SetConsoleMode@8_FlushConsoleInputBuffer@4_ReadConsoleInputA@16_PeekConsoleInputA@16_MapVirtualKeyA@8_GetKeyState@4_GetNumberOfConsoleInputEvents@8pdcscrn.o/ 1249166540 502 20 100644 9848 ` L”Y.text€´pê P`.data@4@`À.bssH€@À.rdataüt@0@U¸‰åW¿õVµèûÿÿSìL‰4$‰D$èƒìè‰Ãè‰D$¸‰\$ è÷ÿÿ‰$‰D$è‰$èƒìÇ$(èƒì‰\$Ç$èƒì‰Ã‰4$1ö艸̃ì£H…Ô÷ÿÿ»2‰D$¡‰$è‰5À‹•Ô÷ÿÿƒì‰Ĺ1Û‰ Ⱦ̉ÐÁêƒâÁ艸ƒà‹£°1À£¬¸£´¿B ·R f‰=¾½Ø÷ÿÿÁà Ðf£¼è£ …Ð÷ÿÿº‰D$1À‰D$¸‰T$ ‰D$Ç$€è¸ƒì‰…Ì÷ÿÿt&‰\$¸C‰D$‰<$è‰t$…Ì÷ÿÿƒÆ‰D$1À‰D$ 1À‰D$‹…Ð÷ÿÿ‰|$‰$èƒìƒû~³…Ì÷ÿÿ¹¿\‰D$‹…Ð÷ÿÿ1ö‰L$1Û‰|$¿&‰t$ ¾@‰\$Ì÷ÿÿ‰$èƒì1Àº`‰D$ 1À‰D$¸1‰D$‹…Ð÷ÿÿ‰T$‰\$‰$èƒì¸d‰D$1À‰D$ 1À‰D$‹…Ð÷ÿÿ‰\$‰|$‰$艵Ì÷ÿÿƒì¸<‰D$‹…Ð÷ÿÿ1ɉ\$1Ò»h‰\$‰L$ ‰T$‰$è‹…Ð÷ÿÿƒì‰$èƒìeô[^_]ô&U‰åƒì¡…Àu¡…Àu(1À£ÉÉ$è¡…Àtæ´&¼'‰$è1À£ÉÃë U‰åƒì8‰uø‰Æ¸‰]ô‰Ó‰}ü‰T$‰D$‰4$èƒì …Àºt‹]ô‰Ð‹uø‹}ü‰ì]ËS}苉Uì¿Uì‰Eè‰ÐÁèÂÑú¿Eîf‰Uì‰Uä‰ÁÁéÈÑøf‰Eî‰Eฉ|$‰D$‰4$èƒì 1Ò…Àt·Cf;Eäf‰Eì}[‹Eäf9G|=·Cf;Eàf‰G|/t&‰|$º‰T$‰4$èƒì …ÀuS·GHf‰Gf;Eà}Õ1Ò‹]ô‰Ð‹uø‹}ü‰ì]Ãt&‰|$¹‰L$‰4$èƒì …Àu‰·GHf;Eäf‰G}Õ1Ò뾺éûþÿÿ´&U‰åWVSƒì‹} ƒ}žÀƒÿžÂ Шºÿÿÿÿ…µ¡uè‰$èfÇEêWÿƒìfÇEèf9”f‰Uì‹UÁøJf9ÂŽ”f‰F·Fãÿÿ‰ò@·À ÷Fãÿÿ@Áà áè)þÿÿ‰\$¡‰$衃ì‰òè þÿÿ‰\$¡‰$衃ì‰$èƒì1Òeô‰Ð[^_]Ãf‰Eì‹UÁøJf9Âlÿÿÿf‰Végÿÿÿ¶¿U‰åSƒì$€=8uè‹]üÉö·"ãÿÿ·&fÇEð)Ð@·$·À ÷(ãÿÿfÇEò)Ð@Áà ÉØÁøHf‰EöCÿf‰Eô¡‰\$‰$èƒìEð‰D$¸‰D$¡‰$è¡ƒì ‰\$‰$衃ì‰$èƒìè‹]üÉÉö¼'U‰åWVSƒì\»X‰\$Ç$裹‰L$Ç$ 裋…Ò„f1Ò…À„\t&¼'¿„ f‰”(Bƒú~êÇ$õÿÿÿ裃ìÇ$öÿÿÿ裃ì‰$èƒìƒø…aè÷пÁè¢8EȉD$¡‰$èƒì¸‰D$¡‰$èƒì‰|$¡‰$衃싃à@£Ç$Nè…À„º‰$è‰C ‹Ç$Tè…À„·‰$è‰C$¡Æ@‹Ç@0–‹S ƒúŽ¥‹MÜ¿Á9Â~1‰T$ ¿Á»\‰D$‰\$¡ƒÀ@‰$è¸ÿÿÿÿeô[^_]ËS$ƒúŽf·MÚ¿Á9Â~B‰T$ ¿Á¹ˆ‰D$‰L$ë¶è‰C ‹Ç$Tè…À…IÿÿÿèéGÿÿÿ‹EЃàf‰C ·EÐÆC %ðÁøf‰C ¡Ç@LÇ$³è…Àu0‹Ç$Æè…À•CHè²üÿÿ¡Æ@eô1À[^_]ÿ¿¯ÂÁà‰$裉Å۸ÿÿÿÿ„ÿÿÿ· æÿÿ}À·Á Æ·IfÇEÀæÿÿfÇE‰ÂHf‰EÆÁâ Öf‰MÄ1Ò‰|$‰\$¡‰T$ ‰t$‰$èƒì…Àt#‹ÇCLé/ÿÿÿ‹MÜé]þÿÿ·MÚéœþÿÿ¡æÿÿ‰$è·"1À£·&)Ð@·$·À Æ·(æÿÿ)Ð@Áà Ɖò¿ÆÁú¯ÂÁà‰$裉…Ҹÿÿÿÿ„þÿÿ·$f‰E·"f‰EÀ·(f‰EÆ·&f‰EÄ1À‰|$‰T$‰D$ ¡‰t$‰$èƒì…ÀtH‹ÇCLéDþÿÿ¡Ç$܃À@‰D$ ¸‰D$¸‰D$èÇ$衉$è1À£éOýÿÿ´&¼'U‰åƒì€=8u!¡‰D$¡‰$èƒìÉö¡‰D$¡‰$èƒì¸"‰D$¸‰D$¡‰$è¡ƒì ‰D$¡‰$èƒì¸"‰D$¸‰D$¡‰$è¡ƒì ‰$衃ì‰D$¡‰$èƒìÉÃë U‰åƒì(èÿÿÿ¡‹PL…Òteƒú„†·UøfÇEøfÇEúHf‰Eþ·Hf‰Eü¡1ɉT$‰D$¡‰L$ ‰D$¡‰$èƒì…Àt-¡ƒxt Ç$èèƒè1Ò‰T$‰$èÉ÷$Uøf‰Eú·"f‰Eø·(f‰Eþ·&émÿÿÿv¼'U‰å]Ãt&¼'U‰å]Ãt&¼'U1ɉåWVS¿u ¿]¿}¿´6(¿œ(¶¿‰Ø‰òÁàÂöÁt ‰ðˆÚÁàÂöÁt²öÁt Àê¶ÂÁàÂöÁt€ÊöÁt€Ê€‰øÁàˆAƒùv®[^_]Éö¼'U‰åƒì¿U‰t$‹5‰$Áâ» ¶2ƒà· C‹E f‰¶2Áè·C‹Ef‰‹$1À‹t$‰ì]ÃfU‰å¶8]Ãt&U¡‰åV…ÀS¿]ts¿„(»€€€‹ …̶ÁiÀèp‰ð÷ë‹E òÁúf‰‰ÈÁè¶ÀÁéiÀè¶ÉiÉèp‰ðƒÁ÷ë‹EòÁúf‰‰È÷ë‹EÊÁúf‰1À[^]ÃèÖñÿÿë†t&U‰åWVSƒìl‹ ¿E¿u…É¿] ¿}‰E´„¿Œ6(‰ØÁà)ؾÓMb˜ô‰Ø÷î‰ØÁøÁú)‰øÁà¶Ú)ø¸ô‰ø÷î‰øÁøÁú)¶ÒÁâ‹E´ Ó‹U´Áà)иô‰ø÷î‰ø¾ÌÁøÁú)¶ÂÁà ÉÌEÀ‰D$¡‰$è‹EÀƒì£¨EȉD$¡‰$è¿MÒƒì‹EÈ¿UÔ£L·EÖ‰ T‰X)È@f£P‹EØ)Ð@f£RE¼‰D$¡‰$è‹E¼ƒìÇ$ÿ‰D$1À‰D$èƒì ¹1Ò‰L$‰Ç1Û‰T$1À‰t$‰\$ ‰D$Ç$ÿÿÿÿèƒì‰Æ¸Ì‰D$1À‰D$ 1À‰D$¸‰4$‰D$èƒì‰Ã¸Ì‰D$¸H‰$‰D$è‰$1Ûèƒìè‰$1Ò¹‰T$U¸‰L$‰\$‰T$ ‰|$‰t$è‹E¸ƒì1ɉL$ ºÉ‰T$‰D$¡‰$èƒì‰4$èƒì‰<$èƒìeô1À[^_]Ãè†ïÿÿéÛýÿÿÿÿÿÿÿÿÿÿ %d/%dConsoleColorTable%02dFontSizeFontWeightFontFamilyFaceNameLINESCOLSLINES value must be >= 2 and <= %d: got %d COLS value must be >= 2 and <= %d: got %d PDC_RESTORE_SCREENPDC_PRESERVE_SCREEN Redirection is not supported. $X,U3V<RWZTiS|R‹T‘ž²ºEÀÏÜáòû% 16D;UiC…’BÁAÔÙôA(BAJpA~•©@º?×%àëõ:ú:T-º--A-™¤,þ +,4+9D*‰‘)£°ÀÌö+"-'6+;F*N)z9%”9™Ÿ%ÄÌÞ8ãò8÷74".6P>GOP[cEhq%y$€…3•2ž%¥ª3º2Â%Ì%õ& 6=L1U%\a3n0–%¤©3³%º¿3Ï%äëù/þ ) [ k .x %š ¨ :¯ ¶ ½ Ç Ó ÷ /ü   ( 3 J V .c %t &{ ™ '¥ 5ª ² :¹ Ø à é ñ (   + , 4 -9 E M +U g o -t  *„  ˜ (¼ %Ó î ø    >( %: =? ;P <Y g r } È Ð % P [ – ¢ ¶  ;^×ãëQö P'-6BNVOrN¢MÍLàìKöJþH(IFNGYFdF.fileþÿgpdcscrn.c _ci_save" 6HDÐ R0 gp z€ _is_nt8™` §(³Ð  ɰ  Ø  ñ   °  @  (  >   Q0 .textê.data@.bssH.rdataüamz†_SP–_fwrite ¢ ´ Ã ã    9 _malloc P a _atoi _getenv o _exit _fprintf } Œ _calloc _free œ ± ½ Ç ß î  _sprintf  * @ R a r ‡ › _memcpy ® À × ç  ! 9 _Sleep@4 H \ s ƒ Ž ¢_realtocurs_old_console_mode__init_console_info_console_info_PDC_scr_free__fit_console_window_PDC_resize_screen_PDC_reset_prog_mode_orig_scr_PDC_scr_open_curstoreal_PDC_reset_shell_mode_PDC_scr_close_PDC_restore_screen_mode_PDC_save_screen_mode_PDC_init_pair_PDC_pair_content_PDC_can_change_color_PDC_color_content_PDC_init_color_pdc_con_in_pdc_con_out_pdc_atrtab_pdc_quick_edit__imp___iob_SetConsoleMode@8_PDC_mouse_set_SetConsoleActiveScreenBuffer@4_SetConsoleScreenBufferSize@8_GetLargestConsoleWindowSize@4_SetConsoleWindowInfo@12_ReadConsoleOutputA@20_PDC_get_columns_PDC_get_rows_GetVersion@0_GetFileType@4_GetStdHandle@4_PDC_get_buffer_rows_PDC_gotoyx_curs_set_WriteConsoleOutputA@20_RegCloseKey@4_RegQueryValueExW@24_RegQueryValueExA@24_RegOpenKeyExA@20_GetConsoleOutputCP@0_GetConsoleMode@8_CloseHandle@4_SendMessageA@16_GetCurrentProcess@0_DuplicateHandle@28_UnmapViewOfFile@4_MapViewOfFile@20_CreateFileMappingA@24_OpenProcess@12_GetWindowThreadProcessId@8_GetConsoleScreenBufferInfo@8_GetConsoleCursorInfo@8_FindWindowA@8_SetConsoleTitleA@4_GetCurrentProcessId@0_GetTickCount@0_wsprintfA_GetConsoleTitleA@8pdcsetsc.o/ 1249166542 502 20 100644 986 ` Lð.textŒŒ P`.data@0À.bss€0ÀU‰åƒì(‰]ô¡]è‰uø‹u‰}ü‹x¡‰\$‰$èƒì…Àºÿÿÿÿt2…öuBÇEì‰\$¡‰$èƒì…Àºÿÿÿÿt ¡‰ú‰p‹]ô‰Ð‹uø‹}ü‰ì]Ãt&ƒþtÇC¡‹@‰Eè무&ÇEìÇEè_ë•U‰åƒì‹E‰$èƒìÉÃv¼'€=U‰å¶Et º‰]<À÷Ðà  *J Rc  ½Òç.fileþÿgpdcsetsc.c ° !Ð .textó .data.bss_SP0=_COLORSP d | ”_PDC_curs_set_PDC_set_title_PDC_set_blink_pdc_con_out_pdc_color_started_SetConsoleTitleA@4_SetConsoleCursorInfo@8_GetConsoleCursorInfo@8pdcutil.o/ 1249166543 502 20 100644 639 ` L*.textP´  P`.data@0À.bss€0À.rdata@0@U‰åƒìÇ$ÿÿÿÿèƒìÉÉö¼'U‰åƒì‹E‰$èƒìÉÃv¼'U¸‰å]ÃWin32-B .fileþÿgpdcutil.c   @ .textJ.data.bss.rdata_Sleep@4 & 5_PDC_beep_PDC_napms_PDC_sysname_MessageBeep@4 samtools-0.1.19/win32/libz.a000066400000000000000000002210321212162403000154760ustar00rootroot00000000000000! / 1249165936 0 0 0 1192 ` H b bÚÚÚ:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶:¶YZ\~\~\~\~\~\~\~\~\~\~\~\~\~ŒJŒJŒJŒJŒJŒJŒJ»v»v»v»v»v»v¿º¿º¿º¿º¿º¿º¿º¿º¿º¿º¿ºóžóžóžêêN_adler32_adler32_combine_compress2_compress_compressBound_get_crc_table_crc32_crc32_combine_gzsetparams_gzdopen_gzopen_gzread_gzgetc_gzungetc_gzgets_gzwrite_gzprintf_gzputc_gzputs_gzflush_gzrewind_gzseek_gztell_gzeof_gzdirect_gzclose_gzerror_gzclearerr_uncompress_deflateEnd_deflateSetDictionary_deflateSetHeader_deflatePrime_deflateTune_deflateBound_deflate_deflateParams_deflateReset_deflateInit2__deflateInit__deflateCopy_deflate_copyright__tr_init__tr_align__tr_stored_block__tr_flush_block__tr_tally__length_code__dist_code_zlibVersion_zlibCompileFlags_zError_zcalloc_zcfree_z_errmsg_inflateReset_inflatePrime_inflateInit2__inflateInit__inflate_inflateEnd_inflateSetDictionary_inflateGetHeader_inflateSync_inflateSyncPoint_inflateCopy_inflateBackInit__inflateBack_inflateBackEnd_inflate_table_inflate_copyright_inflate_fast// 18 ` __.SYMDEF SORTED/ /0 1249165844 502 20 100644 28 ` /1/12 1249165839 0 0 0 1204 ` Hìì       ^ ^ ^5858585858585858585858585858585858585858SÖVøVøVøVøVøVøVøVøVøVøVøVøVø†À†À†À†À†À†À†Àµêµêµêµêµêµêº*º*º*º*º*º*º*º*º*º*º*îîî N N®_adler32_adler32_combine_compress2_compress_compressBound_get_crc_table_crc32_crc32_combine_gzsetparams_gzdopen_gzopen_gzread_gzgetc_gzungetc_gzgets_gzwrite_gzprintf_gzputc_gzputs_gzflush_gzrewind_gzseek_gztell_gzeof_gzdirect_gzclose_gzerror_gzclearerr_uncompress_deflateEnd_deflateSetDictionary_deflateSetHeader_deflatePrime_deflateTune_deflateBound_deflate_deflateParams_deflateReset_deflateInit2__deflateInit__deflateCopy_deflate_copyright__tr_init__tr_align__tr_stored_block__tr_flush_block__tr_tally__length_code__dist_code_zlibVersion_zlibCompileFlags_zError_zcalloc_zcfree_z_errmsg_inflateReset_inflatePrime_inflateInit2__inflateInit__inflate_inflateEnd_inflateSetDictionary_inflateGetHeader_inflateSync_inflateSyncPoint_inflateCopy_inflateBackInit__inflateBack_inflateBackEnd_inflate_table_inflate_copyright_inflate_fast/1/20 1249165826 0 0 100666 1164 ` Lœ .textŒ P`.data@0À.bss€0ÀU‰å‹MW‹}V‹u S‰ËÁëáÿÿƒÿ„ý…ö¸„²ƒÿ†¶ÿ¯†Äï°º[¶Á˶FÁ˶FÁ˶FÁ˶FÁ˶FÁ˶FÁ˶FÁ˶FÁ˶F Á˶F Á˶F Á˶F Á˶F Á˶FÁ˶FƒÆÁËJ…wÿÿÿ¸q€€÷á¸q€€ÁêiÒñÿ)Ñ÷ãÁêiÒñÿ)Óÿ¯‡<ÿÿÿ…ÿ„Êé…¶ƒïÁ¶FËÁ¶FËÁ¶FËÁ¶FËÁ¶FËÁ¶FËÁ¶FËÁ¶FËÁ¶F ËÁ¶F ËÁ¶F ËÁ¶F ËÁ¶F ËÁ¶FËÁ¶FËÁ˃ƃÿ‡rÿÿÿOƒÿÿt¶FÁËOƒÿÿuò¾q€€‰ð÷á‰ðÁêiÒñÿ)Ñ÷ãÁêiÂñÿ)ÉØÁà È[^_]öFÁËOƒÿÿuòùðÿvéñÿ‰Øºq€€÷â‰ØÁêiÒñÿ)ÐÁà Èë¶ÁùðÿvéñÿËûðÿvžëñÿ‰Øë–‰ö¼'U¸q€€‰åƒì‹M‰}ü‹}‰uø·ñÁé÷ç¸q€€‰]ô‹] ÁêiÒñÿ)׉}ð¯þ÷çÁêiÂñÿ‹Uð)Ç·ÃðÁëÙ°ðÿ)Ñþñÿ¼9ñÿvpÿþñÿw9‰ö¼'ÿâÿvïâÿÿñÿvïñÿÁç‹]ô ÷‰ø‹uø‹}ü‰ì]ðÿÿëÈ.fileþÿgadler32.c_adler32 P .text.data.bss_adler32_combine /1/20 1249165827 0 0 100666 924 ` L.text ´Ü P`.data@0À.bss€0À.rdataÔ@0@U¹8‰åƒìh‰}ü‹E‰uø‹} ‰]ôºÇEÈu¨‰E¨‹EÇEÌÇEЉE¬‹E‰E´‹‰E¸‰L$ ‰T$‹E‰4$‰D$è…Àt ‹]ô‹uø‹}ü‰ì]É4$¸‰D$èƒø‰Ãt ‰4$è…Û‰ØuÍ‹]ô¸ûÿÿÿ‹uø‹}ü‰ì]ËE¼‰‰4$è묶U¸ÿÿÿÿ‰åƒì‰D$‹E‰D$ ‹E‰D$‹E ‰D$‹E‰$èÿÿÿÉÃë U‰å‹U]‰ÐÁè ÐÁêD Ã1.2.3 ^€´.fileþÿgcompress.c À  .text.data.bss.rdata( _deflate 4 B_compress2_compress_compressBound_deflateEnd_deflateInit_ /1/12 1249165828 0 0 100666 10156 ` L&.textà´”$' P`.data@0À.bss€0À.rdata ”@`@U¸‰å]öU1À‰åV‹U S…Ò„Ž‹u‰Ó‹E…ö‰Á÷Ñ„ýv¼'öÄê¶C0ȶÀÁé‹…1ÑNu߃þ†!¶‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áé¶Ñ3•‰ÊÁê‰Á‹•1Á‹ƒÃ1Á¶Å¶Ñ‹…3• Áéƒî ¶Ñ3•‰ÊÁê‰Á‹•1Áƒþ‡4þÿÿƒþvOë ‹ƒîƒÃ1Á¶Å¶Ñ‹…Áé3• ¶Ñ3•‰ÊÁê‰Á‹•1ÁƒþwÀ…öë¶C0ȶÀÁé‹…1ÑNuè‰È÷Ð[^]ö¿U1É…Ò‰åt´&öÂt3ƒÀÑêuò]‰Èô&¼'U‰åW‰ÇV‰ÖS1Ût&‹ž‰ðè¶ÿÿÿ‰ŸCƒû~í[^_]ô&U‰åWVSì ‹]‹u…Û„‡º ƒ¸í¸‰•èþÿÿº‰”…èþÿÿ@Òƒø~ñ½hÿÿÿ•èþÿÿ‰øè~ÿÿÿ‰ú…èþÿÿèqÿÿÿëÑût7‰ú…èþÿÿè^ÿÿÿöÃu7Ñût!•èþÿÿ‰øèHÿÿÿöÃtÔ‰ò‰øè ÿÿÿÑû‰ÆuÉ‹E 1ÆÄ ‰ð[^_]Éò…èþÿÿèåþÿÿ‰Æë¸–0w,aîºQ ™Ämôjp5¥c飕dž2ˆÛ¤¸ÜyéÕàˆÙÒ—+L¶ ½|±~-¸ç‘¿d·ò °jHq¹óÞA¾„}ÔÚëäÝmQµÔôÇ…ÓƒV˜lÀ¨kdzùbýìÉeŠO\Ùlcc=úõ È n;^iLäA`Õrqg¢Ñäjm ¨Zjz Ïäÿ “'® ±ž}D“ðÒ£‡hòþÂi]Wb÷Ëge€q6lçknvÔþà+Ó‰ZzÚÌJÝgoß¹ùùホC¾·Õް`è£ÖÖ~“Ñ¡ÄÂØ8RòßOñg»ÑgW¼¦Ýµ?K6²HÚ+ ØL ¯öJ6`zAÃï`ßUßg¨ïŽn1y¾iFŒ³a˃f¼ Òo%6âhR•w ÌG »¹"/&U¾;ºÅ( ½²’Z´+j³\§ÿ×Â1Ïе‹žÙ,®Þ[°Âd›&òc윣ju “m© œ?6ë…grW‚J¿•z¸â®+±{8¶ ›ŽÒ’ ¾Õå·ïÜ|!ßÛ ÔÒÓ†BâÔñø³ÝhnƒÚ;[&¹öáw°owG·æZˆpjÿÊ;f\ ÿžei®bøÓÿkaEÏlxâ  îÒ ×TƒN³9a&g§÷`ÐMGiIÛwn>JjÑ®ÜZÖÙf ß@ð;Ø7S®¼©Åž»ÞϲGéÿµ0ò½½ŠÂºÊ0“³S¦£´$6к“×Í)WÞT¿gÙ#.zf³¸JaÄh]”+o*7¾ ´¡Ž ÃßZï-A1‚b62ÃS-+ÅldEôw}†§ZVÇ–AOŠÙÈI»ÂÑŠèïúËÙôã Oµ¬M~®µŽ-ƒžÏ˜‡QÂJ#ÙSÓpôx’AïaU×®.æµ7×µ˜–„ƒY˜‚©›Ûú-°šË6©]]wællÿß?AÔžZÍ¢$„•㟌 F²§aw©¾¦áèñçÐóè$ƒÞÃe²ÅÚª®]]ëŸFD(Ìkoiýpv®k19ïZ* ,  m8ó6Fß²]ÆqTpí0ekô÷ó*»¶Â1¢u‘‰4 û¼Ÿº„yÞ©%8ï²<ÿyós¾Hèj}ÅA<*ÞXOyðD~bé‡-OÂÆTÛŠ”@»ƒè#¦ÂÙ8¿ Å 8Lô»!§– Ζ Ì\H1×E‹búnÊSáwT]»ºl £Ö?ˆ—–‘P˜×Þ©ÌÇÒúáì“Ëúõ\×bræykÞµT@Ÿ„OYX#Úp8$›A#=§kýeæZæ|% ËWd8ÐN£®‘⟊!̧3`ý¼*¯á$­îÐ?´-ƒŸl² †«$HÉêSÐ)F~ûhweâöy?/·H$6t 5*ò¼SK³HRpÞey1ï~`þóæç¿Âýþ|‘ÐÕ= ËÌú6Šƒ»‘šxT¼±9e§¨K˜ƒ; ©˜"Éúµ ˆË®O]ï_lôFÍ?ÙmŒÂtCZó#AêÁplÁ€AwØG×6—æ-ŽÅµ¥„„¼ŠAq[»Zh˜èwCÙÙlZO-_~6 œ-'Ý>˜¹S1ƒ b®‹ÑSµ’ÅôÝWôïÄ”§ÂïÕ–Ùöé¼®¨·kÞ1œ*ï*…íykʬHpÓo]ø.*Fáá6Þf ÅcTèT"eóMåó²¤Â©g‘„0& Ÿ)¸®ÅäùŸÞý:ÌóÖ{ýèϼk©€ýZ²™> Ÿ²8„«°$,ñ52F*sw1´ápHõÐkQ6ƒFzw²]cN×úËæáÒ̵Ìù„×àJ–¯ #¶Èp ‰A»„F]#l8Ä?1…(B˜Og©T~ÀúyUËbLÅ8^ô#˜§³Ü–ªTåZ1Oü™bbרSyÎOáIV~úP•-×{ÔÌbŠ-R»–4‘è»ÐÙ ìó~^­ÂeGn‘Hl/ Suè6:© #jT$+e?äy§–¥H¼f‘¤'*нà¼Ëò¡ÐëbÞýÀ#ïæÙ½á¼üЧ ?ƒŠ&~²‘?¹$ÐpøËi;FæBzwý[µkeÜôZ~Å7 Sîv8H÷±® ¸ðŸ¡3Ì?Šrý$“7jÂnÔ„Y¾Fܨ ëÂ˲|…O¸Q;ÑÖ…— áïU dù S“Ø -ž =G\ p£&GÉäw¢)`¬ /›aíÂß«õµiÈò5ÿ˜÷¦&±‘LsZ<#0þzޏMäzàFM8×,9Ž’É;¹ø :<îD? „†>R:À(ôq-Ãv³,šÈõ.­¢7/Àšp÷çXq®Ys™3Ür%“w+OQvrñtE›Õux܉~O¶K }!bÏ|¤t€y“BxÊ zýÊÆ{°.¼l‡D~mÞú8oéúnl†µk[ìwjR1h58ói¯b?mcf«+aQÁé`Ôצeã½ddº"fiàg Ë×H¡INSKyu‘JücÞOË N’·ZL¥Ý˜M˜šÄF¯ðGöN@EÁ$‚DD2ÍAsX@*æIBŒ‹CPhñTg3U>¼uW Ö·VŒÀøS»ª:Râ|PÕ~¾Qè9âZßS [†ífY±‡¤X4‘ë]û)\ZEo^m/­_€5á·q÷àîϱâÙ¥sã\³<ækÙþç2g¸å zä8J&ï äîVž¢ìaô`íäâ/èÓˆíéŠ6«ë½\iêð¸ýÇÒÑüžl—þ©Uÿ,úzØûBÄžùu®\øHéóƒÂò&=„ðWFñ”A ô£+Ëõú•÷ÍÿOö`]xÙW7ºØ‰üÚ9ã>Û¼õqÞ‹Ÿ³ßÒ!õÝåK7ÜØ k×ïf©Ö¶ØïÔ²-Õ¤bÐ3ΠÑjpæÓ]$Òþ^Å'”œÄ~*ÚÆI@ÇÌVWÂû<•â‚ÓÁ•èÀ¨¯MËŸÅÊÆ{ÉÈñ ÉtDÌCm†ÍÓÀÏ-¹Î@–¯‘wüm.B+’(铜>¦–«Td—òê"•Å€à”øÇ¼ŸÏ­~ž–8œ¡yú$oµ˜w™J»1›}Ñóš05‰_KŒ^á Ži‹Ï쀊Û÷B‹‚I‰µ#ƈˆdšƒ¿X‚æ°€ÑÚÜTÌ“„c¦Q…:‡ rÕ† Ðâ©—º ¨Îfªùn¤«|xë®K)¯¬o­%Æ­¬ñ§/ë3¦vUu¤A?·¥Ä)ø óC:¡ªý|£—¾¢Ðsĵç´¾§@¶‰Í‚· ÛͲ;±³bI±Ue‹°h"×»_HºöS¸1œ‘¹´ŠÞ¼ƒà½Ú^Z¿í4˜¾eg¼¸‹È ªî¯µW—b2ðÞ7Ü_k%¹8×ï(´ÅŠO}dà½o‡׸¿ÖJÝØjò3wßàVcXŸWPú0¥èŸúqø¬BÈÀ{ß­§ÇgCru&oÎÍp­•-û·¤?žÐ‡'èÏBs¢¬ ưÉGz>¯2 [ÈŽµg; Ї²i8P/ _ì—âðY…‡—å=ч†e´à:ÝZOÏ?(3w†äêãwXR Øí@h¿Qø¡ø+ðÄŸ—H*0"ZOWžâöoI“õÇ}§@ÕÀümNП5+·#Å–Ÿ *'Gýº| A’ô÷èH¨=X›X?¨#¶1Ó÷¡‰jÏv¨Ê¬á¾„`ÃÒp ^·æY¸©ô<ßL…çÂÑà€~i/Ë{kHwâ ËÇh±s)ÇaL ¸Ùõ˜oDÿÓü~Pfî7ÚVM'¹(@¶Æï°¤£ˆ °Û×g9‘xÒ+ôn“÷&;fšƒˆ?/‘íX“)T`D´1ø ߨMºÏñ¦ìß’þ‰¸.Fg›Tp'ì»HðqÞ/LÉ0€ùÛUçEcœ ?kùǃÓh6ÁrŠyË7]ä®Pá\@ÿTN%˜èösˆ‹®ï7ø@‚'>¼$é!AxU™¯×à‹Ê°\3;¶Yí^ÑåU°~PGÕìÿl!;b F‡Úçé2È‚ŽŽpÔží(±ùQ_Vä‚:1X:ƒ §æn3Á† m¦:µ¤á@½Á†ü/)IJNõ¯óv"2–žŠx¾+˜Ù— KÉôx.®HÀÀýÒ¥fAj^–÷y9*O—–Ÿ]òñ#åkM`~×õŽÑbçë¶Þ_RŽ Â7éµzÙFh¼!¼Ðê1߈Vc0aùÖ"žj𽦽ØÁ¿6n´­S šNrÿ)Î¥†{·táÇÍÙ’¨¾¬*F8#v¥€ufÆØz`þ®Ïr›ÉsÊ"ñ¤WG–ï©9­ýÌ^EîMvc‰ñÎ&DÜèAødQy/ù4“AÚ±&S¿ÖšëéÆù³Œ¡E bðiL¡¾Q›<Û6'„5™’–Pþ..™¹T&üÞèžq]Œwá4Î.6©«IŠEæ? ƒ»v‘àãö\[ýYéI˜>Uñ!‚lDa>Ԫ΋ÆÏ©7~8AÖ]&Ãn³‰v|ÖîÊÄoÖY ±¡áäóy¨K×i˲w«\¡Â¹9Æ~€þ©œå™$ 6 6nQާf†ÂqÚ>,Þo,I¹Ó”ð •渱{I £.±H>ÒC-YnûÃöÛ馑gQ©°ÌzÎ t”a¹fñÞw0–îa,™ QºmÄpjôéc¥5žd•£Ûˆ2yܸ¤àÕé—ÒÙˆ ¶L+~±|½ç¸-¿‘·dj° òó¹qH„¾AÞÚÔ}mÝäëôÔµQƒÓ…Çl˜Vdk¨ÀýbùzŠeÉì\OclÙú=c õ;n ÈLi^Õ`Aä¢gqr<äÑKÔGÒ …ý¥ µk5µ¨úB²˜lÛ»ÉÖ¬¼ù@2ØlãEß\uÜÖ Ï«Ñ=Y&Ù0¬QÞ:È×Q€¿Ða!´ôµV³Ä#Ϻ•™¸½¥(¸ž_ˆÆ Ù²± é$/o|‡XhLÁa«¶f-=vÜAÛq˜Ò ¼ïÕ*q±…‰¶µŸ¿ä¥è¸Ô3xÉ¢ù4– ¨Žá˜j »m=-‘dl—æc\kkQôlab…e0ØòbNl•í¥{‚ôÁõÄWe°ÙÆ·éP‹¾¸êü¹ˆ|bÝßÚ-IŒÓ|óûÔLeM²aX:µQΣ¼tÔ»0âJߥA=ؕפÑÄmÓÖôûCiéj4nÙü­gˆFÚ`¸ÐD-s3åª L_Ý |ÉPq<'Aª¾ É †Whµ% o…³¹fÔ ÎaäŸ^Þù)Ùɘ°Ð˜"Çר´Y³=.´ ·½\;Àºl­í¸ƒ š¿³¶¶â t±ÒšêÕG9Òw¯Û&s܃ãc ”d;„ mj>zjZ¨äÏ “ ÿ ®'}ž±ð“D‡£ÒòhiÂþ÷bW]€egËl6qnkçþÔv‰Ó+àÚzZgÝJÌù¹ßo޾ïù·¾C`°ŽÕÖÖ£è¡Ñ“~8ØÂÄOßòRÑ»gñ¦¼Wg?µÝH²6KØ +Ú¯ L6JöAz`ß`ïègßU1nŽïFi¾yËa³Œ¼fƒ%oÒ Rhâ6Ì w•» G"¹U&/ź;¾²½ (+´Z’\³jÂ×ÿ§µÐÏ1,Ùž‹[Þ®›d°ìcò&uj£œm“ œ ©ë6?rg…W•¿J‚â¸z{±+® ¶8’ÒŽ›åÕ¾ |Üï· Ûß!†ÓÒÔñÔâBhݳøÚƒn¾Íö¹&[o°wá·GwˆZæÿjpf;Ê \ežÿøb®iakÿÓlÏE  âx× ÒîNƒT9³Â§g&aÐ`÷IiGM>nwÛ®ÑjJÙÖZÜ@ß f7Ø;𩼮SÞ»žÅG²Ï0µÿé½½òʺŠS³“0$´£¦ºÐ6ÍדTÞW)#Ùg¿³fz.ÄaJ¸]h*o+”´ ¾7Ã Ž¡Zß-ï1A26b‚+-SÃdlÅ}wôEVZ§†OA–ÇÈÙŠÑ»IúïèŠãôÙˬµO µ®~Mžƒ-އ˜ÏJÂQSÙ#xôpÓaïA’.®×U7µæ˜µ×ƒ„–‚˜Y›©°-úÛ©6Ëšæw]]ÿllÔA?ßÍZž•„$¢ŒŸã§²F ¾©wañèá¦èóÐçÃÞƒ$ÚŲe]]®ªDFŸëokÌ(vpýi91k® *Zï  ,8mßF6óÆ]²ípTqôke0»*ó÷¢1¶‰‘u 4Ÿ¼û„º%©Þy<²ï8sóyÿjèH¾AÅ}XÞ*<ðyOéb~DÂO-‡ÛTÆ”Š»@¦#胿8ÙÂ8 Å !»ôL –§–Î\Ì E×1Hnúb‹wáSʺ»]T£ lˆ?Ö‘–—ÞטPÇÌ©ìáúÒõúË“rb×\kyæ@TµÞYO„ŸX#$8pÚ=#A›eýk§|æZæWË %NÐ8d‘®£ŠŸâ3§Ì!*¼ý`­$᯴?Ð-† ²lÉH$«ÐSêû~F)âewh/?yö6$H· t*5KS¼òRH³yeÞp`~ï1çæóþþý¿ÕБ|ÌË =ƒŠ6úš‘»±¼Tx¨§e9;ƒ˜K"˜© µúɮˈ_ï]OFôlmÙ?ÍtÂŒóZCêA#ÁlpÁØwA€—6×GŽ-極ż„„qAŠhZ»[Cwè˜ZlÙÙ-O 6~_'-œ>ݹ˜ ƒ1S‹®b’µSÑÝôÅÄïôWï§”öÙ–Õ®¼é·¨œ1Þk…*ï*ÊkyíÓpH¬ø]oáF*.fÞ6áÅ TèTcMóe"²óå©Â¤0„‘g)Ÿ &äÅ®¸ýÞŸùÖóÌ:Ïèý{€©k¼™²Zý²Ÿ >«„8,$°5ñ*F21wsHpá´QkÐõzFƒ6c]²wËú×NÒáæù̵Ìàׄ¯–J¶#  pÈ„»A‰#]F8l1?Ä(…gO˜B~T©UyúÀLbË8Ř#ô^³§ª–ÜåTüO1Z×bb™ÎySØIáOPú~V{×-•bÌÔ-Š4–»R»è‘ ÙÐ^~óìGe­lH‘nuS /:6è# ©$Tj?e+–§yä¼H¥¤‘f½Š*'ò˼àëСÀýÞbÙæï#¼á½ §Ðü&Šƒ??‘²~pÐ$¹iËøBæF;[ýwzÜekµÅ~ZôîS 7÷H8v¸ ®±¡ŸðŠ?Ì3“$ýrÂj7„ÔnF¾Y ¨ÜËÂë|²O…Q¸Ñ; —…Ö Uïá ùdØ“S ž- \G=&£päÉG¢w`)/ ¬ía›«ßÂiµõ5òÈ÷˜ÿ±&¦sL‘†„ <À:R=Pe6^X7œ}o5ÚÃ64©1W¿„0•Õ³2Ókê3Ý$kå%©§'ï1þ&-[É#bML" '{ æ™"!$ó*x´(+ºÞ)ü`F(> q-qô,³vÃ.õÈš/7¢­pšÀqXç÷sY®rÜ3™w“%vQO+tñruÕ›E~‰ÜxK¶O} |Ïb!y€t¤xB“z Ê{ÆÊýl¼.°m~D‡o8úÞnúékµ†ljwì[h1Rió85b¯cm?a+«f`éÁQe¦×Ôdd½ãf"ºgàiH×Ë I¡KSNJ‘uyOÞcüN ËLZ·’M˜Ý¥FÄš˜Gð¯E@NöD‚$ÁAÍ2D@XsBIæ*C‹ŒTñhPU3gWu¼>V·Ö SøÀŒR:ª»P|âQ¾~ÕZâ9è[ SßYfí†X¤‡±]ë‘4\)û^oEZ_­/má5€à÷q·â±Ïîãs¥Ùæ<³\çþÙkå¸g2äz ï&J8îä 좞Ví`ôaè/âäéíˆÓë«6Šêi\½ý¸ðüÑÒÇþ—lžÿU©ú,ûØzùžÄBø\®uóéHòƒð„=&ñFWô A”õË+£÷•úöOÿÍÙx]`غ7WÚü‰Û>ã9Þqõ¼ß³Ÿ‹Ýõ!ÒÜ7Kå×k ØÖ©fïÔïØ¶Õ-²Ðb¤Ñ Î3ÓæpjÒ$]Å^þÄœ”'ÆÚ*~Ç@IÂWVÌÕ<ûÁÓ‚¢Àè•ËM¯¨ÊÅŸÈÉ{ÆÉ ñÌDt͆mCÏÀÓι-‘¯–@müw’+B.“é(–¦>œ—dT«•"êò”à€ÅŸ¼Çøž~­Ïœ8–úy¡˜µo$™w›1»JšóÑ}‰50ŒK_Ž á^Ï‹iŠ€ì‹B÷Û‰I‚ˆÆ#µƒšdˆ‚X¿€°æÜÚÑ„“ÌT…Q¦c‡:†Õr ©âР¨ º—ªfΫ¤nù®ëx|¯)K­o¬¬­Æ%§ñ¦3ë/¤uUv¥·?A ø)Ä¡:Có£|ýª¢¾—µÄsдç¶@§¾·‚͉²ÍÛ ³±;±Ib°‹eU»×"hºH_¸Sö¹‘œ1¼ÞŠ´½àƒ¿Z^Ú¾˜4í¸¼geª È‹µ¯îb—W7Þð2%k_Ü×8¹Å´(ï}OŠo½àdׇJÖ¿¸òjØÝàßw3XcVPWŸè¥0úúŸB¬øqß{ÀÈgǧ­urCÍÎo&•­p-?¤·û‡ОÏè'¢sB°Æ ¬zGÉ 2¯>ŽÈ[ ;gµ²‡Ð/P8i—ì_ …Yðâ=å—‡e†‡ÑÝ:à´ÏOZw3(?êä†RXwã@íØ øQ¿hð+ø¡H—ŸÄZ"0*âžWOIoöÇõ“Õ@§}müÀ5ŸÐN#·+Ÿ–Å'* ºýGA |ô’¨Hè÷›X=#¨?X1¶‰¡÷ÓvÏj¬Ê¨¾áÃ`„^ pÒæ·ô©¸YLß<ÑÂç…i~€à{Ë/ÃwHkË ¢s±hÇaÇ)Ù¸ LDo˜õüÓÿîfP~VÚ7¹'M¶@(¤°ïÆ ˆ£Û°9g×+Òx‘“nô;&÷ƒšf‘/?ˆ)“Xí´D`T ø1M¨ß¦ñϺþ’ßìF.¸‰T›gì'pqðH»ÉL/ÞÛù€0cEçUk? œÓƒÇùÁ6hyŠrä]7Ë\áP®NTÿ@öè˜%®‹ˆs7ï‚@ø¼>'!é$™UxA‹àׯ3\°ÊíY¶;UåÑ^GP~°ÿìÕb;!lÚ‡F È2éçpŽŽ‚(ížÔQù±‚äV_:X1:§ ƒ3næ †Áµ:¦m½@á¤ü†ÁI)/¯õNJ2"vóŠž–˜+¾x —ÙxôÉKÀH®.ÒýÀjAf¥÷–^O*9y]Ÿ–—å#ñòMkõ×~`çbÑŽ_Þ¶ë ŽRzµé7hFÙм!¼ˆß1ê0cV"Öùašjž½¦½¿ÁØ­´n6 SrNš¥Î)ÿ·{†Çát’ÙÍ*¬¾¨8F€¥v#ØÆfu`zrÏ®þÊsÉ›W¤ñ"ï–Gý­9©E^ÌvMîÎñ‰cÜD&døAèù/yQA“4S&±ÚëšÖ¿³ùÆé E¡Œðb¡Li<›Q¾„'6Û–’™5..þP&T¹™žèÞüŒ]q4áw©6.ΊI«?æE»ƒ ãà‘v[\öIéYýñU>˜l‚!Ô>aDƋΪ~7©ÏÖA8nÃ&]|v‰³ÄÊîÖYÖoᡱ óäK¨yËi׫w²¹Â¡\~Æ9œ©þ€$™å6 6 ŽQn†f§>ÚqÂ,oÞ,”Ó¹I ð±¸æ•£ I{±.CÒ>HûnY-éÛöÃQg‘¦Ì°©t Îzf¹a”ÞñX€‡”¢¸¿ÌÚð÷(/<J`gt‚˜Ÿ¬ºÐ×äò-cmw…£.fileþÿgcrc32.c _crc32 À 0ð C  .textß'.data.bss.rdata R_crc_table_get_crc_table_gf2_matrix_times_gf2_matrix_square_crc32_combine /1/12 1249165829 0 0 100666 7796 ` LdH.text ´€J P`.data@0À.bss€0À.rdata,T@0@U‰åƒì(‰]ô‹]‰uø‹u…Û‰}ü‹} t€{\wt‹]ô¸þÿÿÿ‹uø‹}ü‰ì]ËC…Àt‰u‹uø‰} ‹}ü‰]‹]ô‰ì]é‹CH¹@‹S@‰C ‰T$ º‰L$‰T$‰$è=@tÇC8ÿÿÿÿÇC@먶¼'U‰åSƒì‰Ã‹S<¸ÿÿÿÿ…Òu‹C…Àt‹ H‰CA¶‰‰ÐƒÄ[]ÃèÇ‹C@‰D$ ¸@‰D$¸‰D$‹CD‰$è…Àt‹KD‰ ë²ÇC‹C@ÇC<ö@ tÇC8ÿÿÿÿ¸ÿÿÿÿë›ë U‰åƒì(‰uø‰Æ‰]ô‰}ü‹Xƒû‰Øww…Û…èÇ‹F@ˆÙ¿‰|$‰D$ ¸@Óø‰D$‹NDˉ$è…À‰Ãu‹F@ö@ …†t&‹F‹VD؉Fƒø‰w‰FX‹]ô‹uø‹}ü‰ì]Ë€:t'ÇFX‹]ô‹uø‹}ü‰ì]Ãv‹‹FD¶ˆédÿÿÿ€z‹uÓƒè‰FB‰‰ðèˆþÿÿ‰Ã‰ðèþÿÿƒû‰ÇtÇF8ýÿÿÿë¯ÇF8ÿÿÿÿésÿÿÿ¨à»uâ‰ðèSþÿÿKyö÷ÇuJ÷Çu"÷Çu*ƒçu\‹F<ƒøÀ÷Ѓàý‰F8é^ÿÿÿ‰ðèþÿÿ…ÀtÓ@uòëΉðèþÿÿ…ÀtË@uòëÆ‰ðè÷ýÿÿ‰Ã‰ðèîýÿÿÁàÃKƒûÿt›‰ðèÜýÿÿ@uðë‰ðèÐýÿÿ‰ðèÉýÿÿë”´&U‰åV1öS‰Ãƒì…Û¸þÿÿÿtO‹CP…À…‹C…ÀuI‹C@…Àub‹C8…Àˆ·‹CD…À…œ‹CH…À…‹CT…Àuj…Ût‰$è‰ðƒÄ[^]Ãt&¶C\ÿÿÿÇC8ýÿÿÿ‹S ‰t$)ò‰T$‹CL‰$è‰CL‹K;Mt%‹U)Ê‹]ô‰Ð‹uø‹}ü‰ì]ÃÇC8ºénþÿÿ‹C8ƒøý”Â@”À Шºÿÿÿÿ…Rþÿÿ‹U)Ê뽋C ‰t$)ð‰D$‹CL‰$è‰CL‰Ø‹s è¢ýÿÿ;CL…`ÿÿÿ‰Øè’ýÿÿ‰Øèëöÿÿ‹S8…Ò…Nÿÿÿ‰$èÇ$1À‰D$1À‰D$è‰CL‹C8éþÿÿ‹s9ñs‰Î…öuI…Éu)M‹ECdCh…ÀuÇC<‹Ué©ýÿÿ‹C@‰L$‰<$‰D$ ¸‰D$è‹K)Á‰K뻋S ÷‹‰t$‰$‰D$è3‹K)s‰{ )ñ‰KëŽÇC8ÿÿÿÿ‹S é˜þÿÿ´&U¸‰åƒì‰D$Eÿ‰D$‹E‰$èÿüÿÿHºÿÿÿÿtɉÐÃv¶UÿɉÐô&U‰å‹M …Ét€y\r•Àƒ}ÿ” Шuƒylÿt ]¸ÿÿÿÿÃt&ÿIh‹E‰Al1Àƒy8”À…À‰AptÇA8ÇA<‹E]Ãt&U‰åWVSƒì ‹] ‹u…ۉߔÀ…öžÂ Ð1Ò¨uL¶¼'N…ö~#‰\$¸‰D$‹E‰$è3üÿÿHu¶C< uØÆ9ß•À…öžÂ Ш‰úu1ÒƒÄ ‰Ð[^_]ô&U‰åWVSƒì‹]‹} ‹u…Ût€{\wtƒÄ¸þÿÿÿ[^_]ô&‰;…ö‰ñ‰stq‹Sf…Òu=‹CH¹@‹S@‰C ‰T$ º‰L$‰T$‰$è=@u_ÇC@‹Kº@Kd1ÉSh‰L$‰$è‰C8‹K‹S)Kd)Sh…Àu…Éu”‰t$‰|$‹CL‰$è‰CL‹SƒÄ[‰ð)Ð^_]ÃÇC8ÿÿÿÿëÒ¶U¸‰åSèÆE÷E»‰D$ ‹E ‰\$øïÿÿ‰D$‰$è‰Â@ÿ=þw€}÷t Ä1À[]É\$‰T$‹E‰$è¹þÿÿÄ[]ÃU‰åƒì‹E ˆEÿ¸‰D$Eÿ‰D$‹E‰$è‰þÿÿHºÿÿÿÿtɉÐöUÿɉÐÃt&U‰åSƒì‹] ‰$è‰D$‹E‰\$‰$èKþÿÿƒÄ[]Ãt&U‰åW1ÿVSƒì…À‰Uð‰Ãt€x\wt ¸þÿÿÿƒÄ[^_]ÃÇ@‹Pë ¸@‰Æ)Öuu…ÿ…¢‰$‹CCh‹Eð‰D$è‹S)Sh…öuƒøût‰C81ÿ…Òu0‹C8ƒøt+ƒøv³ƒøu1Àë‰ÇC81ÿ…Òtݶ¼'‹C8¿ëζ‰t$‹C@‰D$ ¸‰D$‹CH‰$è9ðuÇC@‹CH‰C éVÿÿÿ‹C8ë“ÇC8ÿÿÿÿ¸ÿÿÿÿéÿÿÿt&U‰åSƒì‹]‹U ‰ØèÜþÿÿ…ÀtZ[]Ãt&‹C@‰$è‹C8ƒøuåZ1À[]ô&U‰åSƒì‹]…Ût€{\rt ƒÄ¸ÿÿÿÿ[]ÃÇC8‹CDÇC<ÇClÿÿÿÿ‰1ÀÇC‰D$1À‰D$Ç$è‰CL‹CX…Àt,ÇCd1ÉÇCh‰L$‹C`‰D$‹C@‰$èƒÄ[]É$èëÊ´&U‰åVSƒì‹u‹M‹] …ö”Àƒù” Шuo‹F8ƒøÿtgƒøýtb€~\w„·I„N…ÛºÿÿÿÿxM‹VX…Ò…ô‹Fh9Ø})Ã…Ûtg‹FH…À„d…ÛtXƒ~lÿtRÿFhK‹FpÇFlÿÿÿÿ…Àt@ÇF8ë7ºÿÿÿÿƒÄ‰Ð[^]Ãûÿ?¸@‰Ø‰D$‹FH‰4$‰D$èŽ÷ÿÿ…À~Ë)Ã…ÛÔ‹VhƒÄ[‰Ð^]É4$è€þÿÿ…Àºÿÿÿÿ‰pÿÿÿë§…Éu‹Fd)Ã…Ûºÿÿÿÿx”‹FD…Àu3éŠfûÿ?¸@‰Ø‰D$‹FD‰4$‰D$èûÿÿ…À„Wÿÿÿ)Ã…ÛЋVdéNÿÿÿÇFlÿÿÿÿ‹FDÇF‰1À‰D$‰\$‹F@‰$è…Àºÿÿÿÿˆÿÿÿ‰^h‰Ú‰Ð‰^dƒÄ[^]ËNhËé¨þÿÿÇ$@è‰FD…Àºÿÿÿÿ„âþÿÿ‰$¹@1Ò‰L$‰T$èélÿÿÿÇ$@è‰FH…Àºÿÿÿÿ…€þÿÿé¥þÿÿë U¸‰åƒì‰D$1À‰D$‹E‰$èàýÿÿÉô&¼'U‰å‹U…Òt€z\rt1À]ËJ<¸…Éuò]1Àƒz8”ÀÃt&U‰å‹E…Àt€x\rt]1ÀÃ]‹@Xô&U‰åƒì‰]ô‰Ó‰uø¾‰}ü‰Ç´&‰|$¶ÃÁë‰$èNyë‹]ô‹uø‹}ü‰ì]ô&¼'U‰åSƒì‹]…Ût:€{\wt Z‰Ø[]éðÿÿfº‰Øèûÿÿ…Àuä‹SL‹C@èuÿÿÿ‹Sd‹C@èjÿÿÿëÌ[¸þÿÿÿ[]Ãë U‰åƒì‰uø‹u‹E ‰]ô…ö‰}ü„ã‹V8…Ò‰¸+„–B„ô‹~…ÿ„‘€?„ˆ‹FP…À…™t&¼'‹FT‰$è‰<$‰ÃèD‰$è‰FP…À‰Â„Š‹FT‰$‰D$è‹^P‰$èfÇ: ÆD‰|$‹FP‰$è‹FP‹]ô‹uø‹}ü‰ì]ËN8¸)È‹<…‹FP…À„rÿÿÿ‰$èéeÿÿÿt&Çþÿÿÿ‹]ô¡‹uø‹}ü‰ì]á‹]ô‹uø‹}ü‰ì]Ã苉$è‰ÇéùþÿÿU‰å‹E…Àt&ƒx8tÇ@8Ç@<‹@@‰E]é¶]Ë1.2.3%c%c%c%c%c%c%c%c%c%cx;Ó@ý9d@”9>8`6t5}@”8¤8´8Ä8Û70F¼EØDáFûC‘F¤%¹A×@ú?!=m%«B¹F0%E>eGŠ%™<õ3O @y 9 E- Ed 2| EÓ 9ô 4§ ;Ñ 1û E* 'S 0Þ Dd1Û;7/¥EÕ-ã2\-F·.ÈFž,E%‡D‘DF¹CÄDÞ*ÿ(8*(9(K@U+Q:†).fileþÿggzio.c   %@ _destroyà _gz_openð _gzdopenp _gzopenÀ _getLongà _gzread@ _gzgetc  3`  _gzgetsÀ  _gzwrite@  =  _gzputc  _gzputsÐ  G _gzflush QP _gzseekð _gztellð _gzeof  [P _putLongp _gzcloseÀ _gzerror  e` .text’J.data.bss.rdata,__allocaq{ _strcat … _fputc _fseek _memset _fflush  _deflate › _inflate _memcpy _fclose © µ _free _fread Á _fwrite _sprintf _ftell _fprintf _fdopen __errno Ð ß _strcpy _strlen _crc32 _malloc _fopen î_gz_magic_gzsetparams_get_byte_check_header_gzungetc_gzprintf_do_flush_gzrewind_gzdirect_gzclearerr_z_errmsg_clearerr_strerror__vsnprintf_inflateReset_inflateEnd_deflateEnd_deflateParams_inflateInit2__deflateInit2_ /1/20 1249165830 0 0 100666 764 ` L®.textÀ´| P`.data@0À.bss€0À.rdatat@0@U¹‰åƒìh‹E‰}ü‹} ‰E¨‹E‰]ô»8‰E¬‹E‰uøu¨‰E´‹ÇEÈÇẺE¸‰\$‰L$‰4$è…Àt ‹]ô‹uø‹}ü‰ì]É4$º‰T$èƒø‰Ãt/‰4$èƒûtƒûût‰Ø‹uø‹]ô‹}ü‰ì]ËE¬…Àuê¸ýÿÿÿ묋E¼‰‰4$èë1.2.3 Pr  µ .fileþÿguncompr.c .text».data.bss.rdata _inflate  *_uncompress_inflateEnd_inflateInit_ /1/20 1249165832 0 0 100666 12196 ` Ll*0.text&´t'B P`.data@0À.bss€0À.rdataÀ´&* @`@U‰åVSƒì‹]…Ût‹K…Éu ¸þÿÿÿƒÄ[^]Ëqƒþ*•À1ÒƒþE•Â…ÂtƒþI•À1Òƒþ[•Â…ÂuW´&‹A…À…¥‹AD…À…‚‹A@…Àui‹A8…ÀuP‰L$‹C(‰$ÿS$ÇC1Àƒþq•ÀƒÄD@ý[^]Ãt&ƒþg•À1Òƒþq•Â…Âtžþš¸þÿÿÿt‘éWÿÿÿ‰D$‹C(‰$ÿS$‹K랉D$‹C(‰$ÿS$‹Kë…‰D$‹C(‰$ÿS$‹Kéiÿÿÿv‰D$‹C(‰$ÿS$‹KéFÿÿÿt&¼'U‰åWVSƒì,‹E‹]‹} …À‰]ð„‹E‹p…ö”À…ÿ” Ш…ù‹Fƒø„íH„Ü‹F…À…è1Àƒû†Õ‹F,-9Øs‰Eð)Ãß‹Uð1Û‹F8‰|$‰T$‰$è‹Mð‹F8‹~D‰Nl‰N\‹NX‰Eܶ‰Mè‰VHÓâ‹NT‰UÔ‹Uð¶@‰M؃ê1EÔ‰Uì!MÔ‹N4‹EÔ‰Mà‰FH‹F@‰Eä´&¶Mè‹UÜÓeÔ‹MÔ¶D‹UØ1È!ЋUà‹Mä‰EÔ!Ú‰FH·Gf‰Q‹EÔf‰GC9]ìsăÄ,1À[^_]Ã~*„ÿÿÿ¸þÿÿÿƒÄ,[^_]É\$‰|$‹U‹B0‰$è‹M‰A0é÷þÿÿfU‰å‹E…Àt‹P…Òtƒz¸þÿÿÿu‹E ‰B1À]Ã]¸þÿÿÿÃvU‰å‹E‹M …Àt‹P…Òu ]¸þÿÿÿÃt&‰Š¼¸Óà‹MH!Èf‰‚¸1À]ÃfU‰å‹E…Àt‹P…Òu]¸þÿÿÿËE ‰‚Œ‹E‰‚€‹E‰‚‹E‰B|1À]ÃvU‰åS‹M ‹]QÁêA?ÊÁè…ÛD t‹S…Òt ƒz0tv[]ÃzPu÷‰M[]é¶¼'U‰Ñ‰åƒìÁé‰$‹X‰t$‹pˆ 3‹XK‰H‹HˆT ‹$‹t$ÿ@‰ì]ô&U‰åƒì‰]ø‰Ã‰uü‹H‹@‹q9ðs‰Æ…öu‹]ø‹uü‰ì]Ãt&‹S ‹A‰t$‰$‰D$è)s‹Ss s‹B‹J)ðñ…À‰Bu‹B‰B‹]ø‹uü‰ì]ÉJ‹]ø‹uü‰ì]ÃU‰åWVSƒì‹M…Ét`‹E‹x…ÿ”Àƒ} ŸÂ ШuH‹U …ÒxA‹U‹B …Àt‹…ÀtA‹Oùš”À1Òƒ} •Â…Ât8¡‹M‰A¶¼'¸þÿÿÿƒÄ[^_]Ãv‹B…ÀuÑë¶´&‹]‹C…À„d‹W(ƒù*‹E‹] ‰Uð‰‰_(„DƒùE‹w„ȃùI„îƒù[„üƒùg„ …ö…f‹U‹Mð‹Z…Û”À1Ò9M žÂ…Â…‰‹Oùš„Ã…Ûu‹Gt…Àu‹E …À„Úùš„΋‡„‰<$‹M @‰L$ÿ…H‰Á@þƒøwÇGš…É”Àƒù” Ш„G‹]‹C…ÀuÇG(ÿÿÿÿ1Àéçþÿÿƒ} „mÿÿÿ¡‹]‰C¸ûÿÿÿéÈþÿÿÇG ÇGI‹_‰ñ‹C…À…{ÇG[‹_‰ñ‹C$…À…ÔÇGg‹G‹@,…À„`‹W F9ЇšF9ЇÏþÿÿ‹]‹W‹C0ˆ‹O‹C0‹WqÁè‰w1öˆD1ÉÿG‰t$‰L$Ç$è‰C0‹wÇGqé€þÿÿ‹C,…Àt9·ۋEè ýÿÿ‹w;w ‰ñ„+‹_´&‹W ‹C$¶‹GB‰W ˆ‹wF‰w…Û„‹_;w uÕë£IuDƒ} „ʉ<$1À‰D$ 1À‰D$1À‰D$èƒ} „þ‹EèŠüÿÿ‹E‹X…Û„|þÿÿ1Àƒ} …eýÿÿ‹W¸…ÒŽUýÿÿƒú…ª‹]‹W‹O‹C0ˆ ‹O‹C0‹WqÁè‰wˆD‹O‹W·C2q‰wˆD‹O‹W¶C3q‰wˆD‹O‹W‹Cq‰wˆD‹O‹C‹WqÁè‰wˆD‹O‹W·C q‰wˆD‹O‹W¶C q‰wˆDÿG‹Eè®ûÿÿ‹G…À~÷؉G1Àƒ”Àé‰üÿÿt&‹C,…Àt9·p‹Eèyûÿÿ‹w;w ‰ñ„‹_´&‹W ‹C¶‹GB‰W ˆ‹wF‰w…Û„å‹_;w uÕ룋Eè+ûÿÿ‹E‹P…Ò„ýÿÿ‹E‹Xé—üÿÿÇ$1À‰D$1À‰D$è‹U‰B0‹G‹WÆ‹W‹Gr‰wÆD‹‹W‹Gr‰wÆD‹W‹O…Òq…š‰w‹GÆD‹W‹Gr‰wÆD‹W‹Gr‰wÆD‹W‹Gr‰wÆD‹W‹Gr‰wÆD‹G‹—„‹_pƒú ‰wH°tƒ¿ˆ޳°ˆ‹G‹WÆ ‹wF‰wÇGqé|ûÿÿq1À‹_‰w‹r,ƒ:•À…ötƒÀ‹r…ötƒÀ‹r…ötƒÀ‹r$…ötƒÀˆD‹G‹W‹O‹@ˆ ‹G‹O‹W‹@q‰wÁèˆD‹G‹O‹W·@q‰wˆD‹G‹O‹W¶@q‰wˆD‹G‹—„‹_pƒú ‰wH°tƒ¿ˆŽ·°ˆ‹G‹W‹O‹@ ˆ ‹_‹O‹Sq‰w…Òt-‹C‹WˆD‹W‹G‹Or‰w‹@ÁèˆD ‹w‹_F‰w‹C,…À…–ÇG ÇGE‹_‹C…À„ûÿÿ·C‰uì9G ë0;w „E‹S‹G ‹O¶ˆ‹w‹W ‹_FB‰w‰W ·C9Âr΋C,…Àt ;u쇹‹C9G „¹úÿÿ‹OéÈùÿÿƒ„hýÿÿ‹G0ƒèÁà ƒ¿ˆˆ~m1ÀÁà Á‹Gl…ÀtƒÉ ÇGq¸…B÷á)ÑÑéÁê‰ÐÁà)ÐH‰Ê‰øèæ÷ÿÿ‹Gl…À…MÇ$1À‰D$1À‰D$è‹]‹O‰C0é-ùÿÿ‹—„ƒú~ˆƒú¸~€1Àƒú•ÀƒÀépÿÿÿ»‹G‹@,…Àt9·H…Û„úÿÿ‹Oéÿøÿÿ…Û„9ùÿÿ¡‹U‰B¸ûÿÿÿéøÿÿ‹K,…Ét ;uì‡6‹Eèt÷ÿÿ‹w;w ‰uì„É‹_éŒþÿÿ»‹G‹@,…Àt9·/…ÛuYÇG éùÿÿ)΋]‰t$‹GÁ‰L$‹C0‰$è‰C0éúÿÿ)΋U‰t$‹GÁ‰L$‹B0‰$è‹M‰A0éhûÿÿ‹Oé.øÿÿ‹EèÙöÿÿ‹w‹W éSùÿÿ‹E·P2‰øè€öÿÿ‹M‰ø·Q0èröÿÿé’þÿÿ‹E·P2‰øè_öÿÿ‹M‰ø·Q0èQöÿÿéÖúÿÿ1ÀJFüÿÿé?üÿÿ¡‰C锸ÿÿ)΋U‰t$‹GÁ‰L$‹B0‰$è‹M‹w‰A0éþÿÿ‹Uì‹M)Ö‰t$‹GEì‹Uì‰T$‹A0‰$è‹]‰C0é›þÿÿ)Ήt$‹WщL$‹U‹B0‰$è‹M‹w‰A0é¦þÿÿ‰<$èéMùÿÿ‹Eì‹M)Ɖt$‹GEì‹Uì‰T$‹A0‰$è‹]‹w‰C0‹_éýÿÿ1ÀJBüÿÿé;üÿÿ‹_éìüÿÿ‹GL1ö‹WDfÇDBþDþ‰D$‰t$‰$èéÚøÿÿ‰t$‹M‹G‰D$‹A0‰$è‹]‹w‰C0éCüÿÿt&U‰åƒì‰}ü‹}‰]ô‹] …ÿ‰uøÇEð„­‹w…ö„¢ƒûÿ„«‹E1Òƒû —ÂÁè Â…ƒƒ}}‹Ž„[‹•HI;…Ht‹G…Àuv9Ùt=‰ž„[Áâ·‚B‰†€·‚@‰†Œ·‚D‰†·‚F‰F|‹E‰†ˆ‹]ô‹Eð‹uø‹}ü‰ì]Ë]ô¸þÿÿÿ‹uø‹}ü‰ì]ûéKÿÿÿ‰<$¸‰D$èÂôÿÿ‰Eð‹Ž„ékÿÿÿt&U‰åƒì‰uü‹u‰]ø…ö„;‹^…Û„0‹F …À„%‹N$…É„ÇF,‹C‹SÇFÇF…ÒÇFÇC‰CˆƒúÀƒàGƒÀ*ƒú‰C„ÞÇ$1À‰D$1À‰D$è‰F0ÇC(‰$è‹C,‹SDÀ‰C<‹CLfÇDBþDþ‰D$1À‰$‰D$èÇCl‹ƒ„ÇC\ÇCt@Áà·BÇCxÇC`ÇCh‰“€·@ÇCH‰“Œ·D·€F‰“‰C|‹]ø1À‹uü‰ì]Ë]ø¸þÿÿÿ‹uü‰ì]ÃÇ$1Ò1À‰T$‰D$èéÿÿÿ÷Ú‰Séäþÿÿ´&U‰åWVSƒì,‹E ÇEà‹M‹}‰Eð‹E‹u‰Mì‹M‹U$‰Eè‹E ‰Mä…À„Ä€81•Àƒú8• Ш…®…ÿ¸þÿÿÿ„¦ÇG‹G …ÀuÇG ÇG(‹G$…ÀuÇG$ƒ}ðÿ„ …öˆƒþp‹EèHƒø—ƒ}ì•À Шu9ƒþžÀƒþŸÂ Шu'‹Eð1ÒÁèƒ}ð ŸÂ Ðu‹Eä1ÒÁèƒ}äŸÂ Ðt-ƒÄ,¸þÿÿÿ[^_]Ãv¸úÿÿÿƒÄ,[^_]ÃvÇEàƒî넃þ„™¸À»‰\$‰D$‹G(‰$ÿW ‰Ã…Û¸üÿÿÿtº‰_‹Eà‰ñ‰;‹UèÇC‰C¸ƒÂ ‰s0Óà‹Mè‰C,HƒÁ‰C4¸Óà‰KP‰ÁI‰CL¸«ªªª÷â‰KT¹ÇEÜÑê‰SX‰L$‹C,‰D$‹G(‰$ÿW º‰C8‰T$‹C,‰D$‹G(‰$ÿW ‰C@¸‰D$‹CL‰D$‹G(‰$ÿW ‹MèƒÁÓe܉CD¸‹M܉‹œ‰D$‹ƒœ‰D$‹G(‰$ÿW ‹“œ‰Á‰C•‰C ‹C8…À„„‹C@…À„y‹CD…Àtr…Étn‰Ðƒàþȉƒ¤R‰ƒ˜‹Eð‹M䉃„¶E쉋ˆˆC$‰}ƒÄ,[^_]éÕûÿÿt&ÇEð…ö‰ìýÿÿÇEà÷Þéâýÿÿ¾ é]þÿÿÇCš¡‰G‰<$èìÿÿ¸üÿÿÿéþÿÿ´&U1Ò‰åƒì(‹E‰T$‰D$‹E‰D$¸‰D$¸‰D$ ¸‰D$‹E ‰D$‹E‰$èÉüÿÿÉô&U‰åƒì(‰uø‹u ‰]ô‹]…ö‰}ü”À…Û” Шu ‹F…À‰Eðu¸þÿÿÿ‹]ô‹uø‹}ü‰ì]Ãü¹‰ßó¥¾À¹‰t$‰L$‹C(‰$ÿS ‰Æ…ö¸üÿÿÿtÉsºÀ‰T$‹Uð‰4$‰T$艸‰D$‹F,‰D$‹C(‰$ÿS ‰F8¸‰D$‹F,‰D$‹C(‰$ÿS ‰F@¸‰D$‹FL‰D$‹C(‰$ÿS ‰FD¸‰D$‹†œ‰D$‹C(‰$ÿS ‹N8…ɉljF„â‹F@…À„׋FD…À„Ì…ÿ„Ä‹]ð‹F,‹S8‰ $À‰D$‰T$è‹F,‹N@‹S@À‰ $‰T$‰D$è‹FL‹ND‹SDÀ‰ $‰T$‰D$è‹N‹S‹F ‰ $‰T$‰D$è‹C‹K‹–œ)È‹N‰F‰ÐƒàþøRщ†¤†”‰† †ˆ ‰†$ †| ‰†0 1À‰Ž˜éDþÿÿ‰$è¨éÿÿ‹]ô¸üÿÿÿ‹uø‹}ü‰ì]öU‰åWVSƒì(‹X|‰Eð‹@8ÇEè‰EÌ‹Eð‹uÌ‹Hl‹xx‹€ΉEì‹Eð‹@,‰Eä-9Áv ‰È+Eä‰Eè‹Eð‹@@‰Eà‹Eð‹@4‰EÜ‹EÌŒ‰MضL>ÿˆM׋Mð¶>;¹ŒˆEÖrÁë‹Eð‹@t9Eì‰EÐv!‰Eìë‹EÜ‹Mà!зA;Uè†ÊK„öEÖ‹MÌÑ89uÖ¶E×8D9ÿu̶8uŶFA8u¼ƒÆAAF¶8uDAF¶8u;AF¶8u2AF¶8u)AF¶8u AF¶8uAF¶8uAF¶8u;uØr³‹Eع)ð‹uØ)Áî9ùŽNÿÿÿ;Mì‰Ï‹Eð‰Pp} ¶Lÿ‹E؈M×¶„8þþÿÿˆEÖé&ÿÿÿt&;}Љøv‹EЃÄ([^_]ô&¼'U‰åW‰×VS‰Ã‹@8‹Sl‰ÁÑ8¶±8u ¶A8Bt ¸[^_]ÃÁƒÂvBA¶8uCBA¶8u:BA¶8u1BA¶8u(BA¶8uBA¶8uBA¶8u BA¶8u9ñr´)ι)ñƒù¸~“‰{p‹Ct9Èv‰[‰È^_]ÃU‰åW‰ÇVSƒì‹@,‹Wt‰Eì‰UäéÃt&‹‹A‰Mè…À„p‹W8‰ÃÖ‹WtÖ;Eð‰Uäv‹]ð1Ò…ÛtE‹Mè)؉A‹A‹@ƒø„Dƒø„[‹M苉\$‰4$‰D$è‹Eè‰Ú‹OtX‰Mä‹uäÖƒþ‰uä‰wtv!‹_8‹Wl‹OX¶‰GHÓà¶T‹_T1Ð!؉GHþ‡Ñ‹‹H…ɄċG,‹O<‹Uä‹wl)Ñ)ñ‰Mð‹Mì„úþÿÿ9Æ‚ÿÿÿ‰L$‹W8 ‰D$‰$è‹Eì‹wl‹OL)Gp)Æ)G\‹GD‰wlHë ƒè1Û·;Uìr‰Ó‹Uì)Óf‰Iuæ‹Mì‹G@H¶¼'ƒè1Û·;Uìr‰Ó‹Uì)Óf‰Iuæ‹‹UìUð‹A‰Mè…À…þÿÿƒÄ[^_]É\$‹‰D$‹A0‰$è‹Uè‰B0é¥þÿÿ‰\$‹M苉D$‹A0‰$èëÛ´&¼'U‰åW¿ÿÿVSƒì‹u‹F ƒè=ÿÿs‰Ç‹^tƒû†Ä‹Fl‹V\Ø :t9È‚Ÿ‰Nl)ȉFt‰È)ЉD$1É1À‰L$ …Òx‹F8ЉD$‰4$è‹Vl‹‰V\è•èÿÿ‹‹@…ÀtQ‹Fl‹V\‰Á‹F,)Ñ-9Ár†‰L$1À‰D$ 1À…Òx‹F8ЉD$‰4$è‹Vl‹‰V\èHèÿÿ‹‹X…Û…Kÿÿÿ1ÀƒÄ[^_]ÃÇFt‰Flëžt&‰ðèýÿÿ‹^t…Û”À1Òƒ} ”Â…ÂuÉ…Û…ÿÿÿ1Àƒ} ”À‰D$ ‹V\‹Fl)ЉD$1À…Òx‹F8ЉD$‰4$è‹Fl‰F\‹è¿çÿÿ‹‹H…Éu1Àƒ} ”ÀÀémÿÿÿƒ} ¸„^ÿÿÿ¸éTÿÿÿ¶¼'U‰åWVSƒì<‹EÇEð‹Xtv¼'û†Õƒû†ë‹]‹{l‹C8‹sH‹KX¶D8Óæ1Æ‹CT!Æ‹E‰sH‰û‹H4‹PD!Ë‹H@·rf‰Y‰Eðf‰ƒø„#ƒ}Ȇ֋E‹UÈ9P`‡Ç‹M‰ø€êˆUã‹qt‹Yd‹‘ ðƒè‰Eä)ßGÿ·Ø‹¤f‰P‹˜¶M㈠‹Eÿ€ Cÿ·Ø¶Á‹M¶·„‘˜@fûÿf‰„‘˜‡O¶ƒfûÿ…€ ‡'¶ƒ‹M…€ ·D@f‰D‹œ‹Qx‹ylH9 ”À¶À‰Eì‹At)Ð@‰EЉAtBþ‰EÈ´&¼'G;}ä‰}ÜwE‹U‹JX‰Bx‹BH‰zlÓà‰Ñ‹R8‹qD‹Y@¶T:1ЋQT!ЋQ4‰AH!ú‰UÔ‹MÔ·Ff‰K‰Uðf‰,Ó,y-œ+%g%ð%9#\$u$%£%$ $; $"#2"$K"$K#%H$%%%u%$†%$o)HT`lx„œ¨´.fileþÿgdeflate.c@ %  ;€ M° [ð h0 v€ ƒÀ _deflate@ ’€ ¡ ¯¸¼  Ë Ù` æp õ  À ° &@ 4   .textÿ%B.data.bss.rdata¾ BU_my _memset Š œ § ¶ _crc32 _memcpy _adler32 _zcfree _zcalloc À_configuration_table_deflateEnd_deflateSetDictionary_deflateSetHeader_deflatePrime_deflateTune_deflateBound_putShortMSB_flush_pending_deflateParams_deflateResetmy_version.0_deflateInit2__deflateInit__deflateCopy_longest_match_longest_match_fast_fill_window_deflate_stored_deflate_fast_deflate_slow_deflate_copyright_z_errmsg__length_code__dist_code__tr_flush_block__tr_stored_block__tr_align_compressBound__tr_init /1/12 1249165833 0 0 100666 12028 ` L+%.textP´À) P`.data<Î*@0À.bss€0À.rdata€ @@`@U1Ò‰åt&¼'1Éf‰Œ”Bú~í1Òt&¼'1Éf‰Œˆ Bƒú~ð1Ò1Éf‰Œ| Bƒú~ð]º1Éf‰”1Ò‰ˆ¬1ɉ¨1Ò‰ˆ°‰ ÃU¹(‰å‹E”‰ ˆ ‰$ º‰, | ‰ˆ ¹‰0 1Ò‰ˆ8 1Éf‰¸º‰ˆ¼‰´]éÿÿÿvU‰åWVSƒì‰Ã‰Uä‹E‹U‹³P‹„ƒ\ Ò‰uè9Ö‰EðëWv‹¼“` ‹Œ“\ ‹Eä‰}ì‹uä·ˆf9¾‚‹tn‹uä‹}ð‹Eä·4Žf94¸rF¶t~‹u‰Œ³\ ‰UÒ9Uè|*9U襋Œ“\ ‹uä‹}ð‹Eä·4Žf94¸sËt&¼'‹}‹Eð‰„»\ ƒÄ[^_]ö„X8„Xwv¼'B‹Œ“\ 먶¶„X8„X‡mÿÿÿ멉ö¼'U‰åW1ÿV1öSƒÄ€‰Ã‰UÄ‹ÇE¸ÿÿÿÿ‰EÀ‹Bº=‹‹@ ‰³P‰“T9ljE¼}6f‹UÀfƒ<º„’‰}¸‹ƒP@‰¼ƒ\ ‰ƒP1Àˆ„XG;}¼|Ì‹ƒPƒø޼‹M¸‹uĉN‹“P‰ÐÁè<Ñÿ…ÿ~‰<$‹UÀ‰ØOèBþÿÿ…ÿî‹“P‹u¼t&Bÿ‹»` ‰ƒP‹„“\ ‹UÀ‰ƒ` ‰ØÇ$èþÿÿ‹ƒT‹‹` ‰¼ƒX ƒè‰ƒT‰Œƒ\ ‹EÀ·¸f‰…xÿÿÿ‹EÀ·ˆ‹…xÿÿÿЋUÀf‰²¶”X¶„X8ÂrˆÐþÀˆ„X‹EÀf‰tˆf‰t¸‹UÀ‰ð‰ƒ` ‰ØFÇ$èvýÿÿ‹“Pƒú7ÿÿÿÇE˜‹“T‹ƒ` ‹uÄJ‹Mĉ„“\ ‹F‰“T‹UÄ‹I‹p‹‰M°‹H‰U´‹‹@‰u¤1ö‰U¬‰M¨‰E ¶1Éf‰Œs< Fƒþ~ð‹ƒT‹U´‹„ƒ\ fÇD‚‹‹TA‰Mœù<~éjfÿEœ}œ<˜‹uœ‹U´‹¼³\ ·Dº·D‚p;u ~ÿE˜‹u ‹M´f‰t¹;}°¾·„s< 1Ò@;}¤f‰„s< | ‹U¤‰ø‹M¨)Ћ‹E´· ¸¯Áƒ¨‹E¬…À„{ÿÿÿ‹u¬·D¾ÿEœÐ¯È‹¬}œ<Žhÿÿÿ‹}˜…ÿ„µ‹E Pÿ´&‰ÖëN·„s< f…Àtòƒm˜H‹M f‰„s< ·„s> ƒÀf‰„s> ‹u˜·„K< H…öf‰„K< ³…ɉÎtU·¼s< t&…ÿtAÿMœ‹Eœ‹Œƒ\ ;M°ê‹U´·DŠ9ðt‰ò)‰ЋU´·Š¯Âƒ¨‹E´f‰tˆO…ÿu¿Nu«Ã< 1ɺt&¼'·DSþÀf‰DUÈBƒú·È~è1ö;u¸‹UÀ·L²…É…‚F;u¸~êƒì€[^_]Ãf‹EÀfÇD¸G;}¼éüÿÿÿE¸‹E¸´&‰„“\ ‹UÀ‰ÆfÇ‚1Àÿ‹¨…Ɉ„Xt ·D±)ƒ¬‹ƒPƒøDüÿÿP1Àƒ}¸‰“P°ë¡·TMÈ1ÛBf‰DMÈë ‰ÐIƒà ÃÑêÛ…Éð‹MÀÑëf‰±FéBÿÿÿU‰åW¿VSƒì‰Uð‰EäÇEèÿÿÿÿ·Z1ÒÇEà…Ûu ÇE࿊‹E‹MðfÇDÿÿÇEìëHf‹uä·„Ž| f‰„Ž| ‰Mè1Ò…ÛÇE࿊t9Ù¿t ÇEà¿ÿEì‹E9E숋uìB‰Ù‹Eð9ú·\°}9ÙtÛ;Uà|–…Ét2;Mèt‹Uä·„Š| @f‰„Š| ‹uä·†¼ @f‰†¼ éxÿÿÿfƒú ‹Uä·‚À @f‰‚À éZÿÿÿt&‹uä·†Ä @f‰†Ä é?ÿÿÿƒÄ[^_]ÃU¹ÿÿÿÿ‰åWV‰ÆSƒìH‰Uð·BºÇEà‰Eä‹]丅Ûu ºŠ¸ÇEì‹]9]쀋}ä‹]ì‰}è‹}ð·\ŸÿEà9Uà‰]ä} 9]è„K9Eà³‹Ž¼»ërt&·†¸‹Uè·”–| ‰UÜÓâ Âf‰–¸‹F‹Nˆ‹N‹VA‰F¶†¹ˆD‹†¼‰ÙÿF)ÁÓ}Ü‹MÜf‰Ž¸L8ðÿMà‰Ž¼„’‹Eè·¼†~ ‰Ø)ø9Á€‹Uè·„–| ·–¸Óà Ðf‰†¸ù뽋}è…ÿ„›9Mè„»‹M踷ŒŽ~ ÇEÔ)ȉMØ‹Ž¼9ÁØ·–¸‹}è‹]Ø·„¾| Óà Ðf‰†¸ÙÿMà‰Ž¼·†¾ ÇẺEЋUи)Ð9ÁŽ ·¾¼ ·†¸‰úÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼‹]Ð)EÌÿF¶MÌÓÿLðf‰¾¸ƒù¿Ž—‰Ž¼‹Eàƒè‰EȉÂÓâ·†¸‹N Âf‰–¸‹Fˆ‹N‹VA‰F¶†¹ˆD‹†¼·UÈÿF)ljù‰UÄÓúHòf‰–¸‰Ž¼ÇEà‹}亊‹M踅ÿt‹]äº9Ùt º¸ÿEì‹}9}ìŽýÿÿƒÄH[^_]Ã}à ³·žÂ ¸ÇE¼‹Ž¼)؉]À9Á0·†À ¿·–¸Óà Ðf‰†¸‹EÀÁƒù Žj·†¸‰Ž¼‹]àƒë‰ÚÓâ Âf‰–¸‹F‹Nˆ‹N‹VA‰F¶†¹ˆD‹†¼·ÓÿF)ljùÓúHóf‰–¸éõþÿÿ·žÆ ¸ÇE°‹Ž¼)؉]´9Áñ·†Ä ¿·–¸Óà Ðf‰†¸‹E´Áƒù Ž+·†¸‰Ž¼‹]àƒë ‰ÚÓâ Âf‰–¸‹F‹Nˆ‹N‹VA‰F¶†¹ˆD‹†¼·ÓÿF)ljùÓúH÷f‰–¸éBþÿÿ·†¸‹]è·¼ž| ‰úÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼‹]Ø)EÔÿF¶MÔÓÿLðf‰¾¸éæüÿÿ·¾Ä ·†¸‰úÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼)E°ÿF¶M°ÓÿLðf‰¾¸ƒù ¿Õþÿÿ·–¸‹Eàƒè Óà Ðf‰†¸A‰†¼éXýÿÿ·¾À ·†¸‰úÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼)E¼ÿF¶M¼ÓÿLðf‰¾¸ƒù ¿–ýÿÿ·–¸‹EàƒèÓà ЃÁf‰†¸éÌüÿÿ·–¸‹EàƒèÓà ЃÁf‰†¸é¬üÿÿ·†¼ ·–¸Óà Ðf‰†¸‹EÐÁéüÿÿ‹Ž¼éûÿÿt&U‰åS‰Ã‹€¼ƒøt0ƒø~(·ƒ¸‹S‹Kˆ ¶ƒ¹ÿCƒ«¼f‰ƒ¸[]÷ƒ¸‹S‹Kˆ ‹K‹SA‰C¶ƒ¹ˆD1ÀÿCf‰ƒ¸1À‰ƒ¼[]ÃU‰åW¿VSƒì‹u‹Ž¼ƒù Žã·†¸»‰ÚÓâ‹N Âf‰–¸‹Fˆ‹N‹VA‰F¶†¹ˆD‹†¼ÿF)ljùÓûHóf‰ž¸·‚¿‰Eð‹]ð‰ø)Ø9ÁŽP·€·†¸‰Ž¼‰ÚÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼‹UðÿF)ljùÓûLðf‰ž¸‰Ž¼‰ðè…þÿÿ‹†´‹Ž¼)ȃÀ ƒø±ƒù ¿ÿ·–¸¸Ó࿃Á Ðf‰†¸·‚‰Eì‹]ì‰ø)Ø9ÁŽ7·€·†¸‰Ž¼‰ÚÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼‹UìÿF)ljùÓûLðf‰ž¸‰Ž¼‰ðèºýÿÿ¸‰†´ƒÄ[^_]ô&·€‹}ð·–¸Óàù Ðf‰†¸éëþÿÿ·–¸¸ÓàƒÁ Ðf‰†¸éPþÿÿ·†¸»‰ÚÓâ‹N Âf‰–¸‹Fˆ‹N‹VA‰F¶†¹ˆD‹†¼ÿF)ljùÓû¿Hó·‚f‰ž¸‰Eì‹]ì‰ø)Ø9ÁÉþÿÿ·€·–¸Óà Ðf‰†¸‹U쉆¼éÿÿÿt&U‰åWV‰ÆSƒì,‹˜ ‰UðÇEä…Û„à‹ˆ¼éf‹Uð·|‚¸ÇEà)ø9ÁŽÓ‹Eè·‚·†¸‰ÚÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼)EàÿF¶MàÓûL8ðf‰ž¸‰Ž¼‹ž ;]ä†N‹U䋆¤·P‰E싆˜‹}ì¶B…ÿ‰Uä‰Eè„Iÿÿÿ‹Uè‹E𶺷„¸ÇE؉EÜ‹Uܸ)Ð9ÁŽˆ·†¸‹U𷜺‰ÚÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼‹UÜ)EØÿF¶MØÓûLðf‰ž¸‹½  …Û„‹½ ¿)Eè‰ø)Ø9ÁŽ&·†¸‰Ž¼‹UèÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹–¼·EèÿF)׉ùÓøLðf‰†¸‰Ž¼ÿMì}ìÿ‡s‹U춺‹E·D¸ÇEЉEÔ‹UÔ¸)Ð9ÁŽ"·†¸‹U·º‰ÚÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼‹UÔ)EÐÿF¶MÐÓûLðf‰ž¸‹½ …Û„¨‹½ ¿)Eì‰ø)Ø9ÁŽ"·†¸‰Ž¼‹UìÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹–¼·EìÿF)׉ùÓøLðf‰†¸é…ýÿÿ‹Uð‹Eè·‚·–¸Óàù‰EÈ Ðf‰†¸‰Ž¼éeýÿÿv‰Ž¼éQýÿÿt&‹U·º·–¸Óà Ðf‰†¸‹EÔÁéÿÿÿ‹EìÁ趸é…þÿÿ‰Ž¼é`þÿÿ‹Uð‹]Ü·„º·–¸Óà Ðf‰†¸Ùé°ýÿÿÓeèÙ·†¸ Eè‹Eèf‰†¸éþÿÿÓeìÙ·†¸ Eì‹Eìf‰†¸é£üÿÿ‹ˆ¼‹U𸷺ÇEÌ)ø9Á~{·š·†¸‰ÚÓâ ‹N‹Ff‰–¸ˆ‹N‹VA‰F¶†¹ˆD‹†¼)EÌÿF¶MÌÓûL8ð‰Ž¼‹Mðf‰ž¸·‰†´ƒÄ,[^_]Ãt&‹Uð·‚·–¸Óà Ðf‰†¸9‹Mð‰†¼·‰†´ƒÄ,[^_]ÃvU‰åS‰Ã‹€¼ƒø~?·ƒ¸‹S‹Kˆ ‹K‹SA‰C¶ƒ¹ˆDÿC1Àf‰ƒ¸1À‰ƒ¼[]Ã…À~ç·ƒ¸‹K‹Sˆ ëÒv¼'U‰åWVSƒì‹}ÇEð‹u‹¼ƒù Žò·‡¸‰òÓâ‹O ‹Gf‰—¸ˆ‹O‹W¶‡¹Y‰_ˆD‹—¼·Æ)UðÿGƒê ¶Mð‰—¼Óøf‰‡¸‹] ‰ø‹u‰]ìèÿÿÿ¸‹W‰ñ‰‡´‹Gˆ ‹O‹WY‰_‰ó¶ÇˆD‹O‰ð‹WöÐY‰_ˆD‹O‰ð‹W÷жÄY‰_ˆD‹_Cë¶‹Mì‹W¶A‰M숋G@‰Ã‰_NƒþÿuáƒÄ[^_]÷‡¸Óæ Æf‰·¸A‰‡¼éGÿÿÿ‰ö¼'U‰åWVSƒì,‹MÇEè‹}‹E ‰Mì‹u‹„‰Eð…ÉŽ{…ö…9— ‰øèêÿÿ—$ ‰øèêÿÿ‹‡ —”‰$‰øèˆîÿÿ‹‡( —ˆ ‰$‰øèrîÿÿ—0 ‰øèÕéÿÿ¹¶fƒ¼‡~ uIƒùè‰Mè‹—¨I‹Ÿ¬Ѓà HÁéÁëP9ˉ—¨w‰ÙF9È–Â1Àƒ}ð•À…Ð…úƒ¿ˆ”À9Ë” Ш„ª‹¼¾ƒù \·—¸‹EìƒÀÓà ЃÁf‰‡¸‰¼º€Ç$‰øèøÿÿ‰øèçÿÿ‹Eì…À„³ƒÄ,‰ø[^_]éßüÿÿ‹ƒz,…»þÿÿ1Àfƒ¼‡”u@ƒø~ïƒø „уø ”À¶À‰B,éþÿÿ^é"ÿÿÿ‹¼¾ƒù W·—¸‹EìƒÀÓà ЃÁf‰‡¸‹‡ ¾X‰]䋟( C‰]à‹]èC‰]܃ù Ž?‰¼˜ÿÿÿ‰Ú·‡¸Óâ‹O·Û ‹Gf‰—¸ˆ‹O‹WA‰G¶‡¹ˆD‹‡¼ÿG)ƉñÓûHõf‰Ÿ¸ƒù »޲‰¼‹EàH‰E؉ÂÓâ·‡¸‹O Âf‰—¸‹Gˆ‹O‹WA‰G¶‡¹ˆD‹—¼·EØÿG)ÓˆÙÓøJõf‰‡¸ƒù ¾Ž&·‡¸‰¼‹]܃ë‰ÚÓâ‹O ‹G·Ûf‰—¸ˆ‹O‹WA‰G¶‡¹ˆD‹‡¼ÿG)ƉñÓûHôf‰Ÿ¸‰¼1öëm´&¶†·œ‡~ ·‡¸‰ÚÓâ ‹O‹Gf‰—¸ˆ‹O‹WA‰G¶‡¹ˆD‹‡¼¹ÿG)ÁÓûf‰Ÿ¸Hó‰¼F;u܃ù Œ¶†·—¸·„‡~ Óà Ðf‰‡¸ƒÁëÅ‹]ì‰t$‰\$ ‹Eð‰<$‰D$è¼úÿÿ‰øèUäÿÿ‹Eì…À…MýÿÿƒÄ,[^_]÷‡¸‹]ìƒÃ‰ÚÓâ‹O ‹G·Ûf‰—¸ˆ‹O‹WA‰G¶‡¹ˆD‹‡¼ÿG)ƉñÓûHóf‰Ÿ¸éiýÿÿ·‡¸‹]ìƒÃ‰ÚÓâ‹O ‹G·Ûf‰—¸ˆ‹O‹WA‰G¶‡¹ˆD‹‡¼ÿG)ƉñÓûHóf‰Ÿ¸édüÿÿ‹E䟔‰Ú·ˆ H‰$‰øè0ëÿÿ‹E؉ò‰$‰øè!ëÿÿ‰4$‰ÚéAüÿÿ·—¸‹E܃èÓà ЃÁf‰‡¸éþÿÿ·—¸‹]àK‰]؉ØÓà ЃÁf‰‡¸é‰ýÿÿ·—¸-ÓàƒÁ Ðf‰‡¸éþüÿÿ¸fƒ¼‡”…üÿÿ@ƒø~ëéüÿÿ¶U‰åƒì ‰$‰t$‰|$‹]‹M ‹u‹ƒ ‹“¤‹»˜f‰ B‰òˆ8ÿƒ …Éu3·„³”@f‰„³”‹ƒœ‹t$‹|$H9ƒ ‹$”À‰ì¶À]ö–Iÿƒ°·„“˜@ùÿf‰„“˜vW‰ÈÁèùÿ¶€…€ wR¶…€ ·D@f‰D‹ƒœ‹t$‹|$H9ƒ ‹$”À‰ì¶À]öùÿ…€ v®Áé¶ë©  €          ŒLÌ,¬lìœ\Ü<¼|ü‚BÂ"¢bâ’RÒ2²rò ŠJÊ*ªjêšZÚ:ºzú†FÆ&¦fæ–VÖ6¶vöŽNÎ.®nîž^Þ>¾~þAÁ!¡aá‘QÑ1±qñ ‰IÉ)©ié™YÙ9¹yù…EÅ%¥eå•UÕ5µuõ MÍ-­mí]Ý=½}ý  “ “ S S Ó Ó 3 3 ³ ³ s s ó ó  ‹ ‹ K K Ë Ë + + « « k k ë ë   › › [ [ Û Û ; ; » » { { û û   ‡ ‡ G G Ç Ç ' ' § § g g ç ç   — — W W × × 7 7 · · w w ÷ ÷    O O Ï Ï / / ¯ ¯ o o ï ï   Ÿ Ÿ _ _ ß ß ? ? ¿ ¿   ÿ ÿ @ `P0pH(hX8xD$dT4tƒCÃ#£cã       0@`€À€  0@`   (08@P`p€ Àà‚¤»Å!ã!•!®!3!Å!å!!Š!™!&!¬!»!œ!3!ß!æ!ó!g!¥!×!ç!#!<!!!!(!,!.fileþÿgtrees.c€ * 5 B O  \l z‰(˜ ¤€ ®ð º Æ ÑÀ ÜÐ  æP ñ ð  `   /0 .textB.data<.bss.rdatal :HT_static_dtree_static_ltree_bl_order_base_dist_extra_dbits_base_length_extra_lbits_static_bl_desc_extra_blbits_static_d_desc_static_l_desc_init_block__tr_init_pqdownheap_build_tree_scan_tree_send_tree_bi_flush__tr_align_compress_block_bi_windup__tr_stored_block__tr_flush_block__tr_tally__length_code__dist_code /1/12 1249165834 0 0 100666 1044 ` L€.text€´ô P`.data@0À.bss€0À.rdataÀ4 @`@U¸¨‰å]öU¸U‰å]öU¸‰å‹U])Ћ…€Ãv¼'U‰å‹E‹M ¯Á‰E]ét&¼'U‰åƒì‹E ‰$èÉÃneed dictionarystream endfile errorstream errordata errorinsufficient memorybuffer errorincompatible version'4?S`1.2.31mQ€„ˆŒ”˜œ ¤.fileþÿgzutil.c  _zError  _zcalloc@ _zcfree` .texts.data.bss.rdata® #€_free _malloc -_zlibVersion_zlibCompileFlags_z_errmsg /1/20 1249165835 0 0 100666 13244 ` L`0".textp!´-9 P`.data@0À.bss€0À.rdataà $">/@`@U‰å‹E…Àt‹P…Òu]¸þÿÿÿÃÇBÇ@Ç@Ç@Ç@0‚0‰Bl‰BP‰BL1ÀÇÇBÇB ÇB€ÇB ÇB(ÇB,ÇB0ÇB8ÇB<]ö¼'U‰åƒì ‰$‰t$‰|$‹E‹M ‹]…ÀtB‹P…Òt;ƒù6‹r<<ƒÿ w+‰z<¸ÓàH‰ñ!ÃÓã1ÀZ8‹$‹t$‹|$‰ì]ö‹$¸þÿÿÿ‹t$‹|$‰ì]ö¿U‰åVSƒì‹E‹]‹u …À‹U„¨€81•Àƒú8• Ш…’…Û¸þÿÿÿ„ŠÇC‹S …ÒuÇC(º‰S ‹C$…ÀuÇC$¹0%¸‰L$‰D$‹C(‰$ÿ҉…Ҹüÿÿÿt:‰S…öx?‰ðÁø@‰Bƒþ/~,Føƒøw4‰r$ÇB4‰]ƒÄ[^]éþÿÿ¸úÿÿÿƒÄ[^]ÃæëÏÇB÷ÞëĉT$‹C(‰$ÿS$ÇC¸þÿÿÿëÎt&U‰åƒì‹E‰D$ ‹E ‰D$¸‰D$‹E‰$èØþÿÿÉöU‰åƒì(‰]ô‰Ó‰uø‰}ü‹p‰Eð‹F4…À‰Eì„Í‹V(…Òtt‹Mð‹A)Ã9Ú†‹N0‰×)Ï9ßv‰ß‹Uð‹B ‹Uì‰|$)ØʉD$‰$è)û…Ê‹F0‹V(ø9Єâ‰F0‹F,9Ðsø‰F,v‹]ô1À‹uø‹}ü‰ì]ÃÇF0‹N$ºÇF,Óâ‹Mð‰V(‹A)Ã9Ú‡qÿÿÿ‹A ‰T$)ЉD$‹Eì‰$èÇF0‹F(‰F,ëž¹¸‰L$‹N$Óà‰D$‹Uð‹B(‰$ÿR ‰Eì‹Uì‰F4¸…Ò…úþÿÿ‹]ô‹uø‹}ü‰ì]ËMð‹F4‹Q ‰\$‰$)Ú‰T$è‰^0‹F(ë’t&ÇF0éÿÿÿt&U‰åWVSƒì|‹M…É„|‹E‹@…À‰E´to‹U‹J …Éte‹…ÛtV‹E‹P‹u´‹ƒø t\‰Mè‹M‰Uä‹U´‰]ì‹IÇEЋuä‹R8‰Mà‹M´‰UÜ‹Uà‹y<‰u؉UÔƒøw¶ÿ$…l ‹R…Òt©f¸þÿÿÿƒÄ|[^_]ÃÇ ¸ ë—ƒÿw&‹Eä…À„†ÿMä‹M춉ùƒÇÿEìÓàE܃ÿvÚ‹EÜ‹U´<‰B„ ‹MÇAÀé» ƒÿ w'‹Eä…À„;ÿMä‰ù‹UìƒÇ¶B‰UìÓàE܃ÿ vÙ‹E܃ï‹u´Ám܃à‰E¨‰F`‹EÜÁm܃à@‰E¤‰Fd‹EÜÁm܃àX}¨‰^\w ƒ}¤†œ‹E‹U´Ç@Üé. ƒÿw&‹]ä…Û„®ÿMä‹M춉ùƒÇÿEìÓàE܃ÿvÚ‹UÜ1ÿ‹EÜ‹u´ÁêâÿÇ ÁèЋUÜÁeÜâÿÁâ‹MÜЋUÇEÜȉF‰B0‹M´‹Q …Ò„ÍÇ$1À‰D$1À‰D$è‹u´‹U‰FÇ ‰B0ƒ} …¼t&¼'‹E‹Uè‹uì‹Mà‰P ‹Uä‰0‹u´‰H‹M܉P‹V(‰N8‰~<…Òuƒ>‡Æ‹@;EÔt‹UÔ‹Eèüÿÿ…À…[‹U‹B)EÔ‹M‹u‹UÔ‹I)MØ‹M´V‹EØF‹AQ…À•À…ҕ¶҅Ât5‹y…ÿ„¬‹uÔ‰t$‹U‹B )ð‰D$‹A‰$è‹M´‹u‰A‰F0‹U´‹r‹B<…ötƒÀ@‹M´ƒ9 „D‹u‰F,‹EØ EÔ”Àƒ} ” Шt‹]Ð…ÛuÇEÐûÿÿÿ‰ö¼'‹EЃÄ|[^_]ËM´‹Q …Òt ‹EÜÁèƒà‰‹u´öF…ÙÇEÜ‹M´1ÿÇ‹uä…ö„¦þÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàE܃ÿvÙ‹u´‹F …Àt‹U܉P‹M´öA…ŠÇEÜ‹u´1ÿÇt&¼'‹Mä…É„EþÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܃ÿvÙ‹u´‹V …Òt¶E܉B‹EÜÁè‰B ‹E´‹PöÆ…wÇEÜ‹E´1ÿÇöƄƃÿw'‹Eä…À„ÕýÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàE܃ÿvÙ‹EÜ‹M´‰A@‹A …Àt ‹U܉P‹QöÆ…vÇEÜ1ÿ‹E´ÇöÆ„ƒ‹u´‹^@;]䉨v‹]ä…Ûta‹u´‹N …Ét=‹q…ö‰uÌt3‹q‹Q‰Ù)Æ9Ðv‰Ñ)ñ‹ẺL$‹Uìð‰$‰T$è‹M´‹QöÆ…¤)]ä‹u´]ì‹F@)؉F@…À…ÿüÿÿ‹E´‹P‹M´ÇA@ÇöÆ„ ‹Eä…À„Õüÿÿ1Ûv‹Eì‹U´¶4‹B C…Àt.‹P…Òt'‹M´‹I@;H ‰Mœs‰ðˆ‹U´ÿB@´&¼'…ö•À1Ò;]ä’Â…Âu¯‹M´öA…« )]ä]ì…ö…düÿÿ‹u´‹V‹E´Ç@@ÇöÆ„L ‹Eä…À„:üÿÿ1Û´&‹Eì‹U´¶4‹B C…Àt.‹P$…Òt'‹M´‹I@;H(‰Mœs‰ðˆ‹U´ÿB@´&¼'…ö•À1Ò;]ä’Â…Âu¯‹M´öA…ß )]ä]ì…ö…Äûÿÿ‹u´‹V‹E´ÇöÆ„e ƒÿw.´&‹Eä…À„•ûÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàE܃ÿvÙ‹u´·F;EÜ„ ‹EÇ@ Ǹéùÿÿ‹M´‹A…À„ ‰ù‹u´ƒçøƒá¸ÓmÜÇécùÿÿ‹M´‹YH…ÛtD9ûv&‹Mä…É„ ûÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàEÜ9ûwÚˆÙ‹UܸÓmÜ)ßÓà‹u´H!ÐF@‹E´Ç‹u´¸‹U´‹NX‹RPÓà‰U¬Pÿë#‹Eä…À„¥úÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàEÜ‹EÜ‹M¬!Ћ¶Ç¶À9øwÉöÃðu‰Ø¶ÓÁè‰E¼‰Þ‰U¸ë"‹Eä…À„WúÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܉ñ‹]܉ò¶Å¶È‹E¸Á¸ÓàH¶Î!ØÓè‹M¼È‹M¬‹¶Æ¶À¶×¶ÒÐ9øw ‰ð¶Ì¶ÁÓmÜ)Ƕ϶Á)ÇÓmܶè@„¢ ‹U‹M´ÇB éc‹E´Ç‹Eà…À„¹ùÿÿ‹Eà‹]Ô‹U´)ËBD9؆ž )؉ËB09؃F ‰Ñ‹R(‹q4)Ã)Úò‹M´‹A@9Øs‰Ã;]àv‹]à)]à)Ø‹M´‰A@´&¶B‹uèˆFK‰uèuð‹E´‹P@…Ò…¶Ç¸ér÷ÿÿ‹U´ÇBhÇ‹M´‹u´‹U´‹I`‹vd‹Rh‰M¨‰Èð‰u¤9‰U ƒí‹M´¸‹u´‹IL‰M°‹NTÓàH‰EÈ‹UÜ‹EÈ!ЋU°‹‚¶Ç¶À9øv9‹Eä…À„¾øÿÿÿMä‹M춉ùƒÇÿEìÓàEÜ‹EÈ‹UÜ!ЋU°‹‚¶Ç¶À9øwljØÁèfƒø‡ê¶Ç¶À9øv3´&‹Eä…À„eøÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàEܶǶÀ9øwÔ¶Ï‹u ÁëÓmܶÁ)Ç‹E´‹U´f‰\pp‰ð@‰Bh‰E ‹E¨‹U¤Ð9E ‚-ÿÿÿ‹M´‹ƒø„5öÿÿÇAT ‰È‰Ë0‰ÎÃð‰AlƒÆl‰AL-܉\$‰D$‰t$ ‹A`Ç$‰D$‰ÈƒÀp‰D$è‰EÐ…À„ ‹uÇF* ‹E´Ç¸éÊõÿÿÇEÜ‹M´1ÿ‰Q@Ç‹u´‹^@…Û…| ‹u´Ç ¸ é”õÿÿ‹M´Ç¸üÿÿÿé–õÿÿ‹U´Çƒ}ä—À1Ò}à—Â…Â…ü ‹M´¸‹u´‹IL‰M°‹NTÓàPÿë$f‹Eä…À„õöÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàEÜ‹EÜ‹M°!Ћ¶Ç¶À9øwɄۄ„öÃðu‰Ø¶ÓÁè‰EĉމUÀë"‹]ä…Û„ŸöÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܉ñ‹]܉ò¶Å¶È‹EÀÁ¸ÓàH¶Î!ØÓè‹MÄÈ‹M°‹¶Æ¶À¶×¶ÒÐ9øw ‰ð¶Ì¶ÁÓmÜ)ǶϋU´¶ÁÓmÜ)ljØÁè„Û‰B@…ǸéOôÿÿÇEÜ1ÿ‹u´ÇÇEÐéñõÿÿÇEÐýÿÿÿéåõÿÿ‹E´‹XH…Û„9ûv&‹Eä…À„ÈõÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàEÜ9ûwÚˆÙ¸‹u´Óà)ßPÿ‹EÜÓmÜ!‹FD‰VD‹M´‹EÔ‹]à‹q,ð)Ø9†§ûÿÿ‹uÇFF Ǹéšóÿÿ‰ùƒáƒçøÓm܃ÿw&‹Eä…À„?õÿÿÿMä‹M춉ùƒÇÿEìÓàE܃ÿvÚ·UÜ‹EÜÁè5ÿÿ9„ƒýÿÿ‹uÇFd éaýÿÿ‹u´‹^\‹E´‹@h9؉E s_ƒÿw-¶‹uä…ö„ÕôÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܃ÿvÙ‹u´ƒï‹EÜÁmÜ‹Nhƒà·” f‰DVpA9؉Fhr¤‰E ƒ} w!‹U B·„þÿÿÿ‹M´fÇDAp‰ÐBƒøvæ‰Ah‹E´¹‹u´0‰Fl‰FL‰ððÇFT‰D$-œ‰D$ƒÀ‰D$ ƒÀ‰L$‰D$Ç$è‰EÐ…À„Ùúÿÿ‹EÇ@ éøÿÿ‹M´‹Y…Û„èýÿÿ‹u´‹^…Û„Úýÿÿƒÿw2t&¼'‹Mä…É„ÅóÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܃ÿvÙ‹U´‹EÜ9B„‹ýÿÿ‹MÇAš ǸéÀñÿÿ‹u´‹^…Û„Ûƒÿw'‹Eä…À„bóÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܃ÿvÙ‹uà‹E‹M´)uÔ‹UÔPQ…Òt/‹A…À„å‰T$‹Eè)ЉD$‹A‰$è‹M´‹u‰A‰F0‹U´‹Eà‹r‰EÔ‹J…ö…È‹EÜ‹UÜÁè%ÿÁê‹EÜ%ÿÁà‹EÜÁàÂ9Ê„‹M‹u´ÇA± éF÷ÿÿ‹M´‹Qéõÿÿ‹M´‹Qé¡ôÿÿ‹u´‹^…ÛuÇ ¸ éµðÿÿƒÿw'‹Uä…Ò„eòÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàE܃ÿvÙ‰Ø1ÒÑè}Ü‹”Â…Ð…‹M´‹A ÇA…Àt Ç@0ÿÿÿÿ‹YöÃt2¶MÜ»…B‹EÜÁáÁèÁ‰È÷ã‰È)ÐÑèÐÁè‰ÂÁâ)Â9фŋU‹M´ÇBÆ écüÿÿ‹Eà…À„Âñÿÿ‹U´‹Mè‹B@ˆAÿMฉMèÇéàïÿÿ‹M´‹QéGõÿÿ‹u´‹Vé¡ôÿÿ‹M´‹QéÊõÿÿƒÿ‡4‹Eä…À„kñÿÿÿMä‰ù‹UìƒÇ¶B‰UìÓàEÜëÓfÇEÜ1ÿ‹M´‹A …ÀtÇ@0Áú ƒâ‰P,Ç$1À1ö‰t$‰D$è‹u´‹U‰F‰B0é ùÿÿ‹u‹FéPñÿÿ‹M´‹A …À„-õÿÿÇ@$é!õÿÿƒè€é´ñÿÿ‹M´‹A …À„kôÿÿÇ@é_ôÿÿ‹MÔ‰L$‹u‹F )ȉD$‹U´‹B‰$èéLñÿÿfƒø„ fƒø„¶Ç¶ÀƒÀ9øv/‹uä…ö„kðÿÿÿMä‰ù‹uìƒÇ¶F‰uìÓàEܶǶÀƒÀ9øwѶÏ1ö¶ÁÓmÜ)ǃï‹EÜÁm܃àX ‹U ‹E¨‹M¤ÚÈ9‡`Kƒûÿuéë÷ÿÿ‰E ‹E K‹U´f‰tBp@ƒûÿ‰BhuèéË÷ÿÿ¶Ç¶ÀƒÀ9øv.‹Eä…À„ÕïÿÿÿMä‹M춉ùƒÇÿEìÓàEܶǶÀƒÀ9øwҶ϶Á)ÇÓmÜ‹E …À„ø‹E ƒï‹U´·tBn‹EÜÁm܃àXéRÿÿÿ¶Ç¶ÀƒÀ9øv.‹Eä…À„dïÿÿÿMä‹M춉ùƒÇÿEìÓàEܶǶÀƒÀ9øwÒ¶Ï1ö¶ÁÓmÜ)ǃï‹EÜÁm܃àXéõþÿÿ‹u´‹F …À„ƒñÿÿÇ@éwñÿÿ‹u´‹Uè‹^@)‰Øéyõÿÿ‰\$‹Eì‰D$‹A‰$è‹U´‰Béóÿÿ‹E´‹PDé;ùÿÿ‰\$‹Eì‰D$‹A‰$è‹U´‰Bé4òÿÿÇEÜ‹E´1ÿ‹X‹U´Çé“úÿÿ¶UÜEðÁmܶM܈UðºˆMñ‰T$‰D$‹u´‹F‰$è‰F‹VéNðÿÿ‹u´‰Â)Ú‹N4Êé·ôÿÿ‹u´Áë‰^D‰Ãƒã‰^HÇéBøÿÿ¶MÜÁmܶE܈MðˆEñ¸‰D$Eð‰D$‹U´‹B‰$è‹M´‹Q‰AéLðÿÿ¶EÜ»ÁmܶUÜÁm܈Eð¶MÜÁm܈Uñ¶E܈MòˆEóEð‰\$‰D$‹U´‹B‰$è‹M´‰Aé%ïÿÿ¶Ã¨ „Ì‹M´¸ Ç é´ëÿÿ‰\$‹uì‰t$‹U´‹B‰$è‹M´‰Aé8ðÿÿ÷EÜà„^îÿÿ‹uÇFÝ é—õÿÿ‹M´ÇAhÇé'øÿÿ;MÜéZúÿÿ‹U´‹M´‹RlÇAX‰ÈƒÀX‰QP‰\$‰D$‰t$ ‹Ad‰D$‹A`Ç$DAp‰D$è‰EÐ…À„‰õÿÿ‹uÇFö éõÿÿ¨@„³‹uÇF éõÿÿÇ$1À1ÿ‰D$1À‰D$è‹u´‰F¶EÜÁmܶU܈Eð¸ˆUñ‰D$Eð‰D$‹F‰$è‰F¸ÇEÜÇé}êÿÿ‹uÔ‰t$‹Eè)ð‰D$‹U´‹B‰$èéùÿÿ‹E܃àƒøtr‹uÇFÀécôÿÿ‹U´‰Ãƒã‰ZHÇéÍðÿÿ¶EÜ¿ÁmܶU܈EðEðˆUñ‰|$‰D$‹F‰$è‰Féòìÿÿ‹u‹E´ÇF( ÇéóÿÿÁm܃ï‹U´‹E܃àp9r$ƒÿ‹MÇAB éâ÷ÿÿ‹M‹Eà‹Uì‹uè‰A‹E܉‹U´‰q ‹uä‰B8¸‰q‰z<é—éÿÿ‹EÜO‹u´Ñm܃à‰F‹E܃àƒø„u‚aƒø„åƒøu‹u‹E´ÇFV ÇÁm܃ï‹U´‹é éÿÿ;]äv‹]ä;]àv‹]à…Û„Óêÿÿ‰\$‹Eì‰D$‹Uè‰$è)]ä‹M´]ì)]à‹]è)Y@éÙèÿÿ‹U‹M´ÇB( Çé~òÿÿ‹M´‹é·èÿÿ‰ñ‹u´¸Óà‰F1À‰D$1À‰D$Ç$èeÜ‹U‰Fƒ}܉B0ÇEÜÀƒàƒÀ 1ÿ‰édèÿÿ‹M´Çé%ÿÿÿ‹M‹uè‹Eà‹Uì‰q ‹uä‰A‹E܉q‰‹U´‹MÔ‰B8‰z<‰L$‹u‰4$è‹F ‹V‹‰Eè‹E´‹v‰Uà‹@8‰Mì‹U´‰uä‰EÜ‹z<‹éñçÿÿ‹E´Ç é²þÿÿ‹U´ÇBL@ÇBT ÇBP@ÇBXÇéˆþÿÿt&U‰åSƒì‹]…Ût2‹C…Àt+‹S$…Òt$‹H4…Éu(‰D$‹C(‰$ÿÒÇCƒÄ1À[]ÃfƒÄ¸þÿÿÿ[]ÉL$‹C(‰$ÿÒ‹C‹S$ëÄU‰åƒì‰uø‹u‰}ü‹}…ö‰]ô„¦‹^…Û„›‹C…À„¤ƒ; ºþÿÿÿ…‡Ç$1À‰D$1À‰D$è‰|$‹U ‰$‰T$è9CºýÿÿÿuR‹V‰ðèðäÿÿ…Àu|‹K(9ùsV‹U ‹C4‰L$ú)ʉT$‰$è‹C(‰C,ÇC 1Ò‹]ô‹uø‰Ð‹}ü‰ì]úþÿÿÿ‹]ô‰Ð‹uø‹}ü‰ì]Ã; ušé`ÿÿÿ‹s4‰|$‹E ñ)ù‰D$‰ $è‰{,ë«Çºüÿÿÿë»vU‰å‹E‹M …Àt#‹P…ÒtöB¸þÿÿÿt ‰J 1ÀÇA0]Ãv]¸þÿÿÿÉö¼'U‰åWV‰ÖSƒì1Û‰Eð‹}‹9û’À1Òƒù–Â…Ât;v¼'¶ƒù¶Âw4…Àt7„Òt<1ÉC‰ö¼'9û’À1Òƒù–Â…ÂuÏ‹Eð‰‰ØZ[^_]Ãf=ÿuÉACëÕt&¸C)ȉÁëÄt&U‰åWVSƒì ‹}…ÿ„à‹w…ö„Õ‹W…Òuƒ~<¸ûÿÿÿ†Äƒ>^htjÇ‹F<‹V8‰Áƒáƒàø‰F<Óâ1ɉV8ƒøv-]ð¶¼'ˆ‹F8AÁè‰Â‰F8‹F<ƒè‰F<ƒøwãÇFh^hUð‰ $‰ØèÌþÿÿ‹W‰$‰Ø‹è½þÿÿ)G‰Á‹_¸ýÿÿÿÙƒ~h‰Mì‰Ou)‹_‰<$èeàÿÿ‰_‹EìÇ ‰GƒÄ 1À[^_]øþÿÿÿƒÄ [^_]Ãë U‰å‹E…Àt‹P…Òu¸þÿÿÿ]Ã1Àƒ: u÷‹R<…Òuð]¸ÃvU‰åƒì(‰uø‹E‹u ‰]ô…À”À‰}ü…ö” Шu‹^…Ût‹V …Òt ‹F$…Àu¶¸þÿÿÿ‹]ô‹uø‹}ü‰ì]ù¿0%‰|$‰L$‹F(‰$ÿÒ‰Eð‹Uð¸üÿÿÿ…ÒtÊÇEì‹C4…À…žü‹}¹ó¥‰\$¿0%‰|$‹Uð‰$è‹SLƒ09Âr!ƒ,%9Âw‹Eð)ÚЋUð‰BL‹CP)ØЉBP‹Cl‹uð‹Mì‹Uð)Øð…ɉBlt ‹K$¸‹S4Óà‰D$‹Eì‰T$‰$è‹Eì‹Uð‰B4‹E‰P1Àéÿÿÿ¸‰D$‹K$¸Óà‰D$‹F(‰$ÿV ‰Eì…À…7ÿÿÿ‹Eð‰D$‹F(‰$ÿV$¸üÿÿÿéÕþÿÿ     `Psp0 À `  €@ àX ;x8 Ðh( °ˆH ðTã+t4 È d$ ¨„D è\ ˜S|< Øl, ¸ ŒL øR£#r2 Ä b" ¤‚B äZ ”Cz: Ôj* ´ ŠJ ôV@3v6 Ìf& ¬†F ì ^ œc~> Ün. ¼ŽN ü`Qƒq1  a! ¢A âY ’;y9 Òi) ² ‰I òU+u5 Ê e% ª…E ê] šS}= Úm- º M úSÃ#s3 Æ c# ¦ƒC æ[ –C{; Ök+ ¶ ‹K öW@3w7 Îg' ®‡G î _ žc? Þo/ ¾O þ`Psp0 Á ` ¡€@ áX ‘;x8 Ñh( ±ˆH ñTã+t4 É d$ ©„D é\ ™S|< Ùl, ¹ ŒL ùR£#r2 Å b" ¥‚B åZ •Cz: Õj* µ ŠJ õV@3v6 Íf& ­†F í ^ c~> Ýn. ½ŽN ý`Qƒq1 à a! £A ãY “;y9 Ói) ³ ‰I óU+u5 Ë e% «…E ë] ›S}= Ûm- » M ûSÃ#s3 Ç c# §ƒC ç[ —C{; ×k+ · ‹K ÷W@3w7 Ïg' ¯‡G ï _ Ÿc? ßo/ ¿O ÿA@!  @a`10  Á@unknown compression methodtoo many length or distance symbolsheader crc mismatchinvalid distance codeinvalid literal/lengths setinvalid distance too far backinvalid stored block lengthsinvalid code lengths setincorrect length checkincorrect data checkincorrect header checkunknown header flags setinvalid distances setinvalid literal/length codeinvalid bit length repeatinvalid window sizeinvalid block typeqjqÒf[k`vB³ë± ¦‹µÞ ÈÝ 4 < 3€µu!† ²/ŸC¬9Õ±Õ * J_’ù^‚æûlòR*ç`K£ _¡Ã'<Sr¥Ý÷?T…ú?e¡Xf5H׬ !l p t x | € „ ˆ Œ  ” ˜ œ   ¤ ¨ ¬ ° ´ ¸ ¼ À Ä È Ì Ð Ô Ø Ü .fileþÿginflate.c     /  =P order.0lenfix.1@K@_inflateÀ U€ aà wð ‰0 •À ¢Ð ´  .textp!9.data.bss.rdataà Á Ï _adler32 _crc32 _memcpy _zcfree _zcalloc Þ_inflateReset_inflatePrime_inflateInit2__inflateInit__updatewindowdistfix.2_inflateEnd_inflateSetDictionary_inflateGetHeader_syncsearch_inflateSync_inflateSyncPoint_inflateCopy_inflate_fast_inflate_table /1/20 1249165836 0 0 100666 7716 ` L$.textÀ´” P`.data@0À.bss€0À.rdata tz@`@U‰åƒì‰]ô‹E‹] ‰uø…À‰}ü‹u‹}t€81•Àƒ}8• Шt¸úÿÿÿ‹]ô‹uø‹}ü‰ì]Ã…ö”À…ÿ” ШuƒûžÀƒûŸÂ Шt‹]ô¸þÿÿÿ‹uø‹}ü‰ì]ÃÇF‹V …Òtp‹F$…ÀuÇF$¹0%¸‰L$‰D$‹F(‰$ÿ҉…Ҹüÿÿÿ„zÿÿÿ‰VˆÙ¸ÇB€Óà‰B(1À‰Z$‰z4ÇB0ÇB,‹]ô‹uø‹}ü‰ì]ÃÇF(º‰V é|ÿÿÿ¶U‰åWVSƒì\‹]…Ûtp‹E‹x…ÿtfÇ ‹UÇGÇG,‹ÇBÇEè…À‰Eðt‹J‰MèÇEà‹G4‹W(ÇE܉Eì‰Uä‹ƒè ƒøwnÿ$…Ì ´&ƒÄ\¸þÿÿÿ[^_]ÃvEð‰D$‹E‰$ÿU ‰Eè…À„Çt&ÿMè‹Uð¶MܶBƒE܉UðÓàEàƒ}Ü ‡ì‹Eè…ÀuÕë³v»þÿÿÿt&¼'‹Eð‹U‹M艉؉JƒÄ\[^_]ÃÇfƒ}è—À1Ò}ä—…„˜‹U‹Mì‹Eä‰J ‹Mè‰B‹Eð‰J‹_(9_,‰‹Eà‹U܉G8‰W Ün. ¼ŽN ü`Qƒq1  a! ¢A âY ’;y9 Òi) ² ‰I òU+u5 Ê e% ª…E ê] šS}= Úm- º M úSÃ#s3 Æ c# ¦ƒC æ[ –C{; Ök+ ¶ ‹K öW@3w7 Îg' ®‡G î _ žc? Þo/ ¾O þ`Psp0 Á ` ¡€@ áX ‘;x8 Ñh( ±ˆH ñTã+t4 É d$ ©„D é\ ™S|< Ùl, ¹ ŒL ùR£#r2 Å b" ¥‚B åZ •Cz: Õj* µ ŠJ õV@3v6 Íf& ­†F í ^ c~> Ýn. ½ŽN ý`Qƒq1 à a! £A ãY “;y9 Ói) ³ ‰I óU+u5 Ë e% «…E ë] ›S}= Ûm- » M ûSÃ#s3 Ç c# §ƒC ç[ —C{; ×k+ · ‹K ÷W@3w7 Ïg' ¯‡G ï _ Ÿc? ßo/ ¿O ÿA@!  @a`10  Á@too many length or distance symbolsinvalid stored block lengthsinvalid code lengths setinvalid literal/length codeinvalid bit length repeatinvalid literal/lengths setinvalid block typeinvalid distance codeinvalid distance too far backinvalid distances setGðððÚðð ððððððð³Àþ…~™ÍŒšÂô\nœH ^ p … 1 K;M\Ì Ð Ô Ø Ü à ä è ì ð ô ø ü     .fileþÿginfback.c order.0lenfix.1@@  -p .text².data.bss.rdata = K _memcpy _zcfree _zcalloc Z_inflateBackInit_distfix.2_inflateBack_inflateBackEnd_inflate_fast_inflate_table /1/20 1249165837 0 0 100666 2364 ` LÌ.text°´¤ P`.data@0À.bss€0À.rdata@d@`@U‰åWV1öS윋]ë fÇDuÈFƒþvóÇE¤ë‹E¤‹M ÿE¤·A·DUÈ@f‰DUÈ9]¤rãÇEœ‹E‹‰E˜´&¼'‹Uœfƒ|UÈuJ‰Uœuï‹Mœ9M˜v‰M˜ÇE ‹Eœ…ÀuFçÿÿÿ‹UƒÏ@‰ø‹M´‰Ç‹çÿÿ‰8‹B‰‹E‰zƒÇ1ÉÄœ‰È[^_]Ãf‹U fƒ|UÈu Bƒú‰U vì‹M 9M˜s‰M˜º¾´&·DuÈÒ)ˆaFƒþvë…Ò~‹E¹ÿÿÿÿ…À”Àƒ}œ• ШuŽfÇEª¾f·Du¨·TuÈÐf‰DuªFƒþvéÇE¤9]¤s1‹U¤‹M ·Qf…Àt·À·TE¨Jf‰LE¨‹M¤‹Ef‰ PÿE¤9]¤rÏ‹u…ö„ãƒ}„иÀ‰…xÿÿÿ¸‰…tÿÿÿ¸ÿÿÿÿ‰…pÿÿÿÇEŒ‹M˜‹UÇEˆ‹u ‰M”‹¶M˜ÇE¤‰•|ÿÿÿÓeŒ¹ÇE‹EŒÇE„ÿÿÿÿHƒ}‰E€”À1Ò}Œ¯—Â…Â…’þÿÿ‰ð‰ú‹M¤*EˆÆ‹E‰×·H;•pÿÿÿÒÁâçÿ ׋E‰ñº‹]ˆ)Á¸Óà¶M”‰…dÿÿÿÓâ¶M‰U Óëv¼'‹…dÿÿÿ‹|ÿÿÿ)Â…Ò‰þÿÿº~þÿÿ¸‰xÿÿÿ‰•tÿÿÿé+üÿÿ inflate 1.2.3 Copyright 1995-2005 Mark Adler  #+3;CScsƒ£ÃãÉÄ !1AaÁ  0@`@@ºÅŠ.fileþÿginftrees.clbase.0@lext.1€dbase.2Àdext.3 .text©.data.bss.rdata@&_inflate_table_inflate_copyright /1/20 1249165838 0 0 100666 1700 ` L¢ .text€´„ P`.data@0À.bss€0À.rdataP4@0@U‰åWVSƒì@‹E‹M‹U ‹@‹Y‹y ‰Eð‹OH‰Eì؃è‰Eè‹A‰ù)„8ÿþÿÿ)щEà‹Eð‹Uð‰Mä‹@(‹Mð‹R,‰EÜ‹I0‹Eð‰U؉MÔ‹PL‹HP‹X8‹p<‰UЉMÌ‹HT¸‰ÂÓâ‰Ñ‹UðI‰MÈ‹JXÓà‹J4H‰EÄI‰M¸ƒþw"ÿEì‰ñƒÆ‹Uì¶B‰UìÓàö‰ñƒÆÓàËEÈ!Øë$öE´u^öE´@…°¶M´¸ÁêÓàH!Ø‹MЋ¶Æ¶ÀˆÁ)ƶÂÓë…À‰E´uÀGÁꈋMè9Mì’À1Ò;}à’Â…Â…sÿÿÿéyÁêƒe´‰UÀt/;u´sÿEì‰ñƒÆ‹Uì¶ÓàöM´¸ÓàH!ØÓëEÀ‹E´)ƃþw"ÿEì‰ñƒÆ‹Uì¶B‰UìÓàö‰ñƒÆÓàËEÄ!ØëöE´@…¼¶M´¸ÁêÓàH!Ø‹MÌ‹¶Æ¶ÀˆÁ)ƶÂÓ먉E´tƃàÁê9ƉU¼‰E´s'ÿEì‰ñƒÆ‹Uì¶ÓàÃ;u´sB‰ñƒÆ‰Uì¶ÓàöM´¸‹UäÓàH!ØE¼Óë‹M´‰ø)Ð)Î9E¼‰E´†9‹U¼‹MØ)‰U´9ʇ#‹EÔ‹U¸…À…f‹EÜ‹M´)È‹EÀ9E´ƒÉ‹M´)ȉEÀBG¶ˆÿM´uô‹E¼‰ú)Âé©öE´ …»‹EÇ@‹Uðljð‹UÁè)Eì‰EÀÁà)Ɖñ¸ÓàH‹Mè!ËEì@9Mì‰G‰B s7‹Eì)Á‰ÈƒÀ;}à‹M‰As=)}à‹Mð‹U‹Eà‰Y8‰B‰q<ƒÄ@[^_]ËU踋M)Uì‹Uì)Ð;}à‰ArËEà‹Mð‹U)Ǹ)ø‰B‰Y8‰q<ƒÄ@[^_]ËUðÇ éJÿÿÿ‹EÇ@é2ÿÿÿ‹E¼‰ú)ÂBG¶BˆG¶BˆG¶ˆƒmÀƒ}Àwá‹MÀ…É„sýÿÿBG¶ˆƒ}À†býÿÿ¶BGˆéVýÿÿ‹E´9EÔs|‹MÔ‹EÜÈ‹M´)È‹MÔ)M´‹EÀ9E´sT‹M´)ȉEÀBG¶ˆÿM´uô‹EÀ‹U¸9EÔs5‹MÔ)ȉM´‰EÀBG¶ˆÿM´uôédþÿÿBG¶BˆG¶BˆG¶ˆƒmÀƒ}ÀwáéTÿÿÿ‹EÔ‹M´)È‹EÀ9E´sã‹M´)ȉEÀBG¶ˆÿM´uôéþÿÿ‹M‹EðÇA2Çé&þÿÿinvalid literal/length codeinvalid distance codeinvalid distance too far back– _ n .fileþÿginffast.c .text}.data.bss.rdataP_inflate_fast samtools-0.1.19/win32/xcurses.h000066400000000000000000001413731212162403000162520ustar00rootroot00000000000000/* Public Domain Curses */ /* $Id: curses.h,v 1.295 2008/07/15 17:13:25 wmcbrine Exp $ */ /*----------------------------------------------------------------------* * PDCurses * *----------------------------------------------------------------------*/ #ifndef __PDCURSES__ #define __PDCURSES__ 1 /*man-start************************************************************** PDCurses definitions list: (Only define those needed) XCURSES True if compiling for X11. PDC_RGB True if you want to use RGB color definitions (Red = 1, Green = 2, Blue = 4) instead of BGR. PDC_WIDE True if building wide-character support. PDC_DLL_BUILD True if building a Win32 DLL. NCURSES_MOUSE_VERSION Use the ncurses mouse API instead of PDCurses' traditional mouse API. PDCurses portable platform definitions list: PDC_BUILD Defines API build version. PDCURSES Enables access to PDCurses-only routines. XOPEN Always true. SYSVcurses True if you are compiling for SYSV portability. BSDcurses True if you are compiling for BSD portability. **man-end****************************************************************/ #define PDC_BUILD 3401 #define PDCURSES 1 /* PDCurses-only routines */ #define XOPEN 1 /* X/Open Curses routines */ #define SYSVcurses 1 /* System V Curses routines */ #define BSDcurses 1 /* BSD Curses routines */ #define CHTYPE_LONG 1 /* size of chtype; long */ /*----------------------------------------------------------------------*/ #include #include #include /* Required by X/Open usage below */ #ifdef PDC_WIDE # include #endif #if defined(__cplusplus) || defined(__cplusplus__) || defined(__CPLUSPLUS) extern "C" { # define bool _bool #endif /*---------------------------------------------------------------------- * * PDCurses Manifest Constants * */ #ifndef FALSE # define FALSE 0 #endif #ifndef TRUE # define TRUE 1 #endif #ifndef NULL # define NULL (void *)0 #endif #ifndef ERR # define ERR (-1) #endif #ifndef OK # define OK 0 #endif /*---------------------------------------------------------------------- * * PDCurses Type Declarations * */ typedef unsigned char bool; /* PDCurses Boolean type */ #ifdef CHTYPE_LONG # if _LP64 typedef unsigned int chtype; # else typedef unsigned long chtype; /* 16-bit attr + 16-bit char */ # endif #else typedef unsigned short chtype; /* 8-bit attr + 8-bit char */ #endif #ifdef PDC_WIDE typedef chtype cchar_t; #endif typedef chtype attr_t; /*---------------------------------------------------------------------- * * PDCurses Mouse Interface -- SYSVR4, with extensions * */ typedef struct { int x; /* absolute column, 0 based, measured in characters */ int y; /* absolute row, 0 based, measured in characters */ short button[3]; /* state of each button */ int changes; /* flags indicating what has changed with the mouse */ } MOUSE_STATUS; #define BUTTON_RELEASED 0x0000 #define BUTTON_PRESSED 0x0001 #define BUTTON_CLICKED 0x0002 #define BUTTON_DOUBLE_CLICKED 0x0003 #define BUTTON_TRIPLE_CLICKED 0x0004 #define BUTTON_MOVED 0x0005 /* PDCurses */ #define WHEEL_SCROLLED 0x0006 /* PDCurses */ #define BUTTON_ACTION_MASK 0x0007 /* PDCurses */ #define PDC_BUTTON_SHIFT 0x0008 /* PDCurses */ #define PDC_BUTTON_CONTROL 0x0010 /* PDCurses */ #define PDC_BUTTON_ALT 0x0020 /* PDCurses */ #define BUTTON_MODIFIER_MASK 0x0038 /* PDCurses */ #define MOUSE_X_POS (Mouse_status.x) #define MOUSE_Y_POS (Mouse_status.y) /* * Bits associated with the .changes field: * 3 2 1 0 * 210987654321098765432109876543210 * 1 <- button 1 has changed * 10 <- button 2 has changed * 100 <- button 3 has changed * 1000 <- mouse has moved * 10000 <- mouse position report * 100000 <- mouse wheel up * 1000000 <- mouse wheel down */ #define PDC_MOUSE_MOVED 0x0008 #define PDC_MOUSE_POSITION 0x0010 #define PDC_MOUSE_WHEEL_UP 0x0020 #define PDC_MOUSE_WHEEL_DOWN 0x0040 #define A_BUTTON_CHANGED (Mouse_status.changes & 7) #define MOUSE_MOVED (Mouse_status.changes & PDC_MOUSE_MOVED) #define MOUSE_POS_REPORT (Mouse_status.changes & PDC_MOUSE_POSITION) #define BUTTON_CHANGED(x) (Mouse_status.changes & (1 << ((x) - 1))) #define BUTTON_STATUS(x) (Mouse_status.button[(x) - 1]) #define MOUSE_WHEEL_UP (Mouse_status.changes & PDC_MOUSE_WHEEL_UP) #define MOUSE_WHEEL_DOWN (Mouse_status.changes & PDC_MOUSE_WHEEL_DOWN) /* mouse bit-masks */ #define BUTTON1_RELEASED 0x00000001L #define BUTTON1_PRESSED 0x00000002L #define BUTTON1_CLICKED 0x00000004L #define BUTTON1_DOUBLE_CLICKED 0x00000008L #define BUTTON1_TRIPLE_CLICKED 0x00000010L #define BUTTON1_MOVED 0x00000010L /* PDCurses */ #define BUTTON2_RELEASED 0x00000020L #define BUTTON2_PRESSED 0x00000040L #define BUTTON2_CLICKED 0x00000080L #define BUTTON2_DOUBLE_CLICKED 0x00000100L #define BUTTON2_TRIPLE_CLICKED 0x00000200L #define BUTTON2_MOVED 0x00000200L /* PDCurses */ #define BUTTON3_RELEASED 0x00000400L #define BUTTON3_PRESSED 0x00000800L #define BUTTON3_CLICKED 0x00001000L #define BUTTON3_DOUBLE_CLICKED 0x00002000L #define BUTTON3_TRIPLE_CLICKED 0x00004000L #define BUTTON3_MOVED 0x00004000L /* PDCurses */ /* For the ncurses-compatible functions only, BUTTON4_PRESSED and BUTTON5_PRESSED are returned for mouse scroll wheel up and down; otherwise PDCurses doesn't support buttons 4 and 5 */ #define BUTTON4_RELEASED 0x00008000L #define BUTTON4_PRESSED 0x00010000L #define BUTTON4_CLICKED 0x00020000L #define BUTTON4_DOUBLE_CLICKED 0x00040000L #define BUTTON4_TRIPLE_CLICKED 0x00080000L #define BUTTON5_RELEASED 0x00100000L #define BUTTON5_PRESSED 0x00200000L #define BUTTON5_CLICKED 0x00400000L #define BUTTON5_DOUBLE_CLICKED 0x00800000L #define BUTTON5_TRIPLE_CLICKED 0x01000000L #define MOUSE_WHEEL_SCROLL 0x02000000L /* PDCurses */ #define BUTTON_MODIFIER_SHIFT 0x04000000L /* PDCurses */ #define BUTTON_MODIFIER_CONTROL 0x08000000L /* PDCurses */ #define BUTTON_MODIFIER_ALT 0x10000000L /* PDCurses */ #define ALL_MOUSE_EVENTS 0x1fffffffL #define REPORT_MOUSE_POSITION 0x20000000L /* ncurses mouse interface */ typedef unsigned long mmask_t; typedef struct { short id; /* unused, always 0 */ int x, y, z; /* x, y same as MOUSE_STATUS; z unused */ mmask_t bstate; /* equivalent to changes + button[], but in the same format as used for mousemask() */ } MEVENT; #ifdef NCURSES_MOUSE_VERSION # define BUTTON_SHIFT BUTTON_MODIFIER_SHIFT # define BUTTON_CONTROL BUTTON_MODIFIER_CONTROL # define BUTTON_CTRL BUTTON_MODIFIER_CONTROL # define BUTTON_ALT BUTTON_MODIFIER_ALT #else # define BUTTON_SHIFT PDC_BUTTON_SHIFT # define BUTTON_CONTROL PDC_BUTTON_CONTROL # define BUTTON_ALT PDC_BUTTON_ALT #endif /*---------------------------------------------------------------------- * * PDCurses Structure Definitions * */ typedef struct _win /* definition of a window */ { int _cury; /* current pseudo-cursor */ int _curx; int _maxy; /* max window coordinates */ int _maxx; int _begy; /* origin on screen */ int _begx; int _flags; /* window properties */ chtype _attrs; /* standard attributes and colors */ chtype _bkgd; /* background, normally blank */ bool _clear; /* causes clear at next refresh */ bool _leaveit; /* leaves cursor where it is */ bool _scroll; /* allows window scrolling */ bool _nodelay; /* input character wait flag */ bool _immed; /* immediate update flag */ bool _sync; /* synchronise window ancestors */ bool _use_keypad; /* flags keypad key mode active */ chtype **_y; /* pointer to line pointer array */ int *_firstch; /* first changed character in line */ int *_lastch; /* last changed character in line */ int _tmarg; /* top of scrolling region */ int _bmarg; /* bottom of scrolling region */ int _delayms; /* milliseconds of delay for getch() */ int _parx, _pary; /* coords relative to parent (0,0) */ struct _win *_parent; /* subwin's pointer to parent win */ } WINDOW; /* Avoid using the SCREEN struct directly -- use the corresponding functions if possible. This struct may eventually be made private. */ typedef struct { bool alive; /* if initscr() called, and not endwin() */ bool autocr; /* if cr -> lf */ bool cbreak; /* if terminal unbuffered */ bool echo; /* if terminal echo */ bool raw_inp; /* raw input mode (v. cooked input) */ bool raw_out; /* raw output mode (7 v. 8 bits) */ bool audible; /* FALSE if the bell is visual */ bool mono; /* TRUE if current screen is mono */ bool resized; /* TRUE if TERM has been resized */ bool orig_attr; /* TRUE if we have the original colors */ short orig_fore; /* original screen foreground color */ short orig_back; /* original screen foreground color */ int cursrow; /* position of physical cursor */ int curscol; /* position of physical cursor */ int visibility; /* visibility of cursor */ int orig_cursor; /* original cursor size */ int lines; /* new value for LINES */ int cols; /* new value for COLS */ unsigned long _trap_mbe; /* trap these mouse button events */ unsigned long _map_mbe_to_key; /* map mouse buttons to slk */ int mouse_wait; /* time to wait (in ms) for a button release after a press, in order to count it as a click */ int slklines; /* lines in use by slk_init() */ WINDOW *slk_winptr; /* window for slk */ int linesrippedoff; /* lines ripped off via ripoffline() */ int linesrippedoffontop; /* lines ripped off on top via ripoffline() */ int delaytenths; /* 1/10ths second to wait block getch() for */ bool _preserve; /* TRUE if screen background to be preserved */ int _restore; /* specifies if screen background to be restored, and how */ bool save_key_modifiers; /* TRUE if each key modifiers saved with each key press */ bool return_key_modifiers; /* TRUE if modifier keys are returned as "real" keys */ bool key_code; /* TRUE if last key is a special key; used internally by get_wch() */ #ifdef XCURSES int XcurscrSize; /* size of Xcurscr shared memory block */ bool sb_on; int sb_viewport_y; int sb_viewport_x; int sb_total_y; int sb_total_x; int sb_cur_y; int sb_cur_x; #endif short line_color; /* color of line attributes - default -1 */ } SCREEN; /*---------------------------------------------------------------------- * * PDCurses External Variables * */ #ifdef PDC_DLL_BUILD # ifdef CURSES_LIBRARY # define PDCEX __declspec(dllexport) extern # else # define PDCEX __declspec(dllimport) # endif #else # define PDCEX extern #endif PDCEX int LINES; /* terminal height */ PDCEX int COLS; /* terminal width */ PDCEX WINDOW *stdscr; /* the default screen window */ PDCEX WINDOW *curscr; /* the current screen image */ PDCEX SCREEN *SP; /* curses variables */ PDCEX MOUSE_STATUS Mouse_status; PDCEX int COLORS; PDCEX int COLOR_PAIRS; PDCEX int TABSIZE; PDCEX chtype acs_map[]; /* alternate character set map */ PDCEX char ttytype[]; /* terminal name/description */ /*man-start************************************************************** PDCurses Text Attributes ======================== Originally, PDCurses used a short (16 bits) for its chtype. To include color, a number of things had to be sacrificed from the strict Unix and System V support. The main problem was fitting all character attributes and color into an unsigned char (all 8 bits!). Today, PDCurses by default uses a long (32 bits) for its chtype, as in System V. The short chtype is still available, by undefining CHTYPE_LONG and rebuilding the library. The following is the structure of a win->_attrs chtype: short form: ------------------------------------------------- |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0| ------------------------------------------------- color number | attrs | character eg 'a' The available non-color attributes are bold, reverse and blink. Others have no effect. The high order char is an index into an array of physical colors (defined in color.c) -- 32 foreground/background color pairs (5 bits) plus 3 bits for other attributes. long form: ---------------------------------------------------------------------------- |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|..| 3| 2| 1| 0| ---------------------------------------------------------------------------- color number | modifiers | character eg 'a' The available non-color attributes are bold, underline, invisible, right-line, left-line, protect, reverse and blink. 256 color pairs (8 bits), 8 bits for other attributes, and 16 bits for character data. **man-end****************************************************************/ /*** Video attribute macros ***/ #define A_NORMAL (chtype)0 #ifdef CHTYPE_LONG # define A_ALTCHARSET (chtype)0x00010000 # define A_RIGHTLINE (chtype)0x00020000 # define A_LEFTLINE (chtype)0x00040000 # define A_INVIS (chtype)0x00080000 # define A_UNDERLINE (chtype)0x00100000 # define A_REVERSE (chtype)0x00200000 # define A_BLINK (chtype)0x00400000 # define A_BOLD (chtype)0x00800000 # define A_ATTRIBUTES (chtype)0xffff0000 # define A_CHARTEXT (chtype)0x0000ffff # define A_COLOR (chtype)0xff000000 # define A_ITALIC A_INVIS # define A_PROTECT (A_UNDERLINE | A_LEFTLINE | A_RIGHTLINE) # define PDC_ATTR_SHIFT 19 # define PDC_COLOR_SHIFT 24 #else # define A_BOLD (chtype)0x0100 /* X/Open */ # define A_REVERSE (chtype)0x0200 /* X/Open */ # define A_BLINK (chtype)0x0400 /* X/Open */ # define A_ATTRIBUTES (chtype)0xff00 /* X/Open */ # define A_CHARTEXT (chtype)0x00ff /* X/Open */ # define A_COLOR (chtype)0xf800 /* System V */ # define A_ALTCHARSET A_NORMAL /* X/Open */ # define A_PROTECT A_NORMAL /* X/Open */ # define A_UNDERLINE A_NORMAL /* X/Open */ # define A_LEFTLINE A_NORMAL # define A_RIGHTLINE A_NORMAL # define A_ITALIC A_NORMAL # define A_INVIS A_NORMAL # define PDC_ATTR_SHIFT 8 # define PDC_COLOR_SHIFT 11 #endif #define A_STANDOUT (A_REVERSE | A_BOLD) /* X/Open */ #define A_DIM A_NORMAL #define CHR_MSK A_CHARTEXT /* Obsolete */ #define ATR_MSK A_ATTRIBUTES /* Obsolete */ #define ATR_NRM A_NORMAL /* Obsolete */ /* For use with attr_t -- X/Open says, "these shall be distinct", so this is a non-conforming implementation. */ #define WA_ALTCHARSET A_ALTCHARSET #define WA_BLINK A_BLINK #define WA_BOLD A_BOLD #define WA_DIM A_DIM #define WA_INVIS A_INVIS #define WA_LEFT A_LEFTLINE #define WA_PROTECT A_PROTECT #define WA_REVERSE A_REVERSE #define WA_RIGHT A_RIGHTLINE #define WA_STANDOUT A_STANDOUT #define WA_UNDERLINE A_UNDERLINE #define WA_HORIZONTAL A_NORMAL #define WA_LOW A_NORMAL #define WA_TOP A_NORMAL #define WA_VERTICAL A_NORMAL /*** Alternate character set macros ***/ /* 'w' = 32-bit chtype; acs_map[] index | A_ALTCHARSET 'n' = 16-bit chtype; it gets the fallback set because no bit is available for A_ALTCHARSET */ #ifdef CHTYPE_LONG # define ACS_PICK(w, n) ((chtype)w | A_ALTCHARSET) #else # define ACS_PICK(w, n) ((chtype)n) #endif /* VT100-compatible symbols -- box chars */ #define ACS_ULCORNER ACS_PICK('l', '+') #define ACS_LLCORNER ACS_PICK('m', '+') #define ACS_URCORNER ACS_PICK('k', '+') #define ACS_LRCORNER ACS_PICK('j', '+') #define ACS_RTEE ACS_PICK('u', '+') #define ACS_LTEE ACS_PICK('t', '+') #define ACS_BTEE ACS_PICK('v', '+') #define ACS_TTEE ACS_PICK('w', '+') #define ACS_HLINE ACS_PICK('q', '-') #define ACS_VLINE ACS_PICK('x', '|') #define ACS_PLUS ACS_PICK('n', '+') /* VT100-compatible symbols -- other */ #define ACS_S1 ACS_PICK('o', '-') #define ACS_S9 ACS_PICK('s', '_') #define ACS_DIAMOND ACS_PICK('`', '+') #define ACS_CKBOARD ACS_PICK('a', ':') #define ACS_DEGREE ACS_PICK('f', '\'') #define ACS_PLMINUS ACS_PICK('g', '#') #define ACS_BULLET ACS_PICK('~', 'o') /* Teletype 5410v1 symbols -- these are defined in SysV curses, but are not well-supported by most terminals. Stick to VT100 characters for optimum portability. */ #define ACS_LARROW ACS_PICK(',', '<') #define ACS_RARROW ACS_PICK('+', '>') #define ACS_DARROW ACS_PICK('.', 'v') #define ACS_UARROW ACS_PICK('-', '^') #define ACS_BOARD ACS_PICK('h', '#') #define ACS_LANTERN ACS_PICK('i', '*') #define ACS_BLOCK ACS_PICK('0', '#') /* That goes double for these -- undocumented SysV symbols. Don't use them. */ #define ACS_S3 ACS_PICK('p', '-') #define ACS_S7 ACS_PICK('r', '-') #define ACS_LEQUAL ACS_PICK('y', '<') #define ACS_GEQUAL ACS_PICK('z', '>') #define ACS_PI ACS_PICK('{', 'n') #define ACS_NEQUAL ACS_PICK('|', '+') #define ACS_STERLING ACS_PICK('}', 'L') /* Box char aliases */ #define ACS_BSSB ACS_ULCORNER #define ACS_SSBB ACS_LLCORNER #define ACS_BBSS ACS_URCORNER #define ACS_SBBS ACS_LRCORNER #define ACS_SBSS ACS_RTEE #define ACS_SSSB ACS_LTEE #define ACS_SSBS ACS_BTEE #define ACS_BSSS ACS_TTEE #define ACS_BSBS ACS_HLINE #define ACS_SBSB ACS_VLINE #define ACS_SSSS ACS_PLUS /* cchar_t aliases */ #ifdef PDC_WIDE # define WACS_ULCORNER (&(acs_map['l'])) # define WACS_LLCORNER (&(acs_map['m'])) # define WACS_URCORNER (&(acs_map['k'])) # define WACS_LRCORNER (&(acs_map['j'])) # define WACS_RTEE (&(acs_map['u'])) # define WACS_LTEE (&(acs_map['t'])) # define WACS_BTEE (&(acs_map['v'])) # define WACS_TTEE (&(acs_map['w'])) # define WACS_HLINE (&(acs_map['q'])) # define WACS_VLINE (&(acs_map['x'])) # define WACS_PLUS (&(acs_map['n'])) # define WACS_S1 (&(acs_map['o'])) # define WACS_S9 (&(acs_map['s'])) # define WACS_DIAMOND (&(acs_map['`'])) # define WACS_CKBOARD (&(acs_map['a'])) # define WACS_DEGREE (&(acs_map['f'])) # define WACS_PLMINUS (&(acs_map['g'])) # define WACS_BULLET (&(acs_map['~'])) # define WACS_LARROW (&(acs_map[','])) # define WACS_RARROW (&(acs_map['+'])) # define WACS_DARROW (&(acs_map['.'])) # define WACS_UARROW (&(acs_map['-'])) # define WACS_BOARD (&(acs_map['h'])) # define WACS_LANTERN (&(acs_map['i'])) # define WACS_BLOCK (&(acs_map['0'])) # define WACS_S3 (&(acs_map['p'])) # define WACS_S7 (&(acs_map['r'])) # define WACS_LEQUAL (&(acs_map['y'])) # define WACS_GEQUAL (&(acs_map['z'])) # define WACS_PI (&(acs_map['{'])) # define WACS_NEQUAL (&(acs_map['|'])) # define WACS_STERLING (&(acs_map['}'])) # define WACS_BSSB WACS_ULCORNER # define WACS_SSBB WACS_LLCORNER # define WACS_BBSS WACS_URCORNER # define WACS_SBBS WACS_LRCORNER # define WACS_SBSS WACS_RTEE # define WACS_SSSB WACS_LTEE # define WACS_SSBS WACS_BTEE # define WACS_BSSS WACS_TTEE # define WACS_BSBS WACS_HLINE # define WACS_SBSB WACS_VLINE # define WACS_SSSS WACS_PLUS #endif /*** Color macros ***/ #define COLOR_BLACK 0 #ifdef PDC_RGB /* RGB */ # define COLOR_RED 1 # define COLOR_GREEN 2 # define COLOR_BLUE 4 #else /* BGR */ # define COLOR_BLUE 1 # define COLOR_GREEN 2 # define COLOR_RED 4 #endif #define COLOR_CYAN (COLOR_BLUE | COLOR_GREEN) #define COLOR_MAGENTA (COLOR_RED | COLOR_BLUE) #define COLOR_YELLOW (COLOR_RED | COLOR_GREEN) #define COLOR_WHITE 7 /*---------------------------------------------------------------------- * * Function and Keypad Key Definitions. * Many are just for compatibility. * */ #define KEY_CODE_YES 0x100 /* If get_wch() gives a key code */ #define KEY_BREAK 0x101 /* Not on PC KBD */ #define KEY_DOWN 0x102 /* Down arrow key */ #define KEY_UP 0x103 /* Up arrow key */ #define KEY_LEFT 0x104 /* Left arrow key */ #define KEY_RIGHT 0x105 /* Right arrow key */ #define KEY_HOME 0x106 /* home key */ #define KEY_BACKSPACE 0x107 /* not on pc */ #define KEY_F0 0x108 /* function keys; 64 reserved */ #define KEY_DL 0x148 /* delete line */ #define KEY_IL 0x149 /* insert line */ #define KEY_DC 0x14a /* delete character */ #define KEY_IC 0x14b /* insert char or enter ins mode */ #define KEY_EIC 0x14c /* exit insert char mode */ #define KEY_CLEAR 0x14d /* clear screen */ #define KEY_EOS 0x14e /* clear to end of screen */ #define KEY_EOL 0x14f /* clear to end of line */ #define KEY_SF 0x150 /* scroll 1 line forward */ #define KEY_SR 0x151 /* scroll 1 line back (reverse) */ #define KEY_NPAGE 0x152 /* next page */ #define KEY_PPAGE 0x153 /* previous page */ #define KEY_STAB 0x154 /* set tab */ #define KEY_CTAB 0x155 /* clear tab */ #define KEY_CATAB 0x156 /* clear all tabs */ #define KEY_ENTER 0x157 /* enter or send (unreliable) */ #define KEY_SRESET 0x158 /* soft/reset (partial/unreliable) */ #define KEY_RESET 0x159 /* reset/hard reset (unreliable) */ #define KEY_PRINT 0x15a /* print/copy */ #define KEY_LL 0x15b /* home down/bottom (lower left) */ #define KEY_ABORT 0x15c /* abort/terminate key (any) */ #define KEY_SHELP 0x15d /* short help */ #define KEY_LHELP 0x15e /* long help */ #define KEY_BTAB 0x15f /* Back tab key */ #define KEY_BEG 0x160 /* beg(inning) key */ #define KEY_CANCEL 0x161 /* cancel key */ #define KEY_CLOSE 0x162 /* close key */ #define KEY_COMMAND 0x163 /* cmd (command) key */ #define KEY_COPY 0x164 /* copy key */ #define KEY_CREATE 0x165 /* create key */ #define KEY_END 0x166 /* end key */ #define KEY_EXIT 0x167 /* exit key */ #define KEY_FIND 0x168 /* find key */ #define KEY_HELP 0x169 /* help key */ #define KEY_MARK 0x16a /* mark key */ #define KEY_MESSAGE 0x16b /* message key */ #define KEY_MOVE 0x16c /* move key */ #define KEY_NEXT 0x16d /* next object key */ #define KEY_OPEN 0x16e /* open key */ #define KEY_OPTIONS 0x16f /* options key */ #define KEY_PREVIOUS 0x170 /* previous object key */ #define KEY_REDO 0x171 /* redo key */ #define KEY_REFERENCE 0x172 /* ref(erence) key */ #define KEY_REFRESH 0x173 /* refresh key */ #define KEY_REPLACE 0x174 /* replace key */ #define KEY_RESTART 0x175 /* restart key */ #define KEY_RESUME 0x176 /* resume key */ #define KEY_SAVE 0x177 /* save key */ #define KEY_SBEG 0x178 /* shifted beginning key */ #define KEY_SCANCEL 0x179 /* shifted cancel key */ #define KEY_SCOMMAND 0x17a /* shifted command key */ #define KEY_SCOPY 0x17b /* shifted copy key */ #define KEY_SCREATE 0x17c /* shifted create key */ #define KEY_SDC 0x17d /* shifted delete char key */ #define KEY_SDL 0x17e /* shifted delete line key */ #define KEY_SELECT 0x17f /* select key */ #define KEY_SEND 0x180 /* shifted end key */ #define KEY_SEOL 0x181 /* shifted clear line key */ #define KEY_SEXIT 0x182 /* shifted exit key */ #define KEY_SFIND 0x183 /* shifted find key */ #define KEY_SHOME 0x184 /* shifted home key */ #define KEY_SIC 0x185 /* shifted input key */ #define KEY_SLEFT 0x187 /* shifted left arrow key */ #define KEY_SMESSAGE 0x188 /* shifted message key */ #define KEY_SMOVE 0x189 /* shifted move key */ #define KEY_SNEXT 0x18a /* shifted next key */ #define KEY_SOPTIONS 0x18b /* shifted options key */ #define KEY_SPREVIOUS 0x18c /* shifted prev key */ #define KEY_SPRINT 0x18d /* shifted print key */ #define KEY_SREDO 0x18e /* shifted redo key */ #define KEY_SREPLACE 0x18f /* shifted replace key */ #define KEY_SRIGHT 0x190 /* shifted right arrow */ #define KEY_SRSUME 0x191 /* shifted resume key */ #define KEY_SSAVE 0x192 /* shifted save key */ #define KEY_SSUSPEND 0x193 /* shifted suspend key */ #define KEY_SUNDO 0x194 /* shifted undo key */ #define KEY_SUSPEND 0x195 /* suspend key */ #define KEY_UNDO 0x196 /* undo key */ /* PDCurses-specific key definitions -- PC only */ #define ALT_0 0x197 #define ALT_1 0x198 #define ALT_2 0x199 #define ALT_3 0x19a #define ALT_4 0x19b #define ALT_5 0x19c #define ALT_6 0x19d #define ALT_7 0x19e #define ALT_8 0x19f #define ALT_9 0x1a0 #define ALT_A 0x1a1 #define ALT_B 0x1a2 #define ALT_C 0x1a3 #define ALT_D 0x1a4 #define ALT_E 0x1a5 #define ALT_F 0x1a6 #define ALT_G 0x1a7 #define ALT_H 0x1a8 #define ALT_I 0x1a9 #define ALT_J 0x1aa #define ALT_K 0x1ab #define ALT_L 0x1ac #define ALT_M 0x1ad #define ALT_N 0x1ae #define ALT_O 0x1af #define ALT_P 0x1b0 #define ALT_Q 0x1b1 #define ALT_R 0x1b2 #define ALT_S 0x1b3 #define ALT_T 0x1b4 #define ALT_U 0x1b5 #define ALT_V 0x1b6 #define ALT_W 0x1b7 #define ALT_X 0x1b8 #define ALT_Y 0x1b9 #define ALT_Z 0x1ba #define CTL_LEFT 0x1bb /* Control-Left-Arrow */ #define CTL_RIGHT 0x1bc #define CTL_PGUP 0x1bd #define CTL_PGDN 0x1be #define CTL_HOME 0x1bf #define CTL_END 0x1c0 #define KEY_A1 0x1c1 /* upper left on Virtual keypad */ #define KEY_A2 0x1c2 /* upper middle on Virt. keypad */ #define KEY_A3 0x1c3 /* upper right on Vir. keypad */ #define KEY_B1 0x1c4 /* middle left on Virt. keypad */ #define KEY_B2 0x1c5 /* center on Virt. keypad */ #define KEY_B3 0x1c6 /* middle right on Vir. keypad */ #define KEY_C1 0x1c7 /* lower left on Virt. keypad */ #define KEY_C2 0x1c8 /* lower middle on Virt. keypad */ #define KEY_C3 0x1c9 /* lower right on Vir. keypad */ #define PADSLASH 0x1ca /* slash on keypad */ #define PADENTER 0x1cb /* enter on keypad */ #define CTL_PADENTER 0x1cc /* ctl-enter on keypad */ #define ALT_PADENTER 0x1cd /* alt-enter on keypad */ #define PADSTOP 0x1ce /* stop on keypad */ #define PADSTAR 0x1cf /* star on keypad */ #define PADMINUS 0x1d0 /* minus on keypad */ #define PADPLUS 0x1d1 /* plus on keypad */ #define CTL_PADSTOP 0x1d2 /* ctl-stop on keypad */ #define CTL_PADCENTER 0x1d3 /* ctl-enter on keypad */ #define CTL_PADPLUS 0x1d4 /* ctl-plus on keypad */ #define CTL_PADMINUS 0x1d5 /* ctl-minus on keypad */ #define CTL_PADSLASH 0x1d6 /* ctl-slash on keypad */ #define CTL_PADSTAR 0x1d7 /* ctl-star on keypad */ #define ALT_PADPLUS 0x1d8 /* alt-plus on keypad */ #define ALT_PADMINUS 0x1d9 /* alt-minus on keypad */ #define ALT_PADSLASH 0x1da /* alt-slash on keypad */ #define ALT_PADSTAR 0x1db /* alt-star on keypad */ #define ALT_PADSTOP 0x1dc /* alt-stop on keypad */ #define CTL_INS 0x1dd /* ctl-insert */ #define ALT_DEL 0x1de /* alt-delete */ #define ALT_INS 0x1df /* alt-insert */ #define CTL_UP 0x1e0 /* ctl-up arrow */ #define CTL_DOWN 0x1e1 /* ctl-down arrow */ #define CTL_TAB 0x1e2 /* ctl-tab */ #define ALT_TAB 0x1e3 #define ALT_MINUS 0x1e4 #define ALT_EQUAL 0x1e5 #define ALT_HOME 0x1e6 #define ALT_PGUP 0x1e7 #define ALT_PGDN 0x1e8 #define ALT_END 0x1e9 #define ALT_UP 0x1ea /* alt-up arrow */ #define ALT_DOWN 0x1eb /* alt-down arrow */ #define ALT_RIGHT 0x1ec /* alt-right arrow */ #define ALT_LEFT 0x1ed /* alt-left arrow */ #define ALT_ENTER 0x1ee /* alt-enter */ #define ALT_ESC 0x1ef /* alt-escape */ #define ALT_BQUOTE 0x1f0 /* alt-back quote */ #define ALT_LBRACKET 0x1f1 /* alt-left bracket */ #define ALT_RBRACKET 0x1f2 /* alt-right bracket */ #define ALT_SEMICOLON 0x1f3 /* alt-semi-colon */ #define ALT_FQUOTE 0x1f4 /* alt-forward quote */ #define ALT_COMMA 0x1f5 /* alt-comma */ #define ALT_STOP 0x1f6 /* alt-stop */ #define ALT_FSLASH 0x1f7 /* alt-forward slash */ #define ALT_BKSP 0x1f8 /* alt-backspace */ #define CTL_BKSP 0x1f9 /* ctl-backspace */ #define PAD0 0x1fa /* keypad 0 */ #define CTL_PAD0 0x1fb /* ctl-keypad 0 */ #define CTL_PAD1 0x1fc #define CTL_PAD2 0x1fd #define CTL_PAD3 0x1fe #define CTL_PAD4 0x1ff #define CTL_PAD5 0x200 #define CTL_PAD6 0x201 #define CTL_PAD7 0x202 #define CTL_PAD8 0x203 #define CTL_PAD9 0x204 #define ALT_PAD0 0x205 /* alt-keypad 0 */ #define ALT_PAD1 0x206 #define ALT_PAD2 0x207 #define ALT_PAD3 0x208 #define ALT_PAD4 0x209 #define ALT_PAD5 0x20a #define ALT_PAD6 0x20b #define ALT_PAD7 0x20c #define ALT_PAD8 0x20d #define ALT_PAD9 0x20e #define CTL_DEL 0x20f /* clt-delete */ #define ALT_BSLASH 0x210 /* alt-back slash */ #define CTL_ENTER 0x211 /* ctl-enter */ #define SHF_PADENTER 0x212 /* shift-enter on keypad */ #define SHF_PADSLASH 0x213 /* shift-slash on keypad */ #define SHF_PADSTAR 0x214 /* shift-star on keypad */ #define SHF_PADPLUS 0x215 /* shift-plus on keypad */ #define SHF_PADMINUS 0x216 /* shift-minus on keypad */ #define SHF_UP 0x217 /* shift-up on keypad */ #define SHF_DOWN 0x218 /* shift-down on keypad */ #define SHF_IC 0x219 /* shift-insert on keypad */ #define SHF_DC 0x21a /* shift-delete on keypad */ #define KEY_MOUSE 0x21b /* "mouse" key */ #define KEY_SHIFT_L 0x21c /* Left-shift */ #define KEY_SHIFT_R 0x21d /* Right-shift */ #define KEY_CONTROL_L 0x21e /* Left-control */ #define KEY_CONTROL_R 0x21f /* Right-control */ #define KEY_ALT_L 0x220 /* Left-alt */ #define KEY_ALT_R 0x221 /* Right-alt */ #define KEY_RESIZE 0x222 /* Window resize */ #define KEY_SUP 0x223 /* Shifted up arrow */ #define KEY_SDOWN 0x224 /* Shifted down arrow */ #define KEY_MIN KEY_BREAK /* Minimum curses key value */ #define KEY_MAX KEY_SDOWN /* Maximum curses key */ #define KEY_F(n) (KEY_F0 + (n)) /*---------------------------------------------------------------------- * * PDCurses Function Declarations * */ /* Standard */ int addch(const chtype); int addchnstr(const chtype *, int); int addchstr(const chtype *); int addnstr(const char *, int); int addstr(const char *); int attroff(chtype); int attron(chtype); int attrset(chtype); int attr_get(attr_t *, short *, void *); int attr_off(attr_t, void *); int attr_on(attr_t, void *); int attr_set(attr_t, short, void *); int baudrate(void); int beep(void); int bkgd(chtype); void bkgdset(chtype); int border(chtype, chtype, chtype, chtype, chtype, chtype, chtype, chtype); int box(WINDOW *, chtype, chtype); bool can_change_color(void); int cbreak(void); int chgat(int, attr_t, short, const void *); int clearok(WINDOW *, bool); int clear(void); int clrtobot(void); int clrtoeol(void); int color_content(short, short *, short *, short *); int color_set(short, void *); int copywin(const WINDOW *, WINDOW *, int, int, int, int, int, int, int); int curs_set(int); int def_prog_mode(void); int def_shell_mode(void); int delay_output(int); int delch(void); int deleteln(void); void delscreen(SCREEN *); int delwin(WINDOW *); WINDOW *derwin(WINDOW *, int, int, int, int); int doupdate(void); WINDOW *dupwin(WINDOW *); int echochar(const chtype); int echo(void); int endwin(void); char erasechar(void); int erase(void); void filter(void); int flash(void); int flushinp(void); chtype getbkgd(WINDOW *); int getnstr(char *, int); int getstr(char *); WINDOW *getwin(FILE *); int halfdelay(int); bool has_colors(void); bool has_ic(void); bool has_il(void); int hline(chtype, int); void idcok(WINDOW *, bool); int idlok(WINDOW *, bool); void immedok(WINDOW *, bool); int inchnstr(chtype *, int); int inchstr(chtype *); chtype inch(void); int init_color(short, short, short, short); int init_pair(short, short, short); WINDOW *initscr(void); int innstr(char *, int); int insch(chtype); int insdelln(int); int insertln(void); int insnstr(const char *, int); int insstr(const char *); int instr(char *); int intrflush(WINDOW *, bool); bool isendwin(void); bool is_linetouched(WINDOW *, int); bool is_wintouched(WINDOW *); char *keyname(int); int keypad(WINDOW *, bool); char killchar(void); int leaveok(WINDOW *, bool); char *longname(void); int meta(WINDOW *, bool); int move(int, int); int mvaddch(int, int, const chtype); int mvaddchnstr(int, int, const chtype *, int); int mvaddchstr(int, int, const chtype *); int mvaddnstr(int, int, const char *, int); int mvaddstr(int, int, const char *); int mvchgat(int, int, int, attr_t, short, const void *); int mvcur(int, int, int, int); int mvdelch(int, int); int mvderwin(WINDOW *, int, int); int mvgetch(int, int); int mvgetnstr(int, int, char *, int); int mvgetstr(int, int, char *); int mvhline(int, int, chtype, int); chtype mvinch(int, int); int mvinchnstr(int, int, chtype *, int); int mvinchstr(int, int, chtype *); int mvinnstr(int, int, char *, int); int mvinsch(int, int, chtype); int mvinsnstr(int, int, const char *, int); int mvinsstr(int, int, const char *); int mvinstr(int, int, char *); int mvprintw(int, int, const char *, ...); int mvscanw(int, int, const char *, ...); int mvvline(int, int, chtype, int); int mvwaddchnstr(WINDOW *, int, int, const chtype *, int); int mvwaddchstr(WINDOW *, int, int, const chtype *); int mvwaddch(WINDOW *, int, int, const chtype); int mvwaddnstr(WINDOW *, int, int, const char *, int); int mvwaddstr(WINDOW *, int, int, const char *); int mvwchgat(WINDOW *, int, int, int, attr_t, short, const void *); int mvwdelch(WINDOW *, int, int); int mvwgetch(WINDOW *, int, int); int mvwgetnstr(WINDOW *, int, int, char *, int); int mvwgetstr(WINDOW *, int, int, char *); int mvwhline(WINDOW *, int, int, chtype, int); int mvwinchnstr(WINDOW *, int, int, chtype *, int); int mvwinchstr(WINDOW *, int, int, chtype *); chtype mvwinch(WINDOW *, int, int); int mvwinnstr(WINDOW *, int, int, char *, int); int mvwinsch(WINDOW *, int, int, chtype); int mvwinsnstr(WINDOW *, int, int, const char *, int); int mvwinsstr(WINDOW *, int, int, const char *); int mvwinstr(WINDOW *, int, int, char *); int mvwin(WINDOW *, int, int); int mvwprintw(WINDOW *, int, int, const char *, ...); int mvwscanw(WINDOW *, int, int, const char *, ...); int mvwvline(WINDOW *, int, int, chtype, int); int napms(int); WINDOW *newpad(int, int); SCREEN *newterm(const char *, FILE *, FILE *); WINDOW *newwin(int, int, int, int); int nl(void); int nocbreak(void); int nodelay(WINDOW *, bool); int noecho(void); int nonl(void); void noqiflush(void); int noraw(void); int notimeout(WINDOW *, bool); int overlay(const WINDOW *, WINDOW *); int overwrite(const WINDOW *, WINDOW *); int pair_content(short, short *, short *); int pechochar(WINDOW *, chtype); int pnoutrefresh(WINDOW *, int, int, int, int, int, int); int prefresh(WINDOW *, int, int, int, int, int, int); int printw(const char *, ...); int putwin(WINDOW *, FILE *); void qiflush(void); int raw(void); int redrawwin(WINDOW *); int refresh(void); int reset_prog_mode(void); int reset_shell_mode(void); int resetty(void); int ripoffline(int, int (*)(WINDOW *, int)); int savetty(void); int scanw(const char *, ...); int scr_dump(const char *); int scr_init(const char *); int scr_restore(const char *); int scr_set(const char *); int scrl(int); int scroll(WINDOW *); int scrollok(WINDOW *, bool); SCREEN *set_term(SCREEN *); int setscrreg(int, int); int slk_attroff(const chtype); int slk_attr_off(const attr_t, void *); int slk_attron(const chtype); int slk_attr_on(const attr_t, void *); int slk_attrset(const chtype); int slk_attr_set(const attr_t, short, void *); int slk_clear(void); int slk_color(short); int slk_init(int); char *slk_label(int); int slk_noutrefresh(void); int slk_refresh(void); int slk_restore(void); int slk_set(int, const char *, int); int slk_touch(void); int standend(void); int standout(void); int start_color(void); WINDOW *subpad(WINDOW *, int, int, int, int); WINDOW *subwin(WINDOW *, int, int, int, int); int syncok(WINDOW *, bool); chtype termattrs(void); attr_t term_attrs(void); char *termname(void); void timeout(int); int touchline(WINDOW *, int, int); int touchwin(WINDOW *); int typeahead(int); int untouchwin(WINDOW *); void use_env(bool); int vidattr(chtype); int vid_attr(attr_t, short, void *); int vidputs(chtype, int (*)(int)); int vid_puts(attr_t, short, void *, int (*)(int)); int vline(chtype, int); int vw_printw(WINDOW *, const char *, va_list); int vwprintw(WINDOW *, const char *, va_list); int vw_scanw(WINDOW *, const char *, va_list); int vwscanw(WINDOW *, const char *, va_list); int waddchnstr(WINDOW *, const chtype *, int); int waddchstr(WINDOW *, const chtype *); int waddch(WINDOW *, const chtype); int waddnstr(WINDOW *, const char *, int); int waddstr(WINDOW *, const char *); int wattroff(WINDOW *, chtype); int wattron(WINDOW *, chtype); int wattrset(WINDOW *, chtype); int wattr_get(WINDOW *, attr_t *, short *, void *); int wattr_off(WINDOW *, attr_t, void *); int wattr_on(WINDOW *, attr_t, void *); int wattr_set(WINDOW *, attr_t, short, void *); void wbkgdset(WINDOW *, chtype); int wbkgd(WINDOW *, chtype); int wborder(WINDOW *, chtype, chtype, chtype, chtype, chtype, chtype, chtype, chtype); int wchgat(WINDOW *, int, attr_t, short, const void *); int wclear(WINDOW *); int wclrtobot(WINDOW *); int wclrtoeol(WINDOW *); int wcolor_set(WINDOW *, short, void *); void wcursyncup(WINDOW *); int wdelch(WINDOW *); int wdeleteln(WINDOW *); int wechochar(WINDOW *, const chtype); int werase(WINDOW *); int wgetch(WINDOW *); int wgetnstr(WINDOW *, char *, int); int wgetstr(WINDOW *, char *); int whline(WINDOW *, chtype, int); int winchnstr(WINDOW *, chtype *, int); int winchstr(WINDOW *, chtype *); chtype winch(WINDOW *); int winnstr(WINDOW *, char *, int); int winsch(WINDOW *, chtype); int winsdelln(WINDOW *, int); int winsertln(WINDOW *); int winsnstr(WINDOW *, const char *, int); int winsstr(WINDOW *, const char *); int winstr(WINDOW *, char *); int wmove(WINDOW *, int, int); int wnoutrefresh(WINDOW *); int wprintw(WINDOW *, const char *, ...); int wredrawln(WINDOW *, int, int); int wrefresh(WINDOW *); int wscanw(WINDOW *, const char *, ...); int wscrl(WINDOW *, int); int wsetscrreg(WINDOW *, int, int); int wstandend(WINDOW *); int wstandout(WINDOW *); void wsyncdown(WINDOW *); void wsyncup(WINDOW *); void wtimeout(WINDOW *, int); int wtouchln(WINDOW *, int, int, int); int wvline(WINDOW *, chtype, int); /* Wide-character functions */ #ifdef PDC_WIDE int addnwstr(const wchar_t *, int); int addwstr(const wchar_t *); int add_wch(const cchar_t *); int add_wchnstr(const cchar_t *, int); int add_wchstr(const cchar_t *); int border_set(const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *); int box_set(WINDOW *, const cchar_t *, const cchar_t *); int echo_wchar(const cchar_t *); int erasewchar(wchar_t *); int getbkgrnd(cchar_t *); int getcchar(const cchar_t *, wchar_t *, attr_t *, short *, void *); int getn_wstr(wint_t *, int); int get_wch(wint_t *); int get_wstr(wint_t *); int hline_set(const cchar_t *, int); int innwstr(wchar_t *, int); int ins_nwstr(const wchar_t *, int); int ins_wch(const cchar_t *); int ins_wstr(const wchar_t *); int inwstr(wchar_t *); int in_wch(cchar_t *); int in_wchnstr(cchar_t *, int); int in_wchstr(cchar_t *); char *key_name(wchar_t); int killwchar(wchar_t *); int mvaddnwstr(int, int, const wchar_t *, int); int mvaddwstr(int, int, const wchar_t *); int mvadd_wch(int, int, const cchar_t *); int mvadd_wchnstr(int, int, const cchar_t *, int); int mvadd_wchstr(int, int, const cchar_t *); int mvgetn_wstr(int, int, wint_t *, int); int mvget_wch(int, int, wint_t *); int mvget_wstr(int, int, wint_t *); int mvhline_set(int, int, const cchar_t *, int); int mvinnwstr(int, int, wchar_t *, int); int mvins_nwstr(int, int, const wchar_t *, int); int mvins_wch(int, int, const cchar_t *); int mvins_wstr(int, int, const wchar_t *); int mvinwstr(int, int, wchar_t *); int mvin_wch(int, int, cchar_t *); int mvin_wchnstr(int, int, cchar_t *, int); int mvin_wchstr(int, int, cchar_t *); int mvvline_set(int, int, const cchar_t *, int); int mvwaddnwstr(WINDOW *, int, int, const wchar_t *, int); int mvwaddwstr(WINDOW *, int, int, const wchar_t *); int mvwadd_wch(WINDOW *, int, int, const cchar_t *); int mvwadd_wchnstr(WINDOW *, int, int, const cchar_t *, int); int mvwadd_wchstr(WINDOW *, int, int, const cchar_t *); int mvwgetn_wstr(WINDOW *, int, int, wint_t *, int); int mvwget_wch(WINDOW *, int, int, wint_t *); int mvwget_wstr(WINDOW *, int, int, wint_t *); int mvwhline_set(WINDOW *, int, int, const cchar_t *, int); int mvwinnwstr(WINDOW *, int, int, wchar_t *, int); int mvwins_nwstr(WINDOW *, int, int, const wchar_t *, int); int mvwins_wch(WINDOW *, int, int, const cchar_t *); int mvwins_wstr(WINDOW *, int, int, const wchar_t *); int mvwin_wch(WINDOW *, int, int, cchar_t *); int mvwin_wchnstr(WINDOW *, int, int, cchar_t *, int); int mvwin_wchstr(WINDOW *, int, int, cchar_t *); int mvwinwstr(WINDOW *, int, int, wchar_t *); int mvwvline_set(WINDOW *, int, int, const cchar_t *, int); int pecho_wchar(WINDOW *, const cchar_t*); int setcchar(cchar_t*, const wchar_t*, const attr_t, short, const void*); int slk_wset(int, const wchar_t *, int); int unget_wch(const wchar_t); int vline_set(const cchar_t *, int); int waddnwstr(WINDOW *, const wchar_t *, int); int waddwstr(WINDOW *, const wchar_t *); int wadd_wch(WINDOW *, const cchar_t *); int wadd_wchnstr(WINDOW *, const cchar_t *, int); int wadd_wchstr(WINDOW *, const cchar_t *); int wbkgrnd(WINDOW *, const cchar_t *); void wbkgrndset(WINDOW *, const cchar_t *); int wborder_set(WINDOW *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *, const cchar_t *); int wecho_wchar(WINDOW *, const cchar_t *); int wgetbkgrnd(WINDOW *, cchar_t *); int wgetn_wstr(WINDOW *, wint_t *, int); int wget_wch(WINDOW *, wint_t *); int wget_wstr(WINDOW *, wint_t *); int whline_set(WINDOW *, const cchar_t *, int); int winnwstr(WINDOW *, wchar_t *, int); int wins_nwstr(WINDOW *, const wchar_t *, int); int wins_wch(WINDOW *, const cchar_t *); int wins_wstr(WINDOW *, const wchar_t *); int winwstr(WINDOW *, wchar_t *); int win_wch(WINDOW *, cchar_t *); int win_wchnstr(WINDOW *, cchar_t *, int); int win_wchstr(WINDOW *, cchar_t *); wchar_t *wunctrl(cchar_t *); int wvline_set(WINDOW *, const cchar_t *, int); #endif /* Quasi-standard */ chtype getattrs(WINDOW *); int getbegx(WINDOW *); int getbegy(WINDOW *); int getmaxx(WINDOW *); int getmaxy(WINDOW *); int getparx(WINDOW *); int getpary(WINDOW *); int getcurx(WINDOW *); int getcury(WINDOW *); void traceoff(void); void traceon(void); char *unctrl(chtype); int crmode(void); int nocrmode(void); int draino(int); int resetterm(void); int fixterm(void); int saveterm(void); int setsyx(int, int); int mouse_set(unsigned long); int mouse_on(unsigned long); int mouse_off(unsigned long); int request_mouse_pos(void); int map_button(unsigned long); void wmouse_position(WINDOW *, int *, int *); unsigned long getmouse(void); unsigned long getbmap(void); /* ncurses */ int assume_default_colors(int, int); const char *curses_version(void); bool has_key(int); int use_default_colors(void); int wresize(WINDOW *, int, int); int mouseinterval(int); mmask_t mousemask(mmask_t, mmask_t *); bool mouse_trafo(int *, int *, bool); int nc_getmouse(MEVENT *); int ungetmouse(MEVENT *); bool wenclose(const WINDOW *, int, int); bool wmouse_trafo(const WINDOW *, int *, int *, bool); /* PDCurses */ int addrawch(chtype); int insrawch(chtype); bool is_termresized(void); int mvaddrawch(int, int, chtype); int mvdeleteln(int, int); int mvinsertln(int, int); int mvinsrawch(int, int, chtype); int mvwaddrawch(WINDOW *, int, int, chtype); int mvwdeleteln(WINDOW *, int, int); int mvwinsertln(WINDOW *, int, int); int mvwinsrawch(WINDOW *, int, int, chtype); int raw_output(bool); int resize_term(int, int); WINDOW *resize_window(WINDOW *, int, int); int waddrawch(WINDOW *, chtype); int winsrawch(WINDOW *, chtype); char wordchar(void); #ifdef PDC_WIDE wchar_t *slk_wlabel(int); #endif void PDC_debug(const char *, ...); int PDC_ungetch(int); int PDC_set_blink(bool); int PDC_set_line_color(short); void PDC_set_title(const char *); int PDC_clearclipboard(void); int PDC_freeclipboard(char *); int PDC_getclipboard(char **, long *); int PDC_setclipboard(const char *, long); unsigned long PDC_get_input_fd(void); unsigned long PDC_get_key_modifiers(void); int PDC_return_key_modifiers(bool); int PDC_save_key_modifiers(bool); #ifdef XCURSES WINDOW *Xinitscr(int, char **); void XCursesExit(void); int sb_init(void); int sb_set_horz(int, int, int); int sb_set_vert(int, int, int); int sb_get_horz(int *, int *, int *); int sb_get_vert(int *, int *, int *); int sb_refresh(void); #endif /*** Functions defined as macros ***/ /* getch() and ungetch() conflict with some DOS libraries */ #define getch() wgetch(stdscr) #define ungetch(ch) PDC_ungetch(ch) #define COLOR_PAIR(n) (((chtype)(n) << PDC_COLOR_SHIFT) & A_COLOR) #define PAIR_NUMBER(n) (((n) & A_COLOR) >> PDC_COLOR_SHIFT) /* These will _only_ work as macros */ #define getbegyx(w, y, x) (y = getbegy(w), x = getbegx(w)) #define getmaxyx(w, y, x) (y = getmaxy(w), x = getmaxx(w)) #define getparyx(w, y, x) (y = getpary(w), x = getparx(w)) #define getyx(w, y, x) (y = getcury(w), x = getcurx(w)) #define getsyx(y, x) { if (curscr->_leaveit) (y)=(x)=-1; \ else getyx(curscr,(y),(x)); } #ifdef NCURSES_MOUSE_VERSION # define getmouse(x) nc_getmouse(x) #endif /* return codes from PDC_getclipboard() and PDC_setclipboard() calls */ #define PDC_CLIP_SUCCESS 0 #define PDC_CLIP_ACCESS_ERROR 1 #define PDC_CLIP_EMPTY 2 #define PDC_CLIP_MEMORY_ERROR 3 /* PDCurses key modifier masks */ #define PDC_KEY_MODIFIER_SHIFT 1 #define PDC_KEY_MODIFIER_CONTROL 2 #define PDC_KEY_MODIFIER_ALT 4 #define PDC_KEY_MODIFIER_NUMLOCK 8 #if defined(__cplusplus) || defined(__cplusplus__) || defined(__CPLUSPLUS) # undef bool } #endif #endif /* __PDCURSES__ */ samtools-0.1.19/win32/zconf.h000066400000000000000000000225101212162403000156640ustar00rootroot00000000000000/* zconf.h -- configuration of the zlib compression library * Copyright (C) 1995-2005 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ /* @(#) $Id$ */ #ifndef ZCONF_H #define ZCONF_H /* * If you *really* need a unique prefix for all types and library functions, * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. */ #ifdef Z_PREFIX # define deflateInit_ z_deflateInit_ # define deflate z_deflate # define deflateEnd z_deflateEnd # define inflateInit_ z_inflateInit_ # define inflate z_inflate # define inflateEnd z_inflateEnd # define deflateInit2_ z_deflateInit2_ # define deflateSetDictionary z_deflateSetDictionary # define deflateCopy z_deflateCopy # define deflateReset z_deflateReset # define deflateParams z_deflateParams # define deflateBound z_deflateBound # define deflatePrime z_deflatePrime # define inflateInit2_ z_inflateInit2_ # define inflateSetDictionary z_inflateSetDictionary # define inflateSync z_inflateSync # define inflateSyncPoint z_inflateSyncPoint # define inflateCopy z_inflateCopy # define inflateReset z_inflateReset # define inflateBack z_inflateBack # define inflateBackEnd z_inflateBackEnd # define compress z_compress # define compress2 z_compress2 # define compressBound z_compressBound # define uncompress z_uncompress # define adler32 z_adler32 # define crc32 z_crc32 # define get_crc_table z_get_crc_table # define zError z_zError # define alloc_func z_alloc_func # define free_func z_free_func # define in_func z_in_func # define out_func z_out_func # define Byte z_Byte # define uInt z_uInt # define uLong z_uLong # define Bytef z_Bytef # define charf z_charf # define intf z_intf # define uIntf z_uIntf # define uLongf z_uLongf # define voidpf z_voidpf # define voidp z_voidp #endif #if defined(__MSDOS__) && !defined(MSDOS) # define MSDOS #endif #if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) # define OS2 #endif #if defined(_WINDOWS) && !defined(WINDOWS) # define WINDOWS #endif #if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) # ifndef WIN32 # define WIN32 # endif #endif #if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) # if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) # ifndef SYS16BIT # define SYS16BIT # endif # endif #endif /* * Compile with -DMAXSEG_64K if the alloc function cannot allocate more * than 64k bytes at a time (needed on systems with 16-bit int). */ #ifdef SYS16BIT # define MAXSEG_64K #endif #ifdef MSDOS # define UNALIGNED_OK #endif #ifdef __STDC_VERSION__ # ifndef STDC # define STDC # endif # if __STDC_VERSION__ >= 199901L # ifndef STDC99 # define STDC99 # endif # endif #endif #if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) # define STDC #endif #if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) # define STDC #endif #if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) # define STDC #endif #if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) # define STDC #endif #if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ # define STDC #endif #ifndef STDC # ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ # define const /* note: need a more gentle solution here */ # endif #endif /* Some Mac compilers merge all .h files incorrectly: */ #if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) # define NO_DUMMY_DECL #endif /* Maximum value for memLevel in deflateInit2 */ #ifndef MAX_MEM_LEVEL # ifdef MAXSEG_64K # define MAX_MEM_LEVEL 8 # else # define MAX_MEM_LEVEL 9 # endif #endif /* Maximum value for windowBits in deflateInit2 and inflateInit2. * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files * created by gzip. (Files created by minigzip can still be extracted by * gzip.) */ #ifndef MAX_WBITS # define MAX_WBITS 15 /* 32K LZ77 window */ #endif /* The memory requirements for deflate are (in bytes): (1 << (windowBits+2)) + (1 << (memLevel+9)) that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) plus a few kilobytes for small objects. For example, if you want to reduce the default memory requirements from 256K to 128K, compile with make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" Of course this will generally degrade compression (there's no free lunch). The memory requirements for inflate are (in bytes) 1 << windowBits that is, 32K for windowBits=15 (default value) plus a few kilobytes for small objects. */ /* Type declarations */ #ifndef OF /* function prototypes */ # ifdef STDC # define OF(args) args # else # define OF(args) () # endif #endif /* The following definitions for FAR are needed only for MSDOS mixed * model programming (small or medium model with some far allocations). * This was tested only with MSC; for other MSDOS compilers you may have * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, * just define FAR to be empty. */ #ifdef SYS16BIT # if defined(M_I86SM) || defined(M_I86MM) /* MSC small or medium model */ # define SMALL_MEDIUM # ifdef _MSC_VER # define FAR _far # else # define FAR far # endif # endif # if (defined(__SMALL__) || defined(__MEDIUM__)) /* Turbo C small or medium model */ # define SMALL_MEDIUM # ifdef __BORLANDC__ # define FAR _far # else # define FAR far # endif # endif #endif #if defined(WINDOWS) || defined(WIN32) /* If building or using zlib as a DLL, define ZLIB_DLL. * This is not mandatory, but it offers a little performance increase. */ # ifdef ZLIB_DLL # if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) # ifdef ZLIB_INTERNAL # define ZEXTERN extern __declspec(dllexport) # else # define ZEXTERN extern __declspec(dllimport) # endif # endif # endif /* ZLIB_DLL */ /* If building or using zlib with the WINAPI/WINAPIV calling convention, * define ZLIB_WINAPI. * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. */ # ifdef ZLIB_WINAPI # ifdef FAR # undef FAR # endif # include /* No need for _export, use ZLIB.DEF instead. */ /* For complete Windows compatibility, use WINAPI, not __stdcall. */ # define ZEXPORT WINAPI # ifdef WIN32 # define ZEXPORTVA WINAPIV # else # define ZEXPORTVA FAR CDECL # endif # endif #endif #if defined (__BEOS__) # ifdef ZLIB_DLL # ifdef ZLIB_INTERNAL # define ZEXPORT __declspec(dllexport) # define ZEXPORTVA __declspec(dllexport) # else # define ZEXPORT __declspec(dllimport) # define ZEXPORTVA __declspec(dllimport) # endif # endif #endif #ifndef ZEXTERN # define ZEXTERN extern #endif #ifndef ZEXPORT # define ZEXPORT #endif #ifndef ZEXPORTVA # define ZEXPORTVA #endif #ifndef FAR # define FAR #endif #if !defined(__MACTYPES__) typedef unsigned char Byte; /* 8 bits */ #endif typedef unsigned int uInt; /* 16 bits or more */ typedef unsigned long uLong; /* 32 bits or more */ #ifdef SMALL_MEDIUM /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ # define Bytef Byte FAR #else typedef Byte FAR Bytef; #endif typedef char FAR charf; typedef int FAR intf; typedef uInt FAR uIntf; typedef uLong FAR uLongf; #ifdef STDC typedef void const *voidpc; typedef void FAR *voidpf; typedef void *voidp; #else typedef Byte const *voidpc; typedef Byte FAR *voidpf; typedef Byte *voidp; #endif #if 0 /* HAVE_UNISTD_H -- this line is updated by ./configure */ # include /* for off_t */ # include /* for SEEK_* and off_t */ # ifdef VMS # include /* for off_t */ # endif # define z_off_t off_t #endif #ifndef SEEK_SET # define SEEK_SET 0 /* Seek from beginning of file. */ # define SEEK_CUR 1 /* Seek from current position. */ # define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ #endif #ifndef z_off_t # define z_off_t long #endif #if defined(__OS400__) # define NO_vsnprintf #endif #if defined(__MVS__) # define NO_vsnprintf # ifdef FAR # undef FAR # endif #endif /* MVS linker does not support external names larger than 8 bytes */ #if defined(__MVS__) # pragma map(deflateInit_,"DEIN") # pragma map(deflateInit2_,"DEIN2") # pragma map(deflateEnd,"DEEND") # pragma map(deflateBound,"DEBND") # pragma map(inflateInit_,"ININ") # pragma map(inflateInit2_,"ININ2") # pragma map(inflateEnd,"INEND") # pragma map(inflateSync,"INSY") # pragma map(inflateSetDictionary,"INSEDI") # pragma map(compressBound,"CMBND") # pragma map(inflate_table,"INTABL") # pragma map(inflate_fast,"INFA") # pragma map(inflate_copyright,"INCOPY") #endif #endif /* ZCONF_H */ samtools-0.1.19/win32/zlib.h000066400000000000000000002012141212162403000155050ustar00rootroot00000000000000/* zlib.h -- interface of the 'zlib' general purpose compression library version 1.2.3, July 18th, 2005 Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu The data format used by the zlib library is described by RFCs (Request for Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). */ #ifndef ZLIB_H #define ZLIB_H #include "zconf.h" #ifdef __cplusplus extern "C" { #endif #define ZLIB_VERSION "1.2.3" #define ZLIB_VERNUM 0x1230 /* The 'zlib' compression library provides in-memory compression and decompression functions, including integrity checks of the uncompressed data. This version of the library supports only one compression method (deflation) but other algorithms will be added later and will have the same stream interface. Compression can be done in a single step if the buffers are large enough (for example if an input file is mmap'ed), or can be done by repeated calls of the compression function. In the latter case, the application must provide more input and/or consume the output (providing more output space) before each call. The compressed data format used by default by the in-memory functions is the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped around a deflate stream, which is itself documented in RFC 1951. The library also supports reading and writing files in gzip (.gz) format with an interface similar to that of stdio using the functions that start with "gz". The gzip format is different from the zlib format. gzip is a gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. This library can optionally read and write gzip streams in memory as well. The zlib format was designed to be compact and fast for use in memory and on communications channels. The gzip format was designed for single- file compression on file systems, has a larger header than zlib to maintain directory information, and uses a different, slower check method than zlib. The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. */ typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); typedef void (*free_func) OF((voidpf opaque, voidpf address)); struct internal_state; typedef struct z_stream_s { Bytef *next_in; /* next input byte */ uInt avail_in; /* number of bytes available at next_in */ uLong total_in; /* total nb of input bytes read so far */ Bytef *next_out; /* next output byte should be put there */ uInt avail_out; /* remaining free space at next_out */ uLong total_out; /* total nb of bytes output so far */ char *msg; /* last error message, NULL if no error */ struct internal_state FAR *state; /* not visible by applications */ alloc_func zalloc; /* used to allocate the internal state */ free_func zfree; /* used to free the internal state */ voidpf opaque; /* private data object passed to zalloc and zfree */ int data_type; /* best guess about the data type: binary or text */ uLong adler; /* adler32 value of the uncompressed data */ uLong reserved; /* reserved for future use */ } z_stream; typedef z_stream FAR *z_streamp; /* gzip header information passed to and from zlib routines. See RFC 1952 for more details on the meanings of these fields. */ typedef struct gz_header_s { int text; /* true if compressed data believed to be text */ uLong time; /* modification time */ int xflags; /* extra flags (not used when writing a gzip file) */ int os; /* operating system */ Bytef *extra; /* pointer to extra field or Z_NULL if none */ uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ uInt extra_max; /* space at extra (only when reading header) */ Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ uInt name_max; /* space at name (only when reading header) */ Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ uInt comm_max; /* space at comment (only when reading header) */ int hcrc; /* true if there was or will be a header crc */ int done; /* true when done reading gzip header (not used when writing a gzip file) */ } gz_header; typedef gz_header FAR *gz_headerp; /* The application must update next_in and avail_in when avail_in has dropped to zero. It must update next_out and avail_out when avail_out has dropped to zero. The application must initialize zalloc, zfree and opaque before calling the init function. All other fields are set by the compression library and must not be updated by the application. The opaque value provided by the application will be passed as the first parameter for calls of zalloc and zfree. This can be useful for custom memory management. The compression library attaches no meaning to the opaque value. zalloc must return Z_NULL if there is not enough memory for the object. If zlib is used in a multi-threaded application, zalloc and zfree must be thread safe. On 16-bit systems, the functions zalloc and zfree must be able to allocate exactly 65536 bytes, but will not be required to allocate more than this if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers returned by zalloc for objects of exactly 65536 bytes *must* have their offset normalized to zero. The default allocation function provided by this library ensures this (see zutil.c). To reduce memory requirements and avoid any allocation of 64K objects, at the expense of compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). The fields total_in and total_out can be used for statistics or progress reports. After compression, total_in holds the total size of the uncompressed data and may be saved for use in the decompressor (particularly if the decompressor wants to decompress everything in a single step). */ /* constants */ #define Z_NO_FLUSH 0 #define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ #define Z_SYNC_FLUSH 2 #define Z_FULL_FLUSH 3 #define Z_FINISH 4 #define Z_BLOCK 5 /* Allowed flush values; see deflate() and inflate() below for details */ #define Z_OK 0 #define Z_STREAM_END 1 #define Z_NEED_DICT 2 #define Z_ERRNO (-1) #define Z_STREAM_ERROR (-2) #define Z_DATA_ERROR (-3) #define Z_MEM_ERROR (-4) #define Z_BUF_ERROR (-5) #define Z_VERSION_ERROR (-6) /* Return codes for the compression/decompression functions. Negative * values are errors, positive values are used for special but normal events. */ #define Z_NO_COMPRESSION 0 #define Z_BEST_SPEED 1 #define Z_BEST_COMPRESSION 9 #define Z_DEFAULT_COMPRESSION (-1) /* compression levels */ #define Z_FILTERED 1 #define Z_HUFFMAN_ONLY 2 #define Z_RLE 3 #define Z_FIXED 4 #define Z_DEFAULT_STRATEGY 0 /* compression strategy; see deflateInit2() below for details */ #define Z_BINARY 0 #define Z_TEXT 1 #define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ #define Z_UNKNOWN 2 /* Possible values of the data_type field (though see inflate()) */ #define Z_DEFLATED 8 /* The deflate compression method (the only one supported in this version) */ #define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ #define zlib_version zlibVersion() /* for compatibility with versions < 1.0.2 */ /* basic functions */ ZEXTERN const char * ZEXPORT zlibVersion OF((void)); /* The application can compare zlibVersion and ZLIB_VERSION for consistency. If the first character differs, the library code actually used is not compatible with the zlib.h header file used by the application. This check is automatically made by deflateInit and inflateInit. */ /* ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); Initializes the internal stream state for compression. The fields zalloc, zfree and opaque must be initialized before by the caller. If zalloc and zfree are set to Z_NULL, deflateInit updates them to use default allocation functions. The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: 1 gives best speed, 9 gives best compression, 0 gives no compression at all (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION requests a default compromise between speed and compression (currently equivalent to level 6). deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if level is not a valid compression level, Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible with the version assumed by the caller (ZLIB_VERSION). msg is set to null if there is no error message. deflateInit does not perform any compression: this will be done by deflate(). */ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); /* deflate compresses as much data as possible, and stops when the input buffer becomes empty or the output buffer becomes full. It may introduce some output latency (reading input without producing any output) except when forced to flush. The detailed semantics are as follows. deflate performs one or both of the following actions: - Compress more input starting at next_in and update next_in and avail_in accordingly. If not all input can be processed (because there is not enough room in the output buffer), next_in and avail_in are updated and processing will resume at this point for the next call of deflate(). - Provide more output starting at next_out and update next_out and avail_out accordingly. This action is forced if the parameter flush is non zero. Forcing flush frequently degrades the compression ratio, so this parameter should be set only when necessary (in interactive applications). Some output may be provided even if flush is not set. Before the call of deflate(), the application should ensure that at least one of the actions is possible, by providing more input and/or consuming more output, and updating avail_in or avail_out accordingly; avail_out should never be zero before the call. The application can consume the compressed output when it wants, for example when the output buffer is full (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK and with zero avail_out, it must be called again after making room in the output buffer because there might be more output pending. Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to decide how much data to accumualte before producing output, in order to maximize compression. If the parameter flush is set to Z_SYNC_FLUSH, all pending output is flushed to the output buffer and the output is aligned on a byte boundary, so that the decompressor can get all input data available so far. (In particular avail_in is zero after the call if enough output space has been provided before the call.) Flushing may degrade compression for some compression algorithms and so it should be used only when necessary. If flush is set to Z_FULL_FLUSH, all output is flushed as with Z_SYNC_FLUSH, and the compression state is reset so that decompression can restart from this point if previous compressed data has been damaged or if random access is desired. Using Z_FULL_FLUSH too often can seriously degrade compression. If deflate returns with avail_out == 0, this function must be called again with the same value of the flush parameter and more output space (updated avail_out), until the flush is complete (deflate returns with non-zero avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out is greater than six to avoid repeated flush markers due to avail_out == 0 on return. If the parameter flush is set to Z_FINISH, pending input is processed, pending output is flushed and deflate returns with Z_STREAM_END if there was enough output space; if deflate returns with Z_OK, this function must be called again with Z_FINISH and more output space (updated avail_out) but no more input data, until it returns with Z_STREAM_END or an error. After deflate has returned Z_STREAM_END, the only possible operations on the stream are deflateReset or deflateEnd. Z_FINISH can be used immediately after deflateInit if all the compression is to be done in a single step. In this case, avail_out must be at least the value returned by deflateBound (see below). If deflate does not return Z_STREAM_END, then it must be called again as described above. deflate() sets strm->adler to the adler32 checksum of all input read so far (that is, total_in bytes). deflate() may update strm->data_type if it can make a good guess about the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered binary. This field is only for information purposes and does not affect the compression algorithm in any manner. deflate() returns Z_OK if some progress has been made (more input processed or more output produced), Z_STREAM_END if all input has been consumed and all output has been produced (only when flush is set to Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and deflate() can be called again with more input and more output space to continue compressing. */ ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); /* All dynamically allocated data structures for this stream are freed. This function discards any unprocessed input and does not flush any pending output. deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state was inconsistent, Z_DATA_ERROR if the stream was freed prematurely (some input or output was discarded). In the error case, msg may be set but then points to a static string (which must not be deallocated). */ /* ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); Initializes the internal stream state for decompression. The fields next_in, avail_in, zalloc, zfree and opaque must be initialized before by the caller. If next_in is not Z_NULL and avail_in is large enough (the exact value depends on the compression method), inflateInit determines the compression method from the zlib header and allocates all data structures accordingly; otherwise the allocation will be deferred to the first call of inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to use default allocation functions. inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_VERSION_ERROR if the zlib library version is incompatible with the version assumed by the caller. msg is set to null if there is no error message. inflateInit does not perform any decompression apart from reading the zlib header if present: this will be done by inflate(). (So next_in and avail_in may be modified, but next_out and avail_out are unchanged.) */ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); /* inflate decompresses as much data as possible, and stops when the input buffer becomes empty or the output buffer becomes full. It may introduce some output latency (reading input without producing any output) except when forced to flush. The detailed semantics are as follows. inflate performs one or both of the following actions: - Decompress more input starting at next_in and update next_in and avail_in accordingly. If not all input can be processed (because there is not enough room in the output buffer), next_in is updated and processing will resume at this point for the next call of inflate(). - Provide more output starting at next_out and update next_out and avail_out accordingly. inflate() provides as much output as possible, until there is no more input data or no more space in the output buffer (see below about the flush parameter). Before the call of inflate(), the application should ensure that at least one of the actions is possible, by providing more input and/or consuming more output, and updating the next_* and avail_* values accordingly. The application can consume the uncompressed output when it wants, for example when the output buffer is full (avail_out == 0), or after each call of inflate(). If inflate returns Z_OK and with zero avail_out, it must be called again after making room in the output buffer because there might be more output pending. The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much output as possible to the output buffer. Z_BLOCK requests that inflate() stop if and when it gets to the next deflate block boundary. When decoding the zlib or gzip format, this will cause inflate() to return immediately after the header and before the first block. When doing a raw inflate, inflate() will go ahead and process the first block, and will return when it gets to the end of that block, or when it runs out of data. The Z_BLOCK option assists in appending to or combining deflate streams. Also to assist in this, on return inflate() will set strm->data_type to the number of unused bits in the last byte taken from strm->next_in, plus 64 if inflate() is currently decoding the last block in the deflate stream, plus 128 if inflate() returned immediately after decoding an end-of-block code or decoding the complete header up to just before the first byte of the deflate stream. The end-of-block will not be indicated until all of the uncompressed data from that block has been written to strm->next_out. The number of unused bits may in general be greater than seven, except when bit 7 of data_type is set, in which case the number of unused bits will be less than eight. inflate() should normally be called until it returns Z_STREAM_END or an error. However if all decompression is to be performed in a single step (a single call of inflate), the parameter flush should be set to Z_FINISH. In this case all pending input is processed and all pending output is flushed; avail_out must be large enough to hold all the uncompressed data. (The size of the uncompressed data may have been saved by the compressor for this purpose.) The next operation on this stream must be inflateEnd to deallocate the decompression state. The use of Z_FINISH is never required, but can be used to inform inflate that a faster approach may be used for the single inflate() call. In this implementation, inflate() always flushes as much output as possible to the output buffer, and always uses the faster approach on the first call. So the only effect of the flush parameter in this implementation is on the return value of inflate(), as noted below, or when it returns early because Z_BLOCK is used. If a preset dictionary is needed after this call (see inflateSetDictionary below), inflate sets strm->adler to the adler32 checksum of the dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise it sets strm->adler to the adler32 checksum of all output produced so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described below. At the end of the stream, inflate() checks that its computed adler32 checksum is equal to that saved by the compressor and returns Z_STREAM_END only if the checksum is correct. inflate() will decompress and check either zlib-wrapped or gzip-wrapped deflate data. The header type is detected automatically. Any information contained in the gzip header is not retained, so applications that need that information should instead use raw inflate, see inflateInit2() below, or inflateBack() and perform their own processing of the gzip header and trailer. inflate() returns Z_OK if some progress has been made (more input processed or more output produced), Z_STREAM_END if the end of the compressed data has been reached and all uncompressed output has been produced, Z_NEED_DICT if a preset dictionary is needed at this point, Z_DATA_ERROR if the input data was corrupted (input stream not conforming to the zlib format or incorrect check value), Z_STREAM_ERROR if the stream structure was inconsistent (for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no progress is possible or if there was not enough room in the output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and inflate() can be called again with more input and more output space to continue decompressing. If Z_DATA_ERROR is returned, the application may then call inflateSync() to look for a good compression block if a partial recovery of the data is desired. */ ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); /* All dynamically allocated data structures for this stream are freed. This function discards any unprocessed input and does not flush any pending output. inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state was inconsistent. In the error case, msg may be set but then points to a static string (which must not be deallocated). */ /* Advanced functions */ /* The following functions are needed only in some special applications. */ /* ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy)); This is another version of deflateInit with more compression options. The fields next_in, zalloc, zfree and opaque must be initialized before by the caller. The method parameter is the compression method. It must be Z_DEFLATED in this version of the library. The windowBits parameter is the base two logarithm of the window size (the size of the history buffer). It should be in the range 8..15 for this version of the library. Larger values of this parameter result in better compression at the expense of memory usage. The default value is 15 if deflateInit is used instead. windowBits can also be -8..-15 for raw deflate. In this case, -windowBits determines the window size. deflate() will then generate raw deflate data with no zlib header or trailer, and will not compute an adler32 check value. windowBits can also be greater than 15 for optional gzip encoding. Add 16 to windowBits to write a simple gzip header and trailer around the compressed data instead of a zlib wrapper. The gzip header will have no file name, no extra data, no comment, no modification time (set to zero), no header crc, and the operating system will be set to 255 (unknown). If a gzip stream is being written, strm->adler is a crc32 instead of an adler32. The memLevel parameter specifies how much memory should be allocated for the internal compression state. memLevel=1 uses minimum memory but is slow and reduces compression ratio; memLevel=9 uses maximum memory for optimal speed. The default value is 8. See zconf.h for total memory usage as a function of windowBits and memLevel. The strategy parameter is used to tune the compression algorithm. Use the value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no string match), or Z_RLE to limit match distances to one (run-length encoding). Filtered data consists mostly of small values with a somewhat random distribution. In this case, the compression algorithm is tuned to compress them better. The effect of Z_FILTERED is to force more Huffman coding and less string matching; it is somewhat intermediate between Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy parameter only affects the compression ratio but not the correctness of the compressed output even if it is not set appropriately. Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler decoder for special applications. deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid method). msg is set to null if there is no error message. deflateInit2 does not perform any compression: this will be done by deflate(). */ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, const Bytef *dictionary, uInt dictLength)); /* Initializes the compression dictionary from the given byte sequence without producing any compressed output. This function must be called immediately after deflateInit, deflateInit2 or deflateReset, before any call of deflate. The compressor and decompressor must use exactly the same dictionary (see inflateSetDictionary). The dictionary should consist of strings (byte sequences) that are likely to be encountered later in the data to be compressed, with the most commonly used strings preferably put towards the end of the dictionary. Using a dictionary is most useful when the data to be compressed is short and can be predicted with good accuracy; the data can then be compressed better than with the default empty dictionary. Depending on the size of the compression data structures selected by deflateInit or deflateInit2, a part of the dictionary may in effect be discarded, for example if the dictionary is larger than the window size in deflate or deflate2. Thus the strings most likely to be useful should be put at the end of the dictionary, not at the front. In addition, the current implementation of deflate will use at most the window size minus 262 bytes of the provided dictionary. Upon return of this function, strm->adler is set to the adler32 value of the dictionary; the decompressor may later use this value to determine which dictionary has been used by the compressor. (The adler32 value applies to the whole dictionary even if only a subset of the dictionary is actually used by the compressor.) If a raw deflate was requested, then the adler32 value is not computed and strm->adler is not set. deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a parameter is invalid (such as NULL dictionary) or the stream state is inconsistent (for example if deflate has already been called for this stream or if the compression method is bsort). deflateSetDictionary does not perform any compression: this will be done by deflate(). */ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, z_streamp source)); /* Sets the destination stream as a complete copy of the source stream. This function can be useful when several compression strategies will be tried, for example when there are several ways of pre-processing the input data with a filter. The streams that will be discarded should then be freed by calling deflateEnd. Note that deflateCopy duplicates the internal compression state which can be quite large, so this strategy is slow and can consume lots of memory. deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if the source stream state was inconsistent (such as zalloc being NULL). msg is left unchanged in both source and destination. */ ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); /* This function is equivalent to deflateEnd followed by deflateInit, but does not free and reallocate all the internal compression state. The stream will keep the same compression level and any other attributes that may have been set by deflateInit2. deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent (such as zalloc or state being NULL). */ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, int level, int strategy)); /* Dynamically update the compression level and compression strategy. The interpretation of level and strategy is as in deflateInit2. This can be used to switch between compression and straight copy of the input data, or to switch to a different kind of input data requiring a different strategy. If the compression level is changed, the input available so far is compressed with the old level (and may be flushed); the new level will take effect only at the next call of deflate(). Before the call of deflateParams, the stream state must be set as for a call of deflate(), since the currently available input may have to be compressed and flushed. In particular, strm->avail_out must be non-zero. deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if strm->avail_out was zero. */ ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, int good_length, int max_lazy, int nice_length, int max_chain)); /* Fine tune deflate's internal compression parameters. This should only be used by someone who understands the algorithm used by zlib's deflate for searching for the best matching string, and even then only by the most fanatic optimizer trying to squeeze out the last compressed bit for their specific input data. Read the deflate.c source code for the meaning of the max_lazy, good_length, nice_length, and max_chain parameters. deflateTune() can be called after deflateInit() or deflateInit2(), and returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. */ ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, uLong sourceLen)); /* deflateBound() returns an upper bound on the compressed size after deflation of sourceLen bytes. It must be called after deflateInit() or deflateInit2(). This would be used to allocate an output buffer for deflation in a single pass, and so would be called before deflate(). */ ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, int bits, int value)); /* deflatePrime() inserts bits in the deflate output stream. The intent is that this function is used to start off the deflate output with the bits leftover from a previous deflate stream when appending to it. As such, this function can only be used for raw deflate, and must be used before the first deflate() call after a deflateInit2() or deflateReset(). bits must be less than or equal to 16, and that many of the least significant bits of value will be inserted in the output. deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, gz_headerp head)); /* deflateSetHeader() provides gzip header information for when a gzip stream is requested by deflateInit2(). deflateSetHeader() may be called after deflateInit2() or deflateReset() and before the first call of deflate(). The text, time, os, extra field, name, and comment information in the provided gz_header structure are written to the gzip header (xflag is ignored -- the extra flags are set according to the compression level). The caller must assure that, if not Z_NULL, name and comment are terminated with a zero byte, and that if extra is not Z_NULL, that extra_len bytes are available there. If hcrc is true, a gzip header crc is included. Note that the current versions of the command-line version of gzip (up through version 1.3.x) do not support header crc's, and will report that it is a "multi-part gzip file" and give up. If deflateSetHeader is not used, the default gzip header has text false, the time set to zero, and os set to 255, with no extra, name, or comment fields. The gzip header is returned to the default state by deflateReset(). deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ /* ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, int windowBits)); This is another version of inflateInit with an extra parameter. The fields next_in, avail_in, zalloc, zfree and opaque must be initialized before by the caller. The windowBits parameter is the base two logarithm of the maximum window size (the size of the history buffer). It should be in the range 8..15 for this version of the library. The default value is 15 if inflateInit is used instead. windowBits must be greater than or equal to the windowBits value provided to deflateInit2() while compressing, or it must be equal to 15 if deflateInit2() was not used. If a compressed stream with a larger window size is given as input, inflate() will return with the error code Z_DATA_ERROR instead of trying to allocate a larger window. windowBits can also be -8..-15 for raw inflate. In this case, -windowBits determines the window size. inflate() will then process raw deflate data, not looking for a zlib or gzip header, not generating a check value, and not looking for any check values for comparison at the end of the stream. This is for use with other formats that use the deflate compressed data format such as zip. Those formats provide their own check values. If a custom format is developed using the raw deflate format for compressed data, it is recommended that a check value such as an adler32 or a crc32 be applied to the uncompressed data as is done in the zlib, gzip, and zip formats. For most applications, the zlib format should be used as is. Note that comments above on the use in deflateInit2() applies to the magnitude of windowBits. windowBits can also be greater than 15 for optional gzip decoding. Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection, or add 16 to decode only the gzip format (the zlib format will return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a crc32 instead of an adler32. inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg is set to null if there is no error message. inflateInit2 does not perform any decompression apart from reading the zlib header if present: this will be done by inflate(). (So next_in and avail_in may be modified, but next_out and avail_out are unchanged.) */ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, const Bytef *dictionary, uInt dictLength)); /* Initializes the decompression dictionary from the given uncompressed byte sequence. This function must be called immediately after a call of inflate, if that call returned Z_NEED_DICT. The dictionary chosen by the compressor can be determined from the adler32 value returned by that call of inflate. The compressor and decompressor must use exactly the same dictionary (see deflateSetDictionary). For raw inflate, this function can be called immediately after inflateInit2() or inflateReset() and before any call of inflate() to set the dictionary. The application must insure that the dictionary that was used for compression is provided. inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a parameter is invalid (such as NULL dictionary) or the stream state is inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the expected one (incorrect adler32 value). inflateSetDictionary does not perform any decompression: this will be done by subsequent calls of inflate(). */ ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); /* Skips invalid compressed data until a full flush point (see above the description of deflate with Z_FULL_FLUSH) can be found, or until all available input is skipped. No output is provided. inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. In the success case, the application may save the current current value of total_in which indicates where valid compressed data was found. In the error case, the application may repeatedly call inflateSync, providing more input each time, until success or end of the input data. */ ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, z_streamp source)); /* Sets the destination stream as a complete copy of the source stream. This function can be useful when randomly accessing a large stream. The first pass through the stream can periodically record the inflate state, allowing restarting inflate at those points when randomly accessing the stream. inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if the source stream state was inconsistent (such as zalloc being NULL). msg is left unchanged in both source and destination. */ ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); /* This function is equivalent to inflateEnd followed by inflateInit, but does not free and reallocate all the internal decompression state. The stream will keep attributes that may have been set by inflateInit2. inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent (such as zalloc or state being NULL). */ ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, int bits, int value)); /* This function inserts bits in the inflate input stream. The intent is that this function is used to start inflating at a bit position in the middle of a byte. The provided bits will be used before any bytes are used from next_in. This function should only be used with raw inflate, and should be used before the first inflate() call after inflateInit2() or inflateReset(). bits must be less than or equal to 16, and that many of the least significant bits of value will be inserted in the input. inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, gz_headerp head)); /* inflateGetHeader() requests that gzip header information be stored in the provided gz_header structure. inflateGetHeader() may be called after inflateInit2() or inflateReset(), and before the first call of inflate(). As inflate() processes the gzip stream, head->done is zero until the header is completed, at which time head->done is set to one. If a zlib stream is being decoded, then head->done is set to -1 to indicate that there will be no gzip header information forthcoming. Note that Z_BLOCK can be used to force inflate() to return immediately after header processing is complete and before any actual data is decompressed. The text, time, xflags, and os fields are filled in with the gzip header contents. hcrc is set to true if there is a header CRC. (The header CRC was valid if done is set to one.) If extra is not Z_NULL, then extra_max contains the maximum number of bytes to write to extra. Once done is true, extra_len contains the actual extra field length, and extra contains the extra field, or that field truncated if extra_max is less than extra_len. If name is not Z_NULL, then up to name_max characters are written there, terminated with a zero unless the length is greater than name_max. If comment is not Z_NULL, then up to comm_max characters are written there, terminated with a zero unless the length is greater than comm_max. When any of extra, name, or comment are not Z_NULL and the respective field is not present in the header, then that field is set to Z_NULL to signal its absence. This allows the use of deflateSetHeader() with the returned structure to duplicate the header. However if those fields are set to allocated memory, then the application will need to save those pointers elsewhere so that they can be eventually freed. If inflateGetHeader is not used, then the header information is simply discarded. The header is always checked for validity, including the header CRC if present. inflateReset() will reset the process to discard the header information. The application would need to call inflateGetHeader() again to retrieve the header from the next gzip stream. inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ /* ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, unsigned char FAR *window)); Initialize the internal stream state for decompression using inflateBack() calls. The fields zalloc, zfree and opaque in strm must be initialized before the call. If zalloc and zfree are Z_NULL, then the default library- derived memory allocation routines are used. windowBits is the base two logarithm of the window size, in the range 8..15. window is a caller supplied buffer of that size. Except for special applications where it is assured that deflate was used with small window sizes, windowBits must be 15 and a 32K byte window must be supplied to be able to decompress general deflate streams. See inflateBack() for the usage of these routines. inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of the paramaters are invalid, Z_MEM_ERROR if the internal state could not be allocated, or Z_VERSION_ERROR if the version of the library does not match the version of the header file. */ typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, in_func in, void FAR *in_desc, out_func out, void FAR *out_desc)); /* inflateBack() does a raw inflate with a single call using a call-back interface for input and output. This is more efficient than inflate() for file i/o applications in that it avoids copying between the output and the sliding window by simply making the window itself the output buffer. This function trusts the application to not change the output buffer passed by the output function, at least until inflateBack() returns. inflateBackInit() must be called first to allocate the internal state and to initialize the state with the user-provided window buffer. inflateBack() may then be used multiple times to inflate a complete, raw deflate stream with each call. inflateBackEnd() is then called to free the allocated state. A raw deflate stream is one with no zlib or gzip header or trailer. This routine would normally be used in a utility that reads zip or gzip files and writes out uncompressed files. The utility would decode the header and process the trailer on its own, hence this routine expects only the raw deflate stream to decompress. This is different from the normal behavior of inflate(), which expects either a zlib or gzip header and trailer around the deflate stream. inflateBack() uses two subroutines supplied by the caller that are then called by inflateBack() for input and output. inflateBack() calls those routines until it reads a complete deflate stream and writes out all of the uncompressed data, or until it encounters an error. The function's parameters and return types are defined above in the in_func and out_func typedefs. inflateBack() will call in(in_desc, &buf) which should return the number of bytes of provided input, and a pointer to that input in buf. If there is no input available, in() must return zero--buf is ignored in that case--and inflateBack() will return a buffer error. inflateBack() will call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() should return zero on success, or non-zero on failure. If out() returns non-zero, inflateBack() will return with an error. Neither in() nor out() are permitted to change the contents of the window provided to inflateBackInit(), which is also the buffer that out() uses to write from. The length written by out() will be at most the window size. Any non-zero amount of input may be provided by in(). For convenience, inflateBack() can be provided input on the first call by setting strm->next_in and strm->avail_in. If that input is exhausted, then in() will be called. Therefore strm->next_in must be initialized before calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in must also be initialized, and then if strm->avail_in is not zero, input will initially be taken from strm->next_in[0 .. strm->avail_in - 1]. The in_desc and out_desc parameters of inflateBack() is passed as the first parameter of in() and out() respectively when they are called. These descriptors can be optionally used to pass any information that the caller- supplied in() and out() functions need to do their job. On return, inflateBack() will set strm->next_in and strm->avail_in to pass back any unused input that was provided by the last in() call. The return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR if in() or out() returned an error, Z_DATA_ERROR if there was a format error in the deflate stream (in which case strm->msg is set to indicate the nature of the error), or Z_STREAM_ERROR if the stream was not properly initialized. In the case of Z_BUF_ERROR, an input or output error can be distinguished using strm->next_in which will be Z_NULL only if in() returned an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to out() returning non-zero. (in() will always be called before out(), so strm->next_in is assured to be defined if out() returns non-zero.) Note that inflateBack() cannot return Z_OK. */ ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); /* All memory allocated by inflateBackInit() is freed. inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream state was inconsistent. */ ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); /* Return flags indicating compile-time options. Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: 1.0: size of uInt 3.2: size of uLong 5.4: size of voidpf (pointer) 7.6: size of z_off_t Compiler, assembler, and debug options: 8: DEBUG 9: ASMV or ASMINF -- use ASM code 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention 11: 0 (reserved) One-time table building (smaller code, but not thread-safe if true): 12: BUILDFIXED -- build static block decoding tables when needed 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed 14,15: 0 (reserved) Library content (indicates missing functionality): 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking deflate code when not needed) 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect and decode gzip streams (to avoid linking crc code) 18-19: 0 (reserved) Operation variations (changes in library functionality): 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate 21: FASTEST -- deflate algorithm with only one, lowest compression level 22,23: 0 (reserved) The sprintf variant used by gzprintf (zero is best): 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! 26: 0 = returns value, 1 = void -- 1 means inferred string length returned Remainder: 27-31: 0 (reserved) */ /* utility functions */ /* The following utility functions are implemented on top of the basic stream-oriented functions. To simplify the interface, some default options are assumed (compression level and memory usage, standard memory allocation functions). The source code of these utility functions can easily be modified if you need special options. */ ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)); /* Compresses the source buffer into the destination buffer. sourceLen is the byte length of the source buffer. Upon entry, destLen is the total size of the destination buffer, which must be at least the value returned by compressBound(sourceLen). Upon exit, destLen is the actual size of the compressed buffer. This function can be used to compress a whole file at once if the input file is mmap'ed. compress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer. */ ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level)); /* Compresses the source buffer into the destination buffer. The level parameter has the same meaning as in deflateInit. sourceLen is the byte length of the source buffer. Upon entry, destLen is the total size of the destination buffer, which must be at least the value returned by compressBound(sourceLen). Upon exit, destLen is the actual size of the compressed buffer. compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer, Z_STREAM_ERROR if the level parameter is invalid. */ ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); /* compressBound() returns an upper bound on the compressed size after compress() or compress2() on sourceLen bytes. It would be used before a compress() or compress2() call to allocate the destination buffer. */ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)); /* Decompresses the source buffer into the destination buffer. sourceLen is the byte length of the source buffer. Upon entry, destLen is the total size of the destination buffer, which must be large enough to hold the entire uncompressed data. (The size of the uncompressed data must have been saved previously by the compressor and transmitted to the decompressor by some mechanism outside the scope of this compression library.) Upon exit, destLen is the actual size of the compressed buffer. This function can be used to decompress a whole file at once if the input file is mmap'ed. uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. */ typedef voidp gzFile; ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); /* Opens a gzip (.gz) file for reading or writing. The mode parameter is as in fopen ("rb" or "wb") but can also include a compression level ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman only compression as in "wb1h", or 'R' for run-length encoding as in "wb1R". (See the description of deflateInit2 for more information about the strategy parameter.) gzopen can be used to read a file which is not in gzip format; in this case gzread will directly read from the file without decompression. gzopen returns NULL if the file could not be opened or if there was insufficient memory to allocate the (de)compression state; errno can be checked to distinguish the two cases (if errno is zero, the zlib error is Z_MEM_ERROR). */ ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); /* gzdopen() associates a gzFile with the file descriptor fd. File descriptors are obtained from calls like open, dup, creat, pipe or fileno (in the file has been previously opened with fopen). The mode parameter is as in gzopen. The next call of gzclose on the returned gzFile will also close the file descriptor fd, just like fclose(fdopen(fd), mode) closes the file descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). gzdopen returns NULL if there was insufficient memory to allocate the (de)compression state. */ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); /* Dynamically update the compression level or strategy. See the description of deflateInit2 for the meaning of these parameters. gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not opened for writing. */ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); /* Reads the given number of uncompressed bytes from the compressed file. If the input file was not in gzip format, gzread copies the given number of bytes into the buffer. gzread returns the number of uncompressed bytes actually read (0 for end of file, -1 for error). */ ZEXTERN int ZEXPORT gzwrite OF((gzFile file, voidpc buf, unsigned len)); /* Writes the given number of uncompressed bytes into the compressed file. gzwrite returns the number of uncompressed bytes actually written (0 in case of error). */ ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); /* Converts, formats, and writes the args to the compressed file under control of the format string, as in fprintf. gzprintf returns the number of uncompressed bytes actually written (0 in case of error). The number of uncompressed bytes written is limited to 4095. The caller should assure that this limit is not exceeded. If it is exceeded, then gzprintf() will return return an error (0) with nothing written. In this case, there may also be a buffer overflow with unpredictable consequences, which is possible only if zlib was compiled with the insecure functions sprintf() or vsprintf() because the secure snprintf() or vsnprintf() functions were not available. */ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); /* Writes the given null-terminated string to the compressed file, excluding the terminating null character. gzputs returns the number of characters written, or -1 in case of error. */ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); /* Reads bytes from the compressed file until len-1 characters are read, or a newline character is read and transferred to buf, or an end-of-file condition is encountered. The string is then terminated with a null character. gzgets returns buf, or Z_NULL in case of error. */ ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); /* Writes c, converted to an unsigned char, into the compressed file. gzputc returns the value that was written, or -1 in case of error. */ ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); /* Reads one byte from the compressed file. gzgetc returns this byte or -1 in case of end of file or error. */ ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); /* Push one character back onto the stream to be read again later. Only one character of push-back is allowed. gzungetc() returns the character pushed, or -1 on failure. gzungetc() will fail if a character has been pushed but not read yet, or if c is -1. The pushed character will be discarded if the stream is repositioned with gzseek() or gzrewind(). */ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); /* Flushes all pending output into the compressed file. The parameter flush is as in the deflate() function. The return value is the zlib error number (see function gzerror below). gzflush returns Z_OK if the flush parameter is Z_FINISH and all output could be flushed. gzflush should be called only when strictly necessary because it can degrade compression. */ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, z_off_t offset, int whence)); /* Sets the starting position for the next gzread or gzwrite on the given compressed file. The offset represents a number of bytes in the uncompressed data stream. The whence parameter is defined as in lseek(2); the value SEEK_END is not supported. If the file is opened for reading, this function is emulated but can be extremely slow. If the file is opened for writing, only forward seeks are supported; gzseek then compresses a sequence of zeroes up to the new starting position. gzseek returns the resulting offset location as measured in bytes from the beginning of the uncompressed stream, or -1 in case of error, in particular if the file is opened for writing and the new starting position would be before the current position. */ ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); /* Rewinds the given file. This function is supported only for reading. gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) */ ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); /* Returns the starting position for the next gzread or gzwrite on the given compressed file. This position represents a number of bytes in the uncompressed data stream. gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) */ ZEXTERN int ZEXPORT gzeof OF((gzFile file)); /* Returns 1 when EOF has previously been detected reading the given input stream, otherwise zero. */ ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); /* Returns 1 if file is being read directly without decompression, otherwise zero. */ ZEXTERN int ZEXPORT gzclose OF((gzFile file)); /* Flushes all pending output if necessary, closes the compressed file and deallocates all the (de)compression state. The return value is the zlib error number (see function gzerror below). */ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); /* Returns the error message for the last error which occurred on the given compressed file. errnum is set to zlib error number. If an error occurred in the file system and not in the compression library, errnum is set to Z_ERRNO and the application may consult errno to get the exact error code. */ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); /* Clears the error and end-of-file flags for file. This is analogous to the clearerr() function in stdio. This is useful for continuing to read a gzip file that is being written concurrently. */ /* checksum functions */ /* These functions are not related to compression but are exported anyway because they might be useful in applications using the compression library. */ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); /* Update a running Adler-32 checksum with the bytes buf[0..len-1] and return the updated checksum. If buf is NULL, this function returns the required initial value for the checksum. An Adler-32 checksum is almost as reliable as a CRC32 but can be computed much faster. Usage example: uLong adler = adler32(0L, Z_NULL, 0); while (read_buffer(buffer, length) != EOF) { adler = adler32(adler, buffer, length); } if (adler != original_adler) error(); */ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, z_off_t len2)); /* Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. */ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); /* Update a running CRC-32 with the bytes buf[0..len-1] and return the updated CRC-32. If buf is NULL, this function returns the required initial value for the for the crc. Pre- and post-conditioning (one's complement) is performed within this function so it shouldn't be done by the application. Usage example: uLong crc = crc32(0L, Z_NULL, 0); while (read_buffer(buffer, length) != EOF) { crc = crc32(crc, buffer, length); } if (crc != original_crc) error(); */ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); /* Combine two CRC-32 check values into one. For two sequences of bytes, seq1 and seq2 with lengths len1 and len2, CRC-32 check values were calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and len2. */ /* various hacks, don't look :) */ /* deflateInit and inflateInit are macros to allow checking the zlib version * and the compiler's view of z_stream: */ ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, const char *version, int stream_size)); ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, const char *version, int stream_size)); ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size)); ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, const char *version, int stream_size)); ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, unsigned char FAR *window, const char *version, int stream_size)); #define deflateInit(strm, level) \ deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) #define inflateInit(strm) \ inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) #define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ (strategy), ZLIB_VERSION, sizeof(z_stream)) #define inflateInit2(strm, windowBits) \ inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) #define inflateBackInit(strm, windowBits, window) \ inflateBackInit_((strm), (windowBits), (window), \ ZLIB_VERSION, sizeof(z_stream)) #if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) struct internal_state {int dummy;}; /* hack for buggy compilers */ #endif ZEXTERN const char * ZEXPORT zError OF((int)); ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); #ifdef __cplusplus } #endif #endif /* ZLIB_H */