./PaxHeaders.10994/aragorn1.2.380000644000000000000000000000013212776376114013022 xustar0030 mtime=1476000844.307060627 30 atime=1476000847.407162177 30 ctime=1476000844.307060627 aragorn1.2.38/0000755000000000000000000000000012776376114013175 5ustar00rootroot00000000000000aragorn1.2.38/PaxHeaders.10994/aragorn.10000644000000000000000000000012612776135410014450 xustar0028 mtime=1475918600.4194104 30 atime=1475918659.905343566 28 ctime=1475918600.4194104 aragorn1.2.38/aragorn.10000644000000000000000000002161512776135410014705 0ustar00rootroot00000000000000'\" t .\" Title: aragorn .\" Author: [see the "AUTHORS" section] .\" Generator: DocBook XSL Stylesheets v1.76.1 .\" Date: 02/24/2013 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" .TH "ARAGORN" "1" "02/24/2013" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" aragorn \- detect tRNA genes in nucleotide sequences .SH "SYNOPSIS" .sp \fBaragorn\fR [\fIOPTION\fR]\&... \fIFILE\fR .SH "OPTIONS" .PP \fB\-m\fR .RS 4 Search for tmRNA genes\&. .RE .PP \fB\-t\fR .RS 4 Search for tRNA genes\&. By default, all are detected\&. If one of \fB\-m\fR or \fB\-t\fR is specified, then the other is not detected unless specified as well\&. .RE .PP \fB\-mt\fR .RS 4 Search for Metazoan mitochondrial tRNA genes\&. tRNA genes with introns not detected\&. \fB\-i\fR, \fB\-sr\fR switchs ignored\&. Composite Metazoan mitochondrial genetic code used\&. .RE .PP \fB\-mtmam\fR .RS 4 Search for Mammalian mitochondrial tRNA genes\&. \fB\-i\fR, \fB\-sr\fR switchs ignored\&. \fB\-tv\fR switch set\&. Mammalian mitochondrial genetic code used\&. .RE .PP \fB\-mtx\fR .RS 4 Same as \fB\-mt\fR but low scoring tRNA genes are not reported\&. .RE .PP \fB\-mtd\fR .RS 4 Overlapping metazoan mitochondrial tRNA genes on opposite strands are reported\&. .RE .PP \fB\-gc\fR[\fInum\fR] .RS 4 Use the GenBank transl_table = [\fInum\fR] genetic code\&. Individual modifications can be appended using \fI,BBB\fR= B = A,C,G, or T\&. is the three letter code for an amino\-acid\&. More than one modification can be specified\&. eg \fB\-gcvert\fR,aga=Trp,agg=Trp uses the Vertebrate Mitochondrial code and the codons AGA and AGG changed to Tryptophan\&. .RE .PP \fB\-gcstd\fR .RS 4 Use standard genetic code\&. .RE .PP \fB\-gcmet\fR .RS 4 Use composite Metazoan mitochondrial genetic code\&. .RE .PP \fB\-gcvert\fR .RS 4 Use Vertebrate mitochondrial genetic code\&. .RE .PP \fB\-gcinvert\fR .RS 4 Use Invertebrate mitochondrial genetic code\&. .RE .PP \fB\-gcyeast\fR .RS 4 Use Yeast mitochondrial genetic code\&. .RE .PP \fB\-gcprot\fR .RS 4 Use Mold/Protozoan/Coelenterate mitochondrial genetic code\&. .RE .PP \fB\-gcciliate\fR .RS 4 Use Ciliate genetic code\&. .RE .PP \fB\-gcflatworm\fR .RS 4 Use Echinoderm/Flatworm mitochondrial genetic code .RE .PP \fB\-gceuplot\fR .RS 4 Use Euplotid genetic code\&. .RE .PP \fB\-gcbact\fR .RS 4 Use Bacterial/Plant Chloroplast genetic code\&. .RE .PP \fB\-gcaltyeast\fR .RS 4 Use alternative Yeast genetic code\&. .RE .PP \fB\-gcascid\fR .RS 4 Use Ascidian Mitochondrial genetic code\&. .RE .PP \fB\-gcaltflat\fR .RS 4 Use alternative Flatworm Mitochondrial genetic code\&. .RE .PP \fB\-gcblep\fR .RS 4 Use Blepharisma genetic code\&. .RE .PP \fB\-gcchloroph\fR .RS 4 Use Chlorophycean Mitochondrial genetic code\&. .RE .PP \fB\-gctrem\fR .RS 4 Use Trematode Mitochondrial genetic code\&. .RE .PP \fB\-gcscen\fR .RS 4 Use Scenedesmus obliquus Mitochondrial genetic code\&. .RE .PP \fB\-gcthraust\fR .RS 4 Use Thraustochytrium Mitochondrial genetic code\&. .RE .PP \fB\-tv\fR .RS 4 Do not search for mitochondrial TV replacement loop tRNA genes\&. Only relevant if \fB\-mt\fR used\&. .RE .PP \fB\-c7\fR .RS 4 Search for tRNA genes with 7 base C\-loops only\&. .RE .PP \fB\-i\fR .RS 4 Search for tRNA genes with introns in anticodon loop with maximum length 3000 bases\&. Minimum intron length is 0 bases\&. Ignored if \fB\-m\fR is specified\&. .RE .PP \fB\-i\fR[\fImax\fR] .RS 4 Search for tRNA genes with introns in anticodon loop with maximum length [\fImax\fR] bases\&. Minimum intron length is 0 bases\&. Ignored if \fB\-m\fR is specified\&. .RE .PP \fB\-i\fR[\fImin\fR],[\fImax\fR] .RS 4 Search for tRNA genes with introns in anticodon loop with maximum length [\fImax\fR] bases, and minimum length [\fImin\fR] bases\&. Ignored if \fB\-m\fR is specified\&. .RE .PP \fB\-io\fR .RS 4 Same as \fB\-i\fR, but allow tRNA genes with long introns to overlap shorter tRNA genes\&. .RE .PP \fB\-if\fR .RS 4 Same as \fB\-i\fR, but fix intron between positions 37 and 38 on C\-loop (one base after anticodon)\&. .RE .PP \fB\-ifo\fR .RS 4 Same as \fB\-if\fR and \fB\-io\fR combined\&. .RE .PP \fB\-ir\fR .RS 4 Same as \fB\-i\fR, but report tRNA genes with minimum length [\fImin\fR] bases rather than search for tRNA genes with minimum length [\fImin\fR] bases\&. With this switch, [\fImin\fR] acts as an output filter, minimum intron length for searching is still 0 bases\&. .RE .PP \fB\-c\fR .RS 4 Assume that each sequence has a circular topology\&. Search wraps around each end\&. Default setting\&. .RE .PP \fB\-l\fR .RS 4 Assume that each sequence has a linear topology\&. Search does not wrap\&. .RE .PP \fB\-d\fR .RS 4 Double\&. Search both strands of each sequence\&. Default setting\&. .RE .PP \fB\-s\fR or \fB\-s+\fR .RS 4 Single\&. Do not search the complementary (antisense) strand of each sequence\&. .RE .PP \fB\-sc\fR or \fB\-s\-\fR .RS 4 Single complementary\&. Do not search the sense strand of each sequence\&. .RE .PP \fB\-ps\fR .RS 4 Lower scoring thresholds to 95% of default levels\&. .RE .PP \fB\-ps\fR[\fInum\fR] .RS 4 Change scoring thresholds to [\fInum\fR] percent of default levels\&. .RE .PP \fB\-rp\fR .RS 4 Flag possible pseudogenes (score < 100 or tRNA anticodon loop <> 7 bases long)\&. Note that genes with score < 100 will not be detected or flagged if scoring thresholds are not also changed to below 100% (see \-ps switch)\&. .RE .PP \fB\-seq\fR .RS 4 Print out primary sequence\&. .RE .PP \fB\-br\fR .RS 4 Show secondary structure of tRNA gene primary sequence using round brackets\&. .RE .PP \fB\-fasta\fR .RS 4 Print out primary sequence in fasta format\&. .RE .PP \fB\-fo\fR .RS 4 Print out primary sequence in fasta format only (no secondary structure)\&. .RE .PP \fB\-fon\fR .RS 4 Same as \fB\-fo\fR, with sequence and gene numbering in header\&. .RE .PP \fB\-fos\fR .RS 4 Same as \fB\-fo\fR, with no spaces in header\&. .RE .PP \fB\-fons\fR .RS 4 Same as \fB\-fo\fR, with sequence and gene numbering, but no spaces\&. .RE .PP \fB\-w\fR .RS 4 Print out in Batch mode\&. .RE .PP \fB\-ss\fR .RS 4 Use the stricter canonical 1\-2 bp spacer1 and 1 bp spacer2\&. Ignored if \fB\-mt\fR set\&. Default is to allow 3 bp spacer1 and 0\-2 bp spacer2, which may degrade selectivity\&. .RE .PP \fB\-v\fR .RS 4 Verbose\&. Prints out information during search to STDERR\&. .RE .PP \fB\-a\fR .RS 4 Print out tRNA domain for tmRNA genes\&. .RE .PP \fB\-a7\fR .RS 4 Restrict tRNA astem length to a maximum of 7 bases .RE .PP \fB\-aa\fR .RS 4 Display message if predicted iso\-acceptor species does not match species in sequence name (if present)\&. .RE .PP \fB\-j\fR .RS 4 Display 4\-base sequence on 3\*(Aq end of astem regardless of predicted amino\-acyl acceptor length\&. .RE .PP \fB\-jr\fR .RS 4 Allow some divergence of 3\*(Aq amino\-acyl acceptor sequence from NCCA\&. .RE .PP \fB\-jr4\fR .RS 4 Allow some divergence of 3\*(Aq amino\-acyl acceptor sequence from NCCA, and display 4 bases\&. .RE .PP \fB\-q\fR .RS 4 Dont print configuration line (which switchs and files were used)\&. .RE .PP \fB\-rn\fR .RS 4 Repeat sequence name before summary information\&. .RE .PP \fB\-O\fR [\fIoutfile\fR] .RS 4 Print output to \fI\&. If [\*(Aqoutfile\fR] already exists, it is overwritten\&. By default all output goes to stdout\&. .RE .SH "DESCRIPTION" .sp aragorn detects tRNA, mtRNA, and tmRNA genes\&. A minimum requirement is at least a 32 bit compiler architecture (variable types int and unsigned int are at least 4 bytes long)\&. .sp [\fIFILE\fR] is assumed to contain one or more sequences in FASTA format\&. Results of the search are printed to STDOUT\&. All switches are optional and case\-insensitive\&. Unless \-i is specified, tRNA genes containing introns are not detected\&. .SH "AUTHORS" .sp Bjorn Canback , Dean Laslett .SH "REFERENCES" .sp Laslett, D\&. and Canback, B\&. (2004) ARAGORN, a program for the detection of transfer RNA and transfer\-messenger RNA genes in nucleotide sequences Nucleic Acids Research, 32;11\-16 .sp Laslett, D\&. and Canback, B\&. (2008) ARWEN: a program to detect tRNA genes in metazoan mitochondrial nucleotide sequences Bioinformatics, 24(2); 172\-175\&. aragorn1.2.38/PaxHeaders.10994/aragorn1.2.38.c0000644000000000000000000000013212776376114015211 xustar0030 mtime=1476000844.307060627 30 atime=1476000878.924194612 30 ctime=1476000844.307060627 aragorn1.2.38/aragorn1.2.38.c0000644000000000000000000145316612776376114015464 0ustar00rootroot00000000000000 /* --------------------------------------------------------------- ARAGORN v1.2.38 Dean Laslett --------------------------------------------------------------- ARAGORN (together with ARWEN at last) Detects tRNA, mtRNA, and tmRNA genes in nucleotide sequences Copyright (C) 2003-2018 Dean Laslett A minimum requirement is at least a 32 bit compiler architecture (variable types int and unsigned int are at least 4 bytes long). Please report bugs and suggestions of improvements to the authors. E-mail: Dean Laslett: gaiaquark@gmail.com Björn Canbäck: bcanback@acgt.se Version 1.2.38 Oct 8th, 2016. Thanks to Francisco Ossandon for finding many bugs and testing Thanks to Haruo Suzuki for finding bugs Thanks to Sascha Steinbiss for fixing bugs Please reference the following papers if you use this program as part of any published research. Laslett, D. and Canback, B. (2004) ARAGORN, a program for the detection of transfer RNA and transfer-messenger RNA genes in nucleotide sequences. Nucleic Acids Research, 32;11-16. Laslett, D. and Canback, B. (2008) ARWEN: a program to detect tRNA genes in metazoan mitochondrial nucleotide sequences. Bioinformatics, 24(2); 172-175. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License, (see below). This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS --------------------------------------------------------------- ARAGORN v1.2.38 Dean Laslett --------------------------------------------------------------- */ #include #include #ifndef SEEK_SET #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 #endif #define NOCHAR '\0' #define DLIM '\n' #define STRLEN 4001 #define STRLENM1 4000 #define SHORTSTRLEN 51 #define SHORTSTRLENM1 50 #define KEYLEN 15 #define NHELPLINE 173 #define INACTIVE 2.0e+35 #define IINACTIVE 2000000001L #define ITHRESHOLD 2000000000L #define space(c) (c==' ')||(c=='\t')||(c=='\n')||(c=='\r') #define sq(pos) ((pos + d->psmax - 1L) % d->psmax) + 1L #define itmparam(x,y) fputc(x,y) #define FASTA 0 #define GENBANK 1 #define noGENE -1 #define tRNA 0 #define tmRNA 1 #define srpRNA 2 #define rRNA 3 #define CDS 4 #define NS 6 /* should be one more than number of types of gene */ #define MAXGCMOD 16 #define MAMMAL_MT 2 #define NGENECODE 26 #define METAZOAN_MT 0 #define STANDARD 1 #define VERTEBRATE_MT 2 #define NAMINOACID 27 #define Phe 0 #define Val 1 #define Leu 2 #define Ile 3 #define Cys 4 #define Gly 5 #define Arg 6 #define Ser 7 #define Ala 8 #define Pro 9 #define Thr 10 #define Tyr 11 #define Asp 12 #define His 13 #define Asn 14 #define Met 15 #define Trp 16 #define Glu 17 #define Gln 18 #define Lys 19 #define Stop 20 #define SeC 21 #define Pyl 22 #define INSERT -2 #define TERM -1 #define Adenine 0 #define Cytosine 1 #define Guanine 2 #define Thymine 3 #define AMBIG 4 #define NOBASE 5 #define tRNAthresh 132.0 #define mtRNAdtthresh 91.5 #define mtRNAtthresh 83.5 #define mtRNAdthresh 85.0 #define tmRNAthresh 325.0 #define srpRNAthresh 175.0 #define CDSthresh 100.0 #define PSEUDOGENElevel 0.95 #define RIGHT 0 #define UP 1 #define LEFT 2 #define DOWN 3 #define UPRIGHT 4 #define SLANTDR 5 #define SLANTUR 6 #define SLANTUL 7 #define SLANTDL 8 #define SLANT 5 #define MATX 42 #define MATY 34 #define ASTEM2_EXT 9 #define ASTEM2_EXTD 4 /* <= ASTEM2_EXT */ #define ASTEM2_EXTE 5 /* ASTEM2_EXT - ASTEM2_EXTD */ #define MINTSTEM_DIST (17 + ASTEM2_EXT) #define MAXTSTEM_DIST (26 + ASTEM2_EXT) #define MAXDSTEM_DIST 9 #define MINDSTEM_DIST 8 #define MININTRONLEN 0 #define MAXINTRONLEN 3000 #define MINCTRNALEN 62 #define MAXCTRNALEN 110 #define MINTRNALEN (MINCTRNALEN + 1) #define MAXTRNALEN (MAXCTRNALEN + ASTEM2_EXT) #define MAXETRNALEN (MAXTRNALEN + MAXINTRONLEN) #define VARMAX 26 #define VARMIN 3 #define VARDIFF 23 /* VARMAX - VARMIN */ #define MINTPTSDIST 50 #define MAXTPTSDIST 321 #define TPWINDOW (MAXTPTSDIST - MINTPTSDIST + 1) #define MINTPDIST 50 #define MAXTPDIST 250 #define TPDISTWINDOW (MAXTPDIST - MINTPDIST + 1) #define MINTAGDIST 12 #define MAXTAGDIST 102 #define TAGWINDOW MAXTAGDIST - MINTAGDIST #define MINRNACDIST (MINTPDIST - 5) #define MAXRNACDIST (MAXTPDIST - 5) #define MAXPPINTRONDIST 250 #define TMPTRAILER 145 #define MINPPASDIST MINTSTEM_DIST #define MAXPPASDIST MAXTSTEM_DIST + MAXPPINTRONDIST #define MINPPTSTPDIST MINTSTEM_DIST + MINTPDIST #define MAXPPTSTPDIST MAXTSTEM_DIST+ASTEM2_EXT+MAXTPDIST+MAXPPINTRONDIST #define MAXTMRNALEN (4 + MAXPPASDIST + MAXTPDIST + MAXTAGDIST + TMPTRAILER) #define TSWEEP 1000 #define WRAP 2*MAXETRNALEN #define NPTAG 33 #define MAXAGENELEN (MAXETRNALEN + MAXTMRNALEN) /* NOTE: If MAXPPINTRONDIST is increased, then validity of MAXTMRNALEN and MAXETRNALEN must be ensured. WRAP = 2*MAXETRNALEN determines the length of wseq, which contains the wrap around for circular sequences. This must remain equal to or more than 2*MAXTMRNALEN and TSWEEP. */ #define BASE 0 #define FSTEM 1 #define BSTEM 2 #define NOID 0 #define DLOOP 1 #define DSTEM 2 #define CLOOP 3 #define VAR 4 #define NA MAXINTRONLEN #define ND 100 #define NT 200 #define NH 2000 #define NTH 3000 #define NC 5000 #define NGFT 5000 #define NTAG 1273 #define NTAGMAX 1300 #define LSEQ 20000 #define ATBOND 2.5 #define mtNA 1500 #define mtND 150 #define mtNTH 3000 #define mtNTM 3 #define mtNCDS 200 #define mtNCDSCODON 6000 #define mtGCBOND 0.0 #define mtATBOND -0.5 #define mtGTBOND -1.2 #define mtTTBOND -2.9 #define mtGGBOND -3.0 #define mtGABOND -3.0 #define mtNOBOND -3.0 #define mtBONDSTAB 1.5 #define mtABONDSTAB 2.0 #define mtTSTTSTAB -2.5 #define mtTERMSTAB 0.01 #define mtSENDSTAB 0.01 #define mtNSTAB 0.1 #define mt3MMSTAB 1.0 #define mtGCPENALTY 0.8 #define mtGCPENALTYD 2.0 #define mt_DRLmaxlength 16 #define mt_TVRLmaxlength 18 #define mtNCLM 3 #define SRRNAMAXLEN 1500 #define SRRNAMINLEN 600 #define LRRNAMINLEN 1200 #define LRRNAMAXLEN 3000 #define srpMAXLEN 650 #define srpUMAXLEN 300 #define srpUMINLEN 100 #define srpDMAXLEN 300 #define srpDMINLEN 100 #define srpNH 200 #define srpNS 500 #define srpMAXHPL 14 #define srpMAXSP 6 #define srpMAXSTEM 6500 #define srpDISPMAX 4*srpMAXLEN #define srpMAXSPACER 12 #define srpMAXNISTEMS 10 #define srpNESTMAX 2 #define cdsMAXLEN 3000 #define NCDS 200 #define NCDSCODON 1000 typedef struct { long start; long stop; int comp; long antistart; long antistop; int genetype; int pseudogene; int permuted; int detected; char species[SHORTSTRLEN]; } annotated_gene; typedef struct { char filename[80]; FILE *f; char seqname[STRLEN]; int bugmode; int datatype; double gc; long filepointer; long ps; long psmax; long seqstart; long seqstartoff; long nextseq; long nextseqoff; int ns,nf; long aseqlen; int nagene[NS]; annotated_gene gene[NGFT]; } data_set; typedef struct { char name[100]; int seq[MAXTRNALEN+1]; int eseq[MAXETRNALEN+1]; int *ps; int nbase; int comp; long start; long stop; int astem1; int astem2; int aatail; int spacer1; int spacer2; int dstem; int dloop; int cstem; int cloop; int intron; int nintron; int anticodon; int var; int varbp; int tstem; int tloop; int genetype; double energy; int asst; int tps; int tpe; int annotation; int annosc; } gene; typedef struct { int *pos; int stem; int loop; double energy; } trna_loop; typedef struct { int *pos; int stem; int loop; unsigned int bondtype; double energy; double stem_energy; } mt_trna_loop; typedef struct { int *pos; int *looppos; int *end; int stem; int loop; int arm; int anticodon; unsigned int bondtype; double energy; double stem_energy; } mt_trna_cloop; typedef struct { int *pos; int stem; int loop; int *end; unsigned int bondtype; double energy; double stem_energy; } mt_trna_tloop; typedef struct { int *pos; int *end; int stem; int loop; double energy; } trna_dloop; typedef struct { int *pos1; int *pos2; int stem; double energy; } trna_astem; typedef struct { int *pos1; int *pos2; int stem; unsigned int bondtype; double energy; } mt_trna_astem; typedef struct { int *pos; int comp; int frame; int codon; int win; } mt_cds_codon; typedef struct { int *pos1; int *pos2; int comp; } mt_cds; typedef struct { int *pos1; int *pos2; int comp; } mt_rrna; typedef struct { int *pos1; int *pos2; int stem; int loop; } rrna_hairpin; typedef struct { int *pos1; int *pos2; int stem; } rrna_stem; typedef struct { int *pos; int comp; int frame; int codon; int win; } cds_codon; typedef struct { char name[50]; char tag[50]; } tmrna_tag_entry; typedef struct { char genetypename[NS][10]; FILE *f; int batch; int batchfullspecies; int repeatsn; int trna; int tmrna; int srprna; int cds; int mtrna; int tvloop; int cloop7; int peptide; int geneticcode; int ngcmod; int gcmod[MAXGCMOD]; int gcfix; int discrim; int extastem; int tarm; int tagthresh; int tarmlength; int showconfig; int libflag; int verbose; int linear; int both; int reportpseudogenes; int energydisp; int secstructdisp; int seqdisp; int aataildisp; int aataildiv; int sp1max; int sp2min; int sp2max; int mtxdetect; int mtcdsscan; int mtcompov; int matchacceptor; int maxintronlen; int minintronlen; int minintronlenreport; int ioverlay; int ifixedpos; int ireportminintronlen; int tmstrict; int iamismatch; int loffset; int roffset; long start; int comp; int genespace; int srpspace; int ngene[NS]; int nps; int annotated; int dispmatch; int updatetmrnatags; int tagend; int trnalenmisthresh; int tmrnalenmisthresh; int nagene[NS]; int nafn[NS]; int nafp[NS]; int natfpd; int natfptv; int lacds; int ldcds; long nabase; double reportpsthresh; double threshlevel; double trnathresh; double ttscanthresh; double ttarmthresh; double tdarmthresh; double tastemthresh; double tascanthresh; double mttthresh; double mtdthresh; double mtdtthresh; double mttarmthresh; double mtdarmthresh; double tmrnathresh; double tmathresh; double tmcthresh; double tmcathresh; double tmrthresh; double srpthresh; double cdsthresh; double eref[NS]; int tmrna_struct[200]; } csw; /* Basepair matching matrices */ int lbp[3][6][6] = { { { 0,0,1,1,1,0 }, { 0,0,1,0,1,0 }, { 1,1,0,1,1,0 }, { 1,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }, { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,0,1,1,0 }, { 1,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }, { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,0,0,1,0 }, { 1,0,0,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } } }; int bp[6][6] = { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,0,1,1,0 }, { 1,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int wbp[6][6] = { { 0,0,0,2,2,0 }, { 0,0,2,0,2,0 }, { 0,2,0,1,2,0 }, { 2,0,1,0,2,0 }, { 2,2,2,2,2,0 }, { 0,0,0,0,0,0 } }; int wcbp[6][6] = { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,0,0,1,0 }, { 1,0,0,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int gc[6][6] = { { 0,0,0,0,0,0 }, { 0,0,1,0,1,0 }, { 0,1,0,0,1,0 }, { 0,0,0,0,0,0 }, { 0,1,1,0,1,0 }, { 0,0,0,0,0,0 } }; int gt[6][6] = { { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,0,1,1,1,0 }, { 0,0,0,0,0,0 } }; int at[6][6] = { { 0,0,0,1,1,0 }, { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 1,0,0,0,0,0 }, { 1,0,0,0,1,0 }, { 0,0,0,0,0,0 } }; int tt[6][6] = { { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 0,0,0,1,1,0 }, { 0,0,0,1,1,0 }, { 0,0,0,0,0,0 } }; int stemterm[6][6] = { { 0,0,1,0,1,0 }, { 0,0,0,0,0,0 }, { 1,0,0,0,1,0 }, { 0,0,0,1,1,0 }, { 1,0,1,1,1,0 }, { 0,0,0,0,0,0 } }; int aastemterm[6][6] = { { 1,0,1,0,1,0 }, { 0,0,0,0,0,0 }, { 1,0,0,0,1,0 }, { 0,0,0,1,1,0 }, { 1,0,1,1,1,0 }, { 0,0,0,0,0,0 } }; int ggstemterm[6][6] = { { 0,0,1,0,1,0 }, { 0,0,0,0,0,0 }, { 1,0,1,0,1,0 }, { 0,0,0,1,1,0 }, { 1,0,1,1,1,0 }, { 0,0,0,0,0,0 } }; int assymst[6][6] = { { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 1,0,0,0,1,0 }, { 0,0,0,1,1,0 }, { 1,0,0,1,1,0 }, { 0,0,0,0,0,0 } }; int assymat[6][6] = { { 0,0,0,1,1,0 }, { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 0,0,0,0,0,0 }, { 0,0,0,1,1,0 }, { 0,0,0,0,0,0 } }; int stackbp[6][6] = { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,0,1,1,0 }, { 1,0,1,1,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int ggstackbp[6][6] = { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,1,1,1,0 }, { 1,0,1,1,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int ggbp[6][6] = { { 0,0,0,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,1,1,1,0 }, { 1,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int gabp[6][6] = { { 0,0,1,1,1,0 }, { 0,0,1,0,1,0 }, { 1,1,0,1,1,0 }, { 1,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int assymagbp[6][6] = { { 0,0,1,1,1,0 }, { 0,0,1,0,1,0 }, { 0,1,0,1,1,0 }, { 1,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int stembp[6][6] = { { 0,0,1,1,1,0 }, { 0,0,1,0,1,0 }, { 1,1,0,1,1,0 }, { 1,0,1,1,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int ggstembp[6][6] = { { 0,0,1,1,1,0 }, { 0,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 1,0,1,1,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int gastembp[6][6] = { { 1,0,1,1,1,0 }, { 0,0,1,0,1,0 }, { 1,1,1,1,1,0 }, { 1,0,1,1,1,0 }, { 1,1,1,1,1,0 }, { 0,0,0,0,0,0 } }; int vbp[6][6] = { { 0,0,1,4,4,0 }, { 0,0,4,0,4,0 }, { 1,4,0,2,4,0 }, { 4,0,2,0,4,0 }, { 4,4,4,4,4,0 }, { 0,0,0,0,0,0 } }; int tandemid[mtNTM][4] = { { 3,2,2,3 }, { 2,3,3,2 }, { 3,3,3,3 } }; double tandem_em[mtNTM] = { -0.5,-0.5,2.0 }; double send_em[6][6] = { { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,0.5*mtSENDSTAB,0.0,0.5*mtSENDSTAB,0.0 }, { 0.0,0.5*mtSENDSTAB,0.0,mtSENDSTAB,mtSENDSTAB,0.0 }, { 0.0,0.0,mtSENDSTAB,0.0,mtSENDSTAB,0.0 }, { 0.0,0.5*mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 } }; double ssend_em[6][6] = { { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,mtSENDSTAB,0.0,mtSENDSTAB,0.0 }, { 0.0,mtSENDSTAB,0.0,mtSENDSTAB,mtSENDSTAB,0.0 }, { 0.0,0.0,mtSENDSTAB,0.0,mtSENDSTAB,0.0 }, { 0.0,mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 } }; int neighbour_map[6][6] = { { 0,0,1,0,1,0 }, { 0,0,0,0,0,0 }, { 1,0,0,0,1,0 }, { 0,0,0,1,1,0 }, { 1,0,1,1,1,0 }, { 0,0,0,0,0,0 } }; double neighbour_em[2][6][6] = { { { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 } }, { { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,0.0,mtNSTAB,0.0,mtNSTAB,0.0 }, { 0.0,mtNSTAB,0.0,0.0,mtNSTAB,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 }, { 0.0,mtNSTAB,mtNSTAB,0.0,mtNSTAB,0.0 }, { 0.0,0.0,0.0,0.0,0.0,0.0 } } }; unsigned int btmap[6][6] = { { 0x10000,0x10000,0x1000,0x10,0x00000,0x10000 }, { 0x10000,0x10000,0x1,0x10000,0x00000,0x10000 }, { 0x1000,0x1,0x10000,0x100,0x00000,0x10000 }, { 0x10,0x10000,0x100,0x1000,0x00000,0x10000 }, { 0x00000,0x00000,0x00000,0x00000,0x00000,0x10000 }, { 0x10000,0x10000,0x10000,0x10000,0x10000,0x10000 } }; double bem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; int mt_discrim[3][64][6] = /* metazoan mt */ {{{ 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 0,0,0,0,0,0 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 0,0,0,0,0,0 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }}, /* standard */ {{ 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }}, /* mammal mt */ {{ 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 0,0,0,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,1,0,0,1,1 }, { 0,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,0,1,1 }, { 0,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,0,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,0,0,0,0,0 }, { 1,0,1,1,1,1 }, { 0,0,1,0,1,1 }, { 1,1,1,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,0,1,1 }, { 0,1,0,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 1,1,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,1,0,0,1,1 }, { 0,0,1,0,1,1 }, { 1,1,0,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,0,0,0,0,0 }, { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,1,0,1,1 }, { 1,1,1,1,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,1,1,1,1,1 }}}; /* GENETIC CODES (INDEXED BY ANTICODON) */ char aapolarity[NAMINOACID+1] = "NNNNPNPPNNPNPPPNNPPP***????"; char aaletter[NAMINOACID+1] = "FVLICGRSAPTYDHNMWEQK***????"; char aaname[NAMINOACID][20] = { "Phe","Val","Leu","Ile","Cys", "Gly","Arg","Ser","Ala","Pro", "Thr","Tyr","Asp","His","Asn", "Met","Trp","Glu","Gln","Lys", "Stop", "SeC", "Pyl", "(Arg|Stop|Ser|Gly)", "(Ile|Met)", "(Stop|Trp)", "(Lys|Asn)" }; char ambig_aaname[4] = "???"; /* aamap based on NCBI genetic code table (downloaded 26-Apr-2014) ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt */ int aamap[NGENECODE][64] = { /* 0. composite metazoan mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,23, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,24, 25,Gly,Arg,23, Ser,Ala,Pro,Thr, Stop,Glu,Gln,26 }, /* 1. standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 2. vertebrate mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Stop, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Stop, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 3. yeast mt */ { Phe,Val,Thr,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Thr,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Thr,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Thr,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 4. mold, protozoan, and coelenterate mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 5. invertebrate mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 6. ciliate */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Gln,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Gln,Glu,Gln,Lys }, /* 7. deleted -> standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 8. deleted -> standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 9. echinoderm and flatworm mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Asn }, /* 10. euplotid */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, Cys,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 11. bacterial and plant chloroplast */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 12. alternate yeast */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Ser,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 13. ascidian mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Gly, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Gly, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 14. alternate flatworm mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Glu,Gln,Asn }, /* 15. blepharisma */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Gln,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 16. chlorophycean mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Leu,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 17. deleted -> standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 18. deleted -> standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 19. deleted -> standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 20. deleted -> standard */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 21. trematode mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 22. scenedesmus obliquus mt*/ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Leu,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, SeC,Gly,Arg,Arg, Stop,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 23. thraustochytrium mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Ser,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Stop,Val,Leu,Ile, SeC,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 24. Pterobranchia mt */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Lys, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, Trp,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys }, /* 25. Gracilibacteria */ { Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Met, Trp,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Pyl,Glu,Gln,Lys, Phe,Val,Leu,Ile, Cys,Gly,Arg,Ser, Ser,Ala,Pro,Thr, Tyr,Asp,His,Asn, Leu,Val,Leu,Ile, Gly,Gly,Arg,Arg, Ser,Ala,Pro,Thr, Stop,Glu,Gln,Lys } }; /* POINTERS TO DETECTED GENES */ gene *ts; /* HELP MENU */ char helpmenu[NHELPLINE][81] = { "----------------------------", "ARAGORN v1.2.38 Dean Laslett", "----------------------------\n", "Please reference the following papers if you use this", "program as part of any published research.\n", "Laslett, D. and Canback, B. (2004) ARAGORN, a", "program for the detection of transfer RNA and transfer-messenger", "RNA genes in nucleotide sequences", "Nucleic Acids Research, 32;11-16\n", "Laslett, D. and Canback, B. (2008) ARWEN: a", "program to detect tRNA genes in metazoan mitochondrial", "nucleotide sequences", "Bioinformatics, 24(2); 172-175.\n\n", "ARAGORN detects tRNA, mtRNA, and tmRNA genes.\n", "Usage:", "aragorn -v -e -s -d -c -l -j -a -q -rn -w -ifro, -t -mt -m", " -rp -ps -gc -tv -seq -br -fasta -fo -o \n", " is assumed to contain one or more sequences", "in FASTA or GENBANK format. Results of the search are printed", "to STDOUT. All switches are optional and case-insensitive.", "Unless -i is specified, tRNA genes containing introns", "are not detected.\n", " -m Search for tmRNA genes.", " -t Search for tRNA genes.", " By default, all are detected. If one of", " -m or -t is specified, then the other", " is not detected unless specified as well.", " -mt Search for Metazoan mitochondrial tRNA genes.", " tRNA genes with introns not detected. -i,-sr switchs", " ignored. Composite Metazoan mitochondrial", " genetic code used.", " -mtmam Search for Mammalian mitochondrial tRNA", " genes. -i switch ignored. -tv switch set.", " Mammalian mitochondrial genetic code used.", " -mtx Same as -mt but low scoring tRNA genes are", " not reported.", " -mtd Overlapping metazoan mitochondrial tRNA genes", " on opposite strands are reported.", " -gc Use the GenBank transl_table = genetic code.", " -gcstd Use standard genetic code.", " -gcmet Use composite Metazoan mitochondrial genetic code.", " -gcvert Use Vertebrate mitochondrial genetic code.", " -gcinvert Use Invertebrate mitochondrial genetic code.", " -gcyeast Use Yeast mitochondrial genetic code.", " -gcprot Use Mold/Protozoan/Coelenterate mitochondrial genetic code.", " -gcciliate Use Ciliate genetic code.", " -gcflatworm Use Echinoderm/Flatworm mitochondrial genetic code", " -gceuplot Use Euplotid genetic code.", " -gcbact Use Bacterial/Plant chloroplast genetic code.", " -gcaltyeast Use alternative Yeast genetic code.", " -gcascid Use Ascidian mitochondrial genetic code.", " -gcaltflat Use alternative Flatworm mitochondrial genetic code.", " -gcblep Use Blepharisma genetic code.", " -gcchloroph Use Chlorophycean mitochondrial genetic code.", " -gctrem Use Trematode mitochondrial genetic code.", " -gcscen Use Scenedesmus obliquus mitochondrial genetic code.", " -gcthraust Use Thraustochytrium mitochondrial genetic code.", " -gcptero Use Pterobranchia mitochondrial genetic code.", " -gcgrac Use Gracilibacteria genetic code.", " Individual modifications can be appended using", " ,BBB= B = A,C,G, or T. is the three letter", " code for an amino-acid. More than one modification", " can be specified. eg -gcvert,aga=Trp,agg=Trp uses", " the Vertebrate Mitochondrial code and the codons", " AGA and AGG changed to Tryptophan.", " -c Assume that each sequence has a circular", " topology. Search wraps around each end.", " Default setting.", " -l Assume that each sequence has a linear", " topology. Search does not wrap.", " -d Double. Search both strands of each", " sequence. Default setting.", " -s or -s+ Single. Do not search the complementary", " (antisense) strand of each sequence.", " -sc or -s- Single complementary. Do not search the sense", " strand of each sequence.", " -i Search for tRNA genes with introns in", " anticodon loop with maximum length 3000", " bases. Minimum intron length is 0 bases.", " Ignored if -m is specified.", " -i Search for tRNA genes with introns in", " anticodon loop with maximum length ", " bases. Minimum intron length is 0 bases.", " Ignored if -m is specified.", " -i, Search for tRNA genes with introns in", " anticodon loop with maximum length ", " bases, and minimum length bases.", " Ignored if -m is specified.", " -io Same as -i, but allow tRNA genes with long", " introns to overlap shorter tRNA genes.", " -if Same as -i, but fix intron between positions", " 37 and 38 on C-loop (one base after anticodon).", " -ifo Same as -if and -io combined.", " -ir Same as -i, but report tRNA genes with minimum", " length bases rather than search for", " tRNA genes with minimum length bases.", " With this switch, acts as an output filter,", " minimum intron length for searching is still 0 bases.", " -tv Do not search for mitochondrial TV replacement", " loop tRNA genes. Only relevant if -mt used.", " -c7 Search for tRNA genes with 7 base C-loops only.", " -ss Use the stricter canonical 1-2 bp spacer1 and", " 1 bp spacer2. Ignored if -mt set. Default is to", " allow 3 bp spacer1 and 0-2 bp spacer2, which may", " degrade selectivity.", " -j Display 4-base sequence on 3' end of astem", " regardless of predicted amino-acyl acceptor length.", " -jr Allow some divergence of 3' amino-acyl acceptor", " sequence from NCCA.", " -jr4 Allow some divergence of 3' amino-acyl acceptor", " sequence from NCCA, and display 4 bases.", " -e Print out score for each reported gene.", " -ps Lower scoring thresholds to 95% of default levels.", " -ps Change scoring thresholds to percent of default levels.", " -rp Flag possible pseudogenes (score < 100 or tRNA anticodon", " loop <> 7 bases long). Note that genes with score < 100", " will not be detected or flagged if scoring thresholds are not", " also changed to below 100% (see -ps switch).", " -rp Flag possible pseudogenes and change score threshold to ", " percent of default levels.", " -seq Print out primary sequence.", " -br Show secondary structure of tRNA gene primary sequence", " using round brackets.", " -fasta Print out primary sequence in fasta format.", " -fo Print out primary sequence in fasta format only", " (no secondary structure).", " -fon Same as -fo, with sequence and gene numbering in header.", " -fos Same as -fo, with no spaces in header.", " -fons Same as -fo, with sequence and gene numbering, but no spaces.", " as (|) instead of ???", " -v Verbose. Prints out information during", " search to STDERR.", " -a Print out tRNA domain for tmRNA genes.", " -a7 Restrict tRNA astem length to a maximum of 7 bases", " -aa Display message if predicted iso-acceptor species", " does not match species in sequence name (if present).", " -amt Change annotated tRNA length mismatch reporting threshold to", " bases when searching GENBANK files. Default is 10 bases.", " -amm Change annotated tmRNA length mismatch reporting threshold to", " bases when searching GENBANK files. Default is 30 bases.", " -q Dont print configuration line (which switches", " and files were used).", " -rn Repeat sequence name before summary information.", " -o Print output to . If ", " already exists, it is overwritten. By default", " all output goes to stdout.", " -w Print out in batch mode.", " -wa Same as -w, but for 6 or 8 base anticodon", " loops, print possible iso-acceptor species", " For tRNA genes, batch mode output is in the form:\n", " Sequence name", " N genes found", " 1 tRNA- [locus 1] (nnn)", " i(,)", " . ", " . ", " N tRNA- [Locus N] (nnn)", " i(,)\n", " N is the number of genes found", " is the tRNA iso-acceptor species", " is the tRNA anticodon relative position", " (nnn) is the tRNA anticodon base triplet", " i means the tRNA gene has a C-loop intron\n", " For tmRNA genes, output is in the form:\n", " n tmRNA(p) [Locus n] ,", " \n", " p means the tmRNA gene is permuted", " -wunix Get around problem with some windows gcc compilers", " (found so far in Strawberry Perl and Active Perl)", " when reading Unix files.", " Execution speed may be slower for large files.", " Execution speed will be a lot slower for files", " with many small sequences." }; /* tmRNA TAG PEPTIDE DATABASE */ tmrna_tag_entry tagdatabase[NTAGMAX] = { { "Acaryochloris marina","ANNIVSFARQRTATAVA"}, { "Accumulibacter phosphatis","ANDERFALAA"}, { "Acetobacter pasteurianus","ANDNTEVLAVAA"}, { "Acetobacterium woodii","AKTEKSYGLALAA"}, { "Acetohalobium arabaticum","ANDNSYALAAA"}, { "Achromobacter xylosoxidans","ANDERFALAA"}, { "Acidaminococcus fermentans","ADDSYALAA"}, { "Acidaminococcus sp. D21","AEDSYALAA"}, { "Acidimicrobium ferrooxidans","AEPELALAA"}, { "Acidiphilium cryptum","ANDNFEALAVAA"}, { "Acidithiobacillus caldus","ANDSNYALAA"}, { "Acidithiobacillus ferrivorans","ANDSNYALAA"}, { "Acidithiobacillus ferrooxidans","ANDSNYALAA"}, { "Acidobacterium capsulatum","ANNNLALAA"}, { "Acidobacterium Ellin6076","ANTQFAYAA"}, { "Acidothermus cellulolyticus","ANSSRADFALAA"}, { "Acidovorax avenae","ANDERFALAA"}, { "Acidovorax citrulli","ANDERFALAA"}, { "Acidovorax sp. JS42","ANDERFALAA"}, { "Acidovorax sp. KKS102","ANDERFALAA"}, { "Acinetobacter ADP1","ANDETYALAA"}, { "Acinetobacter baumannii","ANDETYALAA"}, { "Acinetobacter oleivorans","ANDETYALAA"}, { "Acinetobacter sp. ADP1","ANDETYALAA"}, { "Acinetobacter sp. SH024","ANDETYALAA"}, { "Actinobacillus actinomycetemcomitans","ANDEQYALAA"}, { "Actinobacillus pleuropneumoniae","ANDEQYALAA"}, { "Actinobacillus succinogenes","ANDEQYALAA"}, { "Actinobacillus suis","ANDEQYALAA"}, { "Actinomyces naeslundii","ADNTRTDFALAA"}, { "Actinoplanes missouriensis","AKDNSRADFALAA"}, { "Actinoplanes sp. SE50/110","ANSKFDADQYALAA"}, { "Actinosynnema mirum","AKSNDQRAFALAA"}, { "Advenella kashmirensis","ANDESYALAA"}, { "Aequorivita sublithincola","GENNYALAA"}, { "Aerococcus urinae","DKNESQSLAFAA"}, { "Aeromonas hydrophila 1","ANDENYALAA"}, { "Aeromonas hydrophila 2","ANDENYALAA"}, { "Aeromonas salmonicida","ANDENYALAA"}, { "Aeromonas veronii","ANDENYALAA"}, { "Aggregatibacter actinomycetemcomitans","ANDEQYALAA"}, { "Aggregatibacter aphrophilus","ANDEQYALAA"}, { "Agrobacterium fabrum","ANDNNAKEYALAA"}, { "Agrobacterium radiobacter","ANDNYAEARLAA"}, { "Agrobacterium sp. H13-3","ANDNNAKEYALAA"}, { "Agrobacterium tumefaciens 1","ANDNNAKEYALAA"}, { "Agrobacterium tumefaciens 2","ANDNNAKECALAA"}, { "Agrobacterium vitis","ANDNNAQGYAVAA"}, { "Akkermansia muciniphila","AESNDLALAA"}, { "Alcaligenes faecalis","ANDERFALAA"}, { "Alcaligenes viscolactis","ANDERFALAA"}, { "Alcanivorax borkumensis","ANDDSYALAA"}, { "Alcanivorax dieselolei","ANDDTYALAA"}, { "Alicycliphilus denitrificans","ANDERFALAA"}, { "Alicyclobacillus acidocaldarius","GKANRFTTQNKLALAA"}, { "Aliivibrio salmonicida","ANDENYALAA"}, { "Alistipes finegoldii","GNNSYALAA"}, { "Alkalilimnicola ehrlichii","ANDENYALAA"}, { "Alkaliphilus metalliredigenes","ANDNYSLAAA"}, { "Alkaliphilus metalliredigens","ANDNYSLAAA"}, { "Alkaliphilus oremlandii","ANDNYALAA"}, { "Allochromatium vinosum","ANDDNYALAA"}, { "alpha proteobacterium","ANESYALAA"}, { "Alphaproteobacteria SAR-1","ANDELALAA"}, { "Alteromonas macleodii","ANDETYALAA"}, { "Alteromonas sp. SN2","ANDENYALAA"}, { "Aminobacterium colombiense","VNNNNYALAA"}, { "Ammonifex degensii","ANNERVALAA"}, { "Amoebophilus asiaticus","GNNQVALAA"}, { "Amphibacillus xylanus","GKTNNYSLAAA"}, { "Amycolatopsis mediterranei","ADSSQREFALAA"}, { "Amycolicicoccus subflavus","ADNAQRSQSDFALAA"}, { "Anabaena variabilis","ANNIVKFARKDALVAA"}, { "Anaerobaculum mobile","ANENYALAA"}, { "Anaerococcus prevotii","ANNNSEANFALAA"}, { "Anaerolinea thermophila","VRKSGCRSGRSRTERKRAFGP"}, { "Anaeromyxobacter dehalogenans","ANEPMALAA"}, { "Anaeromyxobacter sp. Fw109-5","ANEPMALAA"}, { "Anaeromyxobacter sp. K","ANEPMALAA"}, { "Anaplasma centrale","ANDDFVAANDNMETAFVAAA"}, { "Anaplasma marginale","ANDDFVAANDNMETAFVAAA"}, { "Anaplasma phagocytophilum","ANDDFVAANDNVETAFVAAA"}, { "Anoxybacillus flavithermus","GKENYALAA"}, { "Aquifex aeolicus","APEAELALAA"}, { "Arcanobacterium haemolyticum","ANKQKSDFALAA"}, { "Arcobacter butzleri","ANNTNYAPAYAKAA"}, { "Arcobacter nitrofigilis","ANNTNYAPAYAKVA"}, { "Arcobacter sp. L","ANNTNYAPAYAKAA"}, { "Aromatoleum aromaticum","ANDERFAVAA"}, { "Arthrobacter arilaitensis","AESKRTDFALAA"}, { "Arthrobacter aurescens","AESKRTDFALAA"}, { "Arthrobacter chlorophenolicus","AESKRTDFALAA"}, { "Arthrobacter FB24","AKQTRTDFALAA"}, { "Arthrobacter phenanthrenivorans","AESKRTDFALAA"}, { "Arthrobacter sp. FB24","AKQTRTDFALAA"}, { "Arthrobacter sp. Rue61a","AESKRTDFALAA"}, { "Arthromitus sp. SFB-mouse-Japan","DKNYSLQAA"}, { "Arthromitus sp. SFB-rat-Yit","DKNYSLQAA"}, { "Azoarcus BH72","ANDERFALAA"}, { "Azoarcus EbN1","ANDERFAVAA"}, { "Azoarcus sp. BH72","ANDERFALAA"}, { "Azobacteroides pseudotrichonymphae","GENFYALAA"}, { "Azorhizobium caulinodans","ANDNYAPVAVAA"}, { "Azospira oryzae","ANDERFAIAA"}, { "Azospirillum brasilense","ANDNVAPVAVAA"}, { "Azospirillum lipoferum","ANDNVAQARLAA"}, { "Azospirillum sp. B510","ANDNVAQARLAA"}, { "Azotobacter vinelandii","ANDDNYALAA"}, { "Bacillus amyloliquefaciens","GKTKSFNQNLALAA"}, { "Bacillus anthracis","GKQNNLSLAA"}, { "Bacillus atrophaeus","GKTKSFNQNLALAA"}, { "Bacillus cellulosilyticus","GKQEDNFAFAA"}, { "Bacillus cereus","GKQNNLSLAA"}, { "Bacillus clausii","GKENNNFALAA"}, { "Bacillus coagulans","GKSNTKLALAA"}, { "Bacillus cytotoxicus","GKQQNNFALAA"}, { "Bacillus halodurans","GKENNNFALAA"}, { "Bacillus licheniformis","GKSNQNLALAA"}, { "Bacillus megaterium","GKSNNNFALAA"}, { "Bacillus phage","AKLNITNNELQVA"}, { "Bacillus pumilus","GKTKSFNQNLALAA"}, { "Bacillus selenitireducens","GKQDNDFALAAA"}, { "Bacillus stearothermophilus","GKQNYALAA"}, { "Bacillus subtilis","GKTNSFNQNVALAA"}, { "Bacillus thuringiensis","GKQNNLSLAA"}, { "Bacillus weihenstephanensis","GKQNNLSLAA"}, { "Bacillusphage G","AKLNITNNELQVA"}, { "Bacteriovorax marinus","AESNFAPAMAA"}, { "Bacteroides fragilis","GETNYALAA"}, { "Bacteroides helcogenes","GENNYALAA"}, { "Bacteroides salanitronis","GNENYALAA"}, { "Bacteroides thetaiotaomicron","GETNYALAA"}, { "Bacteroides vulgatus","GNENYALAA"}, { "Bartonella bacilliformis","ANDNYAEARLAA"}, { "Bartonella clarridgeiae","ANDNYAEARLIAA"}, { "Bartonella grahamii","ANDNYAEARLAA"}, { "Bartonella henselae","ANDNYAEARLAA"}, { "Bartonella quintana","ANDNYAEARLAA"}, { "Bartonella tribocorum","ANDNYAEARLAA"}, { "Baumannia cicadellinicola","ANNSQYESVALAA"}, { "Bdellovibrio bacteriovorus","GNDYALAA"}, { "Beijerinckia indica","ANDNYAPVAVAA"}, { "Belliella baltica","GESNYAMAA"}, { "Beutenbergia cavernae","ADSKRTDFALAA"}, { "Bifidobacterium adolescentis","AKSNRTEFALAA"}, { "Bifidobacterium animalis","AKSNRTEFALAA"}, { "Bifidobacterium asteroides","AKSNRTEFALAA"}, { "Bifidobacterium bifidum","AKSNRTEFALAA"}, { "Bifidobacterium breve","AKSNRTEFALAA"}, { "Bifidobacterium dentium","AKSNRTEFALAA"}, { "Bifidobacterium longum","AKSNRTEFALAA"}, { "Blastococcus saxobsidens","ADSNRADYALAA"}, { "Blattabacterium sp. (Blaberus giganteus)","GEKEYAFAA"}, { "Blattabacterium sp. (Blattella germanica) Bge","GEQQYAFAA"}, { "Blattabacterium sp. (Cryptocercus punctulatus)","GEKQYAFAA"}, { "Blattabacterium sp. (Mastotermes darwiniensis)","GEKQYAFAA"}, { "Blattabacterium sp. (Periplaneta americana)","GEKQYAFAA"}, { "Blochmannia floridanus","AKNKYNEPVALAA"}, { "Blochmannia pennsylvanicus","ANNTTYRESVALAA"}, { "Blochmannia vafer","ANYNYNESAALAA"}, { "Bolidomonas pacifica chloroplast","ANNILAFNRKSLSFA"}, { "Bordetella avium","ANDERFALAA"}, { "Bordetella bronchiseptica","ANDERFALAA"}, { "Bordetella parapertussis","ANDERFALAA"}, { "Bordetella pertussis","ANDERFALAA"}, { "Bordetella petrii","ANDERFALAA"}, { "Borrelia afzelii","AKNNNFTSSNLVMAA"}, { "Borrelia bissettii","AKNNNFTSSNLVMAA"}, { "Borrelia burgdorferi","AKNNNFTSSNLVMAA"}, { "Borrelia crocidurae","AKNNNFTSSDLVMAA"}, { "Borrelia duttonii","AKNNNFTSSDLVMAA"}, { "Borrelia garinii","AKNNNFTSSNLVMAA"}, { "Borrelia hermsii","ARNNNFTSSNLVMAA"}, { "Borrelia recurrentis","AKNNNFTSSDLVMAA"}, { "Borrelia turicatae","AKNNNFTSSNLVMAA"}, { "Brachybacterium faecium","AEPKRTDFALAA"}, { "Brachyspira hyodysenteriae","ADEYALAA"}, { "Brachyspira intermedia","ADEYALAA"}, { "Brachyspira murdochii","ADEYALAA"}, { "Brachyspira pilosicoli","ADEYALAA"}, { "Bradyrhizobium japonicum","ANDNFAPVAQAA"}, { "Bradyrhizobium sp. BTAi1","ANDNFAPVAQAA"}, { "Bradyrhizobium sp. ORS 278","ANDNFAPVAQAA"}, { "Bradyrhizobium sp. S23321","ANDNFAPVAQAA"}, { "Brevibacillus brevis","GNKQLSLAA"}, { "Brevibacterium linens","AKSNNRTDFALAA"}, { "Brucella abortus","ANDNNAQGYALAA"}, { "Brucella canis","ANDNNAQGYALAA"}, { "Brucella ceti","ANDNNAQGYALAA"}, { "Brucella melitensis","ANDNNAQGYALAA"}, { "Brucella ovis","ANDNNAQGYALAA"}, { "Brucella suis","ANDNNAQGYALAA"}, { "Buchnera aphidicola 1","ANNKQNYALAA"}, { "Buchnera aphidicola 2","ANNKQNYALAA"}, { "Buchnera aphidicola 3","AKQNQYALAA"}, { "Burkholderia ambifaria","ANDDTFALAA"}, { "Burkholderia cenocepacia","ANDDTFALAA"}, { "Burkholderia cepacia","ANDDTFALAA"}, { "Burkholderia fungorum","ANDDTFALAA"}, { "Burkholderia gladioli","ANDETFALAA"}, { "Burkholderia glumae","ANDDTFALAA"}, { "Burkholderia graminis","ANDDTFALAA"}, { "Burkholderia mallei","ANDDTFALAA"}, { "Burkholderia multivorans","ANDDTFALAA"}, { "Burkholderia phenoliruptrix","ANDDTFALAA"}, { "Burkholderia phymatum","ANDDTFALAA"}, { "Burkholderia phytofirmans","ANDETFALAA"}, { "Burkholderia pseudomallei","ANDDTFALAA"}, { "Burkholderia rhizoxinica","ANDETYALAA"}, { "Burkholderia sp. 383","ANDDTFALAA"}, { "Burkholderia sp. CCGE1001","ANDDTFALAA"}, { "Burkholderia sp. CCGE1002","ANDDTFALAA"}, { "Burkholderia sp. YI23","ANDDTFALAA"}, { "Burkholderia thailandensis","ANDDTFALAA"}, { "Burkholderia vietnamiensis","ANDDTFALAA"}, { "Burkholderia xenovorans","ANDDTFALAA"}, { "Butyrivibrio proteoclasticus","ANDNLALAA"}, { "Caldicellulosiruptor bescii","ADKAELALAA"}, { "Caldicellulosiruptor hydrothermalis","ADRTELALAA"}, { "Caldicellulosiruptor kristjanssonii","ADKAELALAA"}, { "Caldicellulosiruptor kronotskyensis","ADKAELALAA"}, { "Caldicellulosiruptor lactoaceticus","ADKAELALAA"}, { "Caldicellulosiruptor obsidiansis","AEKPQLALAA"}, { "Caldicellulosiruptor owensensis","AEKPQLALAA"}, { "Caldicellulosiruptor saccharolyticus","ADKAELALAA"}, { "Caldilinea aerophila","AKNTGKAFAFGTPATSVALAA"}, { "Caldisericum exile","ADYSYALAA"}, { "Calditerrivibrio nitroreducens","ANDEYALAAA"}, { "Campylobacter coli","ANNVKFAPAYAKAA"}, { "Campylobacter concisus","ANNVNFAPAYAKAA"}, { "Campylobacter curvus","ANNVKFAPAYAKAA"}, { "Campylobacter fetus 2","ANNVKFAPAYAKAA"}, { "Campylobacter hominis","ANNAKFAPAYAKIA"}, { "Campylobacter jejuni","ANNVKFAPAYAKAA"}, { "Campylobacter lari","ANNVKFAPAYAKAA"}, { "Campylobacter upsaliensis","ANNAKFAPAYAKVA"}, { "Candidatus atelocyanobacterium thalassa","ANNIVSFKRVAVAA"}, { "Capnocytophaga canimorsus","GENNYALAA"}, { "Capnocytophaga ochracea","GENNYALAA"}, { "Carboxydothermus hydrogenoformans","ANENYALAA"}, { "Cardinium endosymbiont","VINNSRRCKFVALRKEEEEDDELRMAA"}, { "Carnobacterium maltaromaticum","AKNNNNSYALAA"}, { "Carnobacterium sp. 17-4","DKNNNNSYALAA"}, { "Catenulispora acidiphila","ANKTQLKSQTAYGLAA"}, { "Catera virion","ATDTDATVTDAEIEAFFAEEAAALV"}, { "Caulobacter crescentus","ANDNFAEEFAVAA"}, { "Caulobacter segnis","ANDNFAEEFAVAA"}, { "Caulobacter sp. K31","ANDNFAEEFAIAA"}, { "Cellulomonas fimi","ADNKRTDFALAA"}, { "Cellulomonas flavigena","ADSKRTDFALAA"}, { "Cellulophaga algicola","GENNYALAA"}, { "Cellulophaga lytica","GENNYALAA"}, { "Cellvibrio gilvus","ADSKRTDFALAA"}, { "Cellvibrio japonicus","ANDDSYALAA"}, { "Chelativorans sp. BNC1","ANDNYAEARLAA"}, { "Chitinophaga pinensis","GESNYAMAA"}, { "Chlamydia muridarum","AEPKAECEIISFADLNDLRVAA"}, { "Chlamydia psittaci","AEPKAECEIISFSELSEQRLAA"}, { "Chlamydia trachomatis","AEPKAECEIISFADLEDLRVAA"}, { "Chlamydophila abortus","AEPKAKCEIISFSELSEQRLAA"}, { "Chlamydophila caviae","AEPKAECEIISFSDLTEERLAA"}, { "Chlamydophila felis","AEPKAECEIISFSDLTQERLAA"}, { "Chlamydophila pecorum","AEPKAECEIISFSDLLVEERVAA"}, { "Chlamydophila pneumoniae","AEPKAECEIISLFDSVEERLAA"}, { "Chlamydophila psittaci","AEPKAECEIISFSELSEQRLAA"}, { "Chloracidobacterium thermophilum","AETQELALAA"}, { "Chlorobaculum parvum","ADDYSYAMAA"}, { "Chlorobium chlorochromatii","ADDYSYAMAA"}, { "Chlorobium limicola","ADDYSYAMAA"}, { "Chlorobium luteolum","ADDYSYAMAA"}, { "Chlorobium phaeobacteroides","ADDYSYAMAA"}, { "Chlorobium phaeovibrioides","ADDYSYAMAA"}, { "Chlorobium tepidum","ADDYSYAMAA"}, { "Chloroflexus aggregans","ANNNARVQPRLALAA"}, { "Chloroflexus aurantiacus","ANTNTRAQARLALAA"}, { "Chloroherpeton thalassium","ADDYSYAMAA"}, { "Chromobacterium violaceum","ANDETYALAA"}, { "Chromohalobacter salexigens","ANDDNYAQGALAA"}, { "Chroococcidiopsis PCC6712","ANNIVKFERQAVFA"}, { "Citrobacter koseri","ANDENYALAA"}, { "Citrobacter rodentium","ANDENYALAA"}, { "Clavibacter michiganensis","ANNKQSSFVLAA"}, { "Cloacamonas acidaminovorans","ANNNYALAA"}, { "Clostridiales genomosp.","ANKNYSYAAA"}, { "Clostridium acetobutylicum","DNENNLALAA"}, { "Clostridium acidurici","ANDNYALAA"}, { "Clostridium beijerinckii","AEDNFALAA"}, { "Clostridium botulinum","ANDNFALAA"}, { "Clostridium cellulolyticum","AKNDNFALAAA"}, { "Clostridium cellulovorans","DENYLLAA"}, { "Clostridium clariflavum","AENDNYALAAA"}, { "Clostridium difficile","ADDNFAIAA"}, { "Clostridium kluyveri","ENDNLALAA"}, { "Clostridium lentocellum","AEDNLAIAA"}, { "Clostridium ljungdahlii","ENNNENLALAA"}, { "Clostridium perfringens","AEDNFALAA"}, { "Clostridium phytofermentans","ANDNLAYAA"}, { "Clostridium saccharolyticum","ANNNELALAA"}, { "Clostridium sp. BNL1100","AKNDNFALAAA"}, { "Clostridium sp. SY8519","AKEDNFELAMAA"}, { "Clostridium sticklandii","ANENYALAA"}, { "Clostridium tetani","ADDNFVLAA"}, { "Clostridium thermocellum","ANEDNYALAAA"}, { "Collimonas fungivorans","ANDNSYALAA"}, { "Colwellia psychrerythraea","ANDDTFALAA"}, { "Colwellia sp","ANDDTFALAA"}, { "Comamonas testosteroni","ANDERFALAA"}, { "Conexibacter woesei","ADSHEYALAA"}, { "Coprothermobacter proteolyticus","AEPEFALAA"}, { "Coraliomargarita akajimensis","GEEQFALAA"}, { "Corallococcus coralloides","ANDNVELALAA"}, { "Coriobacterium glomerans","GMAQTKIEPTRNPRARRRAQGNRISTGD"}, { "Corynebacterium aurimucosum","AEKNSQRDYALAA"}, { "Corynebacterium diphtheriae","AENTQRDYALAA"}, { "Corynebacterium efficiens","AEKTQRDYALAA"}, { "Corynebacterium glutamicum","AEKSQRDYALAA"}, { "Corynebacterium jeikeium","AENTQRDYALAA"}, { "Corynebacterium kroppenstedtii","AENTQRDYALAA"}, { "Corynebacterium pseudotuberculosis","AEKTQRDYALAA"}, { "Corynebacterium resistens","AENTQRDYALAA"}, { "Corynebacterium ulcerans","AEKTQRDYALAA"}, { "Corynebacterium urealyticum","AENTQRDYALAA"}, { "Corynebacterium variabile","AENTQRDYALAA"}, { "Coxiella burnetii","ANDSNYLQEAYA"}, { "Croceibacter atlanticus","GENNYALAA"}, { "Crocosphaera watsonii","ANNIVSFKRVAVAA"}, { "Cronobacter sakazakii","ANDENYALAA"}, { "Cronobacter turicensis","ANDENYALAA"}, { "Cryptobacterium curtum","DNNKSFGRQYALAA"}, { "Cupriavidus metallidurans","ANDERYALAA"}, { "Cupriavidus necator","ANDERYALAA"}, { "Cupriavidus taiwanensis","ANDERYALAA"}, { "Cyanidioschyzon merolae Chloroplast","ANQILPFSIPVKHLAV"}, { "Cyanidium caldarium chloroplast","ANNIIEISNIRKPALVV"}, { "Cyanobium gracile","ANNIVRFSRQAAPVAA"}, { "Cyanobium sp. PCC 6904","ANNIVRFSRQAAPVAA"}, { "Cyanobium sp. PCC 7009","ANNIVRFSRQAAPVAA"}, { "Cyanophora paradoxa chloroplast","ATNIVRFNRKAAFAV"}, { "Cyanothece sp. ATCC 51142","ANNIVSFKRVAVAA"}, { "Cyanothece sp. PCC 7424","ANNIVPFARKAAPVAA"}, { "Cyanothece sp. PCC 7425","ANNIVPFARKAVAVA"}, { "Cyanothece sp. PCC 7822","ANNIVPFARKSALVAA"}, { "Cyanothece sp. PCC 8801","ANNIVSFKRVAVAA"}, { "Cyclobacterium marinum","GESNYAMAA"}, { "Cycloclasticus sp. P1","ANDDNYAIAA"}, { "Cytophaga hutchinsonii","GEESYAMAA"}, { "Dechloromonas agitata","ANDEQFAIAA"}, { "Dechloromonas aromatica","ANDEQFAIAA"}, { "Dechlorosoma suillum","ANDERFAIAA"}, { "Deferribacter desulfuricans","ANDELALAA"}, { "Dehalococcoides ethenogenes","GERELVLAG"}, { "Dehalococcoides sp. CBDB1","GERELVLAG"}, { "Dehalococcoides sp. VS","GERELVLAG"}, { "Dehalogenimonas lykanthroporepellens","DAKEISAGLERFRRLKLEGREQKAG"}, { "Deinococcus deserti","GNQNYALAA"}, { "Deinococcus geothermalis","GNQNYALAA"}, { "Deinococcus gobiensis","GNQNYALAA"}, { "Deinococcus maricopensis","GNNNSTTFALAA"}, { "Deinococcus proteolyticus","GENNYALAA"}, { "Deinococcus radiodurans","GNQNYALAA"}, { "Delftia acidovorans","ANDERFALAA"}, { "Delftia sp. Cs1-4","ANDERFALAA"}, { "Denitrovibrio acetiphilus","ANNEHTLAAA"}, { "Desulfarculus baarsii","ADDYNYAVAA"}, { "Desulfatibacillum alkenivorans","ADDYNYAMAA"}, { "Desulfitobacterium hafniense","ANDDNYALAA"}, { "Desulfobacca acetoxidans","ADNYGYALAA"}, { "Desulfobacterium autotrophicum","ADDYNYAVAA"}, { "Desulfobacula toluolica","ADDYNYAVAA"}, { "Desulfobulbus propionicus","ADDYNYALAA"}, { "Desulfococcus oleovorans","ADDYNYAVAA"}, { "Desulfohalobium retbaense","ANDYDYALAA"}, { "Desulfomicrobium baculatum","ANDNYDYAMAA"}, { "Desulfomonile tiedjei","ANDYEYALAA"}, { "Desulforudis audaxviator","AKNETYALAA"}, { "Desulfotalea psychrophila","ADDYNYAVAA"}, { "Desulfotomaculum acetoxidans","ANNDYALAA"}, { "Desulfotomaculum carboxydivorans","ANEEYALAA"}, { "Desulfotomaculum kuznetsovii","ANEEYALAA"}, { "Desulfotomaculum reducens","ANEEYALAA"}, { "Desulfotomaculum ruminis","ANEEYALAA"}, { "Desulfovibrio aespoeensis","ANNDYDYAIAA"}, { "Desulfovibrio africanus","ANDYNYSLAA"}, { "Desulfovibrio alaskensis","ANNDYEYAMAA"}, { "Desulfovibrio desulfuricans","ANNDYDYAYAA"}, { "Desulfovibrio desulfuricans 2 (G20)","ANNDYEYAMAA"}, { "Desulfovibrio magneticus","ANDYDYALAA"}, { "Desulfovibrio salexigens","ANDNYDYAMAA"}, { "Desulfovibrio vulgaris","ANNYDYALAA"}, { "Desulfovibrio yellowstonii","ANNELALAA"}, { "Desulfurispirillum indicum","ANDENVLAAA"}, { "Desulfurivibrio alkaliphilus","ADDYAYAAAA"}, { "Desulfurobacterium thermolithotrophum","ANEELALAA"}, { "Desulfuromonas acetoxidans","ADTDVSYALAA"}, { "Dichelobacter nodosus","ANDDNYALAA"}, { "Dickeya dadantii","ANDENFAPAALAA"}, { "Dickeya zeae","ANDENFAPAALAA"}, { "Dictyoglomus thermophilum","ANTNLALAA"}, { "Dictyoglomus turgidum","ANTNLALAA"}, { "Dinoroseobacter shibae","ANDNRAPVAVAA"}, { "Dyadobacter fermentans","GESTYAMAA"}, { "Edwardsiella tarda","ANDENYALAA"}, { "Eggerthella lenta","GKNNTQSAPALAMAA"}, { "Eggerthella sp. YY7918","GKNNTQSAPALAMAA"}, { "Ehrlichia canis","ANDNFVFANDNNSSVAGLVAA"}, { "Ehrlichia chaffeensis","ANDNFVFANDNNSSANLVAA"}, { "Ehrlichia ruminantium 1","ANDNFVSANDNNSTANLVAA"}, { "Ehrlichia ruminantium 2","ANDNFVSANDNNSTANLVAA"}, { "Elusimicrobium minutum","GNQTELNWATA"}, { "Emiliania huxleyi chloroplast","ANNILNFNSKLAIA"}, { "Emticicia oligotrophica","GNTSYAMAA"}, { "Enterobacter aerogenes","ANDENYALAA"}, { "Enterobacter cancerogenus","ANDENYALAA"}, { "Enterobacter cloacae","ANDENYALAA"}, { "Enterobacter lignolyticus","ANDENYALAA"}, { "Enterobacter sakazakii","ANDENYALAA"}, { "Enterobacter sp. 638","ANDENYALAA"}, { "Enterococcus durans","AKNENNSYALAA"}, { "Enterococcus faecalis","AKNENNSFALAA"}, { "Enterococcus faecium","AKNENNSYALAA"}, { "Enterococcus hirae","AKNENNSYALAA"}, { "Erwinia amylovora","ANDENFAPAALAA"}, { "Erwinia billingiae","ANDENYALAA"}, { "Erwinia carotovora","ANDENYALAA"}, { "Erwinia chrysanthemi","ANDENFAPAALAA"}, { "Erwinia pyrifoliae","AKLKYNESVANDGEYELIAAAA"}, { "Erwinia sp. Ejp617","AKLYNNIPVANDGEFITPALAA"}, { "Erwinia tasmaniensis","ANDENFAPAALAA"}, { "Erysipelothrix rhusiopathiae","GNNSLQFAA"}, { "Erythrobacter litoralis","ANDNEALALAA"}, { "Escherichia coli","ANDENYALAA"}, { "Ethanoligenens harbinense","AKDNVIRVNFGRSEEALAA"}, { "Eubacterium eligens","ANDNLAYAA"}, { "Eubacterium limosum","AKENRSYGMALAA"}, { "Eubacterium rectale","AEDNLAYAA"}, { "Exiguobacterium sibiricum","GKTNTQLAAA"}, { "Exiguobacterium sp. AT1b","GKTNTQLAAA"}, { "Ferrimonas balearica","ANDENYALAA"}, { "Fervidobacterium nodosum","ANEYVPLAA"}, { "Fervidobacterium pennivorans","ANEYVPLAA"}, { "Fibrobacter succinogenes","ADENYALAA"}, { "Filifactor alocis","ANENNLLAA"}, { "Finegoldia magna","AEDNNFALAA"}, { "Flavobacteriaceae bacterium","GDQEFALAA"}, { "Flavobacterium columnare","GENNYALAA"}, { "Flavobacterium indicum","GENNYALAA"}, { "Flavobacterium johnsoniae","GENNYALAA"}, { "Flexibacter litoralis","GESNYAMAA"}, { "Flexistipes sinusarabici","ANDEFALAAA"}, { "Fluviicola taffensis","DNTSYALAA"}, { "Francisella cf.","ANDSNFAAVAKAA"}, { "Francisella noatunensis","ANDSNFAAVTKAA"}, { "Francisella novicida","ANDSNFAAVAKAA"}, { "Francisella philomiragia","ANDSNFAAVAKAA"}, { "Francisella sp. TX077308","ANDSNFAAVAKAA"}, { "Francisella tularensis 1","GNKKANRVAANDSNFAAVAKAA"}, { "Francisella tularensis 2","ANDSNFAAVAKAA"}, { "Frankia alni","ANKTQPVTPLYALAA"}, { "Frankia sp. CcI3","ANKTQPTTPTYALAA"}, { "Frankia sp. EAN1pec","ATKTQPASSTFALAA"}, { "Frankia sp. EuI1c","ANSEQSATSAYALAA"}, { "Frankia symbiont","ANKSQSATPRTFALAA"}, { "Frateuria aurantia","ANDDNYALAA"}, { "Fremyella diplosiphon","ANNIVKFARKEALVAA"}, { "Fusobacterium nucleatum 1","GNKDYALAA"}, { "Fusobacterium nucleatum 2","GNKEYALAA"}, { "Gallibacterium anatis","ANDENYALAA"}, { "Gallionella capsiferriformans","ANDENYALAA"}, { "gamma proteobacterium","ANDESYALAA"}, { "Gammaproteobacteria SAR-1","ANNYNYSLAA"}, { "Gardnerella vaginalis","AKSNRTEFALAA"}, { "Gemmata obscuriglobus","AEPQYSLAA"}, { "Gemmatimonas aurantiaca","ANNNLALAA"}, { "Geobacillus kaustophilus","GKQNYALAA"}, { "Geobacillus sp. WCH70","GKENYALAA"}, { "Geobacillus sp. Y4.1MC1","GKENYALAA"}, { "Geobacillus stearothermophilus","GKQNYALAA"}, { "Geobacillus thermodenitrificans","GKENYALAA"}, { "Geobacter bemidjiensis","ADNYDYALAA"}, { "Geobacter daltonii","ADNYDYALAA"}, { "Geobacter lovleyi","ADNYNTQPVALAA"}, { "Geobacter metallireducens","ADNYDYAVAA"}, { "Geobacter sp. M18","ADNYDYALAA"}, { "Geobacter sp. M21","ADNYDYALAA"}, { "Geobacter sulfurreducens","ADNYDYAVAA"}, { "Geobacter uraniireducens","ADNYNYALAA"}, { "Geodermatophilus obscurus","ADSSQREFALAA"}, { "Glaciecola nitratireducens","ANDENYALAA"}, { "Glaciecola sp. 4H-3-7+YE-5","ANDENYALAA"}, { "Gloeobacter violaceus","ATNNVVPFARARATVAA"}, { "Gluconacetobacter diazotrophicus","ANDNSEVLAVAA"}, { "Gluconacetobacter xylinus","ANDNSEVLAVAA"}, { "Gluconobacter oxydans","ANDNSEVLAVAA"}, { "Gordonia bronchialis","ADSNQRDYALAA"}, { "Gordonia polyisoprenivorans","ADKNQRDYALAA"}, { "Gordonia rubripertincta","ADSNQRDYALAA"}, { "Gordonia sp. KTR9","ADSNQRDYALAA"}, { "Gracilaria tenuistipitata chloroplast","AKNNILTLSRRLIYA"}, { "Gramella forsetii","GENNYALAA"}, { "Granulibacter bethesdensis","ANDNHEALAVAA"}, { "Granulicella mallensis","AEPQFALAA"}, { "Granulicella tundricola","AEPQFALAA"}, { "Guillardia theta chloroplast","ASNIVSFSSKRLVSFA"}, { "Haemophilus ducreyi","ANDEQYALAA"}, { "Haemophilus influenzae","ANDEQYALAA"}, { "Haemophilus parainfluenzae","ANDEQYALAA"}, { "Haemophilus parasuis","ANDEQYALAA"}, { "Haemophilus somnus","ANDEQYALAA"}, { "Hahella chejuensis","ANDETYALAA"}, { "Halanaerobium hydrogeniformans","ANDNSYALAAA"}, { "Halanaerobium praevalens","ANDNNYTLAAA"}, { "Haliangium ochraceum","ANDNAVALAA"}, { "Haliscomenobacter hydrossis","GESNYAMAA"}, { "Halobacillus halophilus","GESNDNLAVAA"}, { "Halomonas elongata","ANDDNYAQGALAA"}, { "Halorhodospira halophila","ANDDNYALAA"}, { "Halothermothrix orenii","ADNNNYALAAA"}, { "Halothiobacillus neapolitanus","ANDDNYALAA"}, { "Hamiltonella defensa","AKINKNRPAANGYMPVAALAA"}, { "Helicobacter acinonychis","VNNTDYAPAYAKVA"}, { "Helicobacter bizzozeronii","VNNPNYAPNYAKAA"}, { "Helicobacter cetorum","VNNTNYAPAYAKVA"}, { "Helicobacter cinaedi","ANNTNYAPVYAKVA"}, { "Helicobacter felis","VNNPNYAPNYAKAA"}, { "Helicobacter hepaticus","ANNANYAPAYAKVA"}, { "Helicobacter mustelae","ANNKNYAPAYAKVA"}, { "Helicobacter pylori 1","VNNTDYAPAYAKAA"}, { "Helicobacter pylori 2","VNNTDYAPAYAKAA"}, { "Helicobacter pylori 3","VNNADYAPAYAKAA"}, { "Heliobacillus mobilis","AEDNYALAA"}, { "Heliobacterium modesticaldum","AEENYALAA"}, { "Herbaspirillum seropedicae","ANDESYALAA"}, { "Herminiimonas arsenicoxydans","DNSYALAA"}, { "Herpetosiphon aurantiacus","GKNTFRAPVALAA"}, { "Hippea maritima","ADTEYALAA"}, { "Hirschia baltica","ANDNFAEGELLAA"}, { "Hydrogenophaga palleronii","ANDERFALAA"}, { "Hyphomicrobium denitrificans","ANDNYAEAALAA"}, { "Hyphomicrobium sp. MC1","ANDNYAEAALAA"}, { "Hyphomonas neptunium","ANDNFAEGELLAA"}, { "Idiomarina loihiensis","ANDDNYALAA"}, { "Ignavibacterium album","GEYNYALAA"}, { "Ilyobacter polytropus","ENNNYALAA"}, { "Intrasporangium calvum","ANSKRTDFALAA"}, { "Isoptericola variabilis","ADNKRTDFTLAA"}, { "Jannaschia sp. CCS1","ANDNRAPAMALAA"}, { "Janthinobacterium sp. Marseille","ANDNSYALAA"}, { "Jonesia denitrificans","ADTKRTDFALAA"}, { "Kangiella koreensis","ANEDNYALAA"}, { "Ketogulonicigenium vulgare","ANNNRAPAMALAA"}, { "Kineococcus radiotolerans","ADSKRTEFALAA"}, { "Kitasatospora setae","ANSKRDSQQFALAA"}, { "Klebsiella oxytoca","ANDENYALAA"}, { "Klebsiella pneumoniae","ANDENYALAA"}, { "Kocuria rhizophila","AKSKRTDFALAA"}, { "Koribacter versatilis","ANTQMAYAA"}, { "Kosmotoga olearia","ANTEFALAA"}, { "Kribbella flavida","ADSKRSSFALAA"}, { "Krokinobacter sp. 4H-3-7-5","GENNYALAA"}, { "Kyrpidia tusciae","ANKQELALAA"}, { "Kytococcus sedentarius","ANSKRTDFALAA"}, { "Lacinutrix sp. 5H-3-7-4","GENNYALAA"}, { "Lactobacillus acidophilus","ANNKNSYALAA"}, { "Lactobacillus amylovorus","ANNKNSYALAA"}, { "Lactobacillus brevis","AKNNNNSYALAA"}, { "Lactobacillus buchneri","AKNNNNSYALAA"}, { "Lactobacillus casei","AKNENSYALAA"}, { "Lactobacillus crispatus","ANNKNSYALAA"}, { "Lactobacillus delbrueckii 1","AKNENNSYALAA"}, { "Lactobacillus delbrueckii 2","ANENSYAVAA"}, { "Lactobacillus fermentum","ANNNSQSYAYAA"}, { "Lactobacillus gallinarum","ANNKNSYALAA"}, { "Lactobacillus gasseri","ANNENSYAVAA"}, { "Lactobacillus helveticus","ANNKNSYALAA"}, { "Lactobacillus johnsonii","ANNENSYAVAA"}, { "Lactobacillus kefiranofaciens","ANNKNSYALAA"}, { "Lactobacillus plantarum","AKNNNNSYALAA"}, { "Lactobacillus reuteri","ANNNSNSYAYAA"}, { "Lactobacillus rhamnosus","AKNENSYALAA"}, { "Lactobacillus ruminis","AKNNNYSYALAA"}, { "Lactobacillus sakei","ANNNNSYAVAA"}, { "Lactobacillus salivarius","AKNNNNSYALAA"}, { "Lactobacillus sanfranciscensis","AKNNNNSYALAA"}, { "Lactococcus garvieae","AKNNTSYALAA"}, { "Lactococcus lactis","AKNNTQTYAMAA"}, { "Lactococcus plantarum","AKNTQTYALAA"}, { "Lactococcus raffinolactis","AKNTQTYAVAA"}, { "Laribacter hongkongensis","ANDDTYALAA"}, { "Lawsonia intracellularis","ANNNYDYALAA"}, { "Leadbetterella byssophila","GNTSYAMAA"}, { "Legionella longbeachae","ANDENFAGGEAIAA"}, { "Legionella pneumophila","ANDENFAGGEAIAA"}, { "Leifsonia xyli","ANSKSTVSAKADFALAA"}, { "Leptolyngbya boryana","ANNIVPFARKTAPVAA"}, { "Leptospira biflexa","ANNEFALAA"}, { "Leptospira borgpetersenii","ANNELALAA"}, { "Leptospira interrogans","ANNELALAA"}, { "Leptospirillum ferriphilum","ANEELALAA"}, { "Leptospirillum ferrooxidans","ANNEMALAA"}, { "Leptospirillum groupII","ANEELALAA"}, { "Leptospirillum groupIII","ANEELALAA"}, { "Leptospirillum sp. Group II '5-way CG'","ANEELALAA"}, { "Leptospirillum sp. Group III","ANEELALAA"}, { "Leptothrix cholodnii","ANDSTYALAA"}, { "Leptotrichia buccalis","GNDNYALAA"}, { "Leuconostoc carnosum","AKNENTFAVAA"}, { "Leuconostoc citreum","AKNENSFAIAA"}, { "Leuconostoc gasicomitatum","AKNENSFAIAA"}, { "Leuconostoc gelidum","AKNENSFAIAA"}, { "Leuconostoc lactis","AKNENSFAIAA"}, { "Leuconostoc mesenteroides","AKNENSFAIAA"}, { "Leuconostoc pseudomesenteroides","AKNENSYAIAA"}, { "Leuconostoc sp. C2","AKNENSFAIAA"}, { "Liberibacter asiaticus","ANDNSAREVLAA"}, { "Liberibacter solanacearum","ANDNFAGETRLAA"}, { "Listeria grayi 1","GKEKQNLAFAA"}, { "Listeria grayi 2","GKQNNNLAFAA"}, { "Listeria innocua","GKEKQNLAFAA"}, { "Listeria ivanovii","GKEKQNLAFAA"}, { "Listeria monocytogenes","GKEKQNLAFAA"}, { "Listeria seeligeri","GKEKQNLAFAA"}, { "Listeria welshimeri","GKEKQNLAFAA"}, { "Lysinibacillus sphaericus","GKQQNLAFAA"}, { "Macrococcus caseolyticus","GKTNNFAVAA"}, { "Magnetococcus marinus","ANDEHYAPAFAAA"}, { "Magnetococcus sp.","ANDEHYAPAFAAA"}, { "Magnetospirillum magneticum","ANDNVELAAAA"}, { "Magnetospirillum magnetotacticum 1","ANDNFAPVAVAA"}, { "Magnetospirillum magnetotacticum 2","ANDNVELAAAA"}, { "Mahella australiensis","ADNNAELALAA"}, { "Mannheimia haemolytica","ANDEQYALAA"}, { "Mannheimia succiniciproducens","ANDEQYALAA"}, { "Maribacter sp. HTCC2170","GDNNYALAA"}, { "Maricaulis maris","ANDNFAEEVALAA"}, { "Marinithermus hydrothermalis","GNNRYALAA"}, { "Marinitoga piezophila","AEENYALAA"}, { "Marinobacter adhaerens","ANDENYALAA"}, { "Marinobacter aquaeolei","ANDENYALAA"}, { "Marinobacter hydrocarbonoclasticus","ANDENYALAA"}, { "Marinobacter sp. BSs20148","ANDENYSLAA"}, { "Marinomonas mediterranea","ANDENYALAA"}, { "Marinomonas posidonica","ANDENYALAA"}, { "Marinomonas sp. MWYL1","ANDENYALAA"}, { "Marivirga tractuosa","GESNYAMAA"}, { "Megasphaera elsdenii","AKENNFALAA"}, { "Meiothermus ruber","GNVRSNSYALAA"}, { "Meiothermus silvanus","GNTQRSYALAA"}, { "Melioribacter roseus","GEYNYALAA"}, { "Melissococcus plutonius","AKKQNYSYAVAA"}, { "Mesoplasma florum","ANKNEENTNEVPTFMLNAGQANYAFA"}, { "Mesorhizobium ciceri","ANDNYAEARLAA"}, { "Mesorhizobium loti","ANDNYAEARLAA"}, { "Mesorhizobium opportunistum","ANDNYAEARLAA"}, { "Mesorhizobium sp.","ANDNYAEARLAA"}, { "Mesostigma viride chloroplast","ANNILPFNRKTAVAV"}, { "Mesotoga prima","ANNEFALAA"}, { "Methylacidiphilum infernorum","ANEELALAA"}, { "Methylibium petroleiphilum","ANDERFALAA"}, { "Methylobacillus flagellatus","ANDETYALAA"}, { "Methylobacillus glycogenes","ANDETYALAA"}, { "Methylobacterium extorquens","ANDNFAPVAVAA"}, { "Methylobacterium nodulans","ANDNYAPVAVAA"}, { "Methylobacterium populi","ANDNFAPVAVAA"}, { "Methylobacterium radiotolerans","ANDNFAPVAVAA"}, { "Methylobacterium sp. 4-46","ANDNYAPVAVAA"}, { "Methylocella silvestris","ANDNYAPVAVAA"}, { "Methylococcus capsulatus","ANDDVYALAA"}, { "Methylocystis sp. SC2","ANDNYAPVAVAA"}, { "Methylomicrobium alcaliphilum","ANDENYSMALAA"}, { "Methylomirabilis oxyfera","ANHELALAA"}, { "Methylomonas methanica","ANDENYSVALAA"}, { "Methylophaga sp. JAM1","ANDNNYALAA"}, { "Methylophaga sp. JAM7","ANDNNYALAA"}, { "Methylotenera mobilis","ANDETYSLAA"}, { "Methylotenera versatilis","ANDETYSLAA"}, { "Methylovorus glucosetrophus","ANDETYALAA"}, { "Micavibrio aeruginosavorus","ANDNFVVANDNSREAAVAIAA"}, { "Microbacterium testaceum","ADAKRTDFALAA"}, { "Microbulbifer degradans","ANDDNYGAQLAA"}, { "Micrococcus luteus","AESKRTDFALAA"}, { "Microcystis aeruginosa","ANNIVPFARKAAPVAA"}, { "Microlunatus phosphovorus","AKSEQRTDFALAA"}, { "Micromonospora aurantiaca","AKNNRADFALAA"}, { "Midichloria mitochondrii","ANNKFVPANSDFVPALQAA"}, { "Mobiluncus curtisii","AERNSTESFALAA"}, { "Modestobacter marinus","ADSSQRDFALAA"}, { "Moorella thermoacetica","ADDNLALAA"}, { "Moranella endobia","ANDSQYESVALAA"}, { "Moraxella catarrhalis","ANDETYALAA"}, { "Muricauda ruestringensis","GENNYALAA"}, { "Mycobacteriophage Bxz1 virion","ATDTDATVTDAEIEAFFAEEAAALV"}, { "Mycobacterium abscessus","ADSHQRDYALAA"}, { "Mycobacterium africanum","ADSHQRDYALAA"}, { "Mycobacterium austroafricanum","ADSNQRDYALAA"}, { "Mycobacterium avium","ADSHQRDYALAA"}, { "Mycobacterium bovis","ADSHQRDYALAA"}, { "Mycobacterium chubuense","ADSNQRDYALAA"}, { "Mycobacterium gilvum","ADSNQRDYALAA"}, { "Mycobacterium indicus","ADSHQRDYALAA"}, { "Mycobacterium intracellulare","ADSHQRDYALAA"}, { "Mycobacterium leprae","ADSYQRDYALAA"}, { "Mycobacterium marinum","ADSHQRDYALAA"}, { "Mycobacterium microti","ADSHQRDYALAA"}, { "Mycobacterium phage","ATDTDATVTDAEIEAFFAEEAAALV"}, { "Mycobacterium rhodesiae","ADSNQRDFALAA"}, { "Mycobacterium smegmatis","ADSNQRDYALAA"}, { "Mycobacterium sp. MCS","ADTNQRDYALAA"}, { "Mycobacterium tuberculosis","ADSHQRDYALAA"}, { "Mycoplasma agalactiae","ANDKKSEEVRVELPAFAIANANANLAFA"}, { "Mycoplasma arthritidis","GNLETSEDKKLDLQFVMNSQTQQNLLFA"}, { "Mycoplasma bovis","ANDKKSEEVRLELPAFAIANANANLAFA"}, { "Mycoplasma capricolum","ANKNEETFEMPAFMMNNASAGANFMFA"}, { "Mycoplasma conjunctivae","ANKKEDKAVDVNLLASQSFNSNLAFA"}, { "Mycoplasma crocodyli","GKSKKAENEFSFSNPAFAGNLNLAFA"}, { "Mycoplasma fermentans","AEDKKAEEVNISSLMIAQKMQSQSNLAFA"}, { "Mycoplasma gallisepticum","DKTSKELADENFVLNQLASNNYALNF"}, { "Mycoplasma genitalium 1","DKENNEVLVEPNLIINQQASVNFAFA"}, { "Mycoplasma genitalium 2","DKENNEVLVDPNLIINQQASVNFAFA"}, { "Mycoplasma haemofelis","ANKQERESSVVNLLMSQPQDLASLSF"}, { "Mycoplasma hominis","AEEKQNKQSFVLNQMMSSNPVFAY"}, { "Mycoplasma hyorhinis","GKENKKEDYSLLMNASTQSNLAFAF"}, { "Mycoplasma leachii","ANKNEETFEMPAFMMNNASAGANFMFA"}, { "Mycoplasma mobile","GKEKQLEVSPLLMSSSQSNLVFA"}, { "Mycoplasma mycoides","ADKNEENFEMPAFMINNASAGANYMFA"}, { "Mycoplasma penetrans","AKNNKNEAVEVELNDFEINALSQNANLALYA"}, { "Mycoplasma pneumoniae","DKNNDEVLVDPMLIANQQASINYAFA"}, { "Mycoplasma pulmonis","GTKKQENDYQDLMISQNLNQNLAFASV"}, { "Mycoplasma putrefaciens","ANKKTEEFEMPAFMINNASAGANLMFA"}, { "Mycoplasma synoviae","GNKQSQVEEVTREFSPSLYTFNSNLAYA"}, { "Myxococcus fulvus","ANDNVELALAA"}, { "Myxococcus xanthus","ANDNVELALAA"}, { "Nakamurella multipartita","ADSKRTEFALAA"}, { "Natranaerobius thermophilus","ADEDYALAAA"}, { "Nautilia profundicola","AANNTNYSPAVARAAA"}, { "Neisseria gonorrhoeae","ANDETYALAA"}, { "Neisseria lactamica","ANDETYALAA"}, { "Neisseria meningitidis","ANDETYALAA"}, { "Nephroselmis olivacea chloroplast","TTYHSCLEGHLS"}, { "Niastella koreensis","GNTQFAMAA"}, { "Nitratifractor salsuginis","ANNTDYRPAYAHAA"}, { "Nitratiruptor sp. SB155-2","ANNTDYRPAYAVAA"}, { "Nitrobacter hamburgensis","ANDNYAPVAQAA"}, { "Nitrobacter Nb-311A","ANDNYAPVAQAA"}, { "Nitrobacter winogradskyi","ANDNYAPVAQAA"}, { "Nitrosococcus halophilus","ANDDNYALAA"}, { "Nitrosococcus oceani","ANDDNYALAA"}, { "Nitrosococcus watsonii","ANDDNYALAA"}, { "Nitrosomonas cryotolerans","ANDENYALAA"}, { "Nitrosomonas europaea","ANDENYALAA"}, { "Nitrosomonas eutropha","ANDENYALAA"}, { "Nitrosomonas sp. AL212","ANDENYALAA"}, { "Nitrosomonas sp. Is79A3","ANDENYALAA"}, { "Nitrosospira multiformis","ANDENYALAA"}, { "Nitrospira defluvii","ANQELALAA"}, { "Nocardia brasiliensis","ADSNQREYALAA"}, { "Nocardia cyriacigeorgica","ADSHQREYALAA"}, { "Nocardia farcinica","ADSHQREYALAA"}, { "Nocardioides sp. JS614","ANTNRSSFALAA"}, { "Nocardiopsis alba","ANSKRTEFALAA"}, { "Nocardiopsis dassonvillei","ANSKRTEFALAA"}, { "Nostoc azollae","ANNIVKFARREALVAA"}, { "Nostoc PCC7120","ANNIVKFARKDALVAA"}, { "Nostoc punctiforme","ANNIVNFARKDALVAA"}, { "Nostoc sp. PCC 7120","ANNIVKFARKDALVAA"}, { "Novosphingobium aromaticivorans","ANDNEALALAA"}, { "Novosphingobium sp. PP1Y","ANDNEALALAA"}, { "Oceanimonas sp. GK1","ANDENYALAA"}, { "Oceanithermus profundus","GNDNYALAA"}, { "Oceanobacillus iheyensis","GKETNQPVLAAA"}, { "Ochrobactrum anthropi","ANDNKAQGYALAA"}, { "Odontella sinensis chloroplast","ANNLISSVFKSLSTKQNSLNLSFAV"}, { "Odoribacter splanchnicus","GENNYALAA"}, { "Oenococcus oeni","AKNNEPSYALAA"}, { "Oligotropha carboxidovorans","ANDNYAPVAQAA"}, { "Olsenella uli","DNDSYQGSYALAA"}, { "Ornithobacterium rhinotracheale","GNNEYALAA"}, { "Oscillatoria 6304","ANNIVPFARKAAPVAA"}, { "Oscillatoria acuminata","ANNIVPFARKAAPVAA"}, { "Owenweeksia hongkongensis","GENNFALAA"}, { "Paenibacillus larvae","GKQQNNYALAA"}, { "Paenibacillus mucilaginosus","GNQKQQLAFAA"}, { "Paenibacillus polymyxa","GKQQNNYAFAA"}, { "Paenibacillus sp. JDR-2","GKQQQTYAFAA"}, { "Paenibacillus sp. Y412MC10","GKQQNNYAFAA"}, { "Paenibacillus terrae","GKQQNNYAFAA"}, { "Paludibacter propionicigenes","GENNYALAA"}, { "Pantoea ananatis","ANDENYALAA"}, { "Pantoea sp. At-9b","ANDNYYDAPAALAA"}, { "Pantoea stewartii","ANDENYALAA"}, { "Pantoea vagans","ANDENYALAA"}, { "Parabacteroides distasonis","GENNYALAA"}, { "Parachlamydia acanthamoebae","ADSVSYAAAA"}, { "Parachlamydia UWE25","ANNSNKIAKVDFQEGTFARAA"}, { "Paracoccus denitrificans","ANDNRAPVALAA"}, { "Parvibaculum lavamentivorans","ANDNYAEARLAA"}, { "Parvularcula bermudensis","ANDNSSEGFALAA"}, { "Pasteurella multocida","ANDEQYALAA"}, { "Pavlova lutheri chloroplast","ANNILSFNRVAVA"}, { "Pectobacterium atrosepticum","ANDENYALAA"}, { "Pectobacterium carotovora","ANDENYALAA"}, { "Pectobacterium carotovorum","ANDENYALAA"}, { "Pectobacterium wasabiae","ANDENYALAA"}, { "Pediococcus claussenii","AKNNNNSYALAA"}, { "Pediococcus pentosaceus","AKNNNNSYALAA"}, { "Pedobacter heparinus","GENNYALAA"}, { "Pedobacter saltans","ENNYALAA"}, { "Pelagibacter sp. IMCC9063","ANESYAIAA"}, { "Pelagibacter ubique","ADESYALAA"}, { "Pelagibacterium halotolerans","ANDNNKAPVALAA"}, { "Pelobacter carbinolicus","ADTDVSYALAA"}, { "Pelobacter propionicus","ADNYNTPVALAA"}, { "Pelodictyon phaeoclathratiforme","ADDYSYAMAA"}, { "Pelotomaculum thermopropionicum","AKENYALAA"}, { "Petrotoga mobilis","GGSSLPKFSWNLA"}, { "Phaeobacter gallaeciensis","ANDNRAPAMAVAA"}, { "Photobacterium phosphoreum","ANDENYALAA"}, { "Photobacterium profundum","ANDENFALAA"}, { "Photorhabdus asymbiotica","ANDNEYALVA"}, { "Photorhabdus luminescens","ANDEKYALAA"}, { "Phycisphaera mikurensis","ANDENTIAGRIGFGNDALRLAA"}, { "Phytoplasma australiense","GKQTNSASEGDQIYNWVPSQSSQNLQQLAFA"}, { "Pirellula sp.","AEENFALAA"}, { "Pirellula staleyi","AESNLALAA"}, { "Planctomyces brasiliensis","ANKQYAMVA"}, { "Planctomyces limnophilus","ANTGNYALAA"}, { "Plectonema boryanum","ANNIVPFARKTAPVAA"}, { "Polaromonas JS666","ANDERFALAA"}, { "Polaromonas naphthalenivorans","ANDERFALAA"}, { "Polaromonas sp. JS666","ANDERFALAA"}, { "Polymorphum gilvum","ANDNYASDVALAA"}, { "Polynucleobacter necessarius","ANDERFALAA"}, { "Porphyra purpurea chloroplast","AENNIIAFSRKLAVA"}, { "Porphyromonas asaccharolytica","AETRHHPGGRCSEAL"}, { "Porphyromonas gingivalis","GENNYALAA"}, { "Prevotella denticola","GENNYALAA"}, { "Prevotella intermedia","GENNYALAA"}, { "Prevotella melaninogenica","GENNYALAA"}, { "Prevotella ruminicola","GNNEYALAA"}, { "Prochlorococcus marinus 1","ANKIVSFSRQTAPVAA"}, { "Prochlorococcus marinus 2","ANNIVRFSRQPALVAA"}, { "Prochlorococcus marinus 3","ANKIVSFSRQTAPVAA"}, { "Prochlorococcus marinus","ANNIVSFSRQTAPVAA"}, { "Propionibacterium acidipropionici","ADNKRTDFALAA"}, { "Propionibacterium acnes 1","AENTRTDFALAA"}, { "Propionibacterium acnes 2","AENTRTDFALAA"}, { "Propionibacterium freudenreichii","ADTNRTDFALAA"}, { "Propionibacterium propionicum","ANNSRTDFALAA"}, { "Prosthecochloris aestuarii","ADDYSYAMAA"}, { "Proteobacteria SAR-1, version 1","GENADYALAA"}, { "Proteobacteria SAR-1, version 2","ANNYNYSLAA"}, { "Proteobacteria SAR-1, version 3","ADNGYMAAA"}, { "Proteus mirabilis","ANDNQYKALAA"}, { "Protochlamydia amoebophila","ANNSNKIAKVDFQEGTFARAA"}, { "Providencia rettgeri","ANDENYALAA"}, { "Providencia stuartii","ANDENYALAA"}, { "Pseudoalteromonas atlantica","ANDENYALAA"}, { "Pseudoalteromonas haloplanktis","ANDDNYSLAA"}, { "Pseudoalteromonas sp. SM9913","ANDDNYSLAA"}, { "Pseudogulbenkiania sp. NH8B","ANDETYALAA"}, { "Pseudomonas aeruginosa","ANDDNYALAA"}, { "Pseudomonas brassicacearum","ANDENYGQEFAIAA"}, { "Pseudomonas chlororaphis","ANDETYGEYALAA"}, { "Pseudomonas entomophila","ANDENYEGYALAA"}, { "Pseudomonas fluorescens 1","ANDDQYGAALAA"}, { "Pseudomonas fluorescens 2","ANDENYGQEFALAA"}, { "Pseudomonas fluorescens 3 (Pf-5)","ANDETYGDYALAA"}, { "Pseudomonas fulva","ANDENYEGYALAA"}, { "Pseudomonas mendocina","ANDDNYALAA"}, { "Pseudomonas protegens","ANDETYGDYALAA"}, { "Pseudomonas putida 1","ANDENYGAEYKLAA"}, { "Pseudomonas stutzeri","ANDDNYEGYALAA"}, { "Pseudomonas syringae 1","ANDENYGAQLAA"}, { "Pseudomonas syringae 2","ANDETYGEYALAA"}, { "Pseudomonas syringae 3","ANDENYGAQLAA"}, { "Pseudonocardia dioxanivorans","ADKSQRAYALAA"}, { "Pseudovibrio sp. JE062","ANDNYAMDNAVAA"}, { "Pseudoxanthomonas spadix","ANDDNYGSDFALAA"}, { "Pseudoxanthomonas suwonensis","ANDDNYALAA"}, { "Psychrobacter 2734","ANDENYALAA"}, { "Psychrobacter arcticus","ANDENYALAA"}, { "Psychrobacter cryohalolentis","ANDENYALAA"}, { "Psychrobacter sp. PRwf-1","ANDETYALAA"}, { "Psychroflexus torquis","GEDNYALAA"}, { "Psychromonas ingrahamii","ANDSNYSLAA"}, { "Pusillimonas sp. T7-7","ANDERFALAA"}, { "Rahnella aquatilis","ANDENYALAA"}, { "Rahnella sp. Y9602","ANDENYALAA"}, { "Ralstonia eutropha","ANDERYALAA"}, { "Ralstonia metallidurans","ANDERYALAA"}, { "Ralstonia pickettii","ANDERYALAA"}, { "Ralstonia solanacearum","ANDNRYQLAA"}, { "Ramlibacter tataouinensis","ANDERFALAA"}, { "Renibacterium salmoninarum","ANSKRTDFALAA"}, { "Rhizobium etli","ANDNYAEARLAA"}, { "Rhizobium leguminosarum","ANDNYAEARLAA"}, { "Rhodobacter capsulatus","ANDNRAPVALAA"}, { "Rhodobacter sphaeroides","ANDNRAPVALAA"}, { "Rhodococcus equi","AESTQREYALAA"}, { "Rhodococcus erythropolis","ADSNQRDYALAA"}, { "Rhodococcus jostii","ADSNQRDYALAA"}, { "Rhodococcus opacus","ADSNQRDYALAA"}, { "Rhodoferax ferrireducens","ANDERFALAA"}, { "Rhodomicrobium vannielii","ANDNYAGARPVAIAA"}, { "Rhodomonas salina","ANNIVPFSRKVALV"}, { "Rhodopirellula baltica","AEENFALAA"}, { "Rhodopseudomonas palustris","ANDNYAPVAQAA"}, { "Rhodopseudomonas palustris 4","ANDNVRMNEVRLAA"}, { "Rhodospirillum centenum","ANDNTAPALRMAA"}, { "Rhodospirillum photometricum","ANDNVELAAAA"}, { "Rhodospirillum rubrum","ANDNVELAAAA"}, { "Rhodothermus marinus","ANDYSYAMAA"}, { "Rickettsia africae","ANDNNRSVGHLALAA"}, { "Rickettsia amblyommii","ANDNNRSVGRLALAA"}, { "Rickettsia australis","ANDNNRSVDLALAA"}, { "Rickettsia bellii","ANDNYRSAGTPALAVA"}, { "Rickettsia conorii","ANDNNRSVGHLALAA"}, { "Rickettsia heilongjiangensis","ANDNNRSVGRLALAA"}, { "Rickettsia massiliae","ANDNNRSVGRLALAA"}, { "Rickettsia montanensis","ANDNNRSVGRLALAA"}, { "Rickettsia parkeri","ANDNNRSVGHLALAA"}, { "Rickettsia peacockii","ANDNNRSVGRLALAA"}, { "Rickettsia philipii","ANDNNRSVGRLALAA"}, { "Rickettsia prowazekii","ANDNRYVGVPALAAA"}, { "Rickettsia rhipicephali","ANDNNRSVGRLALAA"}, { "Rickettsia rickettsii","ANDNNRSVGRLALAA"}, { "Rickettsia sibirica","ANDNNRSVGHLALAA"}, { "Rickettsia slovaca","ANDNNRSVGRLALAA"}, { "Rickettsia typhi","ANDNKRYVGVAALAAA"}, { "Riemerella anatipestifer","GNEEFALAA"}, { "Riesia pediculicola","AKTKNYAYAQAA"}, { "Robiginitalea biformata","GDNNYALAA"}, { "Roseburia hominis","AEDNLAYAA"}, { "Roseiflexus castenholzii","ANNNKVVAFKPAMALAA"}, { "Roseiflexus sp. RS-1","ANTNKVVAFKPAMALAA"}, { "Roseobacter denitrificans","ANDNRAPVAMAA"}, { "Roseobacter litoralis","ANDNRAPVAMAA"}, { "Rothia dentocariosa","AKSKRTDFALAA"}, { "Rothia mucilaginosa","AESKRTDFALAA"}, { "Rubrivivax gelatinosus","ANDERFALAA"}, { "Rubrobacter xylanophilus","ANDREMALAA"}, { "Ruegeria pomeroyi","ANDNRAPVALAA"}, { "Ruegeria sp. TM1040","ANDNRAPVALAA"}, { "Ruminococcus albus","GHGYFAKAS"}, { "Ruminococcus albus","DNDNFAMAA"}, { "Runella slithyformis","GEYSYAMAA"}, { "Ruthia magnifica","ANENNYALAA"}, { "Saccharomonospora viridis","AKTNSQRDFALAA"}, { "Saccharophagus degradans","ANDDNYGAQLAA"}, { "Saccharopolyspora erythraea","ADKSQREFALAA"}, { "Salinibacter ruber","ADDYSYAMAA"}, { "Salinispora arenicola","AKQNRADFALAA"}, { "Salinispora tropica","AKQNRADFALAA"}, { "Salmonella bongori","ANDENYALAA"}, { "Salmonella enterica 1","ANDETYALAA"}, { "Salmonella enterica 2","ANDENYALAA"}, { "Salmonella enterica 3","ANDETYALAA"}, { "Salmonella enterica 5","ANDETYALAA"}, { "Salmonella enterica 6","ANDENYALAA"}, { "Salmonella paratyphi","ANDENYALAA"}, { "Salmonella typhimurium","ANDETYALAA"}, { "Salmonella typhi","ANDETYALAA"}, { "Sanguibacter keddieii","ADSKRTDFALAA"}, { "Saprospira grandis","GNTNYALAA"}, { "Sebaldella termitidis","GNDNYALAA"}, { "secondary endosymbiont","ANDSQFESKTALAA"}, { "Segniliparus rotundus","ADTTQRDYALAA"}, { "Selenomonas ruminantium","DEFDYAYAA"}, { "Selenomonas sputigena","ANEDYALAA"}, { "Serratia marcescens","ANDENYALAA"}, { "Serratia plymuthica","ANDSQFESAALAA"}, { "Serratia proteamaculans","ANDSQFESAALAA"}, { "Serratia symbiotica","ANDENYALAA"}, { "Shewanella amazonensis","ANDDNYALAA"}, { "Shewanella ANA-3","ANDDNYALAA"}, { "Shewanella baltica","ANDSNYSLAA"}, { "Shewanella denitrificans","ANDSNYSLAA"}, { "Shewanella frigidimarina","ANDSNYSLAA"}, { "Shewanella halifaxensis","ANDSNYSLAA"}, { "Shewanella loihica","ANDDNYALAA"}, { "Shewanella oneidensis","ANDDNYALAA"}, { "Shewanella pealeana","ANDSNYSLAA"}, { "Shewanella piezotolerans","ANDDNYSLAA"}, { "Shewanella putrefaciens","ANDDNYALAA"}, { "Shewanella PV-4","ANDDNYALAA"}, { "Shewanella SAR-1","ANDDNYALAA"}, { "Shewanella SAR-1, version 2","ANNDNYALAA"}, { "Shewanella SAR-2, version 2","ADYGYMAAA"}, { "Shewanella sediminis","ANDSNYSLAA"}, { "Shewanella sp. ANA-3","ANDDNYALAA"}, { "Shewanella sp. MR-4","ANDDNYALAA"}, { "Shewanella sp. MR-7","ANDDNYALAA"}, { "Shewanella sp. W3-18-1","ANDDNYALAA"}, { "Shewanella violacea","ANDSNYSLAA"}, { "Shewanella woodyi","ANDDNYALAA"}, { "Shigella boydii","ANDENYALAA"}, { "Shigella dysenteriae 1","ANDENYALAA"}, { "Shigella dysenteriae 2","ANDENYALAA"}, { "Shigella flexneri","ANDENYALAA"}, { "Shigella sonnei","ANDENYALAA"}, { "Shimwellia blattae","ANDENYALAA"}, { "Sideroxydans lithotrophicus","ANDEKYALAA"}, { "Silicibacter pomeroyi","ANDNRAPVALAA"}, { "Silicibacter TM1040","ANDNRAPVALAA"}, { "Simiduia agarivorans","ANDDNYGAQLAA"}, { "Simkania negevensis","VDTTEDFYLEAA"}, { "Sinorhizobium fredii","ANDNYAEARLAA"}, { "Sinorhizobium medicae","ANDNYAEARLAA"}, { "Sinorhizobium meliloti","ANDNYAEARLAA"}, { "Slackia heliotrinireducens","GKSYNTGRMALAA"}, { "Sodalis glossinidius","ANDSQFESNAALAA"}, { "Solibacillus silvestris","GKQQNFAFAA"}, { "Solibacter usitatus","ANTQFAYAA"}, { "Solitalea canadensis","GENNYALAA"}, { "Sorangium cellulosum","ANDNAYAVAA"}, { "Sphaerobacter thermophilus","GNESYALAA"}, { "Sphaerochaeta coccoides","AKKEDENVSYDAEYAFAA"}, { "Sphaerochaeta globosa","AKKEDEVSFNAEYAFAA"}, { "Sphaerochaeta pleomorpha","AKKEDEVSFNAEYALAA"}, { "Sphingobacterium sp. 21","GENNYALAA"}, { "Sphingobium chlorophenolicum","ANDNEALALAA"}, { "Sphingobium japonicum","ANDNEALALAA"}, { "Sphingobium sp. SYK-6","ANDNEALALAA"}, { "Sphingomonas elodea","ANDNEALAIAA"}, { "Sphingomonas wittichii","ANDNEALAIAA"}, { "Sphingopyxis alaskensis","ANDNEALALAA"}, { "Spirochaeta africana","AKNEDNVVEVAFGNDDTMLAAA"}, { "Spirochaeta smaragdinae","ANDADYALAA"}, { "Spirochaeta thermophila","ANDELALAA"}, { "Spiroplasma kunkelii","ASKKQKEDKIEMPAFMMNNQLAVSMLAA"}, { "Spirosoma linguale","GEYNYAMAA"}, { "Stackebrandtia nassauensis","AKTESRSSFALAA"}, { "Staphylococcus aureus","GKSNNNFAVAA"}, { "Staphylococcus carnosus","GKTNNNLAVAA"}, { "Staphylococcus epidermidis","DKSNNNFAVAA"}, { "Staphylococcus haemolyticus","DKSNNNFAVAA"}, { "Staphylococcus lugdunensis","GKSNNNFAVAA"}, { "Staphylococcus pseudintermedius","GKTNNNFAVAA"}, { "Staphylococcus saprophyticus","GKENNNFAVAA"}, { "Staphylococcus xylosus","GKENNNFAVAA"}, { "Starkeya novella","ANDNYAPVAQAA"}, { "Stenotrophomonas maltophilia","ANDDNYALAA"}, { "Stigmatella aurantiaca","DGKDTKANDNVELALAA"}, { "Streptobacillus moniliformis","GKNNFALAA"}, { "Streptococcus agalactiae","AKNTNSYALAA"}, { "Streptococcus bovis","AKNTNSYAVAA"}, { "Streptococcus constellatus","AKNNNSYALAA"}, { "Streptococcus criceti","AKNTNSYAVAA"}, { "Streptococcus dysgalactiae","AKNTNSYALAA"}, { "Streptococcus equi","AKNNTTYALAA"}, { "Streptococcus gallolyticus","AKNTNSYAVAA"}, { "Streptococcus gordonii","AKNNTSYALAA"}, { "Streptococcus macedonicus","AKNTNSYAVAA"}, { "Streptococcus mitis","AKNNTSYALAA"}, { "Streptococcus mutans","AKNTNSYAVAA"}, { "Streptococcus oralis","AKNNTSYALAA"}, { "Streptococcus parasanguinis","AKNNNSYALAA"}, { "Streptococcus parauberis","AKNTNTYALAA"}, { "Streptococcus pneumoniae","AKNNTSYALAA"}, { "Streptococcus pseudopneumoniae","AKNNTSYALAA"}, { "Streptococcus pyogenes","AKNTNSYALAA"}, { "Streptococcus salivarius","AQLNITAKNTNSYAVAA"}, { "Streptococcus sanguinis","AKNNNSYALAA"}, { "Streptococcus sobrinus","AKNTNSYAVAA"}, { "Streptococcus suis","AKNTNTYALAA"}, { "Streptococcus thermophilus","AKNTNSYAVAA"}, { "Streptococcus uberis","AKNTNSYALAA"}, { "Streptococcus zooepidemicus","AKNNTTYALAA"}, { "Streptomyces aureofaciens","ANSKRDSQQFALAA"}, { "Streptomyces avermitilis","ANTKSDSQSFALAA"}, { "Streptomyces avermitilus","ANTKSDSQSFALAA"}, { "Streptomyces bingchenggensis","ANTKRDSFALAA"}, { "Streptomyces cattleya","ANNKRDSFALAA"}, { "Streptomyces coelicolor","ANTKRDSSQQAFALAA"}, { "Streptomyces collinus","ANTKRDSSSFALAA"}, { "Streptomyces flavogriseus","ANSKRDSSAFALAA"}, { "Streptomyces griseus","ANSKRDSSAFALAA"}, { "Streptomyces hygroscopicus","ANTKRDSFALAA"}, { "Streptomyces lividans","ANTKRDSSQQAFALAA"}, { "Streptomyces scabiei","ANSKSDSPQQQFSLAA"}, { "Streptomyces sp. SirexAA-E","ANTKRDSSAFALAA"}, { "Streptomyces thermophilus","AKNTNSYAVAA"}, { "Streptomyces venezuelae","ANSKSDNSRFALAA"}, { "Streptomyces violaceusniger","ANTKRDSFALAA"}, { "Streptosporangium roseum","ANKTHSEVSQGNLALAA"}, { "Sulcia muelleri","GKKNYALAA"}, { "Sulfuricurvum kujiense","ANNTNYRPAYAVA"}, { "Sulfurimonas autotrophica","ANNTNYRPALAVA"}, { "Sulfurimonas denitrificans","ANNTNYRPAYAVA"}, { "Sulfurospirillum barnesii","ANNSNYRPAYAVA"}, { "Sulfurospirillum deleyianum","ANNSNYRPAYALAA"}, { "Sulfurovum sp. NBC37-1","ANNTDYRPAYAVA"}, { "Synechococcus elongatus","ANNIVPFARKAAPVAA"}, { "Synechococcus sp. CC9311","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. CC9605","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. CC9902","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. JA-2-3B'a(2-13)","ANNVVPFARKAAALAA"}, { "Synechococcus sp. JA-3-3Ab (version 1)","ANNVVPFARKAAALAA"}, { "Synechococcus sp. JA-3-3Ab (version 2)","ANNVVPFARKAAALAA"}, { "Synechococcus sp. PCC 6301","ANNIVPFARKAAPVAA"}, { "Synechococcus sp. PCC 6307","ANNIVRFSRQAAPVAA"}, { "Synechocystis sp. PCC 6803","ANNIVSFKRVAIAA"}, { "Synechococcus sp. PCC 6904","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. PCC 7002","ANNIVPFARKAAAVA"}, { "Synechococcus sp. PCC 7009","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. RCC307","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. WH 7803","ANNIVRFSRQAAPVAA"}, { "Synechococcus sp. WH 8102","ANNIVRFSRHAAPVAA"}, { "Syntrophobacter fumaroxidans","ADDYAYAVAA"}, { "Syntrophomonas wolfei","AEDNFALAA"}, { "Syntrophothermus lipocalidus","ANNELALAA"}, { "Syntrophus aciditrophicus","ANDYEYALAA"}, { "Tannerella forsythensis","GENNYALAA"}, { "Tannerella forsythia","GENNYALAA"}, { "Taylorella asinigenitalis","ANDDKFALAA"}, { "Taylorella equigenitalis","ANDENFALAA"}, { "Tepidanaerobacter acetatoxydans","ANNDLAYAA"}, { "Teredinibacter turnerae","ANDDNYGAQLAA"}, { "Terriglobus roseus","AEPQFALAA"}, { "Terriglobus saanensis","AEPQFALAA"}, { "Tetragenococcus halophilus","AKNNNNSYALAA"}, { "Thalassiosira pseudonana chloroplast","ANNIMPFMFNVVKTNRSLTTLNFAV"}, { "Thalassiosira weissflogii chloroplast","ANNIIPFIFKAVKTKKEAMALNFAV"}, { "Thauera sp. MZ1T","ANDERFALAA"}, { "Thermacetogenium phaeum","ANNEYALAA"}, { "Thermaerobacter marianensis","ANEELALAA"}, { "Thermanaerovibrio acidaminovorans","ANDNYALAA"}, { "Thermincola potens","AEENYALAA"}, { "Thermoanaerobacter italicus","ADRELAYAA"}, { "Thermoanaerobacter mathranii","ADRELAYAA"}, { "Thermoanaerobacter pseudethanolicus","ADRELAYAA"}, { "Thermoanaerobacter sp. X514","ADRELAYAA"}, { "Thermoanaerobacter tengcongensis","ADRELAYAA"}, { "Thermoanaerobacter wiegelii","ADRELAYAA"}, { "Thermoanaerobacterium saccharolyticum","ANDNLAYAA"}, { "Thermoanaerobacterium thermosaccharolyticum","ANNDNLAYAA"}, { "Thermoanaerobacterium xylanolyticum","ANDNLAYAA"}, { "Thermobaculum terrenum","ANTEYALAA"}, { "Thermobifida fusca","ANSKRTEFALAA"}, { "Thermobispora bispora","ANKKHAEVSQASLALAA"}, { "Thermodesulfatator indicus","ADEYNYAMAA"}, { "Thermodesulfobacterium commune","ANEYAYALAA"}, { "Thermodesulfobacterium geofontis","ADEYSYALAA"}, { "Thermodesulfobium narugense","ANNNSLALAA"}, { "Thermodesulfovibrio yellowstonii","ANNELALAA"}, { "Thermomicrobium roseum","GERELALAA"}, { "Thermomonospora curvata","ANKKQSEFALAA"}, { "Thermosediminibacter oceani","ANEELALAA"}, { "Thermosipho africanus","ANEELALAA"}, { "Thermosipho melanesiensis","ANEEIALAA"}, { "Thermosynechococcus elongatus","ANNIVPFARKAAAVA"}, { "Thermotoga lettingae","ANNELALAA"}, { "Thermotoga maritima ","ANEPVAVAA"}, { "Thermotoga neapolitana","ANEPVAVAA"}, { "Thermotoga petrophila","ANEPVAVAA"}, { "Thermotoga sp. RQ2","ANEPVAVAA"}, { "Thermotoga thermarum","ANEELALAA"}, { "Thermovibrio ammonificans","ADETLALAA"}, { "Thermovirga lienii","ANENYALAA"}, { "Thermus oshimai","ANKPAYALAA"}, { "Thermus scotoductus","ANKPAYALAA"}, { "Thermus sp. CCB_US3_UF1","ANKPAYALAA"}, { "Thermus thermophilus","ANTNYALAA"}, { "Thioalkalimicrobium cyclicum","ANDDNYALAA"}, { "Thioalkalivibrio sp. K90mix","ANDDNYALAA"}, { "Thiobacillus denitrificans","AKSKAARRNPACSAGVMELKA"}, { "Thiocystis violascens","ANDDNYALAA"}, { "Thiomicrospira crunogena","ANDDNYALAA"}, { "Thiomonas intermedia","ANDSSYALAA"}, { "Thiomonas sp. 3As","ANDSSYALAA"}, { "Tistrella mobilis","ANDNRVALAA"}, { "Tolumonas auensis","ANDETYALAA"}, { "Tremblaya princeps 1 (Dysmicoccus)","APSNRFTIVANDCIDALVRRAVV"}, { "Treponema azotonutricium","ADNDNYNYALAA"}, { "Treponema brennaborense","AEDNRQFALAA"}, { "Treponema caldaria","ADNDSYALAA"}, { "Treponema denticola","AENNDSFDYALAA"}, { "Treponema pallidum","ANSDSFDYALAA"}, { "Treponema primitia","ANNDSYAFAA"}, { "Treponema succinifaciens","AKRREDEQSENEQFALAA"}, { "Trichodesmium erythraeum","ANNIVPFARKQVAALA"}, { "Tropheryma whipplei","ANLKRTDLSLAA"}, { "Truepera radiovictrix","GNSNSYALAA"}, { "Tsukamurella paurometabola","ADSNQRDFALAA"}, { "Turneriella parva","AENETYALAA"}, { "uncultured bacterium","ANDNFAPVAVAA"}, { "Uncultured ciona","ANDEFFDARLRA"}, { "Uncultured FS1","ANDETYALAA"}, { "Uncultured FS2","ANDENYALAA"}, { "Uncultured LEM1","ANDETYALAA"}, { "Uncultured LEM2","ANDETHALAA"}, { "Uncultured marineEBAC20E09","ANNDNYALAA"}, { "Uncultured phakopsora","ANDNSYALAA"}, { "Uncultured QL1","ANVENYALAA"}, { "Uncultured RCA1","ANDENYALAA"}, { "Uncultured RCA2","SNDENYALAA"}, { "Uncultured RCA4","ANDETYALAA"}, { "Uncultured remanei","ANDESYALAA"}, { "Uncultured stronglyoides1","ANDERFALAA"}, { "Uncultured U01a","ANDSNYALAA"}, { "Uncultured U02","ANDEQFALAA"}, { "Uncultured U04","ANDETYALAA"}, { "Uncultured VLS13","ANDENYALAA"}, { "Uncultured VLS1","ANDENYALAA"}, { "Uncultured VLS5","ANDETYALAA"}, { "Uncultured VLS6","ANDENYALAA"}, { "Uncultured VLS7","ANDENYALAA"}, { "Uncultured VLS9","ANDENYALAA"}, { "Uncultured VLW1","ANDENYALAA"}, { "Uncultured VLW2","ANDENYALAA"}, { "Uncultured VLW3","ANDENYALAA"}, { "Uncultured VLW5","ANDENYALAA"}, { "Uncultured WW10","ANDENYALAV"}, { "Uncultured WW11","ANDDNYALAA"}, { "Uncultured WW1","ANDENYALAA"}, { "Uncultured WW2","ANDENYALAA"}, { "Uncultured WW4","ANDGNYALAA"}, { "Uncultured WW5","ANDENYALAA"}, { "Uncultured WW7","ANDENCALAA"}, { "Uncultured WW8","ANDENYALAA"}, { "Uncultured WW9","ANDENYALAA"}, { "Ureaplasma parvum","AENKKSSEVELNPAFMASATNANYAFAY"}, { "Ureaplasma urealyticum","AENKKSSEVELNPAFMASATNANYAFAY"}, { "Variovorax paradoxus","ANDERFALAA"}, { "Veillonella parvula","AEENFALAA"}, { "Verminephrobacter eiseniae","ANDERFALAA"}, { "Verrucomicrobium spinosum","ANSNELALAA"}, { "Verrucosispora maris","AKHNRADFALAA"}, { "Vesicomyosocius okutanii","ENENNYALAA"}, { "Vibrio anguillarum","ANDENYALAA"}, { "Vibrio campbellii","ANDENYALAA"}, { "Vibrio cholerae","ANDENYALAA"}, { "Vibrio Ex25","ANDENYALAA"}, { "Vibrio fischeri","ANDENYALAA"}, { "Vibrio furnissii","ANDENYALAA"}, { "Vibrio parahaemolyticus","ANDENYALAA"}, { "Vibrio parahemolyticus","ANDENYALAA"}, { "Vibrio sp. EJY3","ANDENYALAA"}, { "Vibrio sp. Ex25","ANDENYALAA"}, { "Vibrio splendidus","ANDENYALAA"}, { "Vibrio vulnificus","ANDENYALAA"}, { "Waddlia chondrophila","ADLDLATAAVAA"}, { "Weeksella virosa","GNEEYALAA"}, { "Weissella koreensis","AKNSNNLAFAA"}, { "Wigglesworthia brevipalpis","AKHKYNEPALLAA"}, { "Wigglesworthia glossinidia","AKHKYNEPALLAA"}, { "Wolbachi.sp","ANDNFAAEDNVDAIAA"}, { "Wolbachia endosymbiont","ANDNFAAEEYRVAA"}, { "Wolbachia sp. 2 (Brugi)","ANDNFAAEGDVAVAA"}, { "Wolbachia sp. 3 (Culex)","ANDNFAAEDNVALAA"}, { "Wolbachia sp. 4 (Dros.)","ANDNFAAEEYRVAA"}, { "Wolinella succinogenes","ALSSHPKRGKRLGLPITSALGA"}, { "Xanthobacter autotrophicus","ANDNYAPVAQAA"}, { "Xanthomonas albilineans","ANDDNYALAA"}, { "Xanthomonas axonopodis","ANDDNYGSDFAIAA"}, { "Xanthomonas campestris 1","ANDDNYGSDFAIAA"}, { "Xanthomonas campestris 2","ANDDNYGSDSAIAA"}, { "Xanthomonas oryzae","ANDDNYGSDFAIAA"}, { "Xenorhabdus bovienii","ANDENYALAA"}, { "Xenorhabdus nematophila","ANDENYALAA"}, { "Xylanimonas cellulosilytica","ADNTRNDFALAA"}, { "Xylella fastidiosa 1","ANEDNFAVAA"}, { "Xylella fastidiosa 2","ANEDNFALAA"}, { "Xylella fastidiosa 3","ANEDNFAIAA"}, { "Xylella fastidiosa 4","ANEDNFALAA"}, { "Yersinia bercovieri","ANDSQYESAALAA"}, { "Yersinia enterocolitica","ANDSQYESAALAA"}, { "Yersinia frederiksenii","ANDENYALAA"}, { "Yersinia intermedia","ANDSQYESAALAA"}, { "Yersinia mollaretii","ANDSQYESAALAA"}, { "Yersinia pestis","ANDENYALAA"}, { "Yersinia pseudotuberculosis","ANDENYALAA"}, { "Zobellia galactanivorans","GENNYALAA"}, { "Zunongwangia profunda","GENNYALAA"} }; /* TOOLS */ char upcasec(char c) { return((c >= 'a')?c-32:c); } int length(char *s) { int i = 0; while (*s++) i++; return(i); } char *softmatch(char *s, char *key) { while (upcasec(*key) == upcasec(*s)) { if (!*key++) return(s); s++; } if (*key) return(NULL); return(s); } char *strpos(char *s, char *k) { char c,d; int i; d = *k; while (c = *s) { if (c == d) { i = 0; do if (!k[++i]) return(s); while (s[i] == k[i]); } s++; } return(NULL); } char *softstrpos(char *s, char *k) { char c,d; int i; d = upcasec(*k); while (c = *s) { if (upcasec(c) == d) { i = 0; do if (!k[++i]) return(s); while (upcasec(s[i]) == upcasec(k[i])); } s++; } return(NULL); } char *wildstrpos(char *s, char *k) { char c,d; int i; d = upcasec(*k); while (c = *s) { if ((upcasec(c) == d) || (d == '*')) { i = 0; do if (!k[++i]) return(s); while ((upcasec(s[i]) == upcasec(k[i])) || (k[i] == '*')); } s++; } return(NULL); } char *marginstring(char *s, char *k, int margin) { char c,d; int i,j; j = 0; d = *k; while (c = *s) { if (c == d) { i = 0; do if (!k[++i]) return(s); while (s[i] == k[i]); } s++; if (++j >= margin) break; } return(NULL); } int margindetect(char *line, int margin) { int i; char c,*s; i = 0; s = line; while (c = *s++) { if (!space(c)) break; if (c == '\t') i += 7; if (++i >= margin) return(0); } if (c == '\n') return(0); if (c == '\r') return(0); if (c == '\0') return(0); return(1); } char *backword(char *line, char *s, int n) { int spzone; if (space(*s)) { spzone = 1; } else { spzone = 0; n++; } while (s > line) { if (space(*s)) { if (spzone == 0) { spzone = 1; if (--n <= 0) return(++s); }} else spzone = 0; s--; } if (!space(*s)) if (n <= 1) return(s); return(NULL); } char *dconvert(char *s, double *r) { static char zero='0',nine='9'; int shift,expshift,sgn,expsgn,exponent; char c,limit; double result; shift = 0; expshift = 0; sgn = 1; expsgn = 1; limit = 0; exponent = 0; result = 0.0; if ((c = *s) == '-') { sgn = -1; c = *++s; } else if (c == '+') c= *++s; if (c >= zero) if (c <= nine) { result = (double)(c - zero); while ((c = *++s) >= zero) { if (c > nine) break; if (++limit < 15) result = result*10.0 + (double)(c - zero); }} if (c == '.') while ((c = *++s) >= zero) { if (c > nine) break; if (++limit < 15) { result = result*10.0 + (double)(c - zero); shift++; }} if ((c == 'E')||(c == 'e')||(c == 'D')||(c == 'd')) { if ((c = *++s) == '-') { expsgn = -1; c = *++s; } else if (c == '+') c = *++s; if (c >= zero) if (c <= nine) { exponent = c - zero; while ((c = *++s) >= zero) { if (c > nine) break; exponent = exponent*10 + c - zero; if (++expshift > 3) break; }}} result *= (double)sgn; exponent = exponent*expsgn - shift; if (exponent >= 0) while (exponent--) result *= 10.0; else while (exponent++) result /= 10.0; (*r) *= 0.01*result; return(s); } char *lconvert(char *s, long *r) { static char zero='0',nine='9'; long sgn; long result; char c; sgn = 1L; result = 0L; if ((c = *s) == '-') { sgn = -1L; c = *++s; } else if (c == '+') c= *++s; if (c >= zero) if (c <= nine) { result = (long)(c - zero); while ((c = *++s) >= zero) { if (c > nine) break; result = result*10L + (long)(c - zero); }} *r = result * sgn; return(s); } char *getlong(char *line, long *l) { static char zero='0',nine='9'; char c1,c2,*s; if (!line) return(NULL); s = line; while (c1 = *s) { if (c1 >= zero) { if (c1 <= nine) return(lconvert(s,l)); } else if ((c1 == '-') || (c1 == '+')) { c2 = s[1]; if (c2 >= zero) if (c2 <= nine) return(lconvert(s,l)); } s++; } return(NULL); } char *copy(char *from, char *to) { while (*to++ = *from++); return(--to); } char *copy2sp(char *from1, char *from2, char *to, int n) { char *s; s = to; while (from1 < from2) { *s++ = *from1++; if (--n <= 0) { do if (--s <= to) break; while (!space(*s)); break; }} *s = '\0'; return(s); } char *copy3cr(char *from, char *to, int n) { while (*to = *from++) { if (*to == DLIM) { *to = '\0'; break; } if (--n <= 0) { *++to = '\0'; break; } to++; } return(to); } char *quotestring(char *line, char *a, int n) { char ch; while (ch = *line++) if (ch == '"') { while (ch = *line++) { if (ch == '"') break; if (ch == ';') break; if (ch == '\n') break; if (ch == '\r') break; *a++ = ch; if (--n <= 0) break; } break; } *a = '\0'; return(a); } /* LIBRARY */ int fseekd(data_set *d, long fpos, long foffset) { if (d->bugmode) { fpos += foffset; if (fpos < 0L) fpos = 0L; if (fseek(d->f,0L,SEEK_SET)) return(EOF); d->filepointer = -1L; while (++d->filepointer < fpos) if (getc(d->f) == EOF) return(EOF); return(0); } if (fseek(d->f,fpos,SEEK_SET)) return(EOF); d->filepointer = fpos; if (foffset != 0L) { if ((fpos + foffset) < 0L) foffset = -fpos; if (fseek(d->f,foffset,SEEK_CUR)) return(EOF); d->filepointer += foffset; } return(0); } long ftelld(data_set *d) { if (d->bugmode) return(d->filepointer); else return(ftell(d->f)); } char fgetcd(data_set *d) { int ic; if ((ic = getc(d->f)) == EOF) return(NOCHAR); d->filepointer++; return((char)ic); } char *fgetsd(data_set *d, char line[], int len) { int i,ic; i = 0; while (i < len) { if ((ic = getc(d->f)) == EOF) break; d->filepointer++; if (ic == '\r') continue; if (ic == '\n') { line[i++] = DLIM; break; } line[i++] = (char)ic; } if (i < 1) return(NULL); line[i] = '\0'; return(line); } int agene_position_check(data_set *d, int nagene, annotated_gene *agene) { int a; long l,swap; if ((agene->stop - agene->start) > MAXAGENELEN) { swap = agene->stop; agene->stop = agene->start; agene->start = swap; agene->stop += d->aseqlen; } if (agene->start > agene->stop) agene->stop += d->aseqlen; l = agene->stop - agene->start; if ((l < 1) || (l > MAXAGENELEN)) return(0); if (agene->stop == d->aseqlen) { for (a = 0; a < nagene; a++) if (d->gene[a].start == agene->start) if (d->gene[a].genetype == agene->genetype) if (softmatch(d->gene[a].species,agene->species)) return(0); } return(1); } long process_sequence_heading(data_set *d, csw *sw) { int i,ic,nagene; long l,realstart; char line[STRLEN],c,*s,*sq,*sd; annotated_gene *agene,tmpagene; d->datatype = FASTA; fseekd(d,d->seqstart,d->seqstartoff); HEADING: do if ((c = fgetcd(d)) == NOCHAR) return(-1L); while (space(c)); if (c == '#') { if (!fgetsd(d,line,STRLENM1)) return(-1L); goto HEADING; } if (!fgetsd(d,d->seqname,STRLENM1)) return(-1L); if (c != '>') { s = d->seqname; if (upcasec(c) != 'L') { do if (!(c = *s++)) goto FNSN; while (upcasec(c) != 'L'); } if (!(s = softmatch(s,"OCUS"))) goto FNSN; if (sd = softstrpos(d->seqname,"BP")) { sd = backword(d->seqname,sd,1); if (sd = getlong(sd,&l)) d->aseqlen = l; } s += 4; while (space(*s)) s++; sq = d->seqname; while (!space(*s)) *sq++ = *s++; d->aseqlen = 0L; if (!fgetsd(d,line,STRLENM1)) return(-2L); if (sd = softstrpos(line,"DEFINITION")) { sd += 10; while (space(*sd)) sd++; *sq++ = ' '; copy(sd,sq); if (!fgetsd(d,line,STRLENM1)) return(-2L); } else copy(s,sq); for (i = 0; i < NS; i++) d->nagene[i] = 0; nagene = 0; while (!marginstring(line,"ORIGIN",10)) { if (nagene >= NGFT) goto GBNL; agene = &(d->gene[nagene]); agene->comp = 0; agene->start = -1L; agene->stop = -1L; agene->antistart = -1L; agene->antistop = -1L; agene->permuted = 0; agene->pseudogene = 0; if (!(s = marginstring(line,"tRNA",10))) goto TMRNASEQ; agene->genetype = tRNA; if (softstrpos(s,"complement")) agene->comp = 1; if (s = getlong(s,&l)) agene->start = l; if (s = getlong(s,&l)) agene->stop = l; copy("tRNA-???",agene->species); if (!fgetsd(d,line,STRLENM1)) return(-2L); while (!margindetect(line,10)) { if (s = softstrpos(line,"product=")) if (s = softstrpos(s,"tRNA-")) { s += 5; while (space(*s)) s++; copy3cr(s,agene->species+5,3); } if (s = softstrpos(line,"anticodon=")) { s += 10; if (!(s = getlong(s,&l))) l = -1L; agene->antistart = l; if (!(s = getlong(s,&l))) l = -1L; agene->antistop = l; } if (softstrpos(line,"/pseudo")) agene->pseudogene = 1; if (!fgetsd(d,line,STRLENM1)) return(-2L); } if (agene_position_check(d,nagene,agene)) { d->nagene[tRNA]++; nagene++; } continue; TMRNASEQ: if (!(s = marginstring(line,"tmRNA",10))) goto CDSEQ; agene->genetype = tmRNA; if (softstrpos(s,"complement")) agene->comp = 1; if (s = getlong(s,&l)) agene->start = l; if (s = getlong(s,&l)) agene->stop = l; copy("tmRNA",agene->species); if (!agene_position_check(d,nagene,agene)) goto GBNL; d->nagene[tmRNA]++; nagene++; if (!fgetsd(d,line,STRLENM1)) return(-2L); while (!margindetect(line,10)) { if (softstrpos(line,"acceptor")) agene->permuted = 1; if (softstrpos(line,"/pseudo")) agene->pseudogene = 1; if (!fgetsd(d,line,STRLENM1)) return(-2L); } if (s = marginstring(line,"tmRNA",10)) { tmpagene.comp = 0; tmpagene.start = -1L; tmpagene.stop = -1L; tmpagene.antistart = -1L; tmpagene.antistop = -1L; tmpagene.permuted = 0; tmpagene.pseudogene = 0; if (softstrpos(s,"complement")) tmpagene.comp = 1; if (s = getlong(s,&l)) tmpagene.start = l; if (s = getlong(s,&l)) tmpagene.stop = l; if (!fgetsd(d,line,STRLENM1)) return(-2L); while (!margindetect(line,10)) { if (softstrpos(line,"coding")) tmpagene.permuted = 1; if (softstrpos(line,"/pseudo")) tmpagene.pseudogene = 1; if (s = softstrpos(line,"/tag_peptide")) { if (s = getlong(s,&l)) tmpagene.antistart = l; if (s = getlong(s,&l)) tmpagene.antistop = l; } if (!fgetsd(d,line,STRLENM1)) return(-2L); } if (agene->permuted && tmpagene.permuted) { agene->stop = tmpagene.stop; agene->antistart = tmpagene.antistart; agene->antistop = tmpagene.antistop; copy("tmRNA(Perm)",agene->species); } else { if (nagene >= NGFT) goto GBNL; agene = &(d->gene[nagene]); agene->comp = tmpagene.comp; agene->start = tmpagene.start; agene->stop = tmpagene.stop; agene->antistart = -1L; agene->antistop = -1L; agene->permuted = 0; agene->pseudogene = tmpagene.pseudogene; copy("tmRNA",agene->species); if (agene_position_check(d,nagene,agene)) { d->nagene[tmRNA]++; nagene++; }}} continue; CDSEQ: if (!(s = marginstring(line,"CDS",10))) if (!(s = marginstring(line,"mRNA",10))) goto RRNA; agene->genetype = CDS; if (softstrpos(s,"complement")) agene->comp = 1; if (s = getlong(s,&l)) agene->start = l; if (s = getlong(s,&l)) agene->stop = l; copy("???",agene->species); if (!fgetsd(d,line,STRLENM1)) return(-2L); while (!margindetect(line,10)) { if (s = softstrpos(line,"gene=")) { s += 5; quotestring(s,agene->species,SHORTSTRLENM1); } else if (s = softstrpos(line,"product=")) { s += 8; quotestring(s,agene->species,SHORTSTRLENM1); } if (softstrpos(line,"/pseudo")) agene->pseudogene = 1; if (!fgetsd(d,line,STRLENM1)) return(-2L); } if (agene_position_check(d,nagene,agene)) { d->nagene[CDS]++; nagene++; } continue; RRNA: if (!(s = marginstring(line,"rRNA",10))) goto GBNL; agene->genetype = rRNA; if (softstrpos(s,"complement")) agene->comp = 1; if (s = getlong(s,&l)) agene->start = l; if (s = getlong(s,&l)) agene->stop = l; copy("???",agene->species); if (!fgetsd(d,line,STRLENM1)) return(-2L); while (!margindetect(line,10)) { if (s = softstrpos(line,"gene=")) { s += 5; quotestring(s,agene->species,SHORTSTRLENM1); } else if (s = softstrpos(line,"product=")) { s += 8; quotestring(s,agene->species,SHORTSTRLENM1); } if (softstrpos(line,"/pseudo")) agene->pseudogene = 1; if (!fgetsd(d,line,STRLENM1)) return(-2L); } if (agene_position_check(d,nagene,agene)) { d->nagene[rRNA]++; nagene++; } continue; GBNL: if (!fgetsd(d,line,STRLENM1)) return(-2L); } d->datatype = GENBANK; d->nagene[NS-1] = nagene; sw->annotated = 1; realstart = ftelld(d); } else { MH: realstart = ftelld(d); do if ((c = fgetcd(d)) == NOCHAR) return(-3L); while (space(c)); if (c == '>') { if (!fgetsd(d,line,STRLENM1)) return(-3L); goto MH; } fseekd(d,realstart,0L); } s = d->seqname; i = 0; while ((c = *s) != '\0') { if (c == '\n') break; if (c == '\r') break; if (++i >= STRLEN) break; s++; } *s = '\0'; return(realstart); FNSN: s = copy("Unnamed sequence ",d->seqname); fseekd(d,d->seqstart,d->seqstartoff); realstart = ftelld(d); if (fgetsd(d,line,STRLENM1)) copy3cr(line,s,50); fseekd(d,realstart,0L); return(realstart); } int move_forward(data_set *d) { int ic; long nextbase; static int map[256] = { -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,NOBASE,-3,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-2,-4,-4,Adenine,AMBIG,Cytosine,AMBIG,-4,-4,Guanine,AMBIG, -4,-4,AMBIG,-5,AMBIG,AMBIG,-4,-4,-4, AMBIG,AMBIG,Thymine,Thymine,AMBIG,AMBIG,-4, AMBIG,-4,-4,-4,-4,INSERT,NOBASE,-4,Adenine,AMBIG,Cytosine,AMBIG, -4,-4,Guanine,AMBIG,-4,-4,AMBIG,-5,AMBIG,AMBIG,-4,-4,-4, AMBIG,AMBIG,Thymine,Thymine,AMBIG,AMBIG,-4, AMBIG,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4 }; if (d->ps >= d->psmax) if (d->psmax > 0L) { fseekd(d,d->seqstart,d->seqstartoff); d->ps = 0L; } NL: if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; SC: ic = map[ic]; BS: if (ic >= Adenine) { d->ps++; return(ic); } if (ic == -2) { d->nextseq = ftelld(d); d->nextseqoff = -1L; return(TERM); } if (ic == -3) if (d->datatype == GENBANK) { if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; if ((ic = map[ic]) != -3) goto BS; do if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; while (space(ic)); d->nextseq = ftelld(d); d->nextseqoff = -1L; return(TERM); } if (ic == -5) { nextbase = ftelld(d); if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; if (upcasec(ic) == 'O') { if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; if (upcasec(ic) == 'C') { if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; if (upcasec(ic) == 'U') { if ((ic = (int)fgetcd(d)) == NOCHAR) goto FAIL; if (upcasec(ic) == 'S') { d->nextseq = nextbase; d->nextseqoff = -1L; return(TERM); }}}} fseekd(d,nextbase,0L); } goto NL; FAIL: d->nextseq = -1L; d->nextseqoff = 0L; if (d->psmax > 0L) { d->ps = d->psmax; return(NOBASE); } else return(TERM); } char cbase(int c) { static char base[7] = "acgt.."; if (c < Adenine) return('#'); if (c > NOBASE) return((char)c); return(base[c]); } int seq_init(data_set *d, csw *sw) { long ngc; int ic; d->filepointer = 0; if ((d->seqstart = process_sequence_heading(d,sw)) < 0L) { if (d->seqstart == -2L) fprintf(stderr,"ERROR - unable to read Genbank sequence %s\n",d->seqname); else if (d->seqstart == -2L) fprintf(stderr,"ERROR - unable to read fasta sequence %s\n",d->seqname); return(0); } d->seqstartoff = 0L; d->ps = 0L; d->psmax = -1L; ngc = 0L; while ((ic = move_forward(d)) >= Adenine) if (ic >= Cytosine) if (ic <= Guanine) ngc++; if ((d->psmax = d->ps) <= 0L) return(0); d->gc = (double)ngc/(double)d->psmax; fseekd(d,d->seqstart,d->seqstartoff); d->ps = 0L; return(1); } char cpbase(int c) { static char base[7] = "ACGT.."; if (c < Adenine) return('#'); if (c > NOBASE) return((char)c); return(base[c]); } char *aa(int *anticodon, csw *sw) { int p1,p2,p3; if ((p1 = *anticodon) >= AMBIG) return(ambig_aaname); if ((p2 = anticodon[1]) >= AMBIG) return(ambig_aaname); if ((p3 = anticodon[2]) >= AMBIG) return(ambig_aaname); return(aaname[aamap[sw->geneticcode][(p1<<4) + (p2<<2) + p3]]); } char *translate(int *codon, csw *sw) { int p1,p2,p3,aa; if ((p1 = *codon) >= AMBIG) return(ambig_aaname); if ((p2 = codon[1]) >= AMBIG) return(ambig_aaname); if ((p3 = codon[2]) >= AMBIG) return(ambig_aaname); aa = aamap[sw->geneticcode][((3-p3)<<4)+((3-p2)<<2)+(3-p1)]; if ((aa == SeC) || (aa == Pyl)) aa = Stop; return(aaname[aa]); } char ltranslate(int *codon, gene *t, csw *sw) { int code,p1,p2,p3; if (t->genetype == CDS) code = t->asst; else code = sw->geneticcode; if ((p1 = *codon) >= AMBIG) return(ambig_aaname[0]); if ((p2 = codon[1]) >= AMBIG) return(ambig_aaname[0]); if ((p3 = codon[2]) >= AMBIG) return(ambig_aaname[0]); return(aaletter[aamap[code][((3-p3)<<4)+((3-p2)<<2)+(3-p1)]]); } char ptranslate(int *codon, csw *sw) { int p1,p2,p3; if ((p1 = *codon) >= AMBIG) return(ambig_aaname[0]); if ((p2 = codon[1]) >= AMBIG) return(ambig_aaname[0]); if ((p3 = codon[2]) >= AMBIG) return(ambig_aaname[0]); return(aapolarity[aamap[sw->geneticcode][((3-p3)<<4)+((3-p2)<<2)+(3-p1)]]); } int seqlen(gene *t) { return(t->nbase + t->nintron); } int aseqlen(data_set *d, annotated_gene *a) { int alen; long astart,astop; astart = a->start; astop = a->stop; if (astart > astop) astop += d->psmax; alen = (int)(astop - astart) + 1; return(alen); } double gc_content(gene *t) { int *s,*se; double ngc; static double score[6] = { 0.0,1.0,1.0,0.0,0.0,0.0 }; ngc = 0.0; if ((t->nintron > 0) && (t->asst == 0)) { s = t->eseq; se = s + t->intron; while (s < se) ngc += score[*s++]; s = se + t->nintron; se = t->eseq + t->nbase + t->nintron; while (s < se) ngc += score[*s++]; } else { s = t->seq; se = s + t->nbase; while (s < se) ngc += score[*s++]; } return(ngc/(double)t->nbase); } void write_seq(FILE *f, int *seq, int newline) { int i,c; i = 0; while ((c = *seq++) >= Adenine) { fputc(cbase(c),f); if (newline) if (++i >= 50) { fputc('\n',f); i = 0; }} if (i > 0) fputc('\n',f); } int find_var_hairpin(gene *t) { int e,stem,vstem,loop,*sn,*sen,*pos1,*pos2,*sb,*se,*sc,*sd,*sf,*s; unsigned int c,cn,m; static unsigned int A[6] = { 0,0,0x100,0x400,0,0 }; static unsigned int C[6] = { 0,0,0x400,0,0,0 }; static unsigned int G[6] = { 0x100,0x400,0,0x200,0,0 }; static unsigned int T[6] = { 0x400,0,0x200,0,0,0 }; static unsigned int te[6] = { 0,0,0,0,0,0 }; if (t->genetype != tRNA) return(0); if (t->var < 13) return(0); e = 0; sb = t->seq + t->astem1 + t->spacer1 + 2*t->dstem + t->dloop + t->spacer2 + 2*t->cstem + t->cloop + t->nintron; sc = sb + 3; se = sb + t->var - 2; sf = se - 2; te[0] = A[*se]; te[1] = C[*se]; te[2] = G[*se]; te[3] = T[*se]; while (--se > sf) { te[0] = (te[0] >> 4) | A[*se]; te[1] = (te[1] >> 4) | C[*se]; te[2] = (te[2] >> 4) | G[*se]; te[3] = (te[3] >> 4) | T[*se]; } while (se >= sc) { te[0] = ((te[0] >> 4) | A[*se]); te[1] = ((te[1] >> 4) | C[*se]); te[2] = ((te[2] >> 4) | G[*se]); te[3] = ((te[3] >> 4) | T[*se]); s = se - 5; sd = se - 7; m = te[*s]; while (--s > sd) m = (m >> 4) + te[*s]; while (s >= sb) { m = (m >> 4) + te[*s]; c = m & 0xf; if (c >= 9) { stem = 3; loop = (int)(se - s) - 3; sen = se; sn = s + 2; while (loop >= 6) { if ((cn = vbp[sen[-1]][sn[1]]) <= 0) break; c += cn; stem++; loop -= 2; sen--; sn++; } if (c > e) { e = c; pos1 = s; pos2 = sen; vstem = stem; }} s--; } se--; } if (e > 0) return((((int)(pos1 - sb)) << 10) + (((int)(pos2 - sb)) << 5) + vstem); else return(0); } void write_to_library(FILE *f, gene *t, csw *sw) { int *s; static char trnatype[2][6] = { "tRNA","mtRNA" }; s = t->seq + t->anticodon; fprintf(f,">%s",t->name); if (!softstrpos(t->name,"RNA")) switch (t->genetype) { case CDS: fprintf(f," CDS"); break; case srpRNA: fprintf(f," srpRNA"); break; case tmRNA: if (t->asst > 0) fprintf(f," Permuted"); fprintf(f," tmRNA"); break; case tRNA: default: t->varbp = find_var_hairpin(t); if (t->tstem == 0) fprintf(f," TV-loop"); else if (t->dstem == 0) fprintf(f," D-loop"); switch(t->cloop) { case 6: fprintf(f," %s-?""?""?(%c%c)",trnatype[sw->mtrna], cbase(*s),cbase(*(s+1))); break; case 8: fprintf(f," %s-?""?""?(%c%c%c%c)",trnatype[sw->mtrna], cbase(*s),cbase(s[1]),cbase(s[2]),cbase(s[3])); break; case 7: default: fprintf(f," %s-%s(%c%c%c)",trnatype[sw->mtrna], aa(s,sw),cbase(*s),cbase(*(s+1)),cbase(*(s+2))); break; } break; } if (strpos(t->name,"bases)")) fprintf(f,"\n"); else fprintf(f," (%d bases)\n",t->nbase); fprintf(f,"sequence =\n"); write_seq(f,t->seq,1); if (*t->eseq >= Adenine) { fprintf(f,"extended sequence =\n"); write_seq(f,t->eseq,1); } fprintf(f,"nbase = %d\n",t->nbase); fprintf(f,"sense = %d\n",t->comp); fprintf(f,"start = %ld\n",t->start); fprintf(f,"stop = %ld\n",t->stop); fprintf(f,"astem1 = %d\n",t->astem1); fprintf(f,"astem2 = %d\n",t->astem2); fprintf(f,"atail = %d\n",t->aatail); fprintf(f,"spacer1 = %d\n",t->spacer1); fprintf(f,"spacer2 = %d\n",t->spacer2); fprintf(f,"dstem = %d\n",t->dstem); fprintf(f,"dloop = %d\n",t->dloop); fprintf(f,"cstem = %d\n",t->cstem); fprintf(f,"cloop = %d\n",t->cloop); fprintf(f,"anticodon = %d\n",t->anticodon); fprintf(f,"nintron = %d\n",t->nintron); fprintf(f,"intron = %d\n",t->intron); fprintf(f,"asst = %d",t->asst); if (t->genetype == tmRNA) if (t->asst > 0) fprintf(f," permuted"); fprintf(f,"\ntps = %d\n",t->tps); fprintf(f,"tpe = %d\n",t->tpe); fprintf(f,"var = %d\n",t->var); fprintf(f,"varbp = %d,%d,%d\n",((t->varbp >> 10)&0x1f), ((t->varbp >> 5)&0x1f),(t->varbp&0x1f)); fprintf(f,"tstem = %d\n",t->tstem); fprintf(f,"tloop = %d\n",t->tloop); fprintf(f,"gc = %g\n\n",gc_content(t)); } void init_tmrna(FILE *f, csw *sw) { int c,*s; s = sw->tmrna_struct; while ((c = *s++) != TERM) itmparam(cbase(c),f); } int *make_tv(int *seq, char matrix[][MATY], int *x, int *y, int orient, int tv) { int i,px,py,stem; static int ux[4] = { 1,0,-1,0 }; static int uy[4] = { 0,1,0,-1 }; static int vx[4] = { 0,-1,0,1 }; static int vy[4] = { 1,0,-1,0 }; static int loopu[26][26] = { { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 1,1,1,0,0,-1,-1 }, { 1,1,1,1,0,-1,-1,-1 }, { 1,1,1,1,0,0,-1,-1,-1 }, { 1,1,1,1,1,0,-1,-1,-1,-1 }, { 1,1,1,1,1,0,0,-1,-1,-1,-1 }, { 1,1,1,1,1,1,0,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 } }; static int loopv[26][26] = { { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0,1,1,1,1,1,1 }, { -1,1,1,1,1,1,1,1 }, { -1,1,1,1,1,1,1,1,0 }, { -1,0,1,1,1,1,1,1,1,0 }, { -1,0,1,1,1,1,1,1,1,0,0 }, { -1,0,0,1,1,1,1,1,1,1,0,0 }, { -1,0,0,1,1,1,1,1,1,1,0,0,0 }, { -1,0,0,1,1,1,1,1,1,1,0,0,0,0 }, { -1,0,0,1,1,1,1,1,1,1,0,0,0,0,0 }, { -1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0 }, { -1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0 }, { -1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0 }, { -1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0 }, { -1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0 } }; px = *x; py = *y; stem = 0; if (tv < 6) { px += ux[orient]; py += uy[orient]; i = 0; while (i < tv) { px += vx[orient]; py += vy[orient]; matrix[px][py] = cbase(*seq++); i++; } py += (6-i)*vy[orient]; goto FN; } if (tv > 25) { if (tv % 2) stem = (tv - 25)/2; else stem = (tv - 24)/2; tv = tv - 2*stem; } i = 0; while (i < stem) { px += ux[orient]; py += uy[orient]; matrix[px][py] = cbase(*seq++); i++; } i = 0; while (i < tv) { px += ux[orient]*loopu[tv][i] + vx[orient]*loopv[tv][i]; py += uy[orient]*loopu[tv][i] + vy[orient]*loopv[tv][i]; matrix[px][py] = cbase(*seq++); i++; } px += ux[orient]*loopu[tv][i] + vx[orient]*loopv[tv][i]; py += uy[orient]*loopu[tv][i] + vy[orient]*loopv[tv][i]; i = 0; while (i < stem) { matrix[px][py] = cbase(*seq++); px -= (ux[orient]); py -= (uy[orient]); i++; } FN: *x = px; *y = py; return(seq); } int base_match(char b1, char b2) { int i,s; static char base1[11] = "acgtgtagtg"; static char base2[11] = "tgcatggatg"; static int score[11] = { 2,2,2,2,1,1,3,3,3,3 }; s = 0; for (i = 0; i < 10; i++) if (b1 == base1[i]) if (b2 == base2[i]) { s = score[i]; break; } return(s); } int *make_clover(int *seq, int b, int e, int stemlength, char matrix[][MATY], int *x, int *y, int orient) { int i,px,py,pxb,pyb,pxe,pye,l,xlg,xlgd,ylgh,ylg; int *s,*se; static int ux[9] = { 1,0,-1,0,0,1,1,-1,-1 }; static int uy[9] = { 0,1,0,-1,1,-1,1,1,-1 }; static int vx[9] = { 0,-1,0,1,1,1,1,-1,-1 }; static int vy[9] = { 1,0,-1,0,0,0,0,0,0 }; static int loopu[18][18] = { { -1 }, { 0,-1 }, { 0,0,-1 }, { 0,1,-1,-1 }, { 0,1,0,-1,-1 }, { 0,1,0,0,-1,-1 }, { 0,1,1,0,-1,-1,-1 }, { 0,1,1,0,0,-1,-1,-1 }, { 0,1,1,1,0,-1,-1,-1,-1 }, { 0,1,1,1,0,0,-1,-1,-1,-1 }, { 0,1,1,1,0,0,0,-1,-1,-1,-1 }, { 0,1,1,1,1,0,0,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,0,0,0,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,1,0,0,0,0,-1,-1,-1,-1,-1,-1,-1 } }; static int loopv[18][18] = { { 2 }, { 1,1 }, { 0,1,1 }, { -1,2,2,-1 }, { -1,1,1,2,-1 }, { -1,1,1,1,1,-1 }, { -1,0,1,1,1,1,-1 }, { -1,0,1,1,1,1,0,-1 }, { -1,0,1,1,1,1,0,0,-1 }, { -1,0,0,1,1,1,1,0,0,-1 }, { -1,0,0,0,1,1,1,1,0,0,-1 }, { -1,0,0,0,1,1,1,1,0,0,0,-1 }, { -1,0,0,0,1,1,1,1,0,0,0,0,-1 }, { -1,0,0,0,0,1,1,1,1,0,0,0,0,-1 }, { -1,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 }, { -1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 } }; static int dloopu[18][18] = { { -1 }, { 0,-1 }, { 0,0,-1 }, { 0,1,-1,-1 }, { 0,1,0,-1,-1 }, { 0,1,0,0,-1,-1 }, { 0,1,1,0,-1,-1,-1 }, { 0,1,1,0,0,-1,-1,-1 }, { 0,1,1,0,0,0,-1,-1,-1 }, { 0,1,1,1,0,0,-1,-1,-1,-1 }, { 0,1,1,1,0,0,0,-1,-1,-1,-1 }, { 0,1,1,1,1,0,0,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,0,0,0,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1 }, { 0,1,1,1,1,1,1,0,0,0,0,-1,-1,-1,-1,-1,-1,-1 } }; static int dloopv[18][18] = { { 2 }, { 1,1 }, { 0,1,1 }, { -1,2,2,-1 }, { -1,1,1,2,-1 }, { -1,1,1,1,1,-1 }, { -1,0,1,1,1,1,-1 }, { -1,0,1,1,1,1,0,-1 }, { -1,0,1,1,1,1,1,-1,-1 }, { -1,0,0,1,1,1,1,0,0,-1 }, { -1,0,0,0,1,1,1,1,0,0,-1 }, { -1,0,0,0,1,1,1,1,0,0,0,-1 }, { -1,0,0,0,1,1,1,1,0,0,0,0,-1 }, { -1,0,0,0,0,1,1,1,1,0,0,0,0,-1 }, { -1,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 }, { -1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 } }; static char bond1[5] = " +!:"; static char bond2[5] = " +-."; px = *x; py = *y; s = seq + b; se = s + stemlength; while (s < se) { matrix[px][py] = cbase(*s++); px += ux[orient]; py += uy[orient]; } l = e - b - 2*stemlength; if (l < 0) l = 0; if (l < 18) { i = 0; if (orient == DOWN) { while (i < l) { px += ux[orient]*dloopu[l][i] + vx[orient]*dloopv[l][i]; py += uy[orient]*dloopu[l][i] + vy[orient]*dloopv[l][i]; matrix[px][py] = cbase(*s++); i++; } px += ux[orient]*dloopu[l][i] + vx[orient]*dloopv[l][i]; py += uy[orient]*dloopu[l][i] + vy[orient]*dloopv[l][i]; } else { while (i < l) { px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; matrix[px][py] = cbase(*s++); i++; } px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; }} else { ylgh = ((l >> 2) - 2) >> 1; ylg = (ylgh << 1) + 2; xlgd = l - ylg - 2*ylgh + 1; xlg = (xlgd + 1) >> 1; pxb = px - ylgh*vx[orient]; if ((pxb < 0) || (pxb >= MATX)) goto NOLOOP; pyb = py - ylgh*vy[orient]; if ((pyb < 0) || (pyb >= MATY)) goto NOLOOP; pxe = px + xlg*ux[orient] + (ylg - ylgh + 1)*vx[orient]; if ((pxe < 0) || (pxe >= MATX)) goto NOLOOP; pye = py + xlg*uy[orient] + (ylg - ylgh + 1)*vy[orient]; if ((pye < 0) || (pye >= MATY)) goto NOLOOP; for (i = 0; i < ylgh; i++) { px -= vx[orient]; py -= vy[orient]; matrix[px][py] = cbase(*s++); } for (i = 0; i < xlg; i++) { px += ux[orient]; py += uy[orient]; matrix[px][py] = cbase(*s++); } for (i = 1; i < ylg; i++) { px += vx[orient]; py += vy[orient]; matrix[px][py] = cbase(*s++); } px += vx[orient]; py += vy[orient]; if (!(xlgd & 1)) matrix[px][py] = cbase(*s++); for (i = 0; i < xlg; i++) { px -= ux[orient]; py -= uy[orient]; matrix[px][py] = cbase(*s++); } for (i = 1; i < ylgh; i++) { px -= vx[orient]; py -= vy[orient]; matrix[px][py] = cbase(*s++); } px -= (ux[orient] + vx[orient]); py -= (uy[orient] + vy[orient]); } goto STEMBOND; NOLOOP: px += ux[orient]*loopu[0][0] + vx[orient]*loopv[0][0]; py += uy[orient]*loopu[0][0] + vy[orient]*loopv[0][0]; STEMBOND: se = seq + e; s = se - stemlength; while (s < se) { matrix[px][py] = cbase(*s++); i = base_match(matrix[px][py], matrix[px - 2*vx[orient]][py - 2*vy[orient]]); switch(orient) { case RIGHT: case LEFT: matrix[px - vx[orient]][py - vy[orient]] = bond1[i]; break; case SLANTDR: case SLANTUR: case SLANTUL: case SLANTDL: case UPRIGHT: case UP: case DOWN: matrix[px - vx[orient]][py - vy[orient]] = bond2[i]; break; } px -= ux[orient]; py -= uy[orient]; } *x = px; *y = py; return(se); } int *make_dv(int *seq, char matrix[][MATY], int dloop, int orient, int *xp, int *yp) { int i,x,y; static int ux[5] = { 1,0,-1,0,0 }; static int uy[5] = { 0,1,0,-1,1 }; static int vx[5] = { 0,-1,0,1,1 }; static int vy[5] = { 1,0,-1,0,0 }; static int loopu[22][22] = { { -1 }, { -1,0 }, { -1,-1,1 }, { -1,-1,0,1 }, { -1,-1,0,0,1 }, { -1,-1,-1,0,1,1 }, { -1,-1,-1,0,0,1,1 }, { -1,-1,-1,-1,0,1,1,1 }, { -1,-1,-1,-1,0,0,1,1,1 }, { -1,-1,-1,-1,-1,0,1,1,1,1 }, { -1,-1,-1,-1,-1,0,0,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,0,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,0,-1,0,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1,1,1,1,1 }, { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1,1,1,1 } }; static int loopv[22][22] = { { -6 }, { -3,-3 }, { -2,-2,-2 }, { -2,-1,-1,-2 }, { -1,-1,-2,-1,-1 }, { -1,-1,-1,-1,-1,-1 }, { 0,-1,-1,-1,-1,-1,-1 }, { 0,-1,0,-1,-1,-1,-1,-1 }, { 0,-1,0,-1,-1,-1,0,-1,-1 }, { 0,0,-1,0,-1,-1,-1,0,-1,-1 }, { 0,0,-1,0,-1,-1,-1,0,-1,0,-1 }, { 0,0,0,-1,0,-1,-1,-1,0,-1,0,-1 }, { 0,0,0,-1,0,-1,-1,-1,0,-1,0,0,-1 }, { 0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,-1 }, { 0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,-1 }, { 0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,-1 }, { 0,0,0,0,0,-1,0,-1,-1,-1,-1,0,0,0,0,0,-1 }, { 0,0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,0,-1 }, { 0,0,0,0,0,0,-1,0,-1,-1,-1,-1,0,0,0,0,0,0,-1 }, { 0,0,0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,0,0,-1 }, { 0,0,0,0,0,0,0,-1,0,-1,-1,-1,-1,0,0,0,0,0,0,0,-1 }, { 0,0,0,0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,0,0,0,-1 } }; x = *xp; y = *yp; if ((dloop < 2) || (dloop > 21)) { x--; y-= 6; seq += dloop; goto FN; } i = 0; while (i < dloop) { x += ux[orient]*loopu[dloop][i] + vx[orient]*loopv[dloop][i]; y += uy[orient]*loopu[dloop][i] + vy[orient]*loopv[dloop][i]; matrix[x][y] = cbase(*seq++); i++; } x += ux[orient]*loopu[dloop][i] + vx[orient]*loopv[dloop][i]; y += uy[orient]*loopu[dloop][i] + vy[orient]*loopv[dloop][i]; FN: *xp = x; *yp = y; return(seq); } int *make_var(int *seq, char matrix[][MATY], int *x, int *y, int orient, int var, int varbp) { int i,b,e,p,px,py,pxf,pyf,l,stem; static int ux[4] = { 1,0,-1,0 }; static int uy[4] = { 0,1,0,-1 }; static int vx[4] = { 0,-1,0,1 }; static int vy[4] = { 1,0,-1,0 }; static int preu[4][5][4] = { { {0},{1},{1},{1},{1} }, { {0},{1,1},{1,1},{1,1},{1,1} }, { {0},{0,1,1},{0,1,1},{1,1,1},{1,1,1} }, { {0},{0,0,1,1},{0,0,1,1},{0,1,1,1},{0,1,1,1} } }; static int prev[4][5][4] = { { {0},{0},{0},{0},{-1} }, { {0},{0,1},{0,0},{-1,0},{-1,0} }, { {0},{0,1,0},{0,0,0},{-1,0,0},{-1,0,-1} }, { {0},{0,1,0,0},{0,1,0,-1},{0,0,0,-1},{0,0,-1,-1} } }; static int postu[4][5][4] = { { {0},{0},{1,-1},{0,-1,0},{1,-1,-1,0} }, { {0},{0},{0,-1},{0,-1,0},{0,-1,-1,0} }, { {0},{0},{0,-1},{0,-1,-1},{1,-1,-1,-1} }, { {0},{0},{0,-1},{0,-1,-1},{1,-1,-1,-1} } }; static int postv[4][5][4] = { { {0},{0},{0,1},{0,1,1},{0,1,1,1} }, { {0},{0},{0,1},{0,1,1},{0,1,1,1} }, { {0},{0},{0,1},{0,1,1},{0,1,1,1} }, { {0},{0},{0,1},{0,1,1},{0,1,1,1} } }; static int loopu[10][10] = { { 2 }, { 1,1 }, { 1,1,0 }, { 1,0,1,0 }, { 1,1,0,0,0 }, { 1,1,1,-1,-1,1 }, { 1,1,1,0,0,-1,0 }, { 1,1,1,1,0,-1,-1,0 }, { 1,1,1,1,0,0,-1,-1,0 }, { 1,1,1,1,1,0,-1,-1,-1,0 } }; static int loopv[10][10] = { { 3 }, { 1,2 }, { 0,1,2 }, { 0,1,1,1 }, { -1,1,1,1,1 }, { -1,0,1,1,1,1 }, { -1,-1,1,1,1,1,1 }, { -1,-1,0,1,1,1,1,1 }, { -1,-1,-1,1,1,1,1,1,1 }, { -1,-1,-1,0,1,1,1,1,1,1 } }; px = *x; py = *y; if (var < 0) var = 0; if (var > 30) var = 30; if (varbp > 0) { b = (varbp >> 10) & 0x1f; if (b > 3) goto NBP; stem = varbp & 0x1f; e = stem + ((varbp >> 5) & 0x1f); p = var - e; if (p < 1) goto NBP; if (p > 4) goto NBP; pxf = px + 2*ux[orient] + 3*vx[orient]; pyf = py + 2*uy[orient] + 3*vy[orient]; i = 0; while (i < b) { px += ux[orient]*preu[b][p][i] + vx[orient]*prev[b][p][i]; py += uy[orient]*preu[b][p][i] + vy[orient]*prev[b][p][i]; matrix[px][py] = cbase(*seq++); i++; } px += ux[orient]*preu[b][p][b] + vx[orient]*prev[b][p][b]; py += uy[orient]*preu[b][p][b] + vy[orient]*prev[b][p][b]; seq = make_clover(seq,0,e-b,stem,matrix,&px,&py,orient+SLANT); i = 0; while (i < p) { px += ux[orient]*postu[b][p][i] + vx[orient]*postv[b][p][i]; py += uy[orient]*postu[b][p][i] + vy[orient]*postv[b][p][i]; matrix[px][py] = cbase(*seq++); i++; } *x = pxf; *y = pyf; goto FIN; } NBP: if (var > 9) { if (var % 2) stem = (var - 7)/2; else stem = (var - 6)/2; } else stem = 0; l = var - 2*stem; i = 0; while (i < stem) { px += ux[orient] - vx[orient]; py += uy[orient] - vy[orient]; matrix[px][py] = cbase(*seq++); i++; } i = 0; while (i < l) { px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; matrix[px][py] = cbase(*seq++); i++; } px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; i = 0; while (i < stem) { matrix[px][py] = cbase(*seq++); px -= (ux[orient] - vx[orient]); py -= (uy[orient] - vy[orient]); i++; } *x = px; *y = py; FIN: return(seq); } void remove_inserts(int *s1, int *s2) { int flag,c; flag = 0; while ((c = *s1++) != TERM) { if (c == INSERT) { flag = 1 - flag; continue; } if (flag) continue; *s2++ = c; } *s2 = TERM; } void build_trna(gene *t, char matrix[][MATY], int x, int y, csw *sw) { int i,j,e,c,*seq; int rseq[150]; static char bond2[5] = " +-."; t->varbp = find_var_hairpin(t); remove_inserts(t->seq,rseq); seq = rseq; i = 0; while (i < t->astem1) { matrix[x][y] = cbase(*seq++); y--; i++; } if (t->spacer1 > 0) { x--; if (t->spacer1 >= 3) matrix[x][y+1] = cbase(*seq++); matrix[x][y] = cbase(*seq++); y--; x--; if (t->spacer1 >= 2) matrix[x][y] = cbase(*seq++); if ((t->spacer2 < 2) || (t->spacer1 > 1)) { x--; y--; }} if (t->dstem > 0) { e = 2*t->dstem + t->dloop; seq = make_clover(seq,0,e,t->dstem,matrix,&x,&y,LEFT); if (t->spacer2 > 1) x--; y--; if (t->spacer2 > 0) matrix[x][y] = cbase(*seq++); y--; if (t->spacer2 > 1) { if (t->spacer1 > 1) x++; matrix[x][y] = cbase(*seq++); if (t->spacer1 < 2) y--; } x++; } else seq = make_dv(seq,matrix,t->dloop,RIGHT,&x,&y); e = 2*t->cstem + t->cloop; seq = make_clover(seq,0,e,t->cstem,matrix,&x,&y,DOWN); if (t->tstem > 0) { seq = make_var(seq,matrix,&x,&y,RIGHT,t->var,t->varbp); e = 2*t->tstem + t->tloop; seq = make_clover(seq,0,e,t->tstem,matrix,&x,&y,RIGHT); y++; } else seq = make_tv(seq,matrix,&x,&y,RIGHT,t->tloop); e = t->astem2; i = 0; while (i < e) { if ((c = *seq++) < Adenine) break; matrix[x][y] = cbase(c); j = base_match(matrix[x][y],matrix[x - 2][y]); matrix[x - 1][y] = bond2[j]; y++; i++; } i = 0; e = (sw->aataildisp)?ASTEM2_EXTD:t->aatail; j = (e < 2)?e:2; while (i < j) { if ((c = *seq++) < Adenine) break; matrix[x][y] = cbase(c); x++; y++; i++; } e -= j; i = 0; while (i < e) { if ((c = *seq++) < Adenine) break; matrix[x][y] = cbase(c); x++; i++; }} void build_tmrna(gene *t, char matrix[][MATY], int x, int y, csw *sw) { int i,j,e,c,tarm,*seq; int rseq[2*MAXTMRNALEN+1]; static char bond2[5] = " +-."; remove_inserts(t->eseq,rseq); seq = rseq + t->asst; i = 0; while (i < t->astem1) { matrix[x][y] = cbase(*seq++); y--; i++; } seq = make_dv(seq,matrix,t->dloop,RIGHT,&x,&y); tarm = 2*t->tstem + t->tloop; e = (t->asst > 0)? (t->cstem - t->dloop - t->astem1 - t->asst + 54): (2*t->cstem + t->cloop + t->nintron); seq = make_clover(seq,0,e,t->cstem,matrix,&x,&y,DOWN); seq = make_var(seq,matrix,&x,&y,RIGHT,t->var,t->varbp); seq = make_clover(seq,0,tarm,t->tstem,matrix,&x,&y,RIGHT); y++; e = t->astem2; i = 0; while (i < e) { if ((c = *seq++) == TERM) break; matrix[x][y] = cbase(c); j = base_match(matrix[x][y],matrix[x - 2][y]); matrix[x - 1][y] = bond2[j]; y++; i++; } e = (sw->aataildisp)?ASTEM2_EXTD:t->aatail; j = (e < 2)?e:2; i = 0; while (i < j) { if ((c = *seq++) == TERM) break; matrix[x][y] = cbase(c); x++; y++; i++; } e -= j; i = 0; while (i < e) { if ((c = *seq++) == TERM) break; matrix[x][y] = cbase(c); x++; i++; } } void init_matrix(char matrix[][MATY]) { int i,j; for (i =0; i < MATY; i++) for (j = 0; j < MATX; j++) matrix[j][i] = ' '; } void disp_matrix(FILE *f, char matrix[][MATY], int ylines) { int i,j,k; i = ylines; while (--i >= 0) { k = MATX; while (--k > 0) if (matrix[k][i] != ' ') break; for (j = 0; j <= k; j++) fputc(matrix[j][i],f); fputc('\n',f); } fputc('\n',f); } void xcopy(char m[][MATY], int x, int y, char *s, int l) { int i; char c; i = 0; while (i < l) { if (x >= MATX) break; if (!(c = *s++)) break; m[x++][y] = c; i++; }} int identify_tag(char tag[], int len, char (*thit)[50], int nt) { int i,n; char *s,*st,*sb,*sd; n = 0; st = tag + len; while (*--st == '*'); for (i = 0; i < NTAG; i++) { s = st; sb = tagdatabase[i].tag; sd = sb; while (*++sd); while (*s-- == *--sd) { if (s < tag) { if (sd > sb) goto PAR; if (n >= nt) goto MANY; copy(tagdatabase[i].name,thit[n]); n++; break; } if (sd > sb) continue; PAR: if (n >= nt) goto MANY; s = copy(tagdatabase[i].name,thit[n]); copy(" (partial match)",s); n++; break; }} return(n); MANY: return(-1); } int peptide_tag(char tag[], int maxlen, gene *t, csw *sw) { int i,lx,*se; se = t->eseq + t->tps; lx = (t->tpe - t->tps + 1); if (ltranslate(se+lx,t,sw) == '*') { lx += 3; if (ltranslate(se+lx,t,sw) == '*') lx += 3; } lx /= 3; if (lx > maxlen) lx = maxlen; for (i = 0; i < lx; i++) { tag[i] = ltranslate(se,t,sw); se += 3; } tag[i] = '\0'; return(lx); } void update_tmrna_tag_database(gene ts[], int nt, csw *sw) { int nn,i,k,c,lx; char *sp,*se,*s; char species[STRLEN],tag[100]; gene *t; if (sw->tagend >= NTAGMAX) return; for (i = 0; i < nt; i++) { t = ts + i; if (t->genetype != tmRNA) continue; s = t->name; se = NULL; while (*s) { if (*s == '|') se = s; s++; } if (!*se) continue; while (++se) if (space(*se)) break; if (!*se) continue; while (++se) if (!space(*se)) break; if (!*se) continue; if (softstrpos(se," sp. ")) { if (!(sp = softstrpos(se,"two-piece"))) if (!(sp = softstrpos(se,"tmRNA"))) continue; while (space(sp[-1])) sp--; copy2sp(se,sp,species,49); } else { s = species; c = 2; while (*se) { if (space(*se)) if (--c <= 0) break; *s++ = *se++; } *s = '\0'; } for (k = 0; k < sw->tagend; k++) if (softstrpos(tagdatabase[k].name,species)) break; if (k < sw->tagend) continue; copy(species,tagdatabase[sw->tagend].name); s = tag; lx = peptide_tag(s,50,t,sw); s += (lx - 1); while (*s == '*') s--; *++s = '\0'; copy(tag,tagdatabase[sw->tagend].tag); if (++sw->tagend >= NTAGMAX) break; } } int string_compare(char *s1, char *s2) { int r; char c1,c2; r = 0; while (c1 = *s1++) { if (!(c2 = *s2++)) break; r = (int)upcasec(c1) - (int)upcasec(c2); if (r != 0) break; } return(r); } void report_new_tmrna_tags(csw *sw) { int k,n,sort[NTAGMAX]; for (n = 0; n < sw->tagend; n++) { k = n; while (--k >= 0) { if (string_compare(tagdatabase[n].name,tagdatabase[sort[k]].name) >= 0) break; sort[k+1] = sort[k]; } sort[++k] = n; } fprintf(sw->f,"\ntmRNA tag database update:\n"); for (k = 0; k < sw->tagend; k++) { n = sort[k]; fprintf(sw->f," { \"%s\",\"%s\"},\n", tagdatabase[n].name,tagdatabase[n].tag); } fprintf(sw->f,"\n%d tmRNA peptide tags\n",sw->tagend); fprintf(sw->f,"%d new tmRNA peptide tags\n\n",sw->tagend - NTAG); } void disp_peptide_tag(FILE *f, gene *t, csw *sw) { int i,lx,nm,nmh,c1,c2,c3,*s,*se; char tag[52],thit[21][50]; fprintf(f,"Tag peptide at [%d,%d]\nTag sequence: ",t->tps+1,t->tpe+1); lx = peptide_tag(tag,50,t,sw); se = t->eseq + t->tps; s = se; for (i = 0; i < lx; i++) { if (i > 0) fputc('-',f); if ((c1 = *s++) >= AMBIG) continue; if ((c2 = *s++) >= AMBIG) continue; if ((c3 = *s++) >= AMBIG) continue; fputc(cbase(c1),f); fputc(cbase(c2),f); fputc(cbase(c3),f); } s = se; fprintf(f,"\nTag peptide: "); for (i = 0; i < lx; i++) { fprintf(f,"%s",translate(s,sw)); s += 3; if (i < (lx-1)) fputc('-',f); } fprintf(f,"\nTag peptide: %s",tag); if (sw->energydisp) { s = se; fprintf(f,"\nTag Polarity: "); for (i = 0; i < lx; i++) { fprintf(f,"%c",ptranslate(s,sw)); s += 3; }} fputc('\n',f); nmh = identify_tag(tag,lx,thit,21); if (nmh > 0) { if (nmh > 1) { fprintf(f,"Match with tmRNA tags from:\n"); i = 0; for (nm = 0; nm < nmh; nm++) { if (++i > 3) { fputc('\n',f); i = 1; } if (i > 1) fprintf(f,", "); fprintf(f,"%s",thit[nm]); } fputc('\n',f); } else fprintf(f,"Match with %s tmRNA tag\n",thit[0]); } else if (nmh == -1) fprintf(f,"Match with many tmRNA tags\n"); else fprintf(f,"Tag not identified\n"); fputc('\n',f); } void sense_switch(int *seq1, int *seq2, int lseq) { int i,b; int *sseq,*cseq; sseq = seq1; cseq = seq2 + lseq; while (--cseq >= seq2) { b = *sseq++; if (b >= Adenine) { if (b <= Thymine) *cseq = Thymine - b; else { if (b <= NOBASE) *cseq = b; else *cseq = NOBASE; }} else *cseq = NOBASE; }} double nenergy(gene *t, csw *sw) { double eref; if (t->genetype != tRNA) eref = sw->eref[t->genetype]; else if (sw->mtrna) { if (t->dstem == 0) eref = mtRNAtthresh; else if (t->tstem == 0) eref = mtRNAdthresh; else eref = mtRNAdtthresh; } else eref = sw->eref[tRNA]; return(100.0*t->energy/eref); } char *position(char *s, gene *t, csw *sw) { long start; start = t->start; if (sw->linear) if (start <= 0) start--; if (t->comp) sprintf(s,"c[%ld,%ld]",start,t->stop); else sprintf(s,"[%ld,%ld]",start,t->stop); return(s); } void location(char *s, gene *t, csw *sw, char *m) { char sp[80]; sprintf(s,"%s %s",m,position(sp,t,sw)); } void disp_location(gene *t, csw *sw, char *m) { char sp[80]; fprintf(sw->f,"%s %s\n",m,position(sp,t,sw)); } char *name(gene *t, char *si, int proc, csw *sw) { int s[5],*ss,*sin,*sm,*s0,*s1,*s2,*s3,nintron; char *sb,*st; static char trnatype[2][6] = { "tRNA","mtRNA" }; switch (t->genetype) { case CDS: sprintf(si,"CDS"); break; case srpRNA: sprintf(si,"srpRNA"); break; case tmRNA: if (sw->dispmatch) { if (t->asst > 0) sprintf(si,"tmRNA(Perm) "); else sprintf(si,"tmRNA "); } else { if (t->asst > 0) sprintf(si,"tmRNA (Permuted)"); else sprintf(si,"tmRNA"); } break; case tRNA: ss = (proc?t->seq:t->ps); sm = ss + t->anticodon - 1; s0 = sm + 1; s1 = s0 + 1; s2 = s1 + 1; s3 = s2 + 1; nintron = t->nintron; if ((proc == 0) && (nintron > 0)) { sin = ss + t->intron; if (sm >= sin) sm += nintron; if (s0 >= sin) s0 += nintron; if (s1 >= sin) s1 += nintron; if (s2 >= sin) s2 += nintron; if (s3 >= sin) s3 += nintron; } s[0] = *sm; s[1] = *s0; s[2] = *s1; s[3] = *s2; s[4] = *s3; st = trnatype[sw->mtrna]; sb = si; if (t->dstem == 0) { sprintf(sb,"D-loop "); sb += 7; } if (t->tstem == 0) { sprintf(sb,"TV-loop "); sb += 8; } if (t->cloop == 8) sprintf(sb,"%s-?(%s|%s)(%c%c%c%c)",st, aa(s+1,sw),aa(s+2,sw), cbase(s[1]),cbase(s[2]),cbase(s[3]), cbase(s[4])); else if (t->cloop == 6) sprintf(sb,"%s-?(%s|%s)(%c%c)",st, aa(s,sw),aa(s+1,sw), cbase(s[1]),cbase(s[2])); else sprintf(sb,"%s-%s(%c%c%c)",st, aa(s+1,sw),cbase(s[1]),cbase(s[2]),cbase(s[3])); break; default: *si = '\0'; break; } return(si); } void disp_intron(FILE *f, gene *t, csw *sw) { int i,c,*s,*sb,*se; char genename[100]; if (t->nintron <= 0) return; name(t,genename,1,sw); fprintf(f,"Intron from %s\n",genename); fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); sb = t->eseq + t->intron; s = sb; se = sb + t->nintron; i = 0; while (s < se) { if ((c = *s++) < Adenine) break; fputc(cbase(c),f); if (++i >= 50) { fputc('\n',f); i = 0; }} if (i > 0) fputc('\n',f); fputc('\n',f); fprintf(f,"Intron Length: %d\n",t->nintron); fprintf(f,"Intron Insertion Position(%d-%d): ",t->intron,t->intron+1); s = sb - 5; for (i = 0; i < 5; i++) fputc(cbase(*s++),f); fprintf(f,"-Intron-"); s = se; for (i = 0; i < 5; i++) fputc(cbase(*s++),f); fputc('\n',f); fputc('\n',f); } void disp_fasta_seq(FILE *f, gene *t, int ns, int n, int nsp, int c, csw *sw) { int i,*s,*se; char genename[100],genepos[100]; if (t->nintron > 0) { s = t->eseq; se = s + t->nbase + t->nintron; } else { s = t->seq; se = s + t->nbase; } name(t,genename,1,sw); position(genepos,t,sw); if (nsp > 0) { if (ns > 0) fprintf(f,">%d-%d%s%s\n",ns,n,genename,genepos); else fprintf(f,">%s%s\n",genename,genepos); } else { if (ns > 0) fprintf(f,">%d-%d %s %s\n",ns,n,genename,genepos); else fprintf(f,">%s %s\n",genename,genepos); } i = 0; while (s < se) { if (c) fputc(cpbase(*s++),f); else fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} if (i > 0) fputc('\n',f); } void disp_seq(FILE *f, gene *t, csw *sw) { int i,j,k,varbp,stem,ab,ae,hl,*s,*se,*sl,*sb,*sr; char genename[100]; static int bplb[2] = { '.','(' }; static int bprb[2] = { '.',')' }; if (t->nintron > 0) { s = t->eseq; se = s + t->nbase + t->nintron; } else { s = t->seq; se = s + t->nbase; } if (sw->seqdisp >= 3) { if (!sw->batch) fputc('\n',f); if (sw->seqdisp == 3) disp_fasta_seq(f,t,0,0,0,0,sw); else disp_fasta_seq(f,t,0,0,0,1,sw); } else { if (!sw->batch) { name(t,genename,1,sw); fprintf(f,"\nPrimary sequence for %s\n",genename); } if ((sw->seqdisp == 2) && (t->genetype == tRNA)) { sl = s; while (sl < se) fputc(cbase(*sl++),f); fputc('\n',f); sl = s; sr = se - t->aatail - 1; for (i = 0; i < t->astem1; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); for (i = 0; i < t->spacer1; i++) fputc(' ',f); sl += t->spacer1; sb = sl + t->dstem - 1; sr = sb + t->dstem + t->dloop; for (i = 0; i < t->dstem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); for (i = 0; i < t->dloop; i++) fputc('d',f); sl += t->dloop; for (i = 0; i < t->dstem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); for (i = 0; i < t->spacer2; i++) fputc(' ',f); sl += t->spacer2; sb = sl + t->cstem - 1; sr = sb + t->cstem + t->cloop + t->nintron; for (i = 0; i < t->cstem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); hl = t->astem1 + t->spacer1 + 2*t->dstem + t->dloop + t->spacer2 + t->cstem; if (t->nintron > 0) { j = t->intron - hl; ab = t->anticodon - hl; ae = ab + t->cloop - 5; for (i = 0; i < j; i++) if (i <= ae) if (i >= ab) fputc('A',f); else fputc(' ',f); else fputc(' ',f); for (i = 0; i < t->nintron; i++) fputc('i',f); for (i = j; i < t->cloop; i++) if (i <= ae) if (i >= ab) fputc('A',f); else fputc(' ',f); else fputc(' ',f); } else { j = t->cloop - 4; ab = t->anticodon - hl; ae = t->cloop - ab - j; for (i = 0; i < ab; i++) fputc(' ',f); for (i = 0; i < j; i++) fputc('A',f); for (i = 0; i < ae; i++) fputc(' ',f); } sl += (t->cloop + t->nintron); for (i = 0; i < t->cstem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); varbp = find_var_hairpin(t); if (varbp > 0) { j = (varbp >> 10); k = (varbp >> 5) & 0x1f; stem = (varbp & 0x1f); sr = sl + k + stem - 1; sl += j; sb = sl + stem - 1; for (i = 0; i < j; i++) fputc(' ',f); for (i = 0; i < stem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); for (i = j+stem; i < k; i++,sl++) fputc('v',f); for (i = 0; i < stem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); for (i = k+stem; i < t->var; i++,sl++) fputc(' ',f); } else { for (i = 0; i < t->var; i++) fputc(' ',f); sl += t->var; } sb = sl + t->tstem - 1; sr = sb + t->tstem + t->tloop; for (i = 0; i < t->tstem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); for (i = 0; i < t->tloop; i++) fputc('t',f); sl += t->tloop; for (i = 0; i < t->tstem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); sb = s + t->astem1 - 1; for (i = 0; i < t->astem2; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); fputc('\n',f); } else { if (!sw->batch) fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); i = 0; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} if (i > 0) fputc('\n',f); }} if (!sw->batch) { fputc('\n',f); fputc('\n',f); }} void disp_tmrna_seq(FILE *f, gene *t, csw *sw) { int i,*s,*sb,*se; if (t->nintron <= 0) return; if (*(t->name) == '\0') fprintf(f,"tmRNA Sequence\n\n"); else fprintf(f,"tmRNA Sequence in %s\n\n",t->name); fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); sb = t->eseq; s = sb; se = sb + t->intron; i = 0; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->tps; while (s < se) { fputc(cpbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->tpe + 1; while (ltranslate(se,t,sw) == '*') se += 3; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->intron + t->nintron; while (s < se) { fputc(cpbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->nbase + t->nintron; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} if (i > 0) fputc('\n',f); fprintf(f,"\n5' tRNA domain at [%d,%d]\n", 1,t->intron); fprintf(f,"3' tRNA domain at [%d,%d]\n", t->intron+t->nintron+1,t->nbase+t->nintron); fprintf(f,"Resume consensus sequence at [%d,%d]: ",t->tps - 6,t->tps + 11); s = t->eseq + t->tps - 7; for (i = 0; i < 18; i++) fputc(cbase(*s++),f); fputc('\n',f); fputc('\n',f); disp_peptide_tag(f,t,sw); } void disp_tmrna_perm_seq(FILE *f, gene *t, csw *sw) { int i,*s,*sb,*se; if (t->nintron <= 0) return; if (*(t->name) == '\0') fprintf(f,"tmRNA Sequence\n\n"); else fprintf(f,"tmRNA Sequence in %s\n\n",t->name); fprintf(f,"Permuted\n"); fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); sb = t->eseq; s = sb; se = sb + 54; i = 0; while (s < se) { fputc(cpbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->intron; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->asst; while (s < se) { fputc(cpbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->asst + t->astem1 + t->dloop + t->cstem; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->tps; while (s < se) { fputc(cpbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->tpe + 1; while (ltranslate(se,t,sw) == '*') se += 3; while (s < se) { fputc(cbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} se = sb + t->tpe + TMPTRAILER - 54; while (s <= se) { fputc(cpbase(*s++),f); if (++i >= 50) { fputc('\n',f); i = 0; }} if (i > 0) fputc('\n',f); fprintf(f,"\n5' tRNA domain at [%d,%d]\n", t->asst+1,t->asst+t->astem1+t->dloop+t->cstem); fprintf(f,"3' tRNA domain at [%d,%d]\n", 55,t->intron); fprintf(f,"Resume consensus sequence at [%d,%d]: ",t->tps - 6,t->tps + 11); s = t->eseq + t->tps - 7; for (i = 0; i < 18; i++) fputc(cbase(*s++),f); fputc('\n',f); fputc('\n',f); disp_peptide_tag(f,t,sw); } void disp_cds(FILE *f, gene *t, csw *sw) { int i,ncodon,*s,*se; char c; ncodon = t->nbase/3; if (!t->tps) ncodon--; fprintf(f,"\n%d codons, start = %c%c%c, stop = ",ncodon, cbase(t->seq[0]),cbase(t->seq[1]),cbase(t->seq[2])); s = t->seq + 3; while ((i = *s++) != TERM) fputc(cbase(i),f); if (t->tps) fprintf(f," incomplete"); fprintf(f,"\n1 . 10 . 20 . 30 . 40 . 50\n"); s = t->eseq; se = s; while (*se != TERM) se++; if (t->tps) se -= 3; i = 0; while (s < se) { c = ltranslate(s,t,sw); fputc(c,f); if (++i >= 50) { fputc('\n',f); i = 0; } s += 3; } if (i > 0) fputc('\n',f); if (sw->energydisp) fprintf(f,"Score = %lg\n",t->energy); fputc('\n',f); fputc('\n',f); } int pseudogene(gene *t, csw *sw) { if (t->energy < sw->reportpsthresh) return(1); if (t->genetype == tRNA) if (t->cloop != 7) return(1); return(0); } void disp_gene(gene *t, char m[][MATY], csw *sw) { double gc; char stat[80]; switch(t->genetype) { case tmRNA: build_tmrna(t,m,13,27,sw); xcopy(m,4,3,"tmRNA (tRNA domain)",19); break; case tRNA: build_trna(t,m,13,27,sw); name(t,stat,1,sw); xcopy(m,4,3,stat,length(stat)); break; } location(stat,t,sw,"Sequence"); xcopy(m,4,1,stat,length(stat)); gc = gc_content(t); sprintf(stat,"%d bases, %%GC = %2.1f",t->nbase,100.0*gc); xcopy(m,4,2,stat,length(stat)); if (sw->reportpseudogenes) if (pseudogene(t,sw)) xcopy(m,4,4,"Possible Pseudogene",19); if (sw->energydisp) { sprintf(stat,"Score = %g\n",t->energy); xcopy(m,4,0,stat,length(stat)); }} void disp_batch_trna(FILE *f, gene *t, csw *sw) { int ls,ps,*s,anticodon; char pos[50],species[50]; static char type[2][6] = { "tRNA","mtRNA" }; static char asterisk[2] = { ' ','*'}; s = t->seq + t->anticodon; ps = sw->reportpseudogenes?(pseudogene(t,sw)?1:0):0; if (sw->batchfullspecies) { switch(t->cloop) { case 6: sprintf(species,"%s-?(%s|%s)%c", type[sw->mtrna],aa(s-1,sw),aa(s,sw),asterisk[ps]); break; case 8: sprintf(species,"%s-?(%s|%s)%c", type[sw->mtrna],aa(s,sw),aa(s+1,sw),asterisk[ps]); break; case 7: default: sprintf(species,"%s-%s%c",type[sw->mtrna],aa(s,sw),asterisk[ps]); break; }} else { switch(t->cloop) { case 6: case 8: sprintf(species,"%s-???%c",type[sw->mtrna],asterisk[ps]); break; case 7: default: sprintf(species,"%s-%s%c",type[sw->mtrna],aa(s,sw),asterisk[ps]); break; }} position(pos,t,sw); ls = length(species); if (ls <= 10) fprintf(f,"%-10s%28s",species,pos); else if (ls <= 17) fprintf(f,"%-17s%21s",species,pos); else fprintf(f,"%-25s%13s",species,pos); if (sw->energydisp) { fprintf(f,"\t%5.1f",t->energy); } anticodon = 1 + t->anticodon; if (t->nintron > 0) if (t->intron <= t->anticodon) anticodon += t->nintron; fprintf(f,"\t%-4d",anticodon); switch(t->cloop) { case 6: fprintf(f,"\t(%c%c) ",cbase(*s),cbase(s[1])); break; case 8: fprintf(f,"\t(%c%c%c%c) ", cbase(*s),cbase(s[1]),cbase(s[2]),cbase(s[3])); break; case 7: default: fprintf(f,"\t(%c%c%c)",cbase(*s),cbase(s[1]),cbase(s[2])); break; } if (t->nintron > 0) fprintf(f,"i(%d,%d)",t->intron+1,t->nintron); fputc('\n',f); if (sw->seqdisp) disp_seq(f,t,sw); } void disp_batch_tmrna(FILE *f, gene *t, csw *sw) { int ps,tpe,*sb,*se; char pos[50]; static char permask[2][2][3] = { {" ","p "},{"* ","p*"} }; ps = (t->energy < 100.0)?1:0; position(pos,t,sw); fprintf(f,"tmRNA%2s%31s",permask[(t->asst == 0)?0:1][ps],pos); if (sw->energydisp) { fprintf(f,"\t%5.1f\t",t->energy); } tpe = t->tpe; sb = t->eseq + t->tps; se = t->eseq + tpe + 1; while (ltranslate(se,t,sw) == '*') { se += 3; tpe += 3; } fprintf(f,"\t%d,%d\t",t->tps+1,tpe+1); while (sb < se) { fputc(ltranslate(sb,t,sw),f); sb += 3; } fputc('\n',f); if (sw->seqdisp) disp_seq(f,t,sw); } void disp_batch_srprna(FILE *f, gene *t, csw *sw) { int ps,tpe,*sb,*se; char pos[50]; static char asterisk[2] = { ' ','*'}; ps = (t->energy < 100.0)?1:0; position(pos,t,sw); fprintf(f,"srpRNA%c %25s",asterisk[ps],pos); if (sw->energydisp) { fprintf(f,"\t%5.1f",t->energy); } fputc('\n',f); if (sw->seqdisp) disp_seq(f,t,sw); } void disp_batch_cds(FILE *f, gene *t, csw *sw) { int ps,tpe,*sb,*se; char pos[50]; static char asterisk[2] = { ' ','*'}; ps = (t->energy < 100.0)?1:0; position(pos,t,sw); fprintf(f,"CDS%c %25s",asterisk[ps],pos); if (sw->energydisp) { fprintf(f,"\t%5.1f",t->energy); } fputc('\n',f); if (sw->seqdisp) disp_seq(f,t,sw); } double vloop_stability(int *sb, int var, int *varbp) { int e,stem,vstem,loop,*sn,*sen,*pos1,*pos2,*se,*sc,*sd,*sf,*s; unsigned int c,cn,m; static unsigned int A[6] = { 0,0,0x100,0x400,0,0 }; static unsigned int C[6] = { 0,0,0x400,0,0,0 }; static unsigned int G[6] = { 0x100,0x400,0,0x200,0,0 }; static unsigned int T[6] = { 0x400,0,0x200,0,0,0 }; static unsigned int te[6] = { 0,0,0,0,0,0 }; e = 0; sc = sb + 3; se = sb + var - 2; sf = se - 2; te[0] = A[*se]; te[1] = C[*se]; te[2] = G[*se]; te[3] = T[*se]; while (--se > sf) { te[0] = (te[0] >> 4) | A[*se]; te[1] = (te[1] >> 4) | C[*se]; te[2] = (te[2] >> 4) | G[*se]; te[3] = (te[3] >> 4) | T[*se]; } while (se >= sc) { te[0] = ((te[0] >> 4) | A[*se]); te[1] = ((te[1] >> 4) | C[*se]); te[2] = ((te[2] >> 4) | G[*se]); te[3] = ((te[3] >> 4) | T[*se]); s = se - 5; sd = se - 7; m = te[*s]; while (--s > sd) m = (m >> 4) + te[*s]; while (s >= sb) { m = (m >> 4) + te[*s]; c = m & 0xf; if (c >= 9) { stem = 3; loop = (int)(se - s) - 3; sen = se; sn = s + 2; while (loop >= 6) { if ((cn = vbp[sen[-1]][sn[1]]) <= 0) break; c += cn; stem++; loop -= 2; sen--; sn++; } if (c > e) { e = c; pos1 = s; pos2 = sen; vstem = stem; }} s--; } se--; } if (e > 0) { *varbp = (((int)(pos1-sb))<<10) + (((int)(pos2-sb))<<5) + vstem; return((double)(3*(vstem - 4))); } else { *varbp = 0; return(-12.0); }} double find_tag_upstream_hairpin(int *se) { int *sb,*sd,*sf,*sh,*s; unsigned int c,m,mx; static unsigned int A[6] = { 0,0,0,0x10000,0,0 }; static unsigned int C[6] = { 0,0,0x10000,0,0,0 }; static unsigned int G[6] = { 0,0x10000,0,0x10000,0,0 }; static unsigned int T[6] = { 0x10000,0,0x10000,0,0,0 }; static unsigned int t[6] = { 0,0,0,0,0,0 }; mx = 0; sf = se - 4; sb = se - 20; t[0] = A[*se]; t[1] = C[*se]; t[2] = G[*se]; t[3] = T[*se]; while (--se > sf) { t[0] = (t[0] >> 4) | A[*se]; t[1] = (t[1] >> 4) | C[*se]; t[2] = (t[2] >> 4) | G[*se]; t[3] = (t[3] >> 4) | T[*se]; } sh = se - 4; sd = se - 30; while (se > sb) { t[0] = ((t[0] >> 4) | A[*se]); t[1] = ((t[1] >> 4) | C[*se]); t[2] = ((t[2] >> 4) | G[*se]); t[3] = ((t[3] >> 4) | T[*se]); s = sh; m = t[*s]; while (--s > sd) { m = (m >> 4) + t[*s]; c = m & 0xf; if (c > mx) mx = c; if (mx == 5) goto FND; } sd--; sh--; se--; } return(0.0); FND: return(15.0); } double find_taghairpin(int *seq) { int i,*s,*sb,*se,*sf; unsigned int c,m,mx; static unsigned int A[6] = { 0,0,0,1,0,0 }; static unsigned int C[6] = { 0,0,1,0,0,0 }; static unsigned int G[6] = { 0,1,0,1,0,0 }; static unsigned int T[6] = { 1,0,1,0,0,0 }; static unsigned int t[6] = { 0,0,0,0,0,0 }; mx = 0; sb = seq - 20; se = seq - 13; sf = seq - 4; t[0] = A[*sb]; t[1] = C[*sb]; t[2] = G[*sb]; t[3] = T[*sb]; while (++sb < se) { t[0] = (t[0] << 4) | A[*sb]; t[1] = (t[1] << 4) | C[*sb]; t[2] = (t[2] << 4) | G[*sb]; t[3] = (t[3] << 4) | T[*sb]; } while (sb < sf) { t[0] = ((t[0] << 4) | A[*sb]) & 0xffffffff; t[1] = ((t[1] << 4) | C[*sb]) & 0xffffffff; t[2] = ((t[2] << 4) | G[*sb]) & 0xffffffff; t[3] = ((t[3] << 4) | T[*sb]) & 0xffffffff; sb++; s = seq + 20; se = seq + 2; m = t[*s--]; while (s > se) { m = (m >> 4) + t[*s--]; c = m & 0xf; if (c > mx) mx = c; } i = 7 - (int)mx; while (i-- > 0) { m = m >> 4; c = m & 0xf; if (c > mx) mx = c; }} return((double)(mx << 1)); } double stem_energy(int *s1, int *s2, int stem) { int *se; double energy; static double bem[6][6] = { { -1.072,-0.214,-1.072, ATBOND, 0.000, 0.000 }, { -0.214,-1.072, 3.000,-1.072, 0.000, 0.000 }, { -1.072, 3.000,-1.072, 1.286, 0.000, 0.000 }, { ATBOND,-1.072, 1.286,-0.214, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; se = s1 + stem; energy = bem[*s1++][*--s2]; while (s1 < se) energy += bem[*s1++][*--s2]; return(energy); } double astem_energy(int *s1, int *s2, int stem) { int *se; double energy; static double abem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; se = s1 + stem; energy = abem[*s1++][*--s2]; while (s1 < se) energy += abem[*s1++][*--s2]; return(energy); } void trna_score(FILE *f, gene *t) { int *s,*tpos,tarm,varbp; double ea,eta,evls; static double bem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static double A[6] = { 1.0,0.0,0.0,0.0,0.0,0.0 }; static double C[6] = { 0.0,1.0,0.0,0.0,0.0,0.0 }; static double G[6] = { 0.0,0.0,1.0,0.0,0.0,0.0 }; static double T[6] = { 0.0,0.0,0.0,1.0,0.0,0.0 }; if (t->genetype != tRNA) return; tarm = 2*t->tstem + t->tloop; tpos = t->seq + t->astem1 + t->spacer1 + t->dloop + 2*t->dstem + 1 + 2*t->cstem + t->cloop + t->var; s = tpos + t->tstem - 1; eta = 6.0*(G[s[0]] + T[s[1]] + T[s[2]] + C[s[3]]) + 3.0*A[s[1]]; s += t->tloop - 3; eta += 2.0*(G[*s] + A[s[1]] + T[s[3]] + C[s[4]] + C[s[5]]); eta += astem_energy(tpos,tpos+tarm,t->tstem); eta += bem[tpos[t->tstem]][tpos[t->tstem + 4]]; eta -= 3.0*(double)(5 - t->tstem); if (t->tloop > 7) eta -= 3.0*(double)(t->tloop - 7); else eta -= 3.0*(double)(7 - t->tloop); s = t->seq; if (t->astem1 > 7) s++; ea = astem_energy(s,tpos+tarm+7,7); if (t->var > 17) evls = vloop_stability(tpos-t->var,t->var,&varbp); else evls = 0.0; fprintf(f,"\n"); fprintf(f," T-arm score: %g\n",eta); fprintf(f," A-stem score: %g\n",ea); fprintf(f," V-loop stability: %g\n",evls); fprintf(f,"\n"); } void tmrna_score(FILE *f, gene *t, csw *sw) { int r,j,te,*s,*sb,*se,*tpos,tarm; double e,er,et,eal,esp,ed,ec,ea,egga,etcca,egg,eta,edgg; double ehairpin,euhairpin; static int gtem[6] = { 0x00,0x00,0x11,0x00,0x00,0x00 }; static double tagend_score[4] = { 36.0, 66.0, 62.0, 72.0 }; static int nps[126] = { 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1, 2,1,2,1, 0,0,0,0, 2,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; static double bem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static double A[6] = { 1.0,0.0,0.0,0.0,0.0,0.0 }; static double C[6] = { 0.0,1.0,0.0,0.0,0.0,0.0 }; static double G[6] = { 0.0,0.0,1.0,0.0,0.0,0.0 }; static double K[6] = { 0.0,0.0,1.0,1.0,0.0,0.0 }; static double R[6] = { 1.0,0.0,1.0,0.0,0.0,0.0 }; static double T[6] = { 0.0,0.0,0.0,1.0,0.0,0.0 }; static double Y[6] = { 0.0,1.0,0.0,1.0,0.0,0.0 }; static double nA[6] = { 0,1.0,1.0,1.0,1.0,1.0 }; static double nV[6] = { 0,0,0,1.0,1.0,1.0 }; static double nM[6] = { 0,0,1.0,1.0,1.0,1.0 }; if (t->genetype != tmRNA) return; tarm = 2*t->tstem + t->tloop; s = t->eseq + t->tps - 7; er = A[s[1]]+2.0*T[s[2]]+C[s[2]]+3.0*A[s[3]]+R[s[4]]+Y[s[6]]+ 3.0*G[s[7]]+C[s[8]]; if (sw->tmstrict) er -= (nV[s[10]] + nV[s[11]] + nM[s[14]] + nA[s[17]]); er *= 4.0; s = t->eseq + t->tpe - 8; te = ((nps[(s[0]<<4) + (s[1]<<2) + s[2]] & 1) << 1) | (nps[(s[3]<<4) + (s[4]<<2) + s[5]] & 1); et = tagend_score[te]; if (sw->tmstrict) { eal = 0.0; j = -3; while (j < 6) { te = s[j++]; te = (te << 2) | s[j++]; if (te == 9) eal = (double)(11 + 2*((j + 1)/3)); j++; } ehairpin = find_taghairpin(s + 8); euhairpin = find_tag_upstream_hairpin(t->eseq + t->tps - 10); } else { eal = 15.0; ehairpin = 16.0; euhairpin = 15.0; } tpos = t->eseq; if (t->asst > 0) { tpos += t->cstem + t->var + 54; ed = 0.0; } else { tpos += t->astem1 + t->dloop + 2*t->cstem + t->nintron + t->var; ed = 0.001*(double)(t->tps - (long)(tpos - t->eseq)); } s = tpos + t->tstem - 10; e = K[s[0]] + G[s[1]] + A[s[2]]; egga = K[s[1]] + G[s[2]] + A[s[3]]; if (e > egga) egga = e; egga *= 6.0; if (egga < 18.0) egga = 0.0; s = tpos + tarm + 4; etcca = 10.0*(T[s[0]] + C[s[1]] + C[s[2]] + A[s[3]]); s = t->eseq + t->asst; egg = 7.0*(G[s[1]] + G[s[2]]); edgg = 0.0; s = t->eseq + t->asst + t->astem1; sb = s + 3; se = s + 7; r = gtem[*sb++]; while (sb < se) { r = (r >> 4) + gtem[*sb++]; if ((r & 3) == 2) { edgg = 14.0; break; }} s = tpos + t->tstem - 1; if (sw->tmstrict && (t->asst == 0)) eta = 6.0*(G[s[0]] + T[s[1]] + T[s[2]] + C[s[3]]) + 3.0*A[s[1]]; else eta = 6.0*(G[s[0]] + (G[s[1]] + T[s[1]]) + (G[s[2]] + T[s[2]]) + C[s[3]]) + 3.0*A[s[1]]; s += t->tloop - 3; eta += 2.0*(G[*s] + A[s[1]] + T[s[3]] + C[s[4]] + C[s[5]]); eta += astem_energy(tpos,tpos+tarm,t->tstem); eta += bem[tpos[t->tstem]][tpos[t->tstem + 4]]; eta -= 3.0*(double)(5 - t->tstem); if (t->tloop > 7) eta -= 3.0*(double)(t->tloop - 7); else eta -= 3.0*(double)(7 - t->tloop); eta *= 1.59; s = t->eseq + t->asst + t->astem1 + t->dloop; ec = stem_energy(s,tpos-t->var,t->cstem); s = t->eseq + t->asst; ea = astem_energy(s,tpos+tarm+t->astem1,t->astem1); esp = ((t->tpe - t->tps) < 24)?-15.0:0.0; e = er + et + ed + eal + esp + egga + egg + etcca + eta + ec + ea + edgg + ehairpin + euhairpin; fprintf(f,"\n"); fprintf(f," Resume sequence score: %g\n",er); fprintf(f,"Resume-Tarm distance score: %g\n",ed); fprintf(f," Tag peptide score: %g\n",et); fprintf(f," Tag end alanine score: %g\n",eal); fprintf(f," Short tag penalty: %g\n",esp); fprintf(f," Tag hairpin score: %g\n",ehairpin); fprintf(f,"Tag upstream hairpin score: %g\n",euhairpin); fprintf(f," V-loop GGA score: %g\n",egga); fprintf(f," A-stem GG score: %g\n",egg); fprintf(f," A-stem TCCA score: %g\n",etcca); fprintf(f," D-loop GG score: %g\n",edgg); fprintf(f," T-arm score: %g\n",eta); fprintf(f," C-stem score: %g\n",ec); fprintf(f," A-stem score: %g\n",ea); fprintf(f," C-stem + A-stem score: %g\n",ea + ec); fprintf(f," Total score: %g\n",e); fprintf(f," Normalised score: %g\n",nenergy(t,sw)); fprintf(f,"\n"); } int find_tstems(int *s, int ls, trna_loop hit[], int nh, csw *sw) { int i,r,c,tstem,tloop,ithresh1; int *s1,*s2,*se,*ss,*si,*sb,*sc,*sf,*sl,*sx,*tem; double ec,energy,penalty,thresh2; static double bem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static double A[6] = { 2.0,0.0,0.0,0.0,0.0,0.0 }; static double C[6] = { 0.0,2.0,0.0,0.0,0.0,0.0 }; static double G[6] = { 0.0,0.0,2.0,0.0,0.0,0.0 }; static double T[6] = { 0.0,0.0,0.0,2.0,0.0,0.0 }; static int tem_trna[6] = { 0x0100, 0x0002, 0x2000, 0x0220, 0x0000, 0x0000 }; static int tem_tmrna[6] = { 0x0100, 0x0002, 0x2220, 0x0220, 0x0000, 0x0000 }; i = 0; tem = (sw->tmrna || (sw->threshlevel < 1.0))?tem_tmrna:tem_trna; ithresh1 = (int)sw->ttscanthresh; thresh2 = sw->ttarmthresh; ss = s + sw->loffset; si = ss + 4 - 1; sl = s + ls - sw->roffset + 5 + 3; r = tem[*si++]; r = (r >> 4) + tem[*si++]; r = (r >> 4) + tem[*si++]; while (si < sl) { r = (r >> 4) + tem[*si++]; if ((c = (r & 0xF)) < ithresh1) continue; sb = si - 7; sf = sb + 13; ec = (double)(3*c); for (tstem = 4; tstem <= 5; tstem++) { if (sb >= (sl-8)) goto NX; sc = sf; sx = si - 2; for (tloop = 5; tloop <= 9; tloop++) { if (tloop > 7) penalty = 3.0*(double)(tloop - tstem - 2); else penalty = 3.0*(double)(12 - tloop - tstem); s1 = sb; s2 = sc; se = s1 + tstem; energy = ec + bem[*se][se[4]] + bem[*s1++][*--s2] - penalty; while (s1 < se) energy += bem[*s1++][*--s2]; energy += G[*sx] + A[sx[1]] + T[sx[3]] + C[sx[4]] + C[sx[5]]; if (energy >= thresh2) { if (i >= nh) { fprintf(stderr,"Too many tstem hits\n"); goto FN; } hit[i].pos = sb; hit[i].loop = tloop; hit[i].stem = tstem; hit[i].energy = energy; i++; } sx++; sc++; } NX: if (--sb < ss) break; sf++; }} FN: return(i); } int find_astem5(int *si, int *sl, int *astem3, int n3, trna_loop hit[], int nh, csw *sw) { int i,k; int *s1,*s2,*se; unsigned int r,tascanthresh; double tastemthresh,energy; static unsigned int tem[6] = { 0,0,0,0,0,0 }; static unsigned int A[6] = { 0,0,0,2,0,0 }; static unsigned int C[6] = { 0,0,2,0,0,0 }; static unsigned int G[6] = { 0,2,0,1,0,0 }; static unsigned int T[6] = { 2,0,1,0,0,0 }; static double abem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; tascanthresh = (unsigned int)sw->tascanthresh; tastemthresh = sw->tastemthresh; i = 0; sl += n3; se = astem3 + n3 - 1; tem[0] = A[*se]; tem[1] = C[*se]; tem[2] = G[*se]; tem[3] = T[*se]; while (--se >= astem3) { tem[0] = (tem[0] << 4) + A[*se]; tem[1] = (tem[1] << 4) + C[*se]; tem[2] = (tem[2] << 4) + G[*se]; tem[3] = (tem[3] << 4) + T[*se]; } r = tem[*si++]; k = 1; while (++k < n3) r = (r >> 4) + tem[*si++]; while (si < sl) { r = (r >> 4) + tem[*si++]; if ((r & 15) >= tascanthresh) { s1 = astem3; s2 = si; se = s1 + n3; energy = abem[*s1++][*--s2]; while (s1 < se) energy += abem[*s1++][*--s2]; if (energy >= tastemthresh) { if (i >= nh) { fprintf(stderr,"Too many astem5 hits\n"); goto FN; } hit[i].pos = si - n3; hit[i].energy = energy; i++; }}} FN: return(i); } /* Resume consensus sequence is: WAUARNYGCNAANNANNA Williams, K. P., Martindale, K. A. & Bartel, D. P. (1999) EMBO J. 18, 5423-5433 A more general consensus sequence is NATARNYGCNRVNNMNNH aragorn strict search uses NATARNYGCNRVNNMNNA aragorn relaxed search uses NATARNYGC R = A or G Y = C or T W = A or T V = A or C or G M = A or C H = A or C or T K = G or T */ int find_resume_seq(int *s, int ls, trna_loop hit[], int nh, csw *sw) { int e,i,j,k,a,aa[3],*si,*sb,*sf,*st,*sl; double al; unsigned int r,c,thresh; static int nps[105] = { 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1, 0,1,0,1, 0,0,0,0, 0,1,1,1, 1,1,1,1, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,0 }; static double score[4] = { 36.0, 66.0, 62.0, 72.0 }; static unsigned int tem[6] = { 0x10310000, 0x01000101, 0x00010030, 0x02000100, 0x00000000, 0x00000000 }; static int A[6] = { 0,1,1,1,1,1 }; static int V[6] = { 0,0,0,1,1,1 }; static int M[6] = { 0,0,1,1,1,1 }; thresh = (unsigned int)sw->tmrthresh; i = 0; sl = s + ls; r = tem[*s++]; r = (r >> 4) + tem[*s++]; r = (r >> 4) + tem[*s++]; r = (r >> 4) + tem[*s++]; r = (r >> 4) + tem[*s++]; r = (r >> 4) + tem[*s++]; r = (r >> 4) + tem[*s++]; if (sw->tmstrict) while (s < sl) { r = (r >> 4) + tem[*s++]; if ((c = (r & 0xF)) < thresh) continue; c -= (V[s[1]] + V[s[2]] + M[s[5]] + A[s[8]]); if (c < thresh) continue; if (i >= nh) goto FL; st = s - 2; si = st; sb = st + MINTAGDIST + 2; sf = st + MAXTAGDIST; while (si < sf) { if (*si++ != Thymine) si++; else if (*si == Adenine) { if (!(*++si & 5)) goto ST1; } else if (*si == Guanine) { if (*++si == Adenine) goto ST1; } else si++; si++; } continue; ST1: if (si < sb) continue; al = 0.0; k = 0; j = -11; while (j < -2) { a = si[j++]; a = (a << 2) | si[j++]; if (a == 9) al = (double)(11 + 2*((j + 9)/3)); a = (a << 2) | si[j++]; aa[k++] = a; } hit[i].pos = st; hit[i].stem = (int)(si - st); e = (nps[aa[1]] << 1) | (nps[aa[2]]); hit[i].energy = (double)(c << 2) + score[e] + al + find_taghairpin(si) + find_tag_upstream_hairpin(st-10); i++; } else while (s < sl) { r = (r >> 4) + tem[*s++]; if ((c = (r & 0xF)) < thresh) continue; if (i >= nh) goto FL; st = s - 2; si = st + MINTAGDIST; sf = st + MAXTAGDIST; while (si < sf) { if (*si++ != Thymine) si++; else if (*si == Adenine) { if (!(*++si & 5)) goto ST2; } else if (*si == Guanine) { if (*++si == Adenine) goto ST2; } else si++; si++; } continue; ST2: hit[i].pos = st; hit[i].stem = (int)(si - st); e = (nps[(si[-8] << 4) | (si[-7] << 2) | si[-6]] << 1) | (nps[(si[-5] << 4) | (si[-4] << 2) | si[-3]]); hit[i].energy = 46.0 + (double)(c << 2) + score[e]; i++; } FN: return(i); FL: fprintf(stderr,"Too many resume sequence hits\n"); goto FN; } int *base_copy3(int *from, int *to, int n) { while (n-- > 0) *to++ = *from++; *to = TERM; return(to); } void remove_intron(int *s1, int *s2, int nbase, int intron, int nintron) { int *s1e; s1e = s1 + intron; nbase -= intron; while (s1 < s1e) *s2++ = *s1++; s1 += nintron; s1e = s1 + nbase; while (s1 < s1e) *s2++ = *s1++; *s2 = TERM; } gene *nearest_trna_gene(data_set *d, int nt, gene *t, csw *sw) { int n,i,comp,mtrna,mtcompov,maxintronlen,ilength; long a,b,c,e,score,thresh,psmax; static long proximity = 7*MINCTRNALEN/10; double energy; psmax = d->psmax; comp = t->comp; mtrna = sw->mtrna; mtcompov = sw->mtcompov; maxintronlen = sw->maxintronlen; n = -1; energy = INACTIVE; a = t->start; b = t->stop; thresh = b-a; if (b < a) { b += psmax; thresh += psmax; for (i = 0; i < nt; i++) { c = ts[i].start; e = ts[i].stop; if (e < c) { e += psmax; if (a > e) goto NXTW; if (b < c) goto NXTW; if (ts[i].genetype != tRNA) continue; if (ts[i].comp != comp) { if (!mtrna) continue; if (mtcompov) continue; } if (maxintronlen > 0) { ilength = e - c; if ((2*thresh) > (5*ilength)) continue; if ((2*ilength) > (5*thresh)) continue; } score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= proximity) if (ts[i].energy < energy) { n = i; energy = ts[i].energy; } NXTW: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; if (ts[i].genetype != tRNA) continue; if (ts[i].comp != comp) { if (!mtrna) continue; if (mtcompov) continue; } if (maxintronlen > 0) { ilength = e - c; if ((2*thresh) > (5*ilength)) continue; if ((2*ilength) > (5*thresh)) continue; } score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= proximity) if (ts[i].energy < energy) { n = i; energy = ts[i].energy; } } a -= psmax; b -= psmax; } for (i = 0; i < nt; i++) { c = ts[i].start; e = ts[i].stop; if (e < c) { e += psmax; if (a > e) goto NXTN; if (b < c) goto NXTN; if (ts[i].genetype != tRNA) continue; if (ts[i].comp != comp) { if (!mtrna) continue; if (mtcompov) continue; } if (maxintronlen > 0) { ilength = e - c; if ((2*thresh) > (5*ilength)) continue; if ((2*ilength) > (5*thresh)) continue; } score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= proximity) if (ts[i].energy < energy) { n = i; energy = ts[i].energy; } NXTN: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; if (ts[i].genetype != tRNA) continue; if (ts[i].comp != comp) { if (!mtrna) continue; if (mtcompov) continue; } if (maxintronlen > 0) { ilength = e - c; if ((2*thresh) > (5*ilength)) continue; if ((2*ilength) > (5*thresh)) continue; } score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= proximity) if (ts[i].energy < energy) { n = i; energy = ts[i].energy; } } if (n >= 0) return(ts + n); return(NULL); } gene *nearest_tmrna_gene(data_set *d, int nt, gene *t) { int n,i,comp; long a,b,c,e,score,smax,thresh,psmax; psmax = d->psmax; comp = t->comp; smax = -1; n = -1; a = t->start; b = t->stop; thresh = b-a; if (b < a) { b += psmax; thresh += psmax; for (i = 0; i < nt; i++) { c = ts[i].start; e = ts[i].stop; if (e < c) { e += psmax; if (a > e) goto NXTW; if (b < c) goto NXTW; if (ts[i].genetype != tmRNA) continue; if (ts[i].comp != comp) continue; score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= smax) if (score > smax) { n = i; smax = score; } else if (ts[i].energy < ts[n].energy) n = i; NXTW: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; if (ts[i].genetype != tmRNA) continue; if (ts[i].comp != comp) continue; score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= smax) if (score > smax) { n = i; smax = score; } else if (ts[i].energy < ts[n].energy) n = i; } a -= psmax; b -= psmax; } for (i = 0; i < nt; i++) { c = ts[i].start; e = ts[i].stop; if (e < c) { e += psmax; if (a > e) goto NXTN; if (b < c) goto NXTN; if (ts[i].genetype != tmRNA) continue; if (ts[i].comp != comp) continue; score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= smax) if (score > smax) { n = i; smax = score; } else if (ts[i].energy < ts[n].energy) n = i; NXTN: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; if (ts[i].genetype != tmRNA) continue; if (ts[i].comp != comp) continue; score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); if (score >= smax) if (score > smax) { n = i; smax = score; } else if (ts[i].energy < ts[n].energy) n = i; } if ((10*smax) > (9*thresh)) return(ts + n); return(NULL); } void overlap(data_set *d, int sort[], int n, int it, csw *sw) { int i,j,flag,cross,crosstoo; long a,b,e,f,a2,b2,e2,f2,psmax; char sname[100],s[100]; flag = 0; cross = 0; psmax = d->psmax; a = ts[it].start; b = ts[it].stop; if (b < a) { a2 = a - psmax; b2 = b; b += psmax; cross = 1; } j = -1; while (++j < n) { i = sort[j]; if (i == it) continue; e = ts[i].start; f = ts[i].stop; crosstoo = 0; if (f < e) { e2 = e - psmax; f2 = f; f += psmax; crosstoo = 1; } if (a <= f) if (b >= e) goto OV; if (crosstoo) if (a <= f2) if (b >= e2) goto OV; if (cross) { if (a2 <= f) if (b2 >= e) goto OV; if (crosstoo) if (a2 <= f2) if (b2 >= e2) goto OV; } continue; OV: if (!flag) fputc('\n',sw->f); name(ts+i,sname,1,sw); location(s,ts+i,sw,sname); fprintf(sw->f,"Overlap with %d: %s\n", j+1,s); flag = 1; } if (flag) fputc('\n',sw->f); } void init_gene(int nstart, int nstop) { int i; for (i = nstart; i < nstop; i++) { ts[i].energy = -1.0; ts[i].genetype = noGENE; ts[i].tps = 0; *(ts[i].name) = '\0'; }} gene *find_slot(data_set *d, gene *t, int *nts, csw *sw) { int i,newspace; char s1[80],s2[80],s3[80],s4[80]; gene *tn,*tsn; if (sw->comp) { t->stop = sw->start - t->start - 1; t->start = t->stop - t->nbase - t->nintron + 1; t->comp = 1; } else { t->start += sw->start; t->stop = t->start + t->nbase + t->nintron - 1; t->comp = 0; } if (!sw->linear) { t->start = sq(t->start); t->stop = sq(t->stop); } if (t->genetype == tRNA) tn = nearest_trna_gene(d,*nts,t,sw); else if (t->genetype == tmRNA) tn = nearest_tmrna_gene(d,*nts,t); else tn = NULL; if (tn) { if (t->energy <= tn->energy) return(NULL); copy(tn->name,t->name); if (sw->verbose) { fprintf(stderr,"%s %s ",name(t,s1,0,sw),position(s3,t,sw)); if (sw->energydisp) fprintf(stderr,"(%g) ",nenergy(t,sw)); fprintf(stderr,"replacing %s %s",name(tn,s2,1,sw), position(s4,tn,sw)); if (sw->energydisp) fprintf(stderr," (%g)",nenergy(tn,sw)); fprintf(stderr,"\n"); }} else { if (*nts >= sw->genespace) { newspace = (d->ps > 0)?(sw->genespace*(1 + d->psmax/d->ps)): (sw->genespace + NT); tsn = (gene *)realloc((void *)ts,newspace*sizeof(gene)); if (tsn == NULL) { fprintf(stderr,"No more memory to store detected genes\n"); fprintf(stderr,"Gene lost\n"); return(NULL); } if (sw->verbose) fprintf(stderr, "Expanding detected gene store from %d genes to %d genes\n", sw->genespace,newspace); ts = tsn; init_gene(sw->genespace,newspace); sw->genespace = newspace; } copy3cr(d->seqname,t->name,99); tn = ts + (*nts); *nts = (*nts) + 1; if (sw->verbose) { fprintf(stderr,"%s at %s",name(t,s1,0,sw),position(s2,t,sw)); if (sw->energydisp) fprintf(stderr," (%g)",nenergy(t,sw)); fprintf(stderr,"\n"); }} return(tn); } int aatail(int *s, int *ext, csw *sw) { int score,e; static int A[6] = { 1,0,0,0,0,0 }; static int C[6] = { 0,1,0,0,0,0 }; if (sw->aataildiv) { score = 0; e = 0; if (A[s[3]]) { score++; e = 3; } if (C[s[2]]) { score++; if (!e) e = 2; } if (C[s[1]]) { score++; if (!e) e = 1; } if (score < 2) if (A[*s]) score++; *ext = ++e; return(score); } else { score = 1; e = 1; if (C[s[1]]) { score++; e = 2; if (C[s[2]]) { score++; e = 3; if (A[s[3]]) { score++; e = 4; }}} *ext = e; return(score); }} int find_mt_trna(data_set *d, int *seq, int lseq, int nts, csw *sw) { int nah,ndh,nch,nth,ncdsh,h,i,j,k,n,p,y,av,gcc,cgcc,catc,athresh; int igc,nbase,b8,b9,b48,b57,nc,na,nt,nti,nd,ndi,dposmap[32]; int dl,tl,extastem,astem8,astem8d,ti,di,ser,tastem,tastem8,tastem8d; int astem,asteme,as,as8,aext,aext8,nbasefext,cloop,dloop,tloop,tc; int carm,cstem,darm,dstem,tarm,tstem,var,varbp,spacer1,spacer2,anticodon; int ds,dstemmotif,cloop7,mtxdetect,incds; int *s,*sl,*s1,*s2,*s4,*sa,*sb,*sc,*se,*sf,*sg,*si; int *slm,*slm1,*sle,*slb,*sld,*sge; int *dpos,*cpos,*cend,*tpos,*tend,*apos1,*apos2,*aend1,*aend2; int *clooppos,*cloopend; unsigned int bondtype,abondtype,mabondtype,acbondtype,cbondtype; unsigned int agcat,cgcat,tgcat,dbondtype,dtbondtype,tbondtype; unsigned int r,ct[6],cm,cv,q,tendmap[63]; double gcv,e,ec,ea,eas,ed,et,ev,energy,stem_energy; double darmthresh,tarmthresh,tthresh,dthresh,dtthresh,thresh; mt_trna_cloop chit[6]; static mt_trna_loop dhit[mtND+1]; static mt_trna_tloop thit[mtNTH+1]; static mt_trna_astem ahit[mtNA+1]; static mt_cds cdshit[mtNCDS]; gene *tn; static gene te = { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,2,1,4,7,5,7,0,0,0,5,0,5,7, tRNA,0.0,0,0,0 }; static int cAI[6] = { 8,0,0,0,8,0 }; static int cfCI[6] = { 0,16,0,0,16,0 }; static int cRI[6] = { 8,0,4,0,8,0 }; static int cTI[6] = { 0,0,0,16,16,0 }; static int cYI[6] = { 0,8,0,4,8,0 }; static int AI[6] = { 1,0,0,0,1,0 }; static int CI[6] = { 0,1,0,0,1,0 }; static int GI[6] = { 0,0,1,0,1,0 }; static int TI[6] = { 0,0,0,1,1,0 }; static int RI[6] = { 1,0,1,0,1,0 }; static int YI[6] = { 0,1,0,1,1,0 }; static int WI[6] = { 1,0,0,1,1,0 }; static unsigned int tem[6] = { 0,0,0,0,0,0 }; static unsigned int At[6] = { 0,0,0,1,1,0 }; static unsigned int Ct[6] = { 0,0,1,0,1,0 }; static unsigned int Gt[6] = { 0,1,0,1,1,0 }; static unsigned int Tt[6] = { 1,0,1,0,1,0 }; static unsigned int cAt[6] = { 0,0,0,2,2,0 }; static unsigned int cCt[6] = { 0,0,2,0,2,0 }; static unsigned int cGt[6] = { 0,2,0,1,2,0 }; static unsigned int cTt[6] = { 2,0,1,0,2,0 }; static unsigned int aAt[6] = { 0,0,1,2,2,0 }; static unsigned int aCt[6] = { 0,0,2,0,2,0 }; static unsigned int aGt[6] = { 1,2,0,1,2,0 }; static unsigned int aTt[6] = { 2,0,1,1,2,0 }; static unsigned int dAt[6] = { 0,0,1,2,2,0 }; static unsigned int dCt[6] = { 0,0,2,0,2,0 }; static unsigned int dGt[6] = { 1,2,0,2,2,0 }; static unsigned int dTt[6] = { 2,0,2,1,2,0 }; static unsigned int clmotif[mtNCLM] = { 0x1321300,0x3321300,0x1323002 }; static int dloopi[mt_DRLmaxlength+1][4] = { { -1 }, { -1 }, { -1 }, { -1 }, { -1 }, { -1 }, { -1 }, { 0,2,-1 }, { 0,2,-1 }, { 0,2,3,-1 }, { 0,3,-1 }, { 0,3,-1 }, { 0,3,4,-1 }, { 0,4,-1 }, { 0,5,-1 }, { 0,5,6,-1 }, { 0,5,6,-1 } }; static int tloopa[12][4] = { { -1 }, { -1 }, { -1 }, { 0,1,-1 }, { 0,2,1,-1 }, { 4,3,2,-1 }, { 4,3,-1 }, { 4,3,-1 }, { 4,3,-1 }, { 5,4,3,-1 }, { 5,4,-1 }, { 5,-1 } }; static double dA[6] = { 1.0,0.0,0.0,0.0,1.0,0.0 }; static double dT[6] = { 0.0,0.0,0.0,1.0,1.0,0.0 }; static double C[6] = { 0.0,1.0,0.0,0.0,1.0,0.0 }; static double G[6] = { 0.0,0.0,1.0,0.0,1.0,0.0 }; static double T[6] = { 0.0,0.0,0.0,1.0,1.0,0.0 }; static double AX[6] = { 0.0,-1.0,-1.0,-1.0,0.0,-1.0 }; static double AX37[6] = { 0.0,-4.0,-1.0,-4.0,0.0,-4.0 }; static double AXX[6] = { 0.0,-3.0,-1.5,-3.0,0.0,-3.0 }; static double AXX37[6] = { 0.0,-4.0,-4.0,-4.0,0.0,-4.0 }; static double AX7[6] = { 0.0,-0.7,-0.7,-0.7,0.0,-0.7 }; static double CX[6] = { -2.0,0.0,-2.0,-1.0,0.0,-2.0 }; static double CXX[6] = { -4.0,0.0,-4.0,-2.0,0.0,-4.0 }; static double CX7[6] = { -0.7,0.0,-0.7,-0.7,0.0,-0.7 }; static double TX[6] = { -1.0,-1.0,-1.0,0.0,0.0,-1.0 }; static double TXX[6] = { -2.0,-2.0,-2.0,0.0,0.0,-2.0 }; static double YX[6] = { -1.0,0.0,-1.0,0.0,0.0,-1.0 }; static double tC[6] = { 0.0,0.01,0.0,0.0,0.01,0.0 }; static double tG[6] = { 0.0,0.0,0.01,0.0,0.01,0.0 }; static double tT[6] = { 0.0,0.0,0.0,0.01,0.01,0.0 }; static double cA[6] = { 0.8,0.0,0.0,0.0,0.8,0.0 }; static double cfC[6] = { 0.0,2.6,0.0,0.0,2.6,0.0 }; static double cR[6] = { 0.8,-2.0,0.8,-0.8,0.8,-0.8 }; static double cT[6] = { -0.8,0.0,-0.8,2.6,2.6,-0.8 }; static double cY[6] = { -0.8,0.8,-0.8,0.8,0.8,-0.8 }; static double loop_stab[41] = { 10.0,2.0,1.0,0.4,0.3,0.2,0.1,0.0,0.1,0.2,0.3,0.4,0.5,1.6,1.7,1.8, 1.9,2.0,2.1,2.2,2.3,3.9,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9, 5.0,5.1,5.2,5.3,5.4,5.5,5.6,5.7 }; static double bem[6][6] = { { mtNOBOND, mtNOBOND, mtGABOND, mtATBOND, mtATBOND, mtNOBOND }, { mtNOBOND, mtNOBOND, mtGCBOND, mtNOBOND, mtGCBOND, mtNOBOND }, { mtGABOND, mtGCBOND, mtGGBOND, mtGTBOND, mtGCBOND, mtNOBOND }, { mtATBOND, mtNOBOND, mtGTBOND, mtTTBOND, mtATBOND, mtNOBOND }, { mtATBOND, mtGCBOND, mtGCBOND, mtATBOND, mtGCBOND, mtNOBOND }, { mtNOBOND, mtNOBOND, mtNOBOND, mtNOBOND, mtNOBOND, mtNOBOND } }; static double hbem[5][5] = { { 0.0,0.0,0.0,mtBONDSTAB+0.5*mtATBOND,mtBONDSTAB+0.5*mtATBOND }, { 0.0,0.0,mtBONDSTAB+0.5*mtGCBOND,0.0,mtBONDSTAB+0.5*mtGCBOND }, { 0.0,mtBONDSTAB+0.5*mtGCBOND,0.0,mtBONDSTAB+0.5*mtGTBOND, mtBONDSTAB+0.5*mtGCBOND }, { mtBONDSTAB+0.5*mtATBOND,0.0,mtBONDSTAB+0.5*mtGTBOND,0.0, mtBONDSTAB+0.5*mtATBOND }, { mtBONDSTAB+0.5*mtATBOND,mtBONDSTAB+0.5*mtGCBOND, mtBONDSTAB+0.5*mtGCBOND, mtBONDSTAB+0.5*mtATBOND, mtBONDSTAB+0.5*mtGCBOND } }; tarmthresh = sw->mttarmthresh; tthresh = sw->mttthresh; dthresh = sw->mtdthresh; dtthresh = sw->mtdtthresh; ds = sw->discrim; extastem = sw->extastem; cloop7 = sw->cloop7; mtxdetect = sw->mtxdetect; /* find coding sequences */ ncdsh = 0; /* find cstems */ sc = seq + sw->loffset; sl = seq + lseq - sw->roffset; h = sc[16]; p = sc[15]; j = sc[14]; k = sc[13]; n = sc[12]; y = sc[11]; ct[0] = cAt[h]|(cAt[p]<<4)|(cAt[j]<<8)|(cAt[k]<<12)| (cAt[n]<<16)|(cAt[y]<<20); ct[1] = cCt[h]|(cCt[p]<<4)|(cCt[j]<<8)|(cCt[k]<<12)| (cCt[n]<<16)|(cCt[y]<<20); ct[2] = cGt[h]|(cGt[p]<<4)|(cGt[j]<<8)|(cGt[k]<<12)| (cGt[n]<<16)|(cGt[y]<<20); ct[3] = cTt[h]|(cTt[p]<<4)|(cTt[j]<<8)|(cTt[k]<<12)| (cTt[n]<<16)|(cTt[y]<<20); ct[4] = 0; ct[5] = 0; for (; sc < sl; sc++) { p = sc[17]; ct[0] = (ct[0] << 4) | cAt[p]; ct[1] = (ct[1] << 4) | cCt[p]; ct[2] = (ct[2] << 4) | cGt[p]; ct[3] = (ct[3] << 4) | cTt[p]; cm = (ct[sc[4]] >> 16) + (ct[sc[3]] >> 12) + (ct[sc[2]] >> 8) + (ct[sc[1]] >> 4) + ct[*sc]; /* 7 base cloop */ cv = (cm & 0xf0); athresh = 12; nch = 0; /* exclude the following cloops */ /* RRnnnNN, NRnnnYN */ /* NRnnnNN with cstem < 3 Watson-Crick basepairs or equivalent */ /* RYnnnYN */ /* NYnnnNN with cstem < 1 Watson-Crick basepair or equivalent */ /* NYnnnNN with cstem < 2 Watson-Crick basepairs or equivalent */ /* unless cloop = CTnnnAN */ if (RI[sc[6]]) { if (RI[sc[5]]) goto CLOOP6; if (YI[sc[10]]) goto CLOOP6; if (cv < 0x60) goto CLOOP6; } else { if (YI[sc[10]]) if (RI[sc[5]]) goto CLOOP6; if (cv < 0x40) { if (cv < 0x20) goto CLOOP6; if (sc[5] != Cytosine) goto CLOOP6; if (sc[6] != Thymine) goto CLOOP6; if (sc[10] != Adenine) goto CLOOP6; athresh = 11; } else if (cv < 0x70) { athresh = 11; k = cYI[sc[5]] + cTI[sc[6]] + cRI[sc[10]] + cAI[sc[11]]; if (sc[6] == Cytosine) if (sc[5] == Cytosine) k += 16; else if (sc[5] == Thymine) if (sc[11] == Adenine) k += 16; if (cv == 0x40) { if (k < 40) goto CLOOP6; } else if (cv == 0x50) { if (k < 28) goto CLOOP6; } else { if (k < 20) goto CLOOP6; athresh = 9; }} else athresh = (cv < 10)?9:8; } chit[0].pos = sc; chit[0].stem = 5; chit[0].loop = 7; chit[0].looppos = sc + 5; chit[0].arm = 17; chit[0].end = sc + 17; chit[0].anticodon = (sc[7] << 4) + (sc[8] << 2) + sc[9]; if (bp[sc[-1]][sc[17]]) { chit[1].pos = sc-1; chit[1].stem = 6; chit[1].loop = 7; chit[1].looppos = sc + 5; chit[1].arm = 19; chit[1].end = sc + 18; chit[1].anticodon = chit[0].anticodon; nch = 2; } else nch = 1; /* 6 base cloop */ /* exclude cstem < 4 Watson-Crick basepairs or equivalent */ /* exclude cloop = RRnnNN */ /* exclude cloop = NNnnYY */ CLOOP6: if (cloop7) goto CLOOPE; if ((cm & 0xf00) >= 0x800) { if (!YI[sc[6]]) if (!YI[sc[5]]) goto CLOOP8; if (!RI[sc[9]]) if (!RI[sc[10]]) goto CLOOP8; se = sc + 20; sg = sc; while (sg < se) { sf = sg + 5; while (sf < (sg + 11)) { if (*sf == *sg) if (sf[1] == sg[1]) if (sf[2] == sg[2]) if (sf[3] == sg[3]) if (sf[4] == sg[4]) { sb = sg + 5; s = sf + 5; i = 0; while (sb < sf) if (*sb++ != *s++) if (++i > 1) goto NXSEG6; goto CLOOPE; } NXSEG6: sf++; } sg++; } chit[nch].pos = sc; chit[nch].stem = 5; chit[nch].loop = 6; chit[nch].looppos = sc + 5; chit[nch].arm = 16; chit[nch].end = sc + 16; chit[nch++].anticodon = 0; if (athresh > 10) athresh = 10; if (bp[sc[-1]][sc[16]]) { chit[nch].pos = sc-1; chit[nch].stem = 6; chit[nch].loop = 6; chit[nch].looppos = sc + 5; chit[nch].arm = 18; chit[nch].end = sc + 17; chit[nch++].anticodon = 0; }} /* 8 base cloop */ /* exclude cstem < 4 Watson-Crick basepairs or equivalent */ /* exclude cloop = RRnnnnNN */ /* exclude cloop = NNnnnnYY */ CLOOP8: if ((cm & 0xf) >= 0x8) { if (!YI[sc[5]]) if (!YI[sc[6]]) goto CLOOPE; if (!RI[sc[12]]) if (!RI[sc[11]]) goto CLOOPE; se = sc + 20; sg = sc; while (sg < se) { sf = sg + 5; while (sf < (sg + 11)) { if (*sf == *sg) if (sf[1] == sg[1]) if (sf[2] == sg[2]) if (sf[3] == sg[3]) if (sf[4] == sg[4]) { sb = sg + 5; s = sf + 5; i = 0; while (sb < sf) if (*sb++ != *s++) if (++i > 1) goto NXSEG8; goto CLOOPE; } NXSEG8: sf++; } sg++; } chit[nch].pos = sc; chit[nch].stem = 5; chit[nch].loop = 8; chit[nch].looppos = sc + 5; chit[nch].arm = 18; chit[nch].end = sc + 18; chit[nch++].anticodon = 0; if (athresh > 10) athresh = 10; if (bp[sc[-1]][sc[18]]) { chit[nch].pos = sc-1; chit[nch].stem = 6; chit[nch].loop = 8; chit[nch].looppos = sc + 5; chit[nch].arm = 20; chit[nch].end = sc + 19; chit[nch++].anticodon = 0; }} /* calculate carm energy */ CLOOPE: if (nch < 1) continue; for (nc = 0; nc < nch; nc++) { s1 = chit[nc].pos; cstem = chit[nc].stem; cloop = chit[nc].loop; s4 = s1 + cstem; s2 = s4 + cloop; energy = (cloop == 7)?0.0:-4.0; energy += cY[*s4] + cT[s4[1]] + cR[s2[-2]] + cA[s2[-1]]; if (s4[1] == Cytosine) if (*s4 == Cytosine) energy += 2.6; else if (*s4 == Thymine) if (s2[-1] == Adenine) energy += 2.6; s2 += cstem; stem_energy = bem[*s1][*--s2]; k = neighbour_map[*s1][*s2]; stem_energy += neighbour_em[k][s1[1]][s2[-1]]; bondtype = btmap[*s1][*s2]; if (bp[*s1][*s2]) { if (assymst[s2[1]][s1[-1]]) stem_energy += mtTERMSTAB; else stem_energy += send_em[*s2][*s1]; } else { if (assymst[*s2][*s1]) stem_energy += mtTERMSTAB; else stem_energy += send_em[s2[-1]][s1[1]]; } while (++s1 < s4) { if (!wcbp[*s1][*--s2]) { if (!wcbp[s1[-1]][s2[1]]) { for (j = 0; j < mtNTM; j++) if (*s1 == tandemid[j][1]) if (*s2 == tandemid[j][3]) if (s1[-1] == tandemid[j][0]) if (s2[1] == tandemid[j][2]) { stem_energy += tandem_em[j]; break; } if (s1 < (s4-1)) if (!bp[s1[1]][s2[-1]]) stem_energy -= mt3MMSTAB; } k = neighbour_map[*s1][*s2]; stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + neighbour_em[k][s1[1]][s2[-1]]); } bondtype += btmap[*s1][*s2]; stem_energy += bem[*s1][*s2]; } if (!bp[*--s1][*s2]) { s1--; s2++; } if (assymst[s1[1]][s2[-1]]) stem_energy += mtTERMSTAB; else stem_energy += send_em[*s1][*s2]; cgcc = bondtype & 0xf; if (cgcc <= 0) { catc = (bondtype & 0xf0) >> 4; if (catc < cstem) energy -= mtGCPENALTY; } if (cstem == 6) energy += 1.0; chit[nc].bondtype = bondtype; chit[nc].stem_energy = stem_energy; chit[nc].energy = energy + stem_energy; } /* find tarms */ nth = 0; slm = sc + 61; sle = sc + 57; sb = sc + 21; sg = sc + 16; sge = sg + 30; slb = sg + 32; tem[0] = At[*slm]; tem[1] = Ct[*slm]; tem[2] = Gt[*slm]; tem[3] = Tt[*slm]; while (--slm > sle) { tem[0] = (tem[0] << 4) | At[*slm]; tem[1] = (tem[1] << 4) | Ct[*slm]; tem[2] = (tem[2] << 4) | Gt[*slm]; tem[3] = (tem[3] << 4) | Tt[*slm]; } while (slm >= sb) { tem[0] = ((tem[0] << 4) | At[*slm]) & 0xfffff; tem[1] = ((tem[1] << 4) | Ct[*slm]) & 0xfffff; tem[2] = ((tem[2] << 4) | Gt[*slm]) & 0xfffff; tem[3] = ((tem[3] << 4) | Tt[*slm]) & 0xfffff; sf = slm + 3; if (sf > sge) sf = sge; apos2 = slm + 5; si = sg; s = si + 4; r = tem[*si]; while (++si < s) r = (r >> 4) + tem[*si]; while (si <= sf) { if (si < slm) r = (r >> 4) + tem[*si++]; else { si++; r = r >> 4; } q = r & 0xf; if (slm > slb) { if (q < 5) continue; tloop = (int)(slm - si); } else { if (q < 2) continue; if (q < 3) { if (!wcbp[si[-5]][apos2[-1]]) continue; if (!wcbp[si[-4]][apos2[-2]]) continue; tloop = (int)(slm - si); if (tloop > 5) continue; } else { tloop = (int)(slm - si); if (q < 4) if (!bp[si[-4]][apos2[-2]]) if (!bp[si[-2]][apos2[-4]]) { if (tloop < 4) continue; if (si[-1] != Guanine) continue; if (*si != Thymine) continue; if (si[1] != Thymine) continue; }}} if (tloop < 7) { if (tloop < 2) if (tloop <= 0) { if (tloop <= -2) { if (!wcbp[si[-5]][apos2[-1]]) continue; if (!wcbp[si[-4]][apos2[-2]]) continue; tstem = 2; tloop += 6; } else if (bp[si[-3]][apos2[-3]]) { tstem = 3; tloop += 4; } else { if (!wcbp[si[-5]][apos2[-1]]) continue; if (!wcbp[si[-4]][apos2[-2]]) continue; tstem = 2; tloop += 6; }} else { if (bp[si[-2]][apos2[-4]]) { tstem = 4; tloop += 2; } else if (bp[si[-3]][apos2[-3]]) { tstem = 3; tloop += 4; } else { if (!wcbp[si[-5]][apos2[-1]]) continue; if (!wcbp[si[-4]][apos2[-2]]) continue; tstem = 2; tloop += 6; }} else { if (bp[si[-1]][apos2[-5]]) { if (q != 4) tstem = 5; else { if (bp[si[-2]][apos2[-4]]) tstem = 5; else { k = GI[si[-3]] + TI[si[-2]] + TI[si[-1]] + CI[*si]; if (k >= 2) { tstem = 3; tloop += 4; } else tstem = 5; }}} else { if (bp[si[-2]][apos2[-4]]) { tstem = 4; tloop += 2; } else if (bp[si[-3]][apos2[-3]]) { tstem = 3; tloop += 4; } else { if (!wcbp[si[-5]][apos2[-1]]) continue; if (!wcbp[si[-4]][apos2[-2]]) continue; tstem = 2; tloop += 6; } }} if (tloop < 3) if (tstem > 3) { tstem--; tloop += 2; }} else { if (!bp[si[-1]][apos2[-5]]) if (!bp[si[-2]][apos2[-4]]) { tstem = 3; tloop += 4; } else { tstem = 4; tloop += 2; } else tstem = 5; } if (tloop > 17) if (tstem < 5) continue; /* calculate tarm energy */ s1 = si - 5; tpos = s1; s4 = s1 + tstem; s2 = apos2; if (tt[*s1][*--s2]) { energy = mtTSTTSTAB; if (tt[*++s1][*--s2]) { energy += mtTSTTSTAB; bondtype = btmap[*s1++][*s2--]; } else bondtype = 0; } else { energy = 0.0; bondtype = 0; } /* calculate tstem energy */ stem_energy = bem[*s1][*s2]; k = neighbour_map[*s1][*s2]; stem_energy += neighbour_em[k][s1[1]][s2[-1]]; bondtype += btmap[*s1][*s2]; while (++s1 < s4) { if (!wcbp[*s1][*--s2]) { if (!wcbp[s1[-1]][s2[1]]) { for (j = 0; j < mtNTM; j++) if (*s1 == tandemid[j][1]) if (*s2 == tandemid[j][3]) if (s1[-1] == tandemid[j][0]) if (s2[1] == tandemid[j][2]) { stem_energy += tandem_em[j]; break; } if (s1 < (s4-1)) if (!bp[s1[1]][s2[-1]]) stem_energy -= mt3MMSTAB; } k = neighbour_map[*s1][*s2]; stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + neighbour_em[k][s1[1]][s2[-1]]); } bondtype += btmap[*s1][*s2]; stem_energy += bem[*s1][*s2]; } s1--; if (tloop < 4) stem_energy += ssend_em[*s1][*s2]; else if (assymst[s1[1]][s2[-1]]) stem_energy += mtTERMSTAB; else stem_energy += send_em[*s1][*s2]; /* compile possible tarms */ energy += (stem_energy - mtBONDSTAB*(double)(5-tstem)); if (energy >= tarmthresh) { thit[nth].pos = tpos; s1 = tpos + tstem; s2 = apos2 - tstem; thit[nth].energy = energy - loop_stab[tloop] + tG[s1[-1]] + tT[*s1] + tT[s1[1]] + tC[s1[2]]; thit[nth].stem_energy = stem_energy; thit[nth].bondtype = bondtype; thit[nth].stem = tstem; thit[nth].loop = tloop; thit[nth].end = tpos + 2*tstem + tloop; if (++nth >= mtNTH) { fprintf(stderr,"Too many mt-tstem hits\n"); break; } if (tstem > 2) if (tloop < 10) if (gt[s1[-1]][*s2]) { thit[nth].pos = tpos; thit[nth].energy = energy - mtBONDSTAB - mtGTBOND - loop_stab[tloop+2] + tG[s1[-2]] + tT[s1[-1]] + tT[*s1] + tC[s1[1]]; thit[nth].stem_energy = stem_energy - mtGTBOND; thit[nth].bondtype = bondtype - 0x100; thit[nth].stem = tstem - 1; thit[nth].loop = tloop + 2; thit[nth].end = thit[nth-1].end; if (++nth >= mtNTH) { fprintf(stderr,"Too many mt-tstem hits\n"); break; } if (tstem > 3) if (tloop < 8) if (gt[s1[-2]][s2[1]]) { thit[nth].pos = tpos; thit[nth].energy = energy - 2.0*mtBONDSTAB - 2.0*mtGTBOND - loop_stab[tloop+4] + tG[s1[-3]] + tT[s1[-2]] + tT[s1[-1]] + tC[*s1]; thit[nth].stem_energy = stem_energy - 2.0*mtGTBOND; thit[nth].bondtype = bondtype - 0x200; thit[nth].stem = tstem - 2; thit[nth].loop = tloop + 4; thit[nth].end = thit[nth-1].end; if (++nth >= mtNTH) { fprintf(stderr,"Too many mt-tstem hits\n"); break; }}} if (tstem < 5) { if (tloop < 11) continue; if (tloop > 16) continue; if (!wcbp[s1[1]][s2[-2]]) continue; bondtype += btmap[*s1][s2[-1]] + btmap[s1[1]][s2[-2]]; tstem += 2; tloop -= 4; } else { if (tloop < 9) continue; if (wcbp[*s1][s2[-1]]) { if (tloop > 14) continue; tstem++; tloop -= 2; bondtype += btmap[*s1][s2[-1]]; } else { if (tloop < 11) continue; if (tloop > 16) continue; if (!wcbp[s1[1]][s2[-2]]) continue; bondtype += btmap[*s1][s2[-1]] + btmap[s1[1]][s2[-2]]; tstem += 2; tloop -= 4; }} thit[nth].pos = tpos; s1 = tpos + tstem; thit[nth].energy = energy - loop_stab[tloop] + tG[s1[-1]] + tT[*s1] + tT[s1[1]] + tC[s1[2]]; thit[nth].stem_energy = stem_energy; thit[nth].bondtype = bondtype; thit[nth].stem = tstem; thit[nth].loop = tloop; thit[nth].end = thit[nth-1].end; if (++nth >= mtNTH) { fprintf(stderr,"Too many mt-tstem hits\n"); break; } if (tloop < 9) continue; if (!wcbp[*s1][apos2[-tstem-1]]) continue; if (++tstem > 7) continue; if (tloop > 14) continue; tloop -= 2; thit[nth].pos = tpos; s1 = tpos + tstem; thit[nth].energy = energy - loop_stab[tloop] + tG[s1[-1]] + tT[*s1] + tT[s1[1]] + tC[s1[2]]; thit[nth].stem_energy = stem_energy; thit[nth].bondtype = bondtype; thit[nth].stem = tstem; thit[nth].loop = tloop; thit[nth].end = thit[nth-1].end; if (++nth >= mtNTH) { fprintf(stderr,"Too many mt-tstem hits\n"); break; }}} slm--; } /* find darms */ ndh = 0; sle = sc - 4; slb = sc - 8; slm = sc - 1; tem[0] = dAt[*slm]; tem[1] = dCt[*slm]; tem[2] = dGt[*slm]; tem[3] = dTt[*slm]; while (--slm > sle) { tem[0] = (tem[0] << 4) | dAt[*slm]; tem[1] = (tem[1] << 4) | dCt[*slm]; tem[2] = (tem[2] << 4) | dGt[*slm]; tem[3] = (tem[3] << 4) | dTt[*slm]; } slm1 = slm; while (slm > slb) { tem[0] = ((tem[0] << 4) | dAt[*slm]) & 0xffff; tem[1] = ((tem[1] << 4) | dCt[*slm]) & 0xffff; tem[2] = ((tem[2] << 4) | dGt[*slm]) & 0xffff; tem[3] = ((tem[3] << 4) | dTt[*slm]) & 0xffff; slm--; si = slm - 18; s = si + 3; r = tem[*si]; while (++si < s) r = (r >> 4) + tem[*si]; while (si <= slm1) { if (si < slm) r = (r >> 4) + tem[*si++]; else { r = r >> 4; si++; } if ((q = (r & 0xf)) < 6) { q += (unsigned int)(TI[si[-6]] + RI[si[-5]]); if (q < 6) continue; } /* calculate darm energy */ s1 = si - 4; dhit[ndh].pos = s1; energy = dT[s1[-2]] + dA[s1[-1]]; dloop = (int)(slm1 - si); if (dloop > 2) if (bp[si[-1]][*slm1]) { dstem = 4; goto EC; } if (dloop > 0) if ((ggstembp[si[-2]][slm[2]]) || (gabp[si[-1]][*slm1])) { dstem = 3; dloop += 2; energy += mtNOBOND; goto EC; } if (!wcbp[si[-3]][slm[3]]) continue; if (!gc[si[-4]][slm[4]]) continue; dstem = 2; dloop += 4; if (dloop > 5) energy += mtNOBOND; energy += mtNOBOND; EC: s2 = slm + 4; s4 = s1 + dstem; if (!wcbp[s1[1]][s2[-1]]) if (stemterm[s1[1]][s2[-1]]) energy -= 1.0; else if (bp[s1[1]][s2[-1]]) energy -= 1.5; else energy -= 2.0; /* calculate dstem energy */ stem_energy = bem[*s1][*s2]; k = neighbour_map[*s1][*s2]; stem_energy += neighbour_em[k][s1[1]][s2[-1]]; bondtype = btmap[*s1][*s2]; if (bp[*s1][*s2]) { if (assymst[s2[1]][s1[-1]]) stem_energy += mtTERMSTAB; else stem_energy += send_em[*s2][*s1]; s1++; s2--; } else { s1++; s2--; if (assymst[s2[1]][s1[-1]]) stem_energy += mtTERMSTAB; else stem_energy += send_em[*s2][*s1]; } stem_energy += bem[*s1][*s2]; k = neighbour_map[*s1][*s2]; stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + neighbour_em[k][s1[1]][s2[-1]]); bondtype += btmap[*s1][*s2]; while (++s1 < s4) { if (!wcbp[*s1][*--s2]) { if (!wcbp[s1[-1]][s2[1]]) { for (j = 0; j < mtNTM; j++) if (*s1 == tandemid[j][1]) if (*s2 == tandemid[j][3]) if (s1[-1] == tandemid[j][0]) if (s2[1] == tandemid[j][2]) { stem_energy += tandem_em[j]; break; } if (s1 < (s4-1)) if (!bp[s1[1]][s2[-1]]) stem_energy -= mt3MMSTAB; } k = neighbour_map[*s1][*s2]; stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + neighbour_em[k][s1[1]][s2[-1]]); } bondtype += btmap[*s1][*s2]; stem_energy += bem[*s1][*s2]; } if (!bp[*--s1][*s2]) { s1--; s2++; } if (dloop < 4) stem_energy += ssend_em[*s1][*s2]; else if (assymst[s1[1]][s2[-1]]) stem_energy += mtTERMSTAB; else stem_energy += send_em[*s1][*s2]; /* compile possible darms */ energy += stem_energy; dhit[ndh].energy = energy; dhit[ndh].stem_energy = stem_energy; dhit[ndh].bondtype = bondtype; dhit[ndh].stem = dstem; dhit[ndh].loop = dloop; if (++ndh >= mtND) { fprintf(stderr,"Too many mt-dstem hits\n"); break; } if (dstem == 4) { if (dloop >= 6) if (bondtype < 0x1000) { s1 = si - 5; s2 = slm + 5; if (bp[*s1][*s2]) { dhit[ndh].pos = s1; e = 0.5 + bem[*s1][*s2]; dhit[ndh].energy = energy + e; if (wcbp[*s1][*s2]) dhit[ndh].energy += (dT[s1[-2]] + dA[s1[-1]] - dT[s1[-1]] - dA[*s1]); dhit[ndh].stem_energy = stem_energy + e; dhit[ndh].bondtype = bondtype + btmap[*s1][*s2]; dhit[ndh].stem = 5; dhit[ndh].loop = dloop; if (++ndh >= mtND) { fprintf(stderr,"Too many mt-dstem hits\n"); break; }}}} else if (dloop >= 6) { s1 = si - 1; s2 = slm1; if (stemterm[*s1][*s2]) { dhit[ndh].pos = si - 4; dhit[ndh].energy = energy; dhit[ndh].stem_energy = stem_energy; dhit[ndh].bondtype = bondtype; dhit[ndh].stem = 4; dhit[ndh].loop = dloop - 2; if (++ndh >= mtND) { fprintf(stderr,"Too many mt-dstem hits\n"); break; }}} if (dloop >= 4) continue; s1 = si - 4 + dstem - 1; s2 = s1 + dloop + 1; if (bp[*s1][*s2]) continue; dhit[ndh].pos = si - 4; dhit[ndh].energy = energy + 0.001; dhit[ndh].stem_energy = stem_energy; dhit[ndh].bondtype = bondtype; dhit[ndh].stem = dstem - 1; dhit[ndh].loop = dloop + 2; if (++ndh >= mtND) { fprintf(stderr,"Too many mt-dstem hits\n"); break; } } slm1--; } /* build darm exclusion map */ /* 5' astems further from carm than */ /* mt_DRLmaxlength must match a darm */ for (i = 3; i <= 30; i++) dposmap[i] = 0; sf = sc - mt_DRLmaxlength - 1; sld = sf; if (ndh > 0) { s = dhit[0].pos; for (nd = 0; nd < ndh; nd++) { se = dhit[nd].pos; if (se < s) s = se; i = (int)(sc - se); if (dposmap[++i] < 1) dposmap[i] = 1; dposmap[++i] = 2; if (dposmap[++i] < 1) dposmap[i] = 1; } s -= 4; if (s < sf) sf = s; } /* build tarm exclusion map */ /* 3' astems further from carm than */ /* mt_TVRLmaxlength must match a tarm */ for (i = 17; i <= 62; i++) tendmap[i] = 0; s2 = sc + mt_TVRLmaxlength + 17; sle = s2; if (nth > 0) { s = thit[0].end; for (nt = 0; nt < nth; nt++) { se = thit[nt].end; if (se > s) s = se; i = (int)(se - sc); bondtype = thit[nt].bondtype; if (tendmap[i]) { if (bondtype < tendmap[i]) tendmap[i] = bondtype; } else tendmap[i] = bondtype; } if (s > s2) s2 = s; } /* find astems in 3 categories: */ /* high energy astems close to carm */ /* high energy astems matching a high energy tarm far from carm */ /* low energy astem matching a darm and tarm */ nah = 0; sa = sc - 3; sg = sf - 6; sb = sc + 17; se = s2 + 6; tem[0] = aAt[*se]; tem[1] = aCt[*se]; tem[2] = aGt[*se]; tem[3] = aTt[*se]; while (--se > s2) { tem[0] = (tem[0] << 4) | aAt[*se]; tem[1] = (tem[1] << 4) | aCt[*se]; tem[2] = (tem[2] << 4) | aGt[*se]; tem[3] = (tem[3] << 4) | aTt[*se]; } ti = (int)(se - sc); while (se >= sb) { tem[0] = ((tem[0] << 4) | aAt[*se]) & 0xfffffff; tem[1] = ((tem[1] << 4) | aCt[*se]) & 0xfffffff; tem[2] = ((tem[2] << 4) | aGt[*se]) & 0xfffffff; tem[3] = ((tem[3] << 4) | aTt[*se]) & 0xfffffff; if (tendmap[ti]) { nti = (tendmap[ti] < 0x2000)?1:0; } else { if (se > sle) goto ANX; nti = -1; } si = sg; r = tem[*si]; while (++si < sf) r = (r >> 4) + tem[*si]; di = (int)(sc - si); while (si < sa) { r = (r >> 4) + tem[*si++]; if (dposmap[--di]) { if (nti <= 0) { if (nti < 0) if (dposmap[di] < 2) continue; if ((av = (r & 0xf)) < athresh) continue; }} else { if (si < sld) continue; if (nti < 0) continue; if ((av = (r & 0xf)) < athresh) continue; } if (nah >= mtNA) { fprintf(stderr,"Too many mt-astem hits\n"); break; } /* predict astem length and calculate astem energy */ s1 = si - 7; s2 = se + 6; if (bp[*s1][*s2]) { astem = 7; energy = 0.0; ahit[nah].pos1 = s1; ahit[nah].pos2 = se; } else if (ggstemterm[*s1][*s2]) { astem = 7; ahit[nah].pos1 = s1; ahit[nah].pos2 = se; energy = bem[*s1++][*s2--]; } else { energy = bem[*s1++][*s2--]; if (bp[*s1][*s2]) { astem = 6; ahit[nah].pos1 = s1; ahit[nah].pos2 = se; } else if (ggstemterm[*s1][*s2]) { astem = 6; ahit[nah].pos1 = s1; ahit[nah].pos2 = se; energy += bem[*s1++][*s2--]; } else { astem = 5; energy += bem[*s1++][*s2--]; ahit[nah].pos1 = s1; ahit[nah].pos2 = se; }} ahit[nah].stem = astem; bondtype = btmap[*s1][*s2]; energy += bem[*s1][*s2]; k = neighbour_map[*s1][*s2]; energy += neighbour_em[k][s1[1]][s2[-1]]; energy += bem[*++s1][*--s2]; k = neighbour_map[*s1][*s2]; energy += (neighbour_em[k][s1[-1]][s2[1]] + neighbour_em[k][s1[1]][s2[-1]]); bondtype += btmap[*s1][*s2]; while (++s1 < si) { if (!wcbp[*s1][*--s2]) { if (!wcbp[s1[-1]][s2[1]]) { for (j = 0; j < mtNTM; j++) if (*s1 == tandemid[j][1]) if (*s2 == tandemid[j][3]) if (s1[-1] == tandemid[j][0]) if (s2[1] == tandemid[j][2]) { energy += tandem_em[j]; break; } if (s1 < (si-1)) if (!bp[s1[1]][s2[-1]]) energy -= mt3MMSTAB; } k = neighbour_map[*s1][*s2]; energy += (neighbour_em[k][s1[-1]][s2[1]] + neighbour_em[k][s1[1]][s2[-1]]); } bondtype += btmap[*s1][*s2]; energy += bem[*s1][*s2]; } if (!bp[*--s1][*s2]) if (!bp[*--s1][*++s2]) if (!bp[*--s1][*++s2]) if (!bp[*--s1][*++s2]) goto NOST; if (assymst[s1[1]][s2[-1]]) energy += mtTERMSTAB; NOST: ahit[nah].energy = energy; ahit[nah].bondtype = bondtype; nah++; } ANX: se--; ti--; } if (nah <= 0) continue; /* build mttrna genes */ /* cycle through astems first so that */ /* GC content is only calculated once per astem */ thresh = -INACTIVE; te.ps = NULL; for (na = 0; na < nah; na++) { apos2 = ahit[na].pos2; apos1 = ahit[na].pos1; astem = ahit[na].stem; aend1 = apos1 + astem; astem8 = (astem == 7)?(wcbp[apos1[-1]][apos2[7]]):0; asteme = 0; ea = ahit[na].energy; abondtype = ahit[na].bondtype; agcat = ((abondtype >> 4) + abondtype) & 0xf; /* GC content */ s = apos1; aend2 = apos2 + astem; nbase = (int)(aend2 - apos1) + 1; igc = 0; while (s <= aend2) { k = *s++; if (k >= Cytosine) if (k <= Guanine) igc++; } gcv = 10.0*(double)igc/(double)nbase; if (gcv < 1.0) { if (gcv < 0.55) continue; ea -= 0.5; } if (nbase > 60) { if (gcv > 6.0) ea -= 2.0*(gcv - 6.0); } else { if (gcv > 5.0) ea -= 2.0*(gcv - 5.0); } if (gcv > 6.6) { ea -= 6.0; if (gcv > 7.0) ea -= 6.0; } /* findout if inside a coding sequence */ incds = 0; i = -1; while (++i < ncdsh) if (apos1 > cdshit[i].pos1) if (aend2 <= cdshit[i].pos2) { incds = 1; ea -= 2.0; break; } /* cycle through carms that fall between astem */ nc = -1; while (++nc < nch) { cpos = chit[nc].pos; dloop = (int)(cpos - aend1); if (dloop < 3) continue; if (dloop > 26) continue; cend = chit[nc].end; tloop = (int)(apos2 - cend); if (tloop < 5) continue; cloop = chit[nc].loop; cstem = chit[nc].stem; clooppos = chit[nc].looppos; cloopend = clooppos + cloop; carm = chit[nc].arm; anticodon = chit[nc].anticodon; cbondtype = chit[nc].bondtype; acbondtype = abondtype + cbondtype; cgcat = ((cbondtype >> 4) + cbondtype) & 0xf; ec = ea + chit[nc].energy; /* astem,cstem stability (GC bond count) */ if ((abondtype & 0xf) <= 0) if ((cbondtype & 0xf) <= 0) { ec -= mtGCPENALTYD; if (((cbondtype & 0xf0) >> 4) >= 5) ec += 0.5; } /* anticodon to astem discriminator base match */ astem8d = 0; if (cloop == 7) { if (!mt_discrim[ds][anticodon][apos2[astem]]) if (astem8) if (mt_discrim[ds][anticodon][apos2[8]]) astem8d = 1; else ec -= 3.0; else if (astem <= 6) { if (!mt_discrim[ds][anticodon][apos2[7]]) if (astem == 5) { if (!mt_discrim[ds][anticodon][apos2[6]]) ec -= 3.0; } else ec -= 3.0; } else ec -= 3.0; } /* build TV-replacement loop mttrna genes */ if (tloop <= mt_TVRLmaxlength) { if (!sw->tvloop) goto TVN; /* astem termination */ /* (only need to calculate once per astem) */ if (!asteme) { asteme = 1; s = aend1 - 1; se = apos2; while (!bp[*s][*se]) { if (--s <= apos1) { eas = 0.0; goto NOST2; } se++; } if (!aastemterm[s[1]][se[-1]]) eas = -0.5; else { eas = 0.0; while (se >= apos2) { s++; se--; if (aastemterm[*s][*se]) eas += 1.0; }}} /* choose darm */ NOST2: energy = 94.0 + ec + eas; nd = -1; ndi = -1; ed = -INACTIVE; while (++nd < ndh) { dpos = dhit[nd].pos; spacer1 = (int)(dpos - aend1); if (spacer1 != 2) continue; dl = dhit[nd].loop; dstem = dhit[nd].stem; if (dstem > 4) continue; darm = 2*dstem + dl; spacer2 = (int)(cpos - dpos) - darm; /* astem,darm,cstem interspacing */ if (spacer2 < 1) continue; e = dhit[nd].energy; if (spacer2 > 1) { if (spacer2 > 2) continue; if (!stembp[*cpos][cend[-1]]) continue; if (tloop > 12) e -= 2.0; if ((dhit[nd].bondtype & 0xf) < 1) if ((agcat + cgcat + 1) < (cstem + astem)) e -= 3.0; } else if (dl > 11) { if (!RI[cpos[-1]]) e -= 2.0; } else { if (cpos[-1] == Cytosine) e -= 2.0; } /* small,large dloop, dstem R motif */ if (dl < 3) e -= 2.0; if (dl > 12) e -= 2.0; if (!RI[*dpos]) e -= 1.0; /* darm,tloop tertiary interaction */ k = 0; di = ((dl >= 12)?3:((dl >= 9)?2:1)); tl = (tloop >= 14)?5:((dl >= 9)?((tloop >= 10)?4:3):3); if (!ggstackbp[dpos[dstem+di]][cend[tl]]) { if (tl > 3) { if (!ggstackbp[dpos[dstem+di]][cend[tl-1]]) e -= 1.5; else k++; } else if (di > 1) { if (!ggstackbp[dpos[dstem+di-1]][cend[tl]]) e -= 1.5; else k++; } else e -= 1.5; } else k++; if (stemterm[dpos[dstem-1]][dpos[darm-dstem]]) { e -= 0.5; if (cend[2] == dpos[dstem-2]) { if (bp[cend[2]][dpos[darm-dstem+1]]) k++; } else { if (cend[2] == dpos[darm-dstem+1]) if (bp[cend[2]][dpos[dstem-2]]) k++; }} else { if (cend[2] == dpos[dstem-1]) { if (!bp[cend[2]][dpos[darm-dstem]]) e -= 0.5; else k++; } else { if (cend[2] != dpos[darm-dstem]) e -= 0.5; else if (!bp[cend[2]][dpos[dstem-1]]) e -= 0.5; else k++; }} if (cend[1] == *dpos) { if (!stackbp[cend[1]][dpos[darm-1]]) e -= 0.5; else k++; } else { if (cend[1] != dpos[darm-1]) e -= 0.5; else if (!bp[cend[1]][*dpos]) e -= 0.5; else k++; } /* darm stability */ dstemmotif = wcbp[dpos[1]][dpos[darm-2]]; if (spacer2 == 2) if ((k < 3) || (dhit[nd].bondtype > 0x200) || (!dstemmotif)) { if (abondtype >= 0x10000) e -= 2.0; if (dstem > 3) e -= 1.0; e -= 0.5; } /* darm tertiary interactions */ j = 0; b8 = dpos[-2]; b9 = dpos[-1]; if (!bp[b8][dpos[dstem]]) e -= 1.0; else if (wcbp[b8][dpos[dstem]]) j++; if (!bp[b8][dpos[darm-dstem-1]]) e-= 1.0; else if (wcbp[b8][dpos[darm-dstem-1]]) j++; if (!wcbp[dpos[2]][dpos[darm-3]]) { if (!gastembp[b8][dpos[dstem]]) e -= 2.0; else if (!gastembp[b8][dpos[darm-dstem-1]]) e -= 2.0; if (!ggstembp[dpos[2]][dpos[darm-3]]) e -= 1.0; } else j++; if (!bp[b9][dpos[2]]) { if (!bp[b9][dpos[darm-3]]) e -= 1.0; else j++; } else j++; /* more extensive tertiary interaction between darm,tloop */ if (dstemmotif) { if (k >= 3) if (bp[dpos[2]][dpos[darm-3]]) { if (b8 != Thymine) e += 0.5; if (dl > 3) if (bp[dpos[dstem+2]][cend[tl+1]]) e += 0.7; else if (gabp[dpos[dstem+2]][cend[tl+1]]) e += 0.5; if (tloop >= 6) if (spacer2 < 2) if (dl >= 3) { di = (dl > 11)?2:1; if (bp[dpos[dstem+di]][cend[tl]]) { if (chit[nc].stem_energy > -4.8) e += 0.5; if (wcbp[dpos[dstem+di]][cend[tl]]) if (gcv > 1.2) if (clooppos[1] == Thymine) if (cbondtype < 0x200) if ((cbondtype & 0xf) > 0) if (abondtype < 0x2000) { e += 1.5; if (dl > 3) if (wcbp[dpos[dstem+di+1]][cend[tl+1]]) e += 1.0; }}}} if (j >= 4) e += 0.25; } if (e > ed) { ed = e; ndi = nd; ti = k; }} if (ndi < 0) goto TVN; energy += ed; dpos = dhit[ndi].pos; dstem = dhit[ndi].stem; dl = dhit[ndi].loop; darm = 2*dstem + dl; dbondtype = dhit[ndi].bondtype; spacer2 = (int)(cpos - dpos) - darm; spacer1 = (int)(dpos - aend1); b8 = *aend1; b9 = aend1[1]; /* false positive suppression */ if (dloop < 15) energy -= 2.0; if (cstem > 5) energy -= 1.0; if (tloop < 6) energy -= 1.0; if (tloop > 12) { energy -= 1.0; if (agcat < 6) energy -= 2.0; if (tloop > 15) energy -= 2.5; } if (!stackbp[*dpos][dpos[darm-1]]) energy -= 1.0; if (dstem < 4) if (gcv > 1.2) if ((dbondtype & 0xf0f) == 0) energy -= 1.5; if (b8 != Thymine) { if (dl < 4) if (abondtype > 0x10000) energy -= 1.5; if (b8 == Adenine) if (YI[cloopend[-2]]) energy -= 1.0; } if (dl > 10) { if (tloop < 7) energy -= 2.0; if (spacer2 > 1) energy -= 2.0; if (dhit[ndi].stem_energy < -3.4) energy -= 2.0; } if (gcv < 2.0) if (dbondtype > 0x10000) energy -= 2.0; if ((cbondtype & 0xf) < 1) if (abondtype > 0x100) { if (cgcat < 4) energy -= 1.5; if (!wcbp[dpos[2]][dpos[darm-3]]) energy -= 1.0; } if (b8 != Thymine) if ((clooppos[1] != Thymine) || (*clooppos != Cytosine)) if (dl > 3) if (dbondtype > 0x10000) energy -= 1.0; if (!RI[cend[1]]) if (b9 != Guanine) energy -= 1.0; else energy -= 0.5; if (b9 == Guanine) { if (!RI[*cend]) energy -= 1.0; if (spacer2 != 1) energy -= 3.0; else { tl = (tloop >= 14)?5:((dl >= 9)?((tloop >= 7)?4:3):3); s = dpos + dstem; if (!wcbp[s[1]][cend[tl]]) { energy -= 2.5; if (dl >= 5) if (chit[nc].energy > 2.0) if (wcbp[s[2]][cend[tl]]) if (wcbp[s[3]][cend[tl+1]]) energy += 6.0; } else if (b8 == Thymine) if (dl >= 5) if (chit[nc].energy > 2.0) if (wcbp[s[2]][cend[tl+1]]) energy += 3.5; }} else if (b9 != Adenine) energy -= 3.0; if (b8 != Thymine) if (b8 == Guanine) { if (!RI[dpos[dstem]]) energy -= 1.0; else if (RI[dpos[darm-dstem-1]]) energy += 2.0; } else energy -= 1.0; /* carm termination */ if (assymst[cend[-1]][*cpos]) energy += 1.0; /* CTnnnAA cloop motif */ energy += CX7[*clooppos] + AX7[cloopend[-2]]; if (clooppos[1] == Cytosine) energy -= 2.0; /* NNnnnAA cloop motif */ if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) if (spacer1 == 2) if (dbondtype < 0x1000) { if (abondtype < 0x100) energy += 1.0; else if (cbondtype < 0x100) energy += 1.0; } /* global stem damage level */ bondtype = acbondtype + dbondtype; i = (int)((bondtype >> 16) & 0xf); j = (int)((bondtype >> 12) & 0xf); k = (int)((bondtype >> 8) & 0xf); if (k > 0) if (i > 0) { k += (i + j); if (k > 5) energy -= 1.0*(double)(k - 5); } /* global stem stability (GC bond count) */ gcc = bondtype & 0xf; if (gcc < 2) { if (ti >= 2) { if (cbondtype < 0x100) if ((cbondtype & 0xf) > 0) goto NGCC1; if (ti >= 3) if (cgcat >= 4) { if ((cbondtype & 0xf) > 0) goto NGCC1; if (cbondtype < 0x100) goto NGCC2; }} energy -= (double)(3 - gcc); NGCC2: if (gcc < 1) { if (agcat < 5) energy -= 2.0; if (bondtype > 0x10000) energy -= 1.5; }} NGCC1: /* global stability */ /* (stem stability,dloop-tloop tertiary interaction,dloop size) */ if (abondtype > 0x1000) if (ti < 3) { if (chit[nc].stem_energy < -6.0) energy -= 1.5; if (dl > 9) if (((dbondtype + cbondtype) & 0xf) < 1) energy -= 1.0; } /* tloop,dloop tertiary interaction */ /* (alternative dloop position) */ if (bondtype < 0x1000) if (b8 == Thymine) if (RI[b9]) if (dl > 4) if (!bp[cend[3]][dpos[dstem+1]]) if (bp[cend[3]][dpos[dstem+2]]) energy += 0.5; /* "near perfect" TV-loop mttRNA: */ /* darm-tloop tertiary interaction,low global stem damage, */ /* TR motif at b8-9, good astem,darm,carm interspacing */ if (ti >= 2) if (agcat >= 6) if (cbondtype < 0x100) if (dbondtype < 0x100) if (RI[b9]) if (b8 == Thymine) if ((abondtype & 0xf) > 0) if ((dbondtype & 0xf) > 0) if (spacer1 == 2) if (spacer2 == 1) energy += 1.5; /* find exceptions */ if (energy < dthresh) { if (!mtxdetect) goto TVN; if (incds) goto TVN; if (energy < (thresh - 7.0)) goto TVN; if (energy < (dthresh - 7.0)) goto TVN; if (nbase > 68) goto TVN; if (abondtype > 0x20100) goto TVN; if (dl > 9) { if (dl > 10) goto TVN; if (dstem < 4) goto TVN; if (dbondtype > 0x100) goto TVN; } if (dstem > 4) goto TVN; if (b9 != Adenine) { if (b9 != Guanine) goto TVN; if (cbondtype > 0x100) goto TVN; if (dbondtype > 0x200) goto TVN; } if (cloop != 7) goto TVN; if (YI[cloopend[-2]]) goto TVN; if (b8 == Thymine) { if (apos2[-1] == Thymine) if (apos2[-2] == Thymine) if (tloop < 8) if (tt[aend1[-1]][*apos2]) if (wcbp[dpos[2]][dpos[darm-3]]) if (((dbondtype + cbondtype) & 0xf) > 0) energy += 3.0; } else if (b8 == Adenine) { if (apos2[-1] == Adenine) if (apos2[-2] == Adenine) { if (assymat[aend1[-1]][*apos2]) if (assymat[apos2[1]][aend1[-2]]) energy += 2.0; if (agcat >= 5) if (cgcat >= 4) if (dbondtype < 0x100) if (at[aend1[-1]][*apos2]) if (at[apos2[1]][aend1[-2]]) energy += 1.0; } if (ti >= 3) if (cgcat >= 4) if (agcat >= 4) if ((cbondtype & 0xf) > 0) if ((abondtype & 0xf) > 1) if (dbondtype < 0x200) if (wcbp[dpos[1]][dpos[darm-2]]) if (clooppos[1] == Thymine) if (YI[*clooppos]) if (RI[cloopend[-2]]) if (RI[cloopend[-1]]) energy += 5.0; } if (bondtype < 0x100) { if (spacer2 == 1) if (*clooppos == Cytosine) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) energy += 2.0; } else { if (spacer2 == 1) { if (b8 == Thymine) if (dl > 3) if (dbondtype < 0x200) { if (cbondtype < 0x100) { if (!bp[dpos[dstem+1]][cend[3]]) if (bp[dpos[dstem+1]][cend[4]]) energy += 2.0; if (dbondtype < 0x100) if (abondtype < 0x20000) if (ti >= 2) if (dstem >= 3) if (tloop < 13) if ((cbondtype & 0xf) > 0) energy += 4.0; }} else if (dstem > 3) if (dbondtype < 0x300) { if (bondtype < 0x10000) if (ti >= 3) if ((acbondtype & 0xf) > 0) if (wcbp[dpos[2]][dpos[darm-3]]) energy += 4.0; } if (tloop < 8) { if (dbondtype < 0x200) { if (cbondtype < 0x100) if (ti >= 2) { if (wcbp[dpos[dstem+1]][cend[3]]) { if (b8 == Thymine) if (abondtype < 0x3000) energy += 5.0; if (agcat >= 5) if (gcv > 1.2) if (RI[cloopend[-1]]) energy += 7.0; } if (dbondtype < 0x100) if (agcat >= 6) if (YI[*clooppos]) if (clooppos[1] == Thymine) if (RI[cloopend[-2]]) if (RI[cloopend[-1]]) energy += 2.0; } if (cbondtype < 0x300) if (ti >= 3) if (abondtype < 0x2000) if ((dbondtype & 0xf) > 0) if ((acbondtype & 0xf) > 0) if (ahit[na].energy >= -7.0) if (dstem >= 4) energy += 3.0; } if (dbondtype < 0x300) if (cgcat >= 4) if (abondtype < 0x2000) if (ahit[na].energy >= -7.0) if (cbondtype < 0x10000) if ((cbondtype & 0xf) > 0) if (cstem < 6) if (ti >= 3) energy += 4.0; }} if (tloop > 8) if (agcat >= 6) if (cbondtype < 0x100) if ((cbondtype & 0xf) > 0) if (b8 == Thymine) if (wcbp[dpos[dstem+1]][cend[3]]) if (wcbp[dpos[1]][dpos[darm-2]]) energy += 7.0; } if (dbondtype < 0x100) if (cgcat >= 4) if (agcat >= 5) if (wcbp[dpos[1]][dpos[darm-2]]) if ((cbondtype & 0xf) > 0) if ((abondtype & 0xf) > 0) if ((dbondtype & 0xf) > 0) energy += 0.5; if (cbondtype < 0x100) if (dbondtype < 0x200) if (agcat >= 5) if (b8 == Thymine) if (tloop < 8) if (wcbp[dpos[1]][dpos[darm-2]]) if (wcbp[dpos[2]][dpos[darm-3]]) if ((cbondtype & 0xf) > 0) if ((abondtype & 0xf) > 0) if ((dbondtype & 0xf) > 0) if (clooppos[1] == Thymine) if (YI[*clooppos]) if (RI[cloopend[-2]]) energy += 3.0; if (energy < dthresh) goto TVN; energy -= (0.9*(energy - dthresh) + 5.0); } /* remember fully formed TV-loop replacement mttRNA gene */ /* if threshold reached */ if (energy < thresh) goto TVN; te.energy = energy; thresh = energy; te.ps = apos1; te.dstem = dstem; te.dloop = dl; te.spacer1 = spacer1; te.spacer2 = spacer2; te.cstem = cstem; te.cloop = cloop; k = astem + spacer1 + darm + spacer2; te.anticodon = k + cstem + 2; te.nintron = 0; te.intron = 0; te.var = 0; te.varbp = 0; te.tstem = 0; te.tloop = tloop; te.nbase = k + carm + tloop; tastem = astem; tastem8 = astem8; tastem8d = astem8d; /* build D-replacement loop mttrna genes */ TVN: if (tloop < 10) continue; } if (dloop > mt_DRLmaxlength) goto DN; if (gcv < 1.2) goto DN; energy = 91.0 + ec; /* CCnnnAA cloop */ if (clooppos[1] == Cytosine) { if (*clooppos != Cytosine) goto DN; if (cloopend[-2] != Adenine) goto DN; if (cloopend[-1] != Adenine) goto DN; energy -= 1.0; } /* choose tarm */ nt = -1; nti = -1; et = -INACTIVE; while (++nt < nth) { tl = thit[nt].loop; if (tl > 11) continue; if (thit[nt].end != apos2) continue; tpos = thit[nt].pos; tstem = thit[nt].stem; /* var loop (3-7 bases long) */ var = (int)(tpos - cend); if (var < 3) continue; e = thit[nt].energy; if (var > 5) { if (var > 7) continue; if (tl < 7) continue; e -= 1.0; if ((dloop < 10) || (tstem < 4)) e -= 2.0*(double)(var - 5); } /* tloop RA or RG motif */ s = tpos + tstem; k = 0; n = 0; i = 0; while ((j = tloopa[tl][i++]) >= 0) if (s[j] == Adenine) { k = 1; if (dloop >= 3) if (tl > 3) { b57 = s[j-1]; if (RI[b57] || (tl < 5)) { if (bp[b57][aend1[0]]) { e += 1.5; n = 1; break; } if (bp[b57][aend1[1]]) { e += 1.5; n = 1; break; } if (dloop > 10) if (bp[b57][aend1[2]]) { e += 1.5; n = 1; break; }}}} if (!k) { i = 0; while ((j = tloopa[tl][i++]) >= 0) if (s[j] == Guanine) if (RI[s[j-1]]) { k = 1; break; } if ( j < 0) e -= ((tl > 5)?2.0:1.0); } /* tertiary interaction between tloop and start of dloop */ ti = (tl > 5)?1:((dloop > 5)?1:0); di = (dloop > 5)?2:1; if (stackbp[aend1[di]][s[ti]]) e += 1.0; /* tloop GTTC motif */ i = (s[-1] == Guanine)?1:0; if (tl >= 5) { ti = i + TI[*s] + TI[s[1]] + CI[s[2]]; if (n) if (!i) if (TI[*s]) if (TI[s[1]]) if (AI[s[2]]) if (tl >= 7) ti++; if ((i > 0) || (ti >= 3)) e += (double)ti; } else { ti = i + TI[*s] + TI[s[1]]; if ((i > 0) || (ti >= 2)) e += (double)ti; } if (e > et) { et = e; nti = nt; tc = k; }} if (nti < 0) goto DN; energy += et; tpos = thit[nti].pos; tstem = thit[nti].stem; tl = thit[nti].loop; tbondtype = thit[nti].bondtype; var = (int)(tpos - cend); /* tertiary interaction between b48(=tpos[-1]) and dloop */ b48 = tpos[-1]; if (dloop <= 7) { if (YI[b48]) tc++; else energy -= 1.0; } else { i = 0; while ((j = dloopi[dloop][i++]) >= 0) if (assymagbp[b48][aend1[j]]) { tc++; break; } if (j < 0) energy -= 1.0; } /* large dloop, large tloop */ if (dloop > 7) { if (tl >= 6) if (tc < 2) energy -= 2.0; if (tstem < 3) energy -= 1.0; } /* carm termination */ s = cpos - 1; se = cend; if (cstem > 5) { s++; se--; } if (!stackbp[*s][*se]) energy -= 1.0; se = cpos - 3; if (!bp[cend[-1]][*cpos]) { if (assymst[cend[-1]][*cpos]) { if (dloop < 5) se++; energy += 1.5; } else if (dloop < 13) se++; } else { if (cstem > 5) { if (dloop < 13) se++; } else if (dloop < 5) se++; } /* tertiary interaction between tloop and dloop near carm */ s = tpos + tstem; if (tl >= 5) { ti = (tl >= 10)?4:((tl >= 7)?3:2); b57 = s[ti]; if (!gabp[*se][b57]) energy -= 2.0; else { k = (var > 3)?2:((var > 1)?1:0); if (bp[cend[k]][b57]) energy += 1.0; }} /* R motif at end of tstem */ if (!RI[s[-1]]) energy -= 2.0; /* large tloop */ if (tl > 9) if (tbondtype > 0x200) energy -= 2.0; /* dloop,var,tloop T repeat motif */ /* present in some nematode D-loop replacement tRNA-Ser genes */ if (dloop >= 4) { k = 1; se = aend1; while (se < cpos) if (*se++ == Thymine) k++; if (k >= dloop) { if (var >= 3) { se = cend; while (se < tpos) if (*se++ == Thymine) k++; if (k >= (var + dloop)) { energy += 3.0; se = s + ((tl > 5)?5:tl); while (s < se) if (*s++ != Thymine) break; if (s >= se) energy += 5.5; }}}} /* astem stability */ if (ea < -6.1) if (tl > 4) { if (*s == Thymine) if (s[-1] == Guanine) if (s[1] == Thymine) goto NASI; if (ea > -8.3) if (*clooppos == Cytosine) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) goto NASI; energy -= 3.0; } NASI: /* cstem stability (GC bond count) */ bondtype = acbondtype + tbondtype; if ((cbondtype & 0xf) < 1) if ((bondtype & 0xf) < 3) energy -= 1.0; /* cloop CTnnnAA motif */ if (bondtype >= 0x400) energy += CX[*clooppos] + TX[clooppos[1]] + AXX[cloopend[-1]] + AXX37[cloopend[-2]]; else energy += CX[*clooppos] + TX[clooppos[1]] + AX[cloopend[-1]] + AX37[cloopend[-2]]; /* large dloop */ if (dloop >= 9) { k = tloop - dloop - 4; if (k < 0) if (bondtype >= 0x1000) energy += (double)k; if (dloop >= 12) { if (dloop >= 14) energy -= 2.0; else if (tstem < 6) energy -= ((dloop >= 13)?2.0:1.0); }} /* small dloop, small tarm */ if (dloop <= 10) if (tstem < 3) if (ea > -2.6) if (tl <= 7) if (cgcat >= 4) if (gc[*tpos][apos2[-1]]) if (gc[tpos[1]][apos2[-2]]) if (gcv > 1.2) if ((abondtype & 0xf) > 0) if ((cbondtype & 0xf) > 0) energy += (4.5 + (mtBONDSTAB - 0.5)*(double)(5 - tstem)); /* global stem damage level */ i = (int)((bondtype >> 16) & 0xf); j = (int)((bondtype >> 12) & 0xf) + i; k = (int)((bondtype >> 8) & 0xf); if (tstem > 3) { if ((k > 0) || (tl > 9)) if ((j > 0) || (k > 5)) { n = j + k; if ((s[-1] != Guanine) || (*s != Thymine) || (s[1] != Thymine) || (tstem < 5)) if (n > 4) energy -= 2.0*(double)(n - 4); }} else { n = j + k; if (n > 3) energy -= 2.0*(double)(n - 3); } /* long tstem with tloop GTT motif */ if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) if (tstem >= 6) if (tbondtype < 0x100) energy += 1.5; /* find exceptions */ if (energy < tthresh) { if (!mtxdetect) goto DN; if (incds) goto DN; if (energy < (thresh - 13.5)) goto DN; if (energy < (tthresh - 13.5)) goto DN; if (k > 1) { if (i > 2) goto DN; if (k > 4) if (i > 1) goto DN; } if (nbase > 70) goto DN; if (var > 4) { if (var > 5) goto DN; if (var > tl) goto DN; } if (tstem < 4) if ((agcat + cgcat + 2) < (astem + cstem)) goto DN; if (tl > 9) goto DN; if (dloop > 13) goto DN; if (!YI[*clooppos]) goto DN; if ((abondtype & 0xf) < 2) { if ((abondtype & 0xf) < 1) goto DN; if (cbondtype > 0x200) if (tbondtype > 0x100) if (abondtype > 0x200) goto DN; } if ((tbondtype & 0xf) < 1) { if ((acbondtype & 0xf) < 1) goto DN; if (acbondtype > 0x200) goto DN; } if ((dloop + 19) < tloop) goto DN; if (gcv > 5.5) goto DN; tgcat = ((tbondtype >> 4) + tbondtype) & 0xf; if ((tgcat + 2) < tstem) goto DN; if (cloop != 7) goto DN; if (bp[*cpos][cend[-1]]) if (bp[cpos[-1]][*cend]) if (bp[cpos[-2]][cend[1]]) energy += 2.0; if (bondtype < 0x20000) if (thit[nti].stem_energy > -4.6) if (tstem >= 4) if ((tstem >= 5) || (s[-1] == Guanine)) if (stackbp[cpos[1]][cend[-2]]) if (stackbp[*cpos][cend[-1]]) if (stackbp[cpos[-1]][*cend]) { energy += 1.5; if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) energy += 1.0; if (agcat >= 6) energy += 0.5; } if (tc > 0) if (tstem >= 5) if (var < 6) if (var > 2) { if (acbondtype < 0x100) energy += 5.0; else if ((abondtype + tbondtype) < 0x100) energy += 3.0; else if (cloopend[-2] == Thymine) if (cloopend[-1] == Thymine) if (dloop > 7) if (tbondtype < 0x100) if (!tt[*tpos][apos2[-1]]) if ((agcat+cgcat) >= 10) energy += 13.5; } if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) if ((tstem >= 5) || (s[2] == Cytosine)) { energy += 1.5; if (tstem >= 5) if (tbondtype < 0x1000) if (s[2] == Cytosine) { if (abondtype < 0x10000) { if (*clooppos == Cytosine) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) energy += 3.0; if (tbondtype < 0x200) if (bondtype < 0x10000) if (tl == 7) if (s[4] == Adenine) energy += 4.0; }} else if (tbondtype < 0x200) if ((tbondtype & 0xf) >= 2) if (*clooppos == Cytosine) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) energy += 1.0; } if (tstem >= 4) if (tbondtype < 0x100) if (cbondtype < 0x200) if (agcat >= 5) energy += 1.5; if (energy > tthresh) energy = tthresh; if (ea > -1.8) energy += 3.0; else if (abondtype < 0x60) energy += 1.5; else if (acbondtype < 0x200) energy += 0.75; if (*clooppos == Cytosine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) { if (tstem >= 5) if (tbondtype < 0x100) if (clooppos[1] == Thymine) { energy += 3.0; if (tstem >= 6) energy += 1.0; } else if (clooppos[1] == Cytosine) energy += 1.0; if (tc >= 2) if (clooppos[1] == Thymine) if (bondtype < 0x1000) if (tstem >= 4) if (var < 6) if (var > 2) energy += 3.0; } if (cbondtype < 0x100) if (agcat >= 5) if (tc > 0) if (clooppos[1] == Thymine) if (YI[*clooppos]) if (RI[cloopend[-2]]) if (RI[cloopend[-1]]) if (tbondtype < 0x100) energy += 4.0; else if (agcat >= 6) if ((tgcat + 1) >= tstem) if (tstem >= 4) energy += 4.0; if (bondtype < 0x1000) { energy += 0.5; if (bondtype < 0x200) energy += 0.75; } if (energy < tthresh) goto DN; energy -= (3.0 + 0.9*(energy - tthresh)); } /* mammalian cloop motif constraint */ if (ds == MAMMAL_MT) { s1 = clooppos; s2 = s1 + cloop; r = *s1++; while (s1 < s2) r = (r << 4) + *s1++; if (r != clmotif[0]) if (r != clmotif[1]) if (r != clmotif[2]) energy -= 5.0; } /* remember fully formed D-loop replacement mttRNA gene */ /* if threshold reached */ if (energy < thresh) goto DN; te.energy = energy; thresh = energy; te.ps = apos1; te.spacer1 = 0; te.spacer2 = 0; te.dstem = 0; te.dloop = dloop; te.cstem = cstem; te.cloop = cloop; te.anticodon = astem + dloop + cstem + 2; te.nintron = 0; te.intron = 0; te.var = var; te.varbp = 0; te.tstem = tstem; te.tloop = tl; te.nbase = astem + dloop + carm + var + 2*tstem + tl; tastem = astem; tastem8 = astem8; tastem8d = astem8d; /* build fully formed cloverleaf mttRNA genes */ DN: if (dloop < 10) continue; /* choose tarm */ nt = -1; nti = -1; et = -INACTIVE; while (++nt < nth) { tend = thit[nt].end; if (tend != apos2) continue; e = thit[nt].energy; tpos = thit[nt].pos; tstem = thit[nt].stem; /* GT motif on tloop */ s = tpos + tstem; if (*s == Thymine) if (s[-1] == Guanine) if (tstem >= 5) if (!stackbp[*tpos][tend[-1]]) { e += 0.5; if (!bp[tpos[1]][tend[-2]]) e += 0.5; } /* large var loop */ var = (int)(tpos - cend); if (var > 5) { ev = (double)(var - 5); if (tstem < 5) e -= 3.0*ev; else e -= (0.5 + 0.5*ev); /* allow large var loop if tarm looks nuclear */ /* (GTTC motif, very large var loop base-pairing) */ if (var > 9) { if ((thit[nt].bondtype & 0xf) < 1) e -= 1.0; e -= (0.25*(double)(var - 8)); if (*s == Thymine) if (s[-1] == Guanine) if (s[1] == Thymine) if (s[2] == Cytosine) e += 4.0; if (var > 17) { if (var > 25) continue; e += 0.5*vloop_stability(cend,var,&varbp); }}} /* small var loop */ if (var < 3) { if (tstem > 5) if (s[-1] != Guanine) e -= 0.5; if (var < 2) { if (var < 1) { if (var < 0) continue; if (tstem < 4) if (thit[nt].stem_energy < -4.0) continue; } e -= 3.0; }} if (e > et) { et = e; nti = nt; }} if (nti < 0) continue; tpos = thit[nti].pos; tstem = thit[nti].stem; tl = thit[nti].loop; tarm = 2*tstem + tl; var = (int)(tpos - cend); b48 = tpos[-1]; tbondtype = thit[nti].bondtype; bondtype = acbondtype + tbondtype; ti = (int)(((bondtype >> 16) & 0xf) + ((bondtype >> 12) & 0xf) + ((bondtype >> 8) & 0xf)); /* choose darm */ nd = -1; ndi = -1; ed = -INACTIVE; while (++nd < ndh) { dl = dhit[nd].loop; dstem = dhit[nd].stem; darm = 2*dstem + dl; dpos = dhit[nd].pos; e = dhit[nd].energy; /* spacing between astem,darm,carm */ spacer1 = (int)(dpos - aend1); spacer2 = (int)(cpos - dpos) - darm; if (spacer1 < 2) { if (spacer1 < 1) continue; if (dstem < 3) continue; if (dl > 12) e -= 2.0; if (astem < 7) e -= 1.0; if (spacer2 != 2) { if (spacer2 < 1) continue; if (spacer2 > 2) continue; if ((abondtype & 0xf) < 1) if ((dhit[nd].bondtype & 0xf) < 1) e -= 0.5; if (var > 7) e -= 1.0; if (dl > 12) e -= 1.0; if (cloop != 7) e-= 2.0; if (cstem < 6) e -= 3.6; else e -= 0.5; } else { if (cstem > 5) continue; s = cpos; se = cend-1; while (!bp[*s][*se]) { s++; se--; } if (!stemterm[s[-1]][se[1]]) e -= 0.5; e -= 0.8; }} else { if (spacer1 > 2) { if (spacer1 > 3) continue; if (dstem > 4) continue; if (dstem < 3) continue; if (tl > 15) continue; if (astem < 7) e -= 1.0; if (ti > 4) e -= 1.0; if (cloop != 7) e-= 2.0; if (tbondtype > 0x2000) if (!RI[tpos[tstem-1]]) e -= 2.0; e -= 1.0; if (spacer2 != 1) e -= 0.5; else if (dhit[nd].bondtype < 0x100) if (var >= 3) if (var <= 5) if (tstem >= 3) { e += 1.0; if (agcat >= 5) if (wcbp[*aend1][*apos2]) if (!bp[aend1[-1]][*apos2]) if (bp[b48][dpos[dstem+1]]) e += 0.5; } } if (spacer2 > 1) { if (spacer2 > 2) continue; if (astem < 7) if (spacer1 == 2) e -= 1.0; if (cloop != 7) e -= 2.0; if (ea < -5.8) e -= 2.0; e -= 2.5; if (bp[b48][dpos[dstem+1]]) { if (dhit[nd].bondtype < 0x1000) if (wcbp[dpos[1]][dpos[darm-2]]) if (wcbp[dpos[2]][dpos[darm-3]]) if (var < 6) if (dl > 3) e += 2.0; } else e -= 1.0; } else if (spacer2 < 1) { if (spacer2 < 0) continue; if (var > 6) continue; if (dstem > 4) continue; if (dhit[nd].stem_energy < -4.3) continue; if (astem < 7) if (spacer1 == 2) e -= 1.0; if (cloop != 7) e-= 2.0; e -= mtBONDSTAB; } if (cstem > 5) if ((!gt[*cpos][cend[-1]]) || astem8) e-= mtBONDSTAB; } /* very large or very small dloop */ if (dl < 3) e -= 2.0; if (dl > 11) { if (dl > 14) e -= 2.0; else if (dl > 13) { if (dhit[nd].bondtype >= 0x100) e -= 2.0; else e -= 1.0; } else if (dl > 12) { if (dhit[nd].bondtype >= 0x1000) e -= 2.0; else e -= 1.0; } else if (dhit[nd].bondtype >= 0x10000) e -= 2.0; } /* tertiary interactions in darm */ b8 = dpos[-2]; b9 = dpos[-1]; if (dl > 2) { if (dl > 5) if (!stackbp[dpos[dstem+1]][b48]) e -= 1.0; if (!stackbp[b8][dpos[dstem]]) e-= 0.25; if (!stackbp[b8][dpos[dstem+dl-1]]) e -= 0.25; } if (!bp[b9][dpos[2]]) if (!bp[b9][dpos[darm-3]]) e -= 1.0; /* TR motif at b8-9 */ if (RI[b9]) { if (b8 == Thymine) if (spacer1 == 2) if (ti < 6) if (((bondtype & 0xf) > 2) || (bondtype < 0x1000) || ((tbondtype < 0x100) && (tstem > 3))) if ((cbondtype & 0xf) < 5) if (stembp[dpos[1]][dpos[darm-2]]) if (var < 6) if (var > 2) e += 1.5; else if (tstem > 3) if (cloopend[-2] == Adenine) e += 1.5; } else { e -= 1.0; if (b9 == Thymine) if (spacer1 == 2) e -= 2.0; } if (e > ed) { ed = e; ndi = nd; }} if (ndi < 0) continue; energy = 100.0 + ec + ed + et; dl = dhit[ndi].loop; dstem = dhit[ndi].stem; darm = 2*dstem + dl; dpos = dhit[ndi].pos; dbondtype = dhit[ndi].bondtype; spacer1 = (int)(dpos - aend1); spacer2 = (int)(cpos - dpos) - darm; b8 = dpos[-2]; /* tertiary structure interaction between tloop and dloop */ if (tl >= 3) if (dl >= 4) { di = (dl < 7)?(darm-dstem-2):(darm-dstem-3); ti = (tl < 9)?(tstem+2):((tl < 13)?(tstem+3):(tstem+5)); if (ggbp[dpos[di]][tpos[ti]]) if (ggbp[dpos[di-1]][tpos[ti-1]]) { energy += 2.0; if (spacer1 != 2) if (spacer2 != 2) if (dstem < 4) if (tl > 7) if (bp[dpos[di+1]][tpos[ti+1]]) energy += 4.0; if (ea > -2.5) if (wcbp[dpos[1]][dpos[darm-2]]) if (wcbp[dpos[2]][dpos[darm-3]]) energy += 3.0; } if (tl > 10) if (dl > 10) energy -= 1.0; } else if (dl == 3) if (wcbp[dpos[dstem+1]][b48]) energy += 1.0; /* small darm and tarm */ if (tloop <= 18) if (tarm <= 13) if (dl <= 8) if (spacer1 == 2) if (spacer2 == 1) if (abondtype < 0x1000) if (tbondtype < 0x100) if (dbondtype < 0x200) { et = (mtBONDSTAB - 0.5)*(double)(5 - tstem) + 0.1*(double)(7-tl); ed = mtBONDSTAB*(double)(4 - dstem); energy += (0.8*(et + ed)); } /* GTTC motif on tloop */ s = tpos + tstem; if (tl < 5) if (tl < 2) energy += G[s[-1]]; else { et = (G[s[-1]] + T[*s] + T[s[1]]); if (tl > 3) if (bp[*s][s[tl-1]]) { e = (G[*s] + T[s[1]] + T[s[2]]); if (e > et) et = e; } if (tstem < 5) { e = (G[s[-2]] + T[s[-1]] + T[*s] + C[s[1]]); if (e > et) et = e; } energy += et; } else energy += (G[s[-1]] + T[*s] + T[s[1]] + C[s[2]]); /* long astem */ if (astem8) if (bp[apos1[0]][apos2[6]]) if (bp[apos1[1]][apos2[5]]) if (bp[apos1[2]][apos2[4]]) if (bp[apos1[3]][apos2[3]]) energy += hbem[apos1[-1]][apos2[7]]; /* false positive supression */ if (!RI[cend[0]]) energy -= 1.0; if (!RI[cpos[-1]]) energy -= 1.0; if (tarm < (var + 3)) energy -= 2.0; if (gcv < 1.5) if (dbondtype > 0x10000) energy -= 2.0; if (tarm > 27) { energy -= 1.0; if (spacer2 != 1) energy -= 1.0; } if (dstem < 3) { if (var > 5) energy -= 1.0; if (tloop > (dloop + 8)) energy -= 0.5; } if (b8 != Thymine) if (dl > 3) if (dbondtype > 0x100) if ((b8 == Cytosine) || (dbondtype > 0x10000)) if (*clooppos != Cytosine) if (!wcbp[dpos[dstem+1]][b48]) energy -= 1.0; /* high GC false positive suppression */ if (gcv >= 5.1) { if ((abondtype & 0xf) >= 4) { s1 = apos1; s2 = apos2 + astem; n = 0; while (--s2 >= apos2) if (gc[*s1++][*s2]) { if (++n >= 4) { energy -= 2.0; break; }} else n = 0; } if ((dbondtype & 0xf) >= 4) energy -= 3.0; if ((cbondtype & 0xf) >= 5) energy -= 3.5; if ((tbondtype & 0xf) >= tstem) energy -= 4.0; } /* global stem damage level */ tc = tstem + dstem; dtbondtype = dbondtype + tbondtype; mabondtype = dtbondtype + cbondtype; bondtype = acbondtype + dtbondtype; if (bondtype < 0x100) energy += 0.5; if ((dtbondtype & 0xf) < 1) { energy -= 1.0; if (tc >= 10) energy -= 2.0; if ((bondtype & 0xf) < 3) if (nbase > 75) energy -= 1.0; } i = (int)((bondtype >> 16) & 0xf); j = (int)((bondtype >> 12) & 0xf) + i; k = (int)((bondtype >> 8) & 0xf) + j; ti = (tc > 6)?5:((tc > 5)?4:3); if (k > ti) { ev = (double)(k - ti); energy -= 0.5*ev; if (cbondtype > 0x10000) if (tstem < 5) energy -= ev; if (i > 0) if (k > 8) energy -= 1.5*(double)(k - 8); } /* low GC false positive supression */ if (gcv < 3.5) if ((bondtype & 0xf) < 2) { if ((bondtype & 0xf) < 1) energy -= 1.0; if (dl > 3) if (var > 2) if (!wcbp[dpos[dstem+1]][b48]) energy -= 1.0; } /* small variable loop */ if (var < 3) { if (dloop > 18) { if (dloop > (tloop + 2)) energy -= 1.0; if (tloop > 20) if ((((dtbondtype >> 4) + dtbondtype) & 0xf) < 6) energy -= 2.0; } if (astem < 7) { energy -= 1.0; if (agcat >= 5) if (bondtype < 0x300) if (gcv > 1.2) if (gcv < 5.0) energy += 2.0; }} else /* NNNNNAA cloop */ if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) if (spacer1 > 1) if ((dbondtype < 0x2000) || (dloop > mt_DRLmaxlength)) { if (abondtype < 0x100) energy += 1.0; else if (cbondtype < 0x100) energy += 1.0; else if (tstem >= 5) if (tbondtype < 0x100) { energy += 1.0; if (*clooppos == Cytosine) if (clooppos[1] == Thymine) if (dbondtype < 0x100) energy += 0.5; if (cgcat >= 3) if ((tbondtype & 0xf) > 0) if (ggbp[dpos[dstem+1]][b48]) if (wcbp[dpos[1]][dpos[darm-2]]) if (tl < 10) if (spacer1 == 2) if (spacer2 == 1) if (dl > 2) if (var >= 2) if (var < 6) { if (agcat >= 6) energy += 3.0; else if (agcat >= 5) if (cgcat >= 4) if (dbondtype < 0x100) if (*s == Thymine) if (s[-1] == Guanine) if (s[1] == Thymine) energy += 3.0; }}} /* large tloop */ if (tl > 12) { if (tbondtype > 0x10000) energy -= 2.0; if (agcat < 5) if (spacer1 != 2) if (spacer2 != 1) energy -= 1.0; } /* find exceptions */ if (energy < dtthresh) { if (!mtxdetect) continue; if (incds) continue; if (energy < (thresh - 12.0)) continue; if (energy < (dtthresh - 12.0)) continue; if (nbase > 75) continue; if (dstem > 4) continue; if (dstem < 3) continue; if (astem < 7) if (acbondtype > 0x21000) continue; if (var > 5) { if (var > 6) continue; if (tarm < 12) continue; } if (gcv <= 1.2) { if (gcv < 0.9) continue; if ((mabondtype & 0xf) < 1) continue; } if (tl > 9) { if (tl > 13) continue; if (!wcbp[dpos[1]][dpos[darm-2]]) continue; } if (dl > 7) { if (bondtype > 0x20000) if (dloop > (tloop + 4)) continue; if (dl > 10) { if (dl > 12) if (abondtype > 0x1000) continue; if (tbondtype > 0x200) continue; if (tt[*tpos][apos2[-1]]) continue; if (var > 5) continue; if (dloop > (tloop + 8)) if (bondtype > 0x10000) continue; if (astem < 7) continue; }} if (RI[clooppos[1]]) continue; b9 = dpos[-1]; if (cstem >= 6) { if (cbondtype > 0x200) continue; if (var < 3) continue; if (YI[b9]) continue; } if (cloop != 7) continue; if (ds == MAMMAL_MT) continue; if (mabondtype < 0x400) { if ((b8 == Thymine) || (mabondtype < 0x300)) if (ea < -5.45) if (chit[nc].stem_energy > -3.2) if (dbondtype < 0x200) if (spacer1 > 1) if ((spacer2 == 1) || (mabondtype < 0x100)) if ((spacer1 < 3) || (tstem > 3) || (tbondtype < 0x100)) if ((spacer1 < 3) || ((var > 2) && (var < 6) && (tbondtype < 0x2000) && (tl < 10))) if (dstem < 5) if (var >= 2) if (dl > 2) if (tl < 15) if ((b8 != Cytosine) || (*clooppos == Cytosine)) if (RI[b9]) if (*clooppos != Adenine) if (clooppos[1] == Thymine) if (RI[cloopend[-2]]) { s1 = apos1; s2 = apos2 + astem; n = 0; while (--s2 >= apos2) if (wcbp[*s1++][*s2]) { if (++n >= 3) break; } else n = 0; if (n >= 3) { energy += 3.0; if ((abondtype & 0xf) > 0) energy += 2.0; if (bp[dpos[dstem+1]][b48]) if (wcbp[dpos[1]][dpos[darm-2]]) if (var <= 5) energy += 1.0; } if (dtbondtype < 0x200) if (agcat < 2) if (wcbp[dpos[dstem+1]][b48]) if (wcbp[dpos[1]][dpos[darm-2]]) if (wcbp[dpos[2]][dpos[darm-3]]) if (gcv > 1.2) if (var <= 5) if (tstem >= 3) if (dstem >= 3) if (tl > 3) if (tl < 9) if (dl < 9) if (spacer1 == 2) energy += 10.0; } if ((tbondtype & 0xf) > 0) if (mabondtype < 0x300) { if (mabondtype < 0x100) { if ((spacer1 < 3) || (tstem > 2)) if (var > 0) if (YI[*clooppos]) if ((spacer2 > 0) || (clooppos[1] == Thymine)) energy += 2.5; } else if ((dbondtype & 0xf) > 0) if (b9 != Cytosine) if (var <= 7) if (spacer2 == 1) if (tarm < 22) if (gcv > 1.2) if (dstem >= 4) { if (tstem >= 5) energy += 5.0; else if (tstem >= 3) if (tbondtype < 0x100) energy += 1.0; } else if (tstem >= 5) energy += 1.0; } else if ((dbondtype & 0xf) > 0) { if (tstem >= 5) if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) if (*clooppos == Cytosine) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) energy += 1.0; if (bondtype < 0x1000) if (cbondtype < 0x100) energy += 1.0; }} if (tstem >= 5) if (*clooppos == Cytosine) { if (dl > 3) if (dtbondtype < 0x200) if ((tbondtype & 0xf) > 0) if (clooppos[1] == Thymine) { if (clooppos[2] == Thymine) if (clooppos[3] == Adenine) if (clooppos[4] == Cytosine) if (clooppos[5] == Adenine) if (cloop == 7) energy += 0.5; if (cgcat >= 4) if (wcbp[dpos[1]][dpos[darm-2]]) if (bp[dpos[dstem+1]][b48]) if (tl < 10) if (var < 6) if (spacer1 == 2) if (spacer2 == 1) if (dstem >= 3) energy += 3.0; } if (clooppos[1] == Cytosine) if (clooppos[2] == Cytosine) if (clooppos[3] == Adenine) if (clooppos[4] == Thymine) if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) energy += 1.0; } if (RI[b9]) { if (b8 == Thymine) { if (clooppos[1] == Thymine) { if (cloopend[-2] == Adenine) { if (wcbp[dpos[1]][dpos[darm-2]]) { if (*clooppos == Cytosine) { if (abondtype < 0x200) energy += 1.0; if (bondtype < 0x10000) if (dtbondtype < 0x200) if (agcat >= 3) if (cgcat >= 4) if (tl < 10) if (var < 6) if (spacer1 == 2) if (spacer2 == 1) if (tstem >= 3) energy += 3.0; } if (tstem >= 5) if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) { energy += 1.0; if (tl >= 5) if (spacer1 == 2) if (spacer2 == 1) if (tbondtype < 0x100) if (wcbp[dpos[dstem+1]][b48]) energy += 3.0; } if (tstem >= 3) if (tl < 10) if (spacer1 == 2) if (spacer2 == 1) if (RI[cloopend[-1]]) if (dl > 2) if (var >= 2) if (var < 6) if (ggbp[dpos[dstem+1]][b48]) { if (dtbondtype < 0x100) { energy += 3.5; if ((bondtype & 0xf00) == 0) if (*clooppos == Cytosine) energy += 1.5; } if (bondtype < 0x10000) if (tstem > 2) if (tbondtype < 0x200) energy += 2.5; if (abondtype < 0x100) if (wcbp[dpos[2]][dpos[darm-3]]) energy += 3.0; if (tbondtype < 0x100) if (agcat >= 6) if (tstem >= 5) if ((tbondtype & 0xf) > 0) if (RI[cloopend[-1]]) if (cgcat >= 4) energy += 2.0; } else if (!ggstembp[*tpos][apos2[-1]]) if (wcbp[dpos[dstem+1]][*tpos]) energy += 1.5; } if ((abondtype & 0xf) < 1) if (abondtype < 0x100) if (gcv > 1.2) if (dl > 3) if (bp[dpos[dstem+1]][b48]) if (spacer1 == 2) if (spacer2 == 1) if (*clooppos == Cytosine) energy += 5.0; if (cbondtype < 0x100) if (tbondtype < 0x100) if (tstem >= 3) if (dl > 3) if (var < 6) if (bp[dpos[dstem+1]][b48]) if (spacer1 == 2) if (spacer2 == 1) energy += 2.5; } if (stembp[dpos[dstem+1]][b48]) { if (*clooppos == Thymine) if (cloopend[-2] == Guanine) if (clooppos[2] == Guanine) if (clooppos[3] == Thymine) if (clooppos[4] == Guanine) if (dl > 2) energy += 1.0; if (cbondtype < 0x100) if (dbondtype < 0x10000) if (wcbp[dpos[1]][dpos[darm-2]]) if (var < 6) if (tstem >= 3) if (gcv >= 1.2) if (dl > 3) energy += 1.0; if (tstem >= 5) if (dtbondtype < 0x200) if (*clooppos == Cytosine) if (spacer1 == 2) if (spacer2 == 1) if (RI[cloopend[-2]]) energy += 0.5; } } if (tstem > 2) if (tarm < 28) if (spacer1 == 2) if (spacer2 == 1) if (dl > 3) if (j < 1) if (k > ti) if (ggstembp[dpos[dstem+1]][b48]) energy += 2.5; if (dtbondtype < 0x100) if ((tbondtype & 0xf) > 0) if (bp[dpos[dstem+1]][b48]) if (b9 == Adenine) if ((dbondtype & 0xf) > 0) energy += 2.0; else if (spacer2 == 1) energy += 0.5; if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) if (cbondtype < 0x2000) if (spacer1 > 1) if (dl > 2) if (var < 6) energy += 0.75; } if (var > 2) if (dl > 2) { if (cbondtype < 0x200) if (((mabondtype & 0xf) > 3) || (bondtype < 0x1000)) { if (bp[dpos[dstem+1]][b48]) energy += 1.0; if (cbondtype < 0x100) if (dbondtype < 0x100) if (bp[b8][dpos[dstem]]) if (bp[b8][dpos[darm-dstem-1]]) if (var < 6) if (tstem >= 3) if (tl < 10) if (spacer1 == 2) if (spacer2 == 1) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) energy += 3.0; } if (clooppos[1] == Thymine) if (RI[cloopend[-2]]) { if (*clooppos == Cytosine) { if (dtbondtype < 0x200) if (agcat >= 3) if (cgcat >= 4) if (var < 6) if (tstem >= 3) if (tl < 10) if (spacer1 == 2) if (spacer2 == 1) { if (abondtype > 0x20000) if (bp[dpos[dstem+1]][b48]) energy += 7.0; if (agcat >= 6) energy += 2.0; } if ((bondtype & 0xf00) == 0) if (gcv > 5.0) if (s[-1] == Guanine) if (*s == Thymine) if (tstem >= 5) if (var < 6) if (tl < 10) if (spacer1 == 2) if (spacer2 == 1) energy += 2.0; if (abondtype < 0x100) if (cbondtype < 0x10000) if (bp[dpos[dstem+1]][b48]) if (cgcat >= 4) if (tstem >= 3) if (var < 6) if (tstem >= 3) if (tl < 10) if (spacer1 == 2) if (spacer2 == 1) energy += 1.5; } if (dtbondtype < 0x100) if (agcat >= 4) if (cgcat >= 4) if (var < 6) if (tstem >= 3) if (tl < 10) { if (spacer1 == 2) { if (abondtype < 0x3000) if (stackbp[dpos[dstem+1]][b48]) energy += 3.0; if (b8 == Thymine) if (s[-1] == Guanine) if (*s == Thymine) if (s[1] == Thymine) energy += 3.5; } if (agcat >= 6) if (YI[*clooppos]) if (s[-1] == Guanine) if (*s == Thymine) if ((dtbondtype & 0xf) > 0) energy += 3.0; } if (mabondtype < 0x10000) if (dtbondtype < 0x400) if (agcat >= 5) if (cgcat >= 3) if (tl < 10) if (var < 6) if (spacer1 == 2) if (spacer2 == 1) { if (dtbondtype < 0x200) if (cbondtype < 0x300) if (bondtype < 0x10000) if (tstem >= 3) energy += 1.0; if (tstem >= 5) if (s[-1] == Guanine) energy += 4.0; } } } } else if (bondtype < 0x10000) if (mabondtype < 0x500) if (dbondtype < 0x100) if (b8 == Thymine) if (agcat >= 4) if (clooppos[1] == Thymine) if (cloopend[-2] == Adenine) if (cloopend[-1] == Adenine) if (spacer1 == 2) if (spacer2 == 1) if (dstem >= 3) if (tstem >= 5) if (dl > 2) if (tl < 10) if (var < 6) energy += 7.0; if (agcat >= 5) if (cgcat >= 4) if ((acbondtype & 0xf) >= 3) { if (tbondtype < 0x100) if ((dbondtype & 0xf) > 0) if ((((dbondtype >> 4) + dbondtype) & 0xf) >= 3) if (wcbp[dpos[dstem+1]][b48]) if (b8 == Thymine) if (RI[b9]) if (clooppos[1] == Thymine) if (YI[*clooppos]) if (RI[cloopend[-2]]) if (RI[cloopend[-1]]) if (spacer1 == 2) if (spacer2 == 1) if (dl > 2) if (tl < 10) if (var < 6) if (var > 2) energy += 6.0; if (cgcat >= 5) if (abondtype < 0x10000) if (bp[dpos[dstem+1]][b48]) if (clooppos[1] == Thymine) if (YI[*clooppos]) if (RI[cloopend[-2]]) if (dl > 2) if (tl < 10) if (var < 6) if (var > 2) energy += 6.0; } if (energy >= dtthresh) energy -= (0.9*(energy - dtthresh) + 5.0); else continue; } /* remember fully formed mttRNA gene if threshold reached */ if (energy < thresh) continue; te.energy = energy; thresh = energy; te.ps = apos1; te.spacer1 = spacer1; te.dstem = dstem; te.dloop = dl; te.spacer2 = spacer2; te.cstem = cstem; te.cloop = cloop; te.var = var; te.varbp = (var > 17)?varbp:0; te.tstem = tstem; te.tloop = tl; k = astem + spacer1 + darm + spacer2; te.anticodon = k + cstem + 2; te.nintron = 0; te.intron = 0; te.nbase = k + carm + var + 2*tstem + tl; tastem = astem; tastem8 = astem8; tastem8d = astem8d; } } /* for highest energy mttRNA gene */ /* decide astem length, look for NCCA acceptor tail */ /* and calculate total length */ if (te.ps) { apos2 = te.ps + te.nbase; if (extastem) if (tastem8d) { te.astem1 = 8; te.astem2 = 8; te.ps--; te.nbase++; te.anticodon++; as = aatail(apos2+8,&aext,sw); } else { te.astem1 = tastem; te.astem2 = tastem; as = aatail(apos2+tastem,&aext,sw); if (tastem8) { as8 = aatail(apos2+8,&aext8,sw); if (as8 >= as) { te.ps--; te.nbase++; te.anticodon++; te.astem1 = 8; te.astem2 = 8; as = as8; aext = aext8; }}} else { te.astem1 = tastem; te.astem2 = tastem; as = aatail(apos2+tastem,&aext,sw); } if (as < 2) aext = 1; te.nbase += te.astem2; nbasefext = te.nbase + ASTEM2_EXT; te.nbase += aext; /* store mttRNA gene if there are no */ /* higher energy overlapping mttRNA genes */ te.start = (long)(te.ps - seq); if (tn = find_slot(d,&te,&nts,sw)) { base_copy3(te.ps,te.seq,nbasefext); base_copy3(te.ps,te.eseq,nbasefext); te.aatail = aext; *tn = te; }} } return(nts); } int tmopt(data_set *d, trna_loop *th, int tarm, double the, trna_loop *ahit, int nah, int nts,int *seq, csw *sw) { int r,na,nr,nrh,ibase,flag,as,aext,nbasefext; int *s,*v,*s1,*s2,*sa,*sb,*se,*sf,*ps,*tpos,pseq[MAXETRNALEN+1]; static int gtem[6] = { 0x00,0x00,0x11,0x00,0x00,0x00 }; static double A[6] = { 6.0,0.0,0.0,0.0,0.0,0.0 }; static double Ar[6] = { 10.0,0.0,0.0,0.0,0.0,0.0 }; static double Cr[6] = { 0.0,10.0,0.0,0.0,0.0,0.0 }; static double G[6] = { 0.0,0.0,6.0,0.0,0.0,0.0 }; static double Ga[6] = { 0.0,0.0,7.0,0.0,0.0,0.0 }; static double K[6] = { 0.0,0.0,6.0,6.0,0.0,0.0 }; static double Tr[6] = { 0.0,0.0,0.0,10.0,0.0,0.0 }; double e,energy,penergy,tenergy,aenergy,athresh,cthresh,cathresh; static double bem[6][6] = { { -1.072,-0.214,-1.072, ATBOND, 0.000, 0.000 }, { -0.214,-1.072, 3.000,-1.072, 0.000, 0.000 }, { -1.072, 3.000,-1.072, 1.286, 0.000, 0.000 }, { ATBOND,-1.072, 1.286,-0.214, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static trna_loop rhit[NH]; gene te,*tn; static gene t = { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,0,0,0,13,8,0,28,0,0,3,0,5,7, tmRNA,0.0,0,0,0 }; tpos = th->pos; flag = 0; te.energy = sw->tmrnathresh; athresh = sw->tmathresh; cthresh = sw->tmcthresh; cathresh = sw->tmcathresh; s = tpos + tarm + 4; v = tpos + th->stem - 10; energy = K[*v] + G[v[1]] + A[v[2]]; e = K[v[1]] + G[v[2]] + A[v[3]]; if (e > energy) energy = e; if (energy < 18.0) energy = 0.0; tenergy = Tr[*s]+Cr[s[1]]+Cr[s[2]]+Ar[s[3]] + energy + 1.59*the; nrh = find_resume_seq(tpos-MAXTPTSDIST,TPWINDOW,rhit,NH,sw); nr = -1; while (++nr < nrh) { ps = rhit[nr].pos; penergy = tenergy + rhit[nr].energy - 0.001*((double)(tpos - ps)); if (rhit[nr].stem < 24) penergy -= 15.0; na = -1; while (++na < nah) { aenergy = ahit[na].energy; if (aenergy < athresh) continue; t.ps = ahit[na].pos; if (t.ps < (ps - MAXTPDIST)) continue; if (t.ps > (ps - MINTPDIST)) break; energy = -INACTIVE; sa = t.ps + t.astem1; for (sb=sa+9, se=sb+t.cstem; sb <= (sa+16); sb++,se++) for (sf = tpos-3; sf >= (tpos-7); sf--) { s1 = sb; s2 = sf; e = bem[*s1++][*--s2]; while (s1 < se) e += bem[*s1++][*--s2]; if (e > energy) { energy = e; t.var = (int)(tpos - sf); t.dloop = (int)(sb - sa); }} if (energy < cthresh) continue; energy += aenergy; if (energy < cathresh) continue; sb = sa + 3; sf = sa + 7; r = gtem[*sb++]; while (sb < sf) { r = (r >> 4) + gtem[*sb++]; if ((r & 3) == 2) { energy += 14.0; break; }} t.energy = penergy + Ga[t.ps[1]] + Ga[t.ps[2]] + energy; if (t.energy > te.energy) { flag = 1; t.tstem = th->stem; t.tloop = th->loop; t.tps = (int)(ps - t.ps); t.tpe = t.tps + rhit[nr].stem; ibase = (int)(tpos - t.ps); t.nintron = ibase - t.var - 2*t.cstem - t.dloop - t.astem1; t.nbase = ibase + tarm + t.astem2 - t.nintron; te = t; }}} if (flag) { te.start = (long)(te.ps - seq); s = te.ps + te.nbase + te.nintron; as = aatail(s,&aext,sw); nbasefext = te.nbase + ASTEM2_EXT; te.nbase += aext; tn = find_slot(d,&te,&nts,sw); if (tn) { te.intron = te.astem1 + te.dloop + te.cstem; te.asst = 0; base_copy3(te.ps,te.eseq,nbasefext+te.nintron); remove_intron(te.ps,pseq,nbasefext, te.intron,te.nintron); base_copy3(pseq,te.seq,te.nbase); te.aatail = aext; *tn = te; }} return(nts); } int tmopt_perm(data_set *d, trna_loop *th, int tarm, double the, trna_loop *ahit, int nah, int nts, int *seq, csw *sw) { int r,na,nr,nrh,flag,as,aext; int *s,*v,*s1,*s2,*sa,*sb,*se,*sf,*ps,*apos,*tpos; static int gtem[6] = { 0x00,0x00,0x11,0x00,0x00,0x00 }; double e,energy,penergy,tenergy,aenergy,athresh,cthresh,cathresh; static double A[6] = { 6.0,0.0,0.0,0.0,0.0,0.0 }; static double Ar[6] = { 10.0,0.0,0.0,0.0,0.0,0.0 }; static double Cr[6] = { 0.0,10.0,0.0,0.0,0.0,0.0 }; static double G[6] = { 0.0,0.0,6.0,0.0,0.0,0.0 }; static double Ga[6] = { 0.0,0.0,7.0,0.0,0.0,0.0 }; static double K[6] = { 0.0,0.0,6.0,6.0,0.0,0.0 }; static double Tr[6] = { 0.0,0.0,0.0,10.0,0.0,0.0 }; static double bem[6][6] = { { -1.072,-0.214,-1.072, ATBOND, 0.000, 0.000 }, { -0.214,-1.072, 3.000,-1.072, 0.000, 0.000 }, { -1.072, 3.000,-1.072, 1.286, 0.000, 0.000 }, { ATBOND,-1.072, 1.286,-0.214, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static trna_loop rhit[NH]; gene te,*tn; static gene t = { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,0,0,0,13,8,0,28,0,0,3,0,5,7, tmRNA,0.0,0,0,0 }; tpos = th->pos; flag = 0; te.energy = sw->tmrnathresh; athresh = sw->tmathresh; cthresh = sw->tmcthresh; cathresh = sw->tmcathresh; s = tpos + tarm + 4; v = tpos + th->stem - 10; energy = K[*v] + G[v[1]] + A[v[2]]; e = K[v[1]] + G[v[2]] + A[v[3]]; if (e > energy) energy = e; if (energy < 18.0) energy = 0.0; tenergy = Tr[*s]+Cr[s[1]]+Cr[s[2]]+Ar[s[3]]+ energy + 1.59*the; na = -1; while (++na < nah) { aenergy = ahit[na].energy; if (aenergy < athresh) continue; apos = ahit[na].pos; if (apos < (tpos + MINTSTEM_DIST)) continue; if (apos > (tpos + MAXTSTEM_DIST + MAXPPINTRONDIST)) break; energy = -INACTIVE; sa = apos + t.astem1; for (sb=sa+9, se=sb+t.cstem; sb <= (sa+16); sb++,se++) for (sf = tpos-3; sf >= (tpos-7); sf--) { s1 = sb; s2 = sf; e = bem[*s1++][*--s2]; while (s1 < se) e += bem[*s1++][*--s2]; if (e > energy) { energy = e; t.var = (int)(tpos - sf); t.dloop = (int)(sb - sa); }} if (energy < cthresh) continue; energy += aenergy; if (energy < cathresh) continue; sb = sa + 3; sf = sa + 7; r = gtem[*sb++]; while (sb < sf) { r = (r >> 4) + gtem[*sb++]; if ((r & 3) == 2) { energy += 14.0; break; }} penergy = tenergy + Ga[apos[1]] + Ga[apos[2]] + energy; nrh = find_resume_seq(apos+MINTPDIST,TPWINDOW,rhit,NH,sw); nr = -1; while (++nr < nrh) { ps = rhit[nr].pos; t.energy = penergy + rhit[nr].energy; if (rhit[nr].stem < 24) t.energy -= 15.0; if (t.energy > te.energy) { flag = 1; t.tstem = th->stem; t.tloop = th->loop; t.asst = (long)(apos - tpos) + t.var + t.cstem; t.ps = tpos - t.var - t.cstem; t.tps = (int)(ps - t.ps); t.tpe = t.tps + rhit[nr].stem; te = t; }}} if (flag) { te.start = (long)(te.ps - seq) - 54; te.intron = te.cstem + te.var + 2*te.tstem + te.tloop + te.astem2; as = aatail(te.ps + te.intron,&aext,sw); te.aatail = aext; base_copy3(te.ps-54,te.eseq,te.tpe+1+TMPTRAILER); te.nbase = te.astem1 + te.dloop + te.cstem; base_copy3(te.ps+te.asst,te.seq,te.nbase); base_copy3(te.ps,te.seq+te.nbase,te.intron + ASTEM2_EXT); te.intron += aext; te.nbase += te.intron; te.nintron = te.tpe - te.nbase + 1 + TMPTRAILER; te.intron += 54; te.tps += 54; te.tpe += 54; te.asst += 54; tn = find_slot(d,&te,&nts,sw); if (tn) *tn = te; } return(nts); } int ti_genedetected(data_set *d, int nts, int *seq, gene *te, csw *sw) { int as,aext,as8,aext8,nbasefext,*s; int pseq[2*MAXETRNALEN+1]; gene *tn; te->nbase = te->astem1 + te->spacer1 + te->spacer2 + 2*te->dstem + te->dloop + 2*te->cstem + te->cloop + te->var + 2*te->tstem + te->tloop + te->astem2; s = te->ps + te->nbase + te->nintron; as = aatail(s,&aext,sw); if (sw->extastem) if (te->astem1 == 7) if (bp[te->ps[-1]][*s]) { as8 = aatail(s+1,&aext8,sw); if (as8 >= as) { te->ps--; te->nbase += 2; te->anticodon++; if (te->nintron > 0) te->intron++; te->astem1 = 8; te->astem2 = 8; as = as8; aext = aext8; }} nbasefext = te->nbase + ASTEM2_EXT; te->nbase += aext; te->start = (long)(te->ps - seq); tn = find_slot(d,te,&nts,sw); if (tn) { if (te->nintron == 0) base_copy3(te->ps,te->seq,nbasefext); else { base_copy3(te->ps,te->eseq,nbasefext + te->nintron); remove_intron(te->ps,pseq,nbasefext, te->intron,te->nintron); base_copy3(pseq,te->seq,nbasefext); } te->aatail = aext; *tn = *te; } return(nts); } int tmioptimise(data_set *d, int *seq, int lseq, int nts, csw *sw) { int i,j,k,intron,nt,nth,nd1,nd2,ndx,ndh,na,nah,nppah,nc,nch,tfold,tarm; int dstem,dloop,flag,mindist,maxdist,tmindist,tmaxdist,tmmindist,tmmaxdist; int tarmthresh,tmstrict,sp2min,sp2max,ige[7]; int *se,*sc,*sb,*si,*tpos,*tend,*apos,*dpos,*tloopfold,*tmv,*cend; int *s1,*s2,*sd,*sf,*sl,*sg1,*sg2,*cposmin,*cposmax,*cpos; unsigned int r,q,c; double e,ec,he,the,thet,ethresh,energy,cenergy,denergy,ienergy; double tdarmthresh,genergy,energy2,energyf,energyf6; static unsigned int TT[6] = { 0x00, 0x00, 0x00, 0x11, 0x00, 0x00 }; static unsigned int GG[6] = { 0x00, 0x00, 0x11, 0x00, 0x00, 0x00 }; static unsigned int ct[6] = { 0,0,0,0,0,0 }; static unsigned int cA[6] = { 0,0,0,2,0,0 }; static unsigned int cC[6] = { 0,0,2,0,0,0 }; static unsigned int cG[6] = { 0,2,0,1,0,0 }; static unsigned int cT[6] = { 2,0,1,0,0,0 }; static int yic[9] = { 1,0,0,0,0,0,0,0,0 }; static int tic[9] = { 1,1,0,0,0,0,0,0,0 }; static int a1ic[9] = { 1,1,1,0,0,0,0,0,0 }; static int a2ic[9] = { 1,1,1,1,0,0,0,0,0 }; static int a3ic[9] = { 1,1,1,1,1,0,0,0,0 }; static int ric[9] = { 1,1,1,1,1,1,0,0,0 }; static int goffb[13] = { 0,0,0,0,1,2,2,2,2,2,2,2,2 }; static int goffe[13] = { 0,0,0,0,2,3,4,4,5,6,6,6,6 }; static int cY[6] = { 0,1,0,1,0,0 }; static int cR[6] = { 1,0,1,0,0,0 }; static double ilw = 0.002; static double G[6] = { 0.0,0.0,6.0,0.0,0.0,0.0 }; static double T[6] = { 0.0,0.0,0.0,7.0,0.0,0.0 }; static double Y[6] = { 0.0,3.0,0.0,3.0,0.0,0.0 }; static double R[6] = { 2.0,0.0,2.0,0.0,0.0,0.0 }; static double YP[6] = { 0.0,3.0,0.0,3.0,0.0,0.0 }; static double RP[6] = { 2.0,0.0,2.0,0.0,0.0,0.0 }; static double RI[6] = { 0.1,0.0,0.05,0.0,0.0,0.0 }; static double GI[6] = { 0.0,0.0,0.1,0.0,0.0,0.0 }; static double YI[6] = { 0.0,0.1,0.0,0.1,0.0,0.0 }; static double AI[6] = { 1.0,0.0,0.0,0.0,0.0,0.0 }; static double GC[6] = { 0.0,1.5,6.0,0.0,0.0,0.0 }; static double G3[6] = { 0.0,6.0,12.0,12.0,0.0,0.0 }; static double dR[6] = { 6.0,0.0,6.0,0.0,0.0,0.0 }; static double RH[6] = { 3.0,0.0,3.0,0.0,0.0,0.0 }; static double AGT[6] = { 6.0,0.0,6.0,6.0,0.0,0.0 }; static double dT[6] = { 0.0,0.0,0.0,6.0,0.0,0.0 }; static double dbem[6][6] = { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static double dfem[6][6] = { { -4.000,-4.000,-4.000, ATBOND, 0.000, 0.000 }, { -4.000,-4.000, 3.000,-4.000, 0.000, 0.000 }, { -4.000, 3.000,-4.000, 1.286, 0.000, 0.000 }, { ATBOND,-4.000, 1.286,-4.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static double cbem[6][6] = { { -1.072,-0.214,-1.072,2.0*ATBOND, 0.000, 0.000 }, { -0.214,-1.072, 6.000,-1.072, 0.000, 0.000 }, { -1.072, 6.000,-1.072, 3.400, 0.000, 0.000 }, { 2.0*ATBOND,-1.072, 3.400,-0.214, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; static trna_loop thit[NTH],chit[NC],ahit[NA]; static trna_dloop dhit[ND]; gene te; static gene t = { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,2,1,3,9,5,7,0,0,0,15,0,5,7, tRNA,0.0,0,0,0 }; if (sw->mtrna) { nts = find_mt_trna(d,seq,lseq,nts,sw); if (!sw->tmrna) return(nts); } ethresh = sw->trnathresh; tmmindist = MINTPTSDIST + MINTPDIST; tmmaxdist = MAXTPTSDIST + MAXTPDIST; tmindist = (MINTRNALEN + sw->minintronlen - MAXTSTEM_DIST); tmaxdist = (MAXTRNALEN + sw->maxintronlen - MINTSTEM_DIST); if (sw->trna) { if (sw->tmrna) { mindist = (tmindist < tmmindist)?tmindist:tmmindist; maxdist = (tmaxdist > tmmaxdist)?tmaxdist:tmmaxdist; } else { mindist = tmindist; maxdist = tmaxdist; }} else { mindist = tmmindist; maxdist = tmmaxdist; } tarmthresh = sw->ttarmthresh; tdarmthresh = sw->tdarmthresh; tmstrict = sw->tmstrict; sp2min = sw->sp2min; sp2max = sw->sp2max; nth = find_tstems(seq,lseq,thit,NTH,sw); nt = -1; while (++nt < nth) { tpos = thit[nt].pos; t.tloop = thit[nt].loop; t.tstem = thit[nt].stem; tfold = tpos[-1]; tloopfold = tpos + t.tstem + 1; tarm = 2*t.tstem + t.tloop; tend = tpos + tarm; tmv = tpos - VARMIN; flag = 0; te.energy = ethresh; the = thit[nt].energy; nah = find_astem5(tpos-maxdist,tpos-mindist,tend,7,ahit,NA,sw); if (sw->tmrna) { thet = the - G[tpos[t.tstem]] - G[tpos[t.tstem+1]]; if (tmstrict) { if (thet >= tarmthresh) nts = tmopt(d,thit+nt,tarm,thet,ahit,nah,nts,seq,sw); } else nts = tmopt(d,thit+nt,tarm,the,ahit,nah,nts,seq,sw); nppah = find_astem5(tpos+MINPPASDIST,tpos+MAXPPASDIST, tend,7,ahit+nah,NA-nah,sw); nts = tmopt_perm(d,thit+nt,tarm,the,ahit+nah,nppah,nts,seq,sw); if (thet < tarmthresh) continue; the = thet; } else if (sw->threshlevel < 1.0) /* find_tstems is generating extra tstems */ { the -= (G[tpos[t.tstem]] + G[tpos[t.tstem+1]]); if (the < tarmthresh) continue; } if (!sw->trna) continue; na = -1; while (++na < nah) { apos = ahit[na].pos; if (apos < (tpos - tmaxdist)) continue; if (apos > (tpos - tmindist)) break; he = the + ahit[na].energy; /* find dstems */ ndh = 0; sc = apos + 8; energyf = dfem[sc[5]][tfold]; sl = sc + sw->sp1max; while (sc < sl) { energy2 = dT[sc[-2]] + RH[*(sc-1)] + GC[*sc] + dfem[sc[-2]][sc[4]]; energyf6 = dfem[sc[6]][tfold]; for (dstem = 3; dstem <= 4; dstem++) { sd = sc + dstem; dloop = 3; se = sd + dloop; energy = energy2 + 6.0 + dR[*(se-1)] + energyf; if (dstem == 3) if (energyf < 0.0) energyf = energyf6; se += dstem; s1 = sc; s2 = se; sf = s1 + dstem; while (s1 < sf) energy += dbem[*s1++][*--s2]; if (energy >= tdarmthresh) { if (ndh >= ND) goto DFL; dhit[ndh].pos = sc; dhit[ndh].end = se; dhit[ndh].loop = dloop; dhit[ndh].stem = dstem; dhit[ndh].energy = energy; ndh++; } sg1 = sd + 1; sg2 = sd + 6; q = GG[*sg1++]; ige[1] = q & 3; j = 2; while (sg1 <= sg2) { q = (q >> 4) + GG[*sg1++]; ige[j++] = q & 3; } for (dloop = 4; dloop <= 11; dloop++) { j = goffb[dloop]; k = goffe[dloop]; c = ige[j++]; while (j <= k) c = c | ige[j++]; genergy = G3[c]; se = sd + dloop; energy = energy2 + genergy + dR[*(se-1)] + energyf; se += dstem; s1 = sc; s2 = se; sf = s1 + dstem; while (s1 < sf) energy += dbem[*s1++][*--s2]; if (energy >= tdarmthresh) { if (ndh >= ND) goto DFL; dhit[ndh].pos = sc; dhit[ndh].end = se; dhit[ndh].loop = dloop; dhit[ndh].stem = dstem; dhit[ndh].energy = energy; ndh++; }}} s1 = sc; s2 = sc + 16; sd = sc + 6; j = bp[*s1][*--s2]; while (++s1 < sd) j += bp[*s1][*--s2]; if (j >= 6) { energy = dT[sc[-1]] + RH[*sc] + GC[*(sc+1)] + energyf6; energy += G[*++sd]; energy += G[*++sd]; energy += AGT[*++sd] + dfem[sc[-1]][sc[4]]; sd += 7; s1 = sc; s2 = sd; sf = s1 + 6; while (s1 < sf) energy += dbem[*s1++][*--s2]; if (energy >= tdarmthresh) { if (ndh >= ND) goto DFL; dhit[ndh].pos = sc; dhit[ndh].end = sd; dhit[ndh].loop = 4; dhit[ndh].stem = 6; dhit[ndh].energy = energy; ndh++; }} s1 = sc; s2 = sc + 18; sd = sc + 7; j = bp[*s1][*--s2]; while (++s1 < sd) j += bp[*s1][*--s2]; if (j >= 7) { energy = energy2 + dfem[sc[7]][tfold]; energy += G[*++sd]; energy += G[*++sd]; energy += AGT[*++sd]; sd += 8; s1 = sc; s2 = sd; sf = s1 + 7; while (s1 < sf) energy += dbem[*s1++][*--s2]; if (energy >= tdarmthresh) { if (ndh >= ND) goto DFL; dhit[ndh].pos = sc; dhit[ndh].end = sd; dhit[ndh].loop = 4; dhit[ndh].stem = 7; dhit[ndh].energy = energy; ndh++; }} energyf = energyf6; sc++; } goto DFN; DFL: fprintf(stderr,"Too many D-stem hits\n"); DFN: /* End of find dstems routine */ nd1 = ndh; while (--nd1 >= 0) { dstem = dhit[nd1].stem; dpos = dhit[nd1].pos; if ((int)(dpos - apos) < 9) dhit[nd1].energy -= 3.0; if (*tloopfold == Guanine) { sb = dpos + dstem + 2; sc = sb; se = sb + dhit[nd1].loop - 3; r = TT[*sb++]; while (sb < se) { r = (r >> 4) + TT[*sb++]; if (r & 2) { dhit[nd1].energy += 10.0; break; }} r = GG[*sc++]; while (sc < se) { r = (r >> 4) + GG[*sc++]; if (r & 2) { dhit[nd1].energy -= 12.0; break; }}}} nd1 = ndh; while (--nd1 >= 0) { if (!dhit[nd1].end) continue; cpos = dhit[nd1].end; denergy = dhit[nd1].energy; ndx = nd1; nd2 = nd1; while (--nd2 >= 0) { if (dhit[nd2].end != cpos) continue; e = dhit[nd2].energy; if (e > denergy) { denergy = e; dhit[ndx].end = NULL; ndx = nd2; }}} cposmin = 0; cposmax = 0; nd1 = ndh; while (--nd1 >= 0) { if (!dhit[nd1].end) continue; cposmin = dhit[nd1].end; cposmax = cposmin; break; } nd2 = nd1; while (--nd2 >= 0) { if (!(cpos = dhit[nd2].end)) continue; if (cpos < cposmin) cposmin = cpos; if (cpos > cposmax) cposmax = cpos; } for (cpos = cposmin + sp2min; cpos <= (cposmax + sp2max); cpos++) { denergy = -INACTIVE; ndx = -1; nd1 = ndh; while (--nd1 >= 0) { if (!dhit[nd1].end) continue; if ((dhit[nd1].end + sp2max) < cpos) continue; if ((dhit[nd1].end + sp2min) > cpos) continue; e = dhit[nd1].energy; if (e > denergy) { denergy = e; ndx = nd1; }} if (ndx < 0) continue; denergy += he; if (denergy < (te.energy - 49.0)) continue; /* find cstems */ nch = 0; si = cpos; sc = cpos + 5; se = cpos + 4; ct[0] = cA[*se]; ct[1] = cC[*se]; ct[2] = cG[*se]; ct[3] = cT[*se]; while (--se >= cpos) { ct[0] = (ct[0] << 4) + cA[*se]; ct[1] = (ct[1] << 4) + cC[*se]; ct[2] = (ct[2] << 4) + cG[*se]; ct[3] = (ct[3] << 4) + cT[*se]; } si += 11; se = tmv - VARDIFF - 5; if (si < se) si = se; r = ct[*si++]; r = (r >> 4) + ct[*si++]; r = (r >> 4) + ct[*si++]; r = (r >> 4) + ct[*si++]; while (si < tmv) { r = (r >> 4) + ct[*si++]; if ((r & 0xf) >= 5) { if (nch >= NC) { fprintf(stderr,"Too many cstem hits\n"); goto FN; } chit[nch].pos = si; chit[nch].stem = 5; chit[nch].loop = (int)(si - sc - 5); if (chit[nch].loop == 9) if (bp[*sc][si[-6]]) if (cY[sc[2]]) if (cR[sc[6]]) if (cY[sc[1]]) { chit[nch].stem = 6; chit[nch].loop = 7; } s1 = cpos; s2 = si; se = s1 + chit[nch].stem; chit[nch].energy = cbem[*s1++][*--s2]; while (s1 < se) chit[nch].energy += cbem[*s1++][*--s2]; nch++; }} FN: /* end of find cstems routine */ nc = -1; while (++nc < nch) { energy = denergy + chit[nc].energy; if (energy < (te.energy - 19.0)) continue; cend = chit[nc].pos; t.var = (int)(tpos - cend); t.cloop = chit[nc].loop; t.cstem = chit[nc].stem; intron = 0; if (t.cloop < 9) { if (sw->minintronlen > 0) continue; if (sw->cloop7) if (t.cloop != 7) continue; t.nintron = 0; if (t.var > 17) energy += vloop_stability(cend,t.var,&t.varbp); sb = cpos + t.cstem; energy += T[*(sb + 1)] + Y[*(sb)] + R[*(sb + 5)] - 0.05*t.var - ((t.cloop == 7)?0.0:6.0); } else { t.nintron = t.cloop - 7; if (t.nintron > sw->maxintronlen) continue; if (t.nintron < sw->minintronlen) continue; if (t.var > 17) energy += vloop_stability(cend,t.var,&t.varbp); if (energy < (te.energy - 9.0)) continue; t.cloop = 7; sb = cpos + t.cstem; se = sb + t.nintron; if (sw->ifixedpos) { intron = 6; cenergy = YP[*sb] + T[sb[1]] + RP[sb[5]]; } else { cenergy = YP[*se] + T[*(se+1)] + RP[*(se+5)]; ienergy = cenergy + RI[*sb] + GI[*(se-1)] + AI[se[-2]]*YI[se[-1]]; for (j = 1; j <= 7; j++) { si = se + j - 1; ec = YP[*(sb + yic[j]*t.nintron)] + T[*(sb + tic[j]*t.nintron + 1)] + RP[*(sb + ric[j]*t.nintron + 5)]; e = ec + RI[*(sb + j)] + GI[*si] + AI[si[-1]]*YI[*si]; if (j == 6) e += 0.01; if (e > ienergy) { ienergy = e; cenergy = ec; intron = j; }}} energy += cenergy - 10.0 - ilw*(t.nintron + 1.1*t.var); if (t.nintron >= 130) { si = se + intron; j = si[-1]; if (j != Guanine) { if (si[-2] != Adenine) energy -= 4.0; if (j != Cytosine) if (j != Thymine) energy -= 8.0; }}} dstem = dhit[ndx].stem; dpos = dhit[ndx].pos; if (dstem >= 6) { if (sb[2 + a1ic[intron]*t.nintron] != Thymine) continue; if (sb[3 + a2ic[intron]*t.nintron] != Cytosine) continue; if (sb[4 + a3ic[intron]*t.nintron] != Adenine) continue; energy += 3.0; } else if (!(dpos[-1] & 5)) { i = 0; si = cend; se = cend + 4; while (si < se) { if (!(*si++ & 5)) { if (++i >= 2) { energy += 3.0; break; }} else i = 0; }} if (t.cstem >= 6) { if (sb[2 + a1ic[intron]*t.nintron] == Cytosine) if (sb[3 + a2ic[intron]*t.nintron] == Thymine) if (sb[4 + a3ic[intron]*t.nintron] == Adenine) energy += 4.0; } if (energy < ethresh) continue; t.energy = energy; t.dstem = dstem; t.astem1 = (t.dstem < 6)?7:((t.tstem < 5)?9:8); t.astem2 = t.astem1; t.ps = apos + 7 - t.astem1; t.nbase = (int)(tend - t.ps) + t.astem2; t.dloop = dhit[ndx].loop; t.spacer1 = (int)(dpos - apos) - 7; t.spacer2 = (int)(cpos - dhit[ndx].end); j = (int)(cpos - t.ps) + t.cstem; t.anticodon = j + 2; if (t.nintron > 0) { t.intron = j + intron; if ((t.nbase + t.nintron) > MAXTRNALEN) { nts = ti_genedetected(d,nts,seq,&t,sw); continue; }} if (energy < te.energy) continue; flag = 1; te = t; } }} if (flag) nts = ti_genedetected(d,nts,seq,&te,sw); } return(nts); } void disp_ftable_entry(FILE *f, int n[], int i, int m, csw *sw) { if (m > 0) switch(sw->geneticcode) { case METAZOAN_MT: fprintf(f," %-18s %-4d",aa(n,sw),m); break; case STANDARD: case VERTEBRATE_MT: default: fprintf(f," %-4s %-5d",aa(n,sw),m); break; } else switch(sw->geneticcode) { case METAZOAN_MT: fprintf(f," %-18s ",aa(n,sw)); break; case STANDARD: case VERTEBRATE_MT: default: fprintf(f," %-4s ",aa(n,sw)); break; }} void disp_freq_table(int nt, csw *sw) { int i,j,k,m,ambig,*s,c1,c2,c3,c[3],a[3],table[4][4][4]; static int cgflip[4] = { 0,2,1,3 }; static int codonorder[4] = { 3,1,0,2 }; FILE *f = sw->f; ambig = 0; for (i = 0; i < 4; i++) for (j = 0; j < 4; j++) for (k = 0; k < 4; k++) table[i][j][k] = 0; for (i = 0; i < nt; i++) if (ts[i].energy >= 0.0) if (ts[i].genetype == tRNA) if (ts[i].cloop == 7) { s = ts[i].seq + ts[i].anticodon; c1 = *s; c2 = s[1]; c3 = s[2]; if ((c1 >= Adenine) && (c1 <= Thymine)) if ((c2 >= Adenine) && (c2 <= Thymine)) if ((c3 >= Adenine) && (c3 <= Thymine)) table[*s][s[1]][s[2]]++; else ambig++; else ambig++; else ambig++; } else ambig++; fprintf(f,"tRNA anticodon frequency\n"); for (i = 0; i < 4; i++) { c[0] = codonorder[i]; a[2] = 3 - c[0]; for (j = 0; j < 4; j++) { c[2] = codonorder[j]; a[0] = 3 - c[2]; for (k = 0; k < 4; k++) { c[1] = codonorder[k]; a[1] = 3 - c[1]; fprintf(f,"%c%c%c",cpbase(a[0]),cpbase(a[1]),cpbase(a[2])); m = table[a[0]][a[1]][a[2]]; disp_ftable_entry(f,a,k,m,sw); } fputc('\n',f); } if (i < 3) fputc('\n',f); } if (ambig > 0) fprintf(f,"Ambiguous: %d\n",ambig); fprintf(f,"\ntRNA codon frequency\n"); for (i = 0; i < 4; i++) { c[0] = codonorder[i]; a[2] = 3 - c[0]; for (j = 0; j < 4; j++) { c[2] = codonorder[j]; a[0] = 3 - c[2]; for (k = 0; k < 4; k++) { c[1] = codonorder[k]; a[1] = 3 - c[1]; fprintf(f,"%c%c%c",cpbase(c[0]),cpbase(c[1]),cpbase(c[2])); m = table[a[0]][a[1]][a[2]]; disp_ftable_entry(f,a,k,m,sw); } fputc('\n',f); } if (i < 3) fputc('\n',f); } if (ambig > 0) fprintf(f,"Ambiguous: %d\n",ambig); fputc('\n',f); } void disp_energy_stats(data_set *d, int nt, csw *sw) { int i,n[NS],genetype,introns,nintron,trna,mtrna,ntv,nd,nps; double gc,gcmin[NS],gcmax[NS]; FILE *f = sw->f; mtrna = sw->mtrna; trna = sw->trna | mtrna; nps = 0; if (mtrna) { ntv = 0; nd = 0; } if ((sw->trna) && (sw->maxintronlen > 0)) { introns = 1; nintron = 0; } else introns = 0; for (i = 0; i < NS; i++) { n[i] = 0; gcmin[i] = 1.0; gcmax[i] = 0.0; } for (i = 0; i < nt; i++) if (ts[i].energy >= 0.0) { n[NS-1]++; genetype = ts[i].genetype; n[genetype]++; if (pseudogene(ts + i,sw)) nps++; if (genetype == tRNA) { if (mtrna) { if (ts[i].tstem == 0) ntv++; if (ts[i].dstem == 0) nd++; } if (introns) if (ts[i].nintron > 0) nintron++; gc = gc_content(ts+i); if (gc < gcmin[genetype]) gcmin[genetype] = gc; if (gc > gcmax[genetype]) gcmax[genetype] = gc; }} fputc('\n',f); fputc('\n',f); if (sw->repeatsn) if ((n[tRNA] + n[tmRNA]) > 0) fprintf(f,"%s\n\n",d->seqname); if (trna) { sw->ngene[tRNA] += n[tRNA]; if (n[tRNA] > 3) disp_freq_table(nt,sw); if ((n[tRNA] > 1) || ((sw->tmrna) && (n[tmRNA] > 0))) { if (introns) { if (sw->minintronlen == 0) fprintf(f,"Number of tRNA genes with no introns = %d\n", n[0]-nintron); fprintf(f,"Number of tRNA genes with C-loop introns = %d\n", nintron); } else fprintf(f,"Number of %s genes = %d\n",sw->genetypename[tRNA],n[tRNA]); if (mtrna) { if (sw->tvloop) fprintf(f,"Number of TV replacement loop tRNA genes = %d\n", ntv); fprintf(f,"Number of D replacement loop tRNA genes = %d\n", nd); } if (n[tRNA] > 1) fprintf(f,"tRNA GC range = %2.1f%% to %2.1f%%\n", gcmin[0]*100.0,gcmax[0]*100.0); }} if (sw->tmrna) { sw->ngene[tmRNA] += n[tmRNA]; if ((n[tmRNA] > 1) || (trna && (n[tRNA] > 0))) fprintf(f,"Number of %s genes = %d\n",sw->genetypename[tmRNA],n[tmRNA]); } sw->nps += nps; if (sw->reportpseudogenes) if (nps > 0) if (n[NS-1] > 1) fprintf(f,"Number of possible pseudogenes = %d\n",nps); fputc('\n',f); fputc('\n',f); } void batch_energy_stats(data_set *d, int nt, csw *sw) { int i,n[NS],genetype,introns,nintron,trna,mtrna,ntv,nd,nps; double gc,gcmin[NS],gcmax[NS]; FILE *f = sw->f; mtrna = sw->mtrna; trna = sw->trna | mtrna; nps = 0; if (mtrna) { ntv = 0; nd = 0; } if ((sw->trna) && (sw->maxintronlen > 0)) { introns = 1; nintron = 0; } else introns = 0; for (i = 0; i < NS; i++) { n[i] = 0; gcmin[i] = 1.0; gcmax[i] = 0.0; } for (i = 0; i < nt; i++) if (ts[i].energy >= 0.0) { n[NS-1]++; genetype = ts[i].genetype; n[genetype]++; if (ts[i].energy < 100.0) nps++; if (genetype == tRNA) { if (mtrna) { if (ts[i].tstem == 0) ntv++; if (ts[i].dstem == 0) nd++; } if (introns) if (ts[i].nintron > 0) nintron++; gc = gc_content(ts+i); if (gc < gcmin[genetype]) gcmin[genetype] = gc; if (gc > gcmax[genetype]) gcmax[genetype] = gc; }} if (trna) sw->ngene[tRNA] += n[tRNA]; if (sw->tmrna) sw->ngene[tmRNA] += n[tmRNA]; sw->nps += nps; } int gene_sort(data_set *d, int nt, int sort[], csw *sw) { int i,n,j,k; long starti,startj,stopi,stopj,psmax; psmax = d->psmax; n = 0; for (i = 0; i < nt; i++) if (ts[i].energy >= 0.0) { if (sw->ireportminintronlen == 1) if (ts[i].genetype == tRNA) if (ts[i].nintron < sw->minintronlenreport) continue; sort[n++] = i; } i = -1; while (++i < (n-1)) { j = i; while (++j < n) { starti = ts[sort[i]].start; startj = ts[sort[j]].start; stopi = ts[sort[i]].stop; stopj = ts[sort[j]].stop; if (stopi < starti) if ((psmax - starti) < stopi) starti -= psmax; else stopi += psmax; if (stopj < startj) if ((psmax - startj) < stopj) startj -= psmax; else stopj += psmax; if (starti > startj) { k = sort[i]; sort[i] = sort[j]; sort[j] = k; } else if (starti == startj) if (stopi < stopj) { k = sort[i]; sort[i] = sort[j]; sort[j] = k; }}} return(n); } int iamatch(data_set *d, gene *t, csw *sw) { char key[5],*k,s[100]; if (k = softstrpos(d->seqname,"TRNA-")) k += 5; else if (k = wildstrpos(d->seqname,"|***|")) k++; else return(-1); copy3cr(k,key,3); name(t,s,1,sw); if (softstrpos(s,key)) return(1); return(0); } int gene_mismatch(data_set *d, annotated_gene *agene, gene *t, csw *sw) { int w,alen,dlen; char *s; w = 0; dlen = seqlen(t); alen = aseqlen(d,agene); switch(t->genetype) { case tRNA: s = aa(t->seq + t->anticodon,sw); if (!softstrpos(s,agene->species+5)) { if (t->cloop == 8) { s = aa(t->seq + t->anticodon + 1,sw); if (!softstrpos(s,agene->species+5)) w += 1; } else if (t->cloop == 6) { s = aa(t->seq + t->anticodon - 1,sw); if (!softstrpos(s,agene->species+5)) w += 1; } else w += 1; } if (agene->comp != t->comp) w += 2; if (alen <= (dlen - sw->trnalenmisthresh)) w += 4; else if (alen >= (dlen + sw->trnalenmisthresh)) w += 4; break; case tmRNA: if (agene->comp != t->comp) w += 2; if (alen <= (dlen - sw->tmrnalenmisthresh)) w += 4; else if (alen >= (dlen + sw->tmrnalenmisthresh)) w += 4; break; } return(w); } int gene_mismatch_report(data_set *d, annotated_gene *agene, gene *t, char *report, csw *sw) { int w; char *s; w = gene_mismatch(d,agene,t,sw); s = report; if (w & 1) s = copy("amino acceptor",s); if (w & 2) { if (w & 1) if (w & 4) s = copy(", ",s); else s = copy(" and ",s); s = copy("sense",s); } if (w & 4) { if ((w & 3) > 0) s = copy(" and ",s); s = copy("sequence length",s); } if (w > 0) s = copy(" mismatch",s); *s = '\0'; return(w); } int nearest_annotated_gene(data_set *d, gene *t, int list[], int score[], int nmax, csw *sw) { int n,i,j,k,q,w,nagene; long a,b,c,e,thresh,psmax; char *s; annotated_gene *ta; psmax = d->psmax; nagene = d->nagene[NS-1]; ta = d->gene; n = 0; a = t->start; b = t->stop; thresh = b-a; if (b < a) { b += psmax; thresh += psmax; for (i = 0; i < nagene; i++) { c = ta[i].start; e = ta[i].stop; if (e < c) { e += psmax; if (a > e) goto NXTW; if (b < c) goto NXTW; if (n >= nmax) break; list[n] = i; score[n] = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); n++; NXTW: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; if (n >= nmax) break; list[n] = i; score[n] = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); n++; } a -= psmax; b -= psmax; } for (i = 0; i < nagene; i++) { c = ta[i].start; e = ta[i].stop; if (e < c) { e += psmax; if (a > e) goto NXTN; if (b < c) goto NXTN; if (n >= nmax) break; list[n] = i; score[n] = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); n++; NXTN: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; if (n >= nmax) break; list[n] = i; score[n] = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?e-c:b-c); n++; } for (i = 0; i < n; i++) { k = list[i]; if (ta[k].genetype == t->genetype) { score[i] += 5000; w = gene_mismatch(d,ta + k,t,sw); if (w & 1) score[i] -= 2; if (w & 2) score[i] -= 1; }} if (n > 1) { for (i = 0; i < (n-1); i++) for (j = i+1; j < n; j++) if (score[j] > score[i]) { k = list[i]; list[i] = list[j]; list[j] = k; k = score[i]; score[i] = score[j]; score[j] = k; }} return(n); } int proximity_compare(data_set *d, int is, long prox, long dlen, long alen, annotated_gene *a, csw *sw) { int w,score; long diff; char nm[200]; gene *t; t = ts + is; w = gene_mismatch(d,a,t,sw); if (prox >= alen) { diff = dlen - alen; if (prox >= (2L*diff)) score = (int)(prox - diff); else score = (int)(prox/2L); } else if (prox >= dlen) { diff = alen - dlen; if (prox >= (2L*diff)) score = (int)(prox - diff); else score = (int)(prox/2L); } else { score = (int)prox; } if (w & 1) score -= 10; if (w & 2) score -= 2; if (score < 0) score = 0; if (t->annotation >= 0) if (t->annosc >= score) return(-1); return(score); } int nearest_detected_gene(data_set *d, int sort[], int nd, int *scorep, annotated_gene *ag, csw *sw) { int n,i,is; long a,b,c,e,score,alen,scoremax,psmax; long prox,proximity; psmax = d->psmax; n = -1; scoremax = -1; a = ag->start; b = ag->stop; alen = b - a; if (b < a) alen += psmax; proximity = 1 + alen/2; if (proximity > 30) proximity = 30; if (b < a) { b += psmax; for (i = 0; i < nd; i++) { is = sort[i]; if (ag->genetype != ts[is].genetype) continue; c = ts[is].start; e = ts[is].stop; if (e < c) { e += psmax; if (a > e) goto NXTW; if (b < c) goto NXTW; prox = (a >= c)?((b >= e)?e-a:alen):((b >= e)?e-c:b-c); if (prox >= proximity) if ((score = proximity_compare(d,is,prox,e-c,alen,ag,sw)) > scoremax) { n = i; scoremax = score; } NXTW: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; prox = (a >= c)?((b >= e)?e-a:alen):((b >= e)?e-c:b-c); if (prox >= proximity) if ((score = proximity_compare(d,is,prox,e-c,alen,ag,sw)) > scoremax) { n = i; scoremax = score; }} a -= psmax; b -= psmax; } for (i = 0; i < nd; i++) { is = sort[i]; if (ag->genetype != ts[is].genetype) continue; c = ts[is].start; e = ts[is].stop; if (e < c) { e += psmax; if (a > e) goto NXTN; if (b < c) goto NXTN; prox = (a >= c)?((b >= e)?e-a:alen):((b >= e)?e-c:b-c); if (prox >= proximity) if ((score = proximity_compare(d,is,prox,e-c,alen,ag,sw)) > scoremax) { n = is; scoremax = score; } NXTN: c -= psmax; e -= psmax; } if (a > e) continue; if (b < c) continue; prox = (a >= c)?((b >= e)?e-a:alen):((b >= e)?e-c:b-c); if (prox >= proximity) if ((score = proximity_compare(d,is,prox,e-c,alen,ag,sw)) > scoremax) { n = is; scoremax = score; }} *scorep = scoremax; return(n); } void disp_match(data_set *d, int *sort, int nd, csw *sw) { int i,ld,fn[NS],fp[NS],fpd,fptv,w,score,detect,n[NS]; int prevannoted,nl,k,csort[NGFT],*msort; long start; char tag[52],nm[100],anm[100],ps[100],mreport[100],*s; FILE *f = sw->f; gene *t; annotated_gene *agene,*a; static char gp[2][7] = { "genes","gene" }; static char comp[3] = " c"; static char aps[2][5] = { " ","PS" }; nl = nd; if (sw->trna | sw->mtrna) nl += d->nagene[tRNA]; if (sw->tmrna) nl += d->nagene[tmRNA]; if (nl < NGFT) msort = csort; else { msort = (int *)malloc(nl*sizeof(int)); if (msort == NULL) { fprintf(stderr,"Not enough memory to match genes\n"); return; }} fprintf(f,"\n%s\n",d->seqname); fprintf(f,"%ld nucleotides in sequence\n",d->psmax); fprintf(f,"Mean G+C content = %2.1f%%\n",100.0*d->gc); fprintf(f,"\nGenBank to Aragorn comparison\n\n"); sw->dispmatch = 1; for (i = 0; i < NS; i++) { n[i] = 0; fn[i] = 0; fp[i] = 0; } for (i = 0; i < nd; i++) { w = sort[i]; if (ts[w].energy >= 0.0) { n[NS-1]++; n[ts[w].genetype]++; } ts[w].annotation = -1; ts[w].annosc = -1; } if (sw->trna | sw->mtrna | sw->tmrna) { fpd = 0; fptv = 0; if (sw->trna | sw->mtrna) { fprintf(f,"%d annotated tRNA %s\n",d->nagene[tRNA],gp[(d->nagene[tRNA]==1)?1:0]); fprintf(f,"%d detected tRNA %s\n",n[tRNA],gp[(n[tRNA]==1)?1:0]); } if (sw->tmrna) { fprintf(f,"%d annotated tmRNA %s\n",d->nagene[tmRNA],gp[(d->nagene[tmRNA]==1)?1:0]); fprintf(f,"%d detected tmRNA %s\n",n[tmRNA],gp[(n[tmRNA]==1)?1:0]); } fprintf(f,"\n GenBank Aragorn\n"); nl = 0; for (i = 0; i < d->nagene[NS-1]; i++) { agene = d->gene + i; agene->detected = -1; if (agene->genetype != tRNA) { if (agene->genetype != tmRNA) continue; else if (!sw->tmrna) continue; } else if (!sw->trna) if (!sw->mtrna) continue; a = agene; k = i; while ((a->detected = nearest_detected_gene(d,sort,nd,&score,a,sw)) >= 0) { t = ts + a->detected; prevannoted = t->annotation; t->annotation = k; t->annosc = score; if (prevannoted < 0) break; if (prevannoted == k) break; if (prevannoted == i) break; a = d->gene + prevannoted; k = prevannoted; } k = nl; while (--k >= 0) { if (agene->start >= d->gene[msort[k]].start) break; msort[k+1] = msort[k]; } msort[++k] = i; nl++; } for (i = 0; i < nd; i++) { t = ts + sort[i]; if (t->annotation >= 0) continue; if (t->genetype != tRNA) { if (t->genetype != tmRNA) continue; else if (!sw->tmrna) continue; } else if (!sw->trna) if (!sw->mtrna) continue; k = nl; while (--k >= 0) { if (msort[k] >= 0) start = d->gene[msort[k]].start; else start = ts[-1-msort[k]].start; if (t->start >= start) break; msort[k+1] = msort[k]; } msort[++k] = -(sort[i] + 1); nl++; } for (i = 0; i < nl; i++) { if (msort[i] >= 0) { agene = d->gene + msort[i]; detect = agene->detected; if (detect >= 0) { t = ts + detect; w = gene_mismatch_report(d,agene,t,mreport,sw); if (w > 0) fputc('*',f); else fputc(' ',f); } else fputc('*',f); sprintf(anm," %-11s%c(%ld,%ld) %s", agene->species,comp[agene->comp], sq(agene->start),sq(agene->stop),aps[agene->pseudogene]); fprintf(f,"%-45s ",anm); if (detect >= 0) { fprintf(f,"%s ",name(t,nm,1,sw)); if (t->comp == 0) fputc(' ',f); fprintf(f,"%s",position(ps,t,sw)); if (sw->energydisp) fprintf(f," %7.3lf",t->energy); if (t->genetype == tmRNA) { peptide_tag(tag,50,t,sw); fprintf(f," %s",tag); } if (sw->reportpseudogenes) if (pseudogene(t,sw)) fprintf(f," PS"); if (w > 0) fprintf(f," %s",mreport); fputc('\n',f); } else { fprintf(f,"Not detected\n"); fn[agene->genetype]++; }} else { t = ts - (msort[i] + 1); fprintf(f,"* Not annotated %s ",name(t,nm,1,sw)); if (t->comp == 0) fputc(' ',f); fprintf(f,"%s",position(ps,t,sw)); if (sw->energydisp) fprintf(f," %7.3lf",t->energy); if (t->genetype == tmRNA) { peptide_tag(tag,50,t,sw); fprintf(f," %s",tag); } if (sw->reportpseudogenes) if (pseudogene(t,sw)) fprintf(f," PS"); fputc('\n',f); fp[t->genetype]++; if (t->genetype == tRNA) { if (t->dstem == 0) fpd++; if (t->tstem == 0) fptv++; }}} fprintf(f,"\n"); if (sw->trna | sw->mtrna) { fprintf(f,"Number of annotated tRNA genes not detected = %d\n",fn[tRNA]); fprintf(f,"Number of unannotated tRNA genes detected = %d\n",fp[tRNA]); } if (sw->mtrna) { fprintf(f,"Number of unannotated D-replacement tRNA genes detected = %d\n",fpd); fprintf(f,"Number of unannotated TV-replacement tRNA genes detected = %d\n",fptv); } if (sw->tmrna) { fprintf(f,"Number of annotated tmRNA genes not detected = %d\n",fn[tmRNA]); fprintf(f,"Number of unannotated tmRNA genes detected = %d\n",fp[tmRNA]); } fprintf(f,"\n\n"); for (i = tRNA; i <= tmRNA; i++) { sw->nagene[i] += d->nagene[i]; sw->nafn[i] += fn[i]; sw->nafp[i] += fp[i]; } if (sw->mtrna) { sw->natfpd += fpd; sw->natfptv += fptv; }} sw->nabase += d->psmax; sw->dispmatch = 0; if (nl >= NGFT) free((void *)msort); } void annotation_overlap_check(data_set *d, gene *t, char *margin, csw *sw) { int a,m,n,w,list[20],score[20]; char mreport[100]; static char comp[3] = " c"; n = nearest_annotated_gene(d,t,list,score,20,sw); if (n < 1) m = -1; else { m = 0; a = list[m]; if (d->gene[a].genetype != t->genetype) m = -1; else { w = gene_mismatch_report(d,d->gene+a,t,mreport,sw); if (w & 1) { if ((score[m] - 5000) < (3*seqlen(t)/4)) m = -1; } else { if ((score[m] - 5000) < (seqlen(t)/3)) m = -1; }}} if (m < 0) fprintf(sw->f,"%sNot annotated\n",margin); else { a = list[m]; fprintf(sw->f,"%sMatch with annotated %s %c(%ld,%ld)", margin,d->gene[a].species,comp[d->gene[a].comp], d->gene[a].start,d->gene[a].stop); w = gene_mismatch_report(d,d->gene+a,t,mreport,sw); if (w > 0) fprintf(sw->f," * %s",mreport); fputc('\n',sw->f); } while (++m < n) { a = list[m]; fprintf(sw->f,"%sOverlap with annotated %s %c(%ld,%ld)\n", margin,d->gene[a].species,comp[d->gene[a].comp], d->gene[a].start,d->gene[a].stop); } fputc('\n',sw->f); } void disp_gene_set(data_set *d, int nt, csw *sw) { int i,j,n,vsort[NT],*sort; char m[MATX][MATY],s[20]; gene *t; FILE *f = sw->f; if (nt <= NT) sort = vsort; else { sort = (int *)malloc(nt*sizeof(int)); if (sort == NULL) { fprintf(stderr,"Not enough memory to sort genes\n"); exit(1); }} n = gene_sort(d,nt,sort,sw); j = sw->tmrna_struct[54]; for (i = 55; i <= 60; i++) j += sw->tmrna_struct[i]; if (j != ((sw->tmrna_struct[0] << 4) + 9)) return; if (sw->libflag < 2) { if (n > 0) for (j = 0; j < n;) { i = sort[j++]; t = ts + i; t->energy = nenergy(t,sw); switch(t->genetype) { case tRNA: init_matrix(m); disp_gene(t,m,sw); sprintf(s,"%d.",j); xcopy(m,0,32,s,length(s)); disp_matrix(f,m,MATY); if (sw->matchacceptor) if (iamatch(d,t,sw) == 0) { fprintf(f," Iso-acceptor mismatch\n"); sw->iamismatch++; } if (sw->annotated) annotation_overlap_check(d,t," ",sw); overlap(d,sort,n,i,sw); if (sw->seqdisp) disp_seq(f,t,sw); if (t->nintron > 0) disp_intron(f,t,sw); if (sw->energydisp > 1) trna_score(f,t); break; case tmRNA: if (sw->secstructdisp == 1) { init_matrix(m); disp_gene(t,m,sw); sprintf(s,"%d.",j); xcopy(m,0,32,s,length(s)); disp_matrix(f,m,MATY); if (sw->annotated) annotation_overlap_check(d,t," ",sw); } else { fprintf(f,"\n%d.\n",j); disp_location(t,sw,"Location"); if (sw->reportpseudogenes) if (pseudogene(t,sw)) fprintf(f,"Possible Pseudogene\n"); if (sw->energydisp) fprintf(f,"Score = %g\n",t->energy); if (sw->annotated) annotation_overlap_check(d,t,"",sw); } overlap(d,sort,n,i,sw); if (t->asst == 0) disp_tmrna_seq(f,t,sw); else disp_tmrna_perm_seq(f,t,sw); if (sw->energydisp > 1) tmrna_score(f,t,sw); break; case CDS: fprintf(f,"\n%d.\nCDS gene\n",j); disp_location(t,sw,"Location"); if (sw->annotated) annotation_overlap_check(d,t,"",sw); overlap(d,sort,n,i,sw); disp_cds(f,t,sw); break; } if (sw->libflag > 0) write_to_library(f,t,sw); } else if (*(d->seqname) != '\0') fprintf(f,"\nNothing found in %s\n\n\n",d->seqname); else fprintf(f,"\nNothing found\n\n\n"); } else { if (n > 0) for (i = 0; i < n; i++) write_to_library(f,ts + sort[i],sw); } disp_energy_stats(d,nt,sw); if (d->datatype == GENBANK) disp_match(d,sort,n,sw); if (nt > NT) free((void *)sort); } void batch_gene_set(data_set *d, int nt, csw *sw) { int i,j,n,vsort[NT],nspaces,caps,*sort; gene *t; FILE *f = sw->f; if (nt <= NT) sort = vsort; else { sort = (int *)malloc(nt*sizeof(int)); if (sort == NULL) { fprintf(stderr,"Not enough memory to sort genes\n"); exit(1); }} n = gene_sort(d,nt,sort,sw); j = sw->tmrna_struct[54]; for (i = 55; i <= 60; i++) j += sw->tmrna_struct[i]; if (j != ((sw->tmrna_struct[0] << 4) + 9)) return; if (sw->libflag < 2) if (sw->batch >= 2) { nspaces = (sw->batch & 0x4); caps = (sw->batch & 0x10); if (sw->batch & 0x8) for (i = 0; i < n; i++) disp_fasta_seq(f,ts + sort[i],d->ns+1,i+1,nspaces,caps,sw); else for (i = 0; i < n; i++) disp_fasta_seq(f,ts + sort[i],0,0,nspaces,caps,sw); } else { fprintf(f,"%d genes found\n",n); for (j = 0; j < n; j++) { fprintf(f,"%-3d ",j+1); t = ts + sort[j]; t->energy = nenergy(t,sw); switch(t->genetype) { case tRNA: disp_batch_trna(f,t,sw); break; case tmRNA: disp_batch_tmrna(f,t,sw); break; case srpRNA:disp_batch_srprna(f,t,sw); break; case CDS: disp_batch_cds(f,t,sw); break; default: break; }}} if (sw->libflag > 0) { for (i = 0; i < n; i++) write_to_library(f,ts + sort[i],sw); } batch_energy_stats(d,nt,sw); if (nt > NT) free((void *)sort); } void remove_overlapping_trna(data_set *d, int nt, csw *sw) { int i,n,ioverlay; long a,b,c,e,len,leni,overlap,psmax; char s1[80],s2[80]; gene *t,*ti; static long proximity = 7*MINCTRNALEN/10; psmax = d->psmax; ioverlay = sw->ioverlay; for (n = 0; n < nt; n++) { t = ts + n; if (t->genetype != tRNA) continue; if (t->energy < 0.0) continue; if (t->nintron <= 0) continue; a = t->start; b = t->stop; if (b < a) b += psmax; len = b - a; for (i = 0; i < nt; i++) { if (i == n) continue; ti = ts + i; if (ti->genetype != tRNA) continue; if (ti->comp != t->comp) continue; if (ti->energy < 0.0) continue; c = ti->start; e = ti->stop; if (e < c) e += psmax; leni = e - c; if (ioverlay) { if ((2*len) > (5*leni)) continue; if ((2*leni) > (5*len)) continue; } overlap = (a >= c)?((b >= e)?e-a:len):((b >= e)?len:b-c); if (overlap >= proximity) if (t->energy < ti->energy) { if (sw->verbose) { fprintf(stderr,"Removing %s at %s",name(t,s1,0,sw),position(s2,t,sw)); if (sw->energydisp) fprintf(stderr," (%g)",nenergy(t,sw)); fprintf(stderr,"\n"); } t->energy = -1.0; break; }}} for (n = 0; n < (nt-1); n++) { t = ts + n; if (t->genetype != tRNA) continue; if (t->energy < 0.0) continue; a = t->start; b = t->stop; if (b < a) b += psmax; len = b - a; for (i = n + 1; i < nt; i++) { ti = ts + i; if (ti->genetype != tRNA) continue; if (ti->comp != t->comp) continue; if (ti->energy < 0.0) continue; c = ti->start; e = ti->stop; if (e < c) e += psmax; leni = e - c; if (ioverlay) { if ((2*len) > (5*leni)) continue; if ((2*leni) > (5*len)) continue; } overlap = (a >= c)?((b >= e)?e-a:len):((b >= e)?len:b-c); if (overlap >= proximity) if (t->energy < ti->energy) { if (sw->verbose) { fprintf(stderr,"Removing %s at %s",name(t,s1,0,sw),position(s2,t,sw)); if (sw->energydisp) fprintf(stderr," (%g)",nenergy(t,sw)); fprintf(stderr,"\n"); } t->energy = -1.0; break; } else if (ti->energy < t->energy) { if (sw->verbose) { fprintf(stderr,"Removing %s at %s",name(ti,s1,0,sw),position(s2,ti,sw)); if (sw->energydisp) fprintf(stderr," (%g)",nenergy(ti,sw)); fprintf(stderr,"\n"); } ti->energy = -1.0; }}}} void iopt_fastafile(data_set *d, csw *sw) { int i,nt,flag,len,aragorn,anticodon; int *s,*sf,*se,*sc,*swrap; int seq[2*LSEQ+WRAP+1],cseq[2*LSEQ+WRAP+1],wseq[2*WRAP+1]; long gap,start,rewind,drewind,psmax,tmaxlen,vstart,vstop; double sens,sel1,sel2; char c1,c2,c3; static char trnatypename[3][25] = { "Metazoan mitochondrial","Cytosolic","Mammalian mitochondrial" }; static char genecodename[NGENECODE][50] = { "composite Metazoan Mitochondrial", "standard", "Vertebrate Mitochondrial", "Yeast Mitochondrial", "Mold/Protozoan/Coelenterate Mitochondrial", "Invertebrate Mitochondrial", "Ciliate", "deleted -> standard", "deleted -> standard", "Echinoderm/Flatworm Mitochondrial", "Euplotid", "Bacterial/Plant Chloroplast", "Alternative Yeast", "Ascidian Mitochondrial", "Alternative Flatworm Mitochondrial", "Blepharisma", "Chlorophycean Mitochondrial", "deleted -> standard", "deleted -> standard", "deleted -> standard", "deleted -> standard", "Trematode Mitochondrial", "Scenedesmus obliquus Mitochondrial", "Thraustochytrium Mitochondrial", "Pterobranchia mitochondrial", "Gracilibacteria" }; FILE *f = sw->f; init_tmrna(f,sw); aragorn = (sw->trna || sw->tmrna || sw->cds || sw->srprna); fprintf(f,"\nPlease reference the following paper"); if (aragorn && sw->mtrna) fputc('s',f); fprintf(f," if you use this\n"); fprintf(f,"program as part of any published research.\n\n"); if (aragorn) { fprintf(f,"Laslett, D. and Canback, B. (2004) ARAGORN, a\n"); fprintf(f,"program for the detection of transfer RNA and\n"); fprintf(f,"transfer-messenger RNA genes in nucleotide sequences.\n"); fprintf(f,"Nucleic Acids Research, 32;11-16.\n\n"); } if (sw->mtrna) { fprintf(f,"Laslett, D. and Canback, B. (2008) ARWEN: a\n"); fprintf(f,"program to detect tRNA genes in metazoan mitochondrial\n"); fprintf(f,"nucleotide sequences\n"); fprintf(f,"Bioinformatics, 24(2); 172-175.\n\n\n"); } fputc('\n',f); if (sw->mtrna) { fprintf(f,"Searching for %s tRNA genes\n",trnatypename[sw->discrim]); if (!sw->tvloop) fprintf(f,"TV replacement loop tRNA genes not detected\n"); } else if (sw->trna) { fprintf(f,"Searching for tRNA genes"); if (sw->maxintronlen > 0) fprintf(f," with introns in anticodon loop"); else fprintf(f," with no introns"); fputc('\n',f); if (sw->maxintronlen > 0) { fprintf(f,"Intron length from %d to %d bases\n", sw->minintronlen,sw->maxintronlen); if (sw->ifixedpos) { fprintf(f,"Intron position fixed between positions 37 and 38\n"); fprintf(f,"on C-loop (one base after anticodon)\n"); } if (sw->ioverlay) fprintf(f,"Allowing overlay of long tRNA genes\n"); }} if (sw->tmrna) fprintf(f,"Searching for tmRNA genes\n"); if (sw->linear) fprintf(f,"Assuming linear topology, search will not wrap around ends\n"); else fprintf(f,"Assuming circular topology, search wraps around ends\n"); if (sw->both == 2) fprintf(f,"Searching both strands\n"); else if (sw->both == 1) fprintf(f,"Searching complementary (antisense) strand only\n"); else fprintf(f,"Searching single (sense) strand only\n"); if (sw->mtrna) if (sw->mtcompov) fprintf(f,"Reporting overlapping candidates on opposite strands\n"); if ((sw->mtrna) || (sw->trna) || (sw->tmrna)) { fprintf(f,"Using %s genetic code\n",genecodename[sw->geneticcode]); if (sw->ngcmod > 0) { fprintf(f,"Specified modifications:\n"); for (i = 0; i < sw->ngcmod; i++) { anticodon = sw->gcmod[i]; c1 = cpbase(Thymine - (anticodon & 0x3)); c2 = cpbase(Thymine - ((anticodon >> 2) & 0x3)); c3 = cpbase(Thymine - ((anticodon >> 4) & 0x3)); fprintf(f,"%c%c%c = %s\n",c1,c2,c3, aaname[aamap[sw->geneticcode][anticodon]]); }}} fputc('\n',f); fputc('\n',f); rewind = MAXTAGDIST + 20; if (sw->trna | sw->mtrna) { tmaxlen = MAXTRNALEN + sw->maxintronlen; if (rewind < tmaxlen) rewind = tmaxlen; } if (sw->tmrna) if (rewind < MAXTMRNALEN) rewind = MAXTMRNALEN; if (sw->peptide) if (sw->tagthresh >= 5) if (rewind < TSWEEP) rewind = TSWEEP; sw->loffset = rewind; sw->roffset = rewind; drewind = 2*rewind; d->ns = 0; d->nf = 0; d->nextseq = 0L; d->nextseqoff = 0L; while (d->nextseq >= 0L) { d->seqstart = d->nextseq; d->seqstartoff = d->nextseqoff; if (!seq_init(d,sw)) break; psmax = d->psmax; if (sw->verbose) { fprintf(stderr,"%s\n",d->seqname); fprintf(stderr,"%ld nucleotides in sequence\n",psmax); fprintf(stderr,"Mean G+C content = %2.1f%%\n",100.0*d->gc); if ((sw->mtrna) || (sw->trna) || (sw->tmrna)) { fprintf(stderr,"Using %s genetic code\n",genecodename[sw->geneticcode]); if (sw->ngcmod > 0) { fprintf(stderr,"Specified modifications:\n"); for (i = 0; i < sw->ngcmod; i++) { anticodon = sw->gcmod[i]; c1 = cpbase(Thymine - (anticodon & 0x3)); c2 = cpbase(Thymine - ((anticodon >> 2) & 0x3)); c3 = cpbase(Thymine - ((anticodon >> 4) & 0x3)); fprintf(stderr,"%c%c%c = %s\n",c1,c2,c3, aaname[aamap[sw->geneticcode][anticodon]]); }}}} fprintf(f,"%s\n",d->seqname); fprintf(f,"%ld nucleotides in sequence\n",psmax); fprintf(f,"Mean G+C content = %2.1f%%\n",100.0*d->gc); init_gene(0,NT); nt = 0; flag = 0; start = 1L; se = seq; if (sw->linear) { for (i = 0; i < rewind; i++) *se++ = NOBASE; start -= rewind; } else { if (psmax <= drewind) { gap = drewind - psmax; sc = se + gap; while (se < sc) *se++ = NOBASE; swrap = wseq; sc = se + psmax; while (se < sc) { *se = move_forward(d); *swrap++ = *se++; } sc = swrap + gap; while (swrap < sc) *swrap++ = NOBASE; swrap = wseq; sc = swrap + psmax; while (swrap < sc) *se++ = *swrap++; swrap = wseq; sc = swrap + drewind; while (swrap < sc) *se++ = *swrap++; sw->loffset = drewind; sw->roffset = drewind; start -= drewind; flag = 1; goto SH; } else { swrap = wseq; sc = seq + drewind; while (se < sc) { *se = move_forward(d); *swrap++ = *se++; }}} sc = seq + LSEQ; NX: while (se < sc) { if (d->ps >= psmax) { if (sw->linear) for (i = 0; i < rewind; i++) *se++ = NOBASE; else { sc = wseq + drewind; swrap = wseq; while (swrap < sc) *se++ = *swrap++; } flag = 1; break; } else *se++ = move_forward(d); } SH: len = (int)(se - seq); if (sw->verbose) { vstart = sq(start + sw->loffset); vstop = sq(start + len - sw->roffset - 1); if (vstop < vstart) { fprintf(stderr,"Searching from %ld to %ld\n",vstart,psmax); fprintf(stderr,"Searching from 1 to %ld\n",vstop); } else fprintf(stderr,"Searching from %ld to %ld\n",vstart,vstop); } if (sw->both != 1) { sw->start = start; sw->comp = 0; nt = tmioptimise(d,seq,len,nt,sw); } if (sw->both > 0) { sense_switch(seq,cseq,len); sw->start = start+len; sw->comp = 1; nt = tmioptimise(d,cseq,len,nt,sw); } if (!flag) { s = seq; sf = se - drewind; se = seq + drewind; while (s < se) *s++ = *sf++; start += len - drewind; goto NX; } if (nt < 1) d->nf++; if (sw->maxintronlen > 0) remove_overlapping_trna(d,nt,sw); if (sw->updatetmrnatags) update_tmrna_tag_database(ts,nt,sw); disp_gene_set(d,nt,sw); if (sw->verbose) fprintf(stderr,"%s\nSearch Finished\n\n",d->seqname); d->ns++; } if (d->ns > 1) { fprintf(f,"\n\n%d sequences searched\n",d->ns); if (sw->trna | sw->mtrna) { fprintf(f,"Total tRNA genes = %d\n",sw->ngene[tRNA]); if (sw->matchacceptor) fprintf(f,"Total iso-acceptor mismatches = %d\n",sw->iamismatch); } if (sw->tmrna) fprintf(f,"Total tmRNA genes = %d\n",sw->ngene[tmRNA]); if (sw->reportpseudogenes) if (sw->nps > 0) fprintf(f,"Total number of possible pseudogenes = %d\n",sw->nps); if (d->nf > 0) { sens = 100.0*(d->ns - d->nf)/d->ns; fprintf(f,"Nothing found in %d sequences (%.2lf%% sensitivity)\n",d->nf,sens); } if (sw->annotated) { if (sw->trna | sw->mtrna) { fprintf(f,"\nTotal number of annotated tRNA genes = %d\n", sw->nagene[tRNA]); fprintf(f,"Total number of annotated tRNA genes not detected = %d\n",sw->nafn[tRNA]); fprintf(f,"Total number of unannotated tRNA genes detected = %d\n",sw->nafp[tRNA]); fprintf(f,"Total number of unannotated DRL tRNA genes detected = %d\n", sw->natfpd); fprintf(f,"Total number of unannotated TVRL tRNA genes detected = %d\n", sw->natfptv); fprintf(f,"Total annotated sequence length = %ld bases\n",sw->nabase); sens = (sw->nagene[tRNA] > 0)? 100.0*(double)(sw->nagene[tRNA] - sw->nafn[tRNA])/ (double)sw->nagene[tRNA]:0.0; sel1 = (sw->nagene[tRNA] > 0)? 100.0*(double)(sw->nafp[tRNA])/ (double)sw->nagene[tRNA]:0.0; sel2 = (sw->nabase > 0)? 1000000.0*(double)(sw->nafp[tRNA])/ (double)sw->nabase:0.0; fprintf(f,"Sensitivity = %lg%%\n",sens); fprintf(f,"Selectivity = %lg%% or %lg per Megabase\n\n",sel1,sel2); } if (sw->tmrna) { fprintf(f,"\nTotal number of annotated tmRNA genes = %d\n", sw->nagene[tmRNA]); fprintf(f,"Total number of annotated tmRNA genes not detected = %d\n",sw->nafn[tmRNA]); fprintf(f,"Total number of unannotated tmRNA genes detected = %d\n",sw->nafp[tmRNA]); fprintf(f,"Total annotated sequence length = %ld bases\n",sw->nabase); sens = (sw->nagene[tmRNA] > 0)? 100.0*(double)(sw->nagene[tmRNA] - sw->nafn[tmRNA])/ (double)sw->nagene[tmRNA]:0.0; sel1 = (sw->nagene[tmRNA] > 0)? 100.0*(double)(sw->nafp[tmRNA])/ (double)sw->nagene[tmRNA]:0.0; sel2 = (sw->nabase > 0)? 1000000.0*(double)(sw->nafp[tmRNA])/ (double)sw->nabase:0.0; fprintf(f,"Sensitivity = %lg%%\n",sens); fprintf(f,"Selectivity = %lg%% or %lg per Megabase\n\n",sel1,sel2); } if (sw->cds) { fprintf(f,"\nTotal number of annotated CDS genes = %d\n", sw->nagene[CDS]); fprintf(f,"Total number of annotated CDS genes not detected = %d\n",sw->nafn[CDS]); fprintf(f,"Total number of unannotated CDS genes detected = %d\n",sw->nafp[CDS]); fprintf(f,"Total annotated sequence length = %ld bases\n",sw->nabase); sens = (sw->nagene[CDS] > 0)? 100.0*(double)(sw->nagene[CDS] - sw->nafn[CDS])/ (double)sw->nagene[CDS]:0.0; sel1 = (sw->nagene[CDS] > 0)? 100.0*(double)(sw->nafp[CDS])/ (double)sw->nagene[CDS]:0.0; sel2 = (sw->nabase > 0)? 1000000.0*(double)(sw->nafp[CDS])/ (double)sw->nabase:0.0; fprintf(f,"Sensitivity = %lg%%\n",sens); fprintf(f,"Selectivity = %lg%% or %lg per Megabase\n",sel1,sel2); sens = (sw->lacds > 0)? 100.0*(double)sw->ldcds/(double)sw->lacds:0.0; fprintf(f,"Length sensitivity = %lg%%\n\n",sens); } } } if (sw->updatetmrnatags) report_new_tmrna_tags(sw); } void bopt_fastafile(data_set *d, csw *sw) { int i,nt,flag,len; int *s,*sf,*se,*sc,*swrap; int seq[2*LSEQ+WRAP+1],cseq[2*LSEQ+WRAP+1],wseq[2*WRAP+1]; long gap,start,rewind,drewind,psmax,tmaxlen,vstart,vstop; double sens; FILE *f = sw->f; rewind = MAXTAGDIST + 20; if (sw->trna | sw->mtrna) { tmaxlen = MAXTRNALEN + sw->maxintronlen; if (rewind < tmaxlen) rewind = tmaxlen; } if (sw->tmrna) if (rewind < MAXTMRNALEN) rewind = MAXTMRNALEN; if (sw->peptide) if (sw->tagthresh >= 5) if (rewind < TSWEEP) rewind = TSWEEP; sw->loffset = rewind; sw->roffset = rewind; drewind = 2*rewind; d->ns = 0; d->nf = 0; d->nextseq = 0L; d->nextseqoff = 0L; while (d->nextseq >= 0L) { d->seqstart = d->nextseq; d->seqstartoff = d->nextseqoff; if (!seq_init(d,sw)) break; psmax = d->psmax; if (sw->verbose) { fprintf(stderr,"%s\n",d->seqname); fprintf(stderr,"%ld nucleotides in sequence\n",psmax); fprintf(stderr,"Mean G+C content = %2.1f%%\n",100.0*d->gc); } if (sw->batch < 2) fprintf(f,">%s\n",d->seqname); init_gene(0,NT); nt = 0; flag = 0; start = 1L; se = seq; if (sw->linear) { for (i = 0; i < rewind; i++) *se++ = NOBASE; start -= rewind; } else { if (psmax <= drewind) { gap = drewind - psmax; sc = se + gap; while (se < sc) *se++ = NOBASE; swrap = wseq; sc = se + psmax; while (se < sc) { *se = move_forward(d); *swrap++ = *se++; } sc = swrap + gap; while (swrap < sc) *swrap++ = NOBASE; swrap = wseq; sc = swrap + psmax; while (swrap < sc) *se++ = *swrap++; swrap = wseq; sc = swrap + drewind; while (swrap < sc) *se++ = *swrap++; sw->loffset = drewind; sw->roffset = drewind; start -= drewind; flag = 1; goto SH; } else { swrap = wseq; sc = seq + drewind; while (se < sc) { *se = move_forward(d); *swrap++ = *se++; }}} sc = seq + LSEQ; NX: while (se < sc) { *se++ = move_forward(d); if (d->ps >= psmax) { if (sw->linear) for (i = 0; i < rewind; i++) *se++ = NOBASE; else { sc = wseq + drewind; swrap = wseq; while (swrap < sc) *se++ = *swrap++; } flag = 1; break; }} SH: len = (int)(se - seq); if (sw->verbose) { vstart = sq(start + sw->loffset); vstop = sq(start + len - sw->roffset - 1); if (vstop < vstart) { fprintf(stderr,"Searching from %ld to %ld\n",vstart,psmax); fprintf(stderr,"Searching from 1 to %ld\n",vstop); } else fprintf(stderr,"Searching from %ld to %ld\n",vstart,vstop); } if (sw->both != 1) { sw->start = start; sw->comp = 0; nt = tmioptimise(d,seq,len,nt,sw); } if (sw->both > 0) { sense_switch(seq,cseq,len); sw->start = start+len; sw->comp = 1; nt = tmioptimise(d,cseq,len,nt,sw); } if (!flag) { s = seq; sf = se - drewind; se = seq + drewind; while (s < se) *s++ = *sf++; start += len - drewind; goto NX; } if (nt < 1) d->nf++; if (sw->maxintronlen > 0) remove_overlapping_trna(d,nt,sw); if (sw->updatetmrnatags) update_tmrna_tag_database(ts,nt,sw); batch_gene_set(d,nt,sw); if (sw->verbose) fprintf(stderr,"%s\nSearch Finished\n\n",d->seqname); d->ns++; } if ((d->ns > 1) && (sw->batch < 2)) { fprintf(f,">end \t%d sequences",d->ns); if (sw->trna || sw->mtrna) fprintf(f," %d tRNA genes",sw->ngene[tRNA]); if (sw->tmrna) fprintf(f," %d tmRNA genes",sw->ngene[tmRNA]); if (d->nf > 0) { sens = 100.0*(d->ns - d->nf)/d->ns; fprintf(f,", nothing found in %d sequences, (%.2lf%% sensitivity)",d->nf,sens); } fputc('\n',f); } if (sw->updatetmrnatags) report_new_tmrna_tags(sw); } void aragorn_help_menu() { int h; for (h = 0; h < NHELPLINE; h++) printf("%s\n",helpmenu[h]); } void error_report(int n, char *s) { switch(n) { case 0: fprintf(stderr, "-%s not recognised, type aragorn -h for help\n",s); break; case 1: fprintf(stderr, "-%s not understood, type aragorn -h for help\n",s); break; case 2: fprintf(stderr,"Could not open %s\n",s); break; case 3: fprintf(stderr, "No sequence file specified, type aragorn -h for help\n"); break; case 4: fprintf(stderr,"Don't know genetic code %s\n",s); break; case 5: fprintf(stderr,"Too many genetic code modifications (max=%d)\n", MAXGCMOD); break; default: break; } exit(0); } void process_genecode_switch(char *s, csw *sw) { int i,m,lmax,len[NGENECODE],anticodon,b[3]; long l; char c,*ss,*se; static char genecodetag[NGENECODE][10] = { "MET", "STD","VERT","YEAST","PROT","INVERT", "CILIATE","DELETED","DELETED","FLATWORM","EUPLOT", "BACT","ALTYEAST","ASCID","ALTFLAT","BLEP", "CHLOROPH","DELETED","DELETED","DELETED","DELETED", "TREM","SCEN","THRAUST","PTERO","GRAC" }; sw->geneticcode = STANDARD; sw->gcfix = 1; c = *s; if (c >= '0') if (c <= '9') { lconvert(s,&l); i = (int)l; if ((i >= 0) && (i < NGENECODE)) sw->geneticcode = i; goto MOD; } for (i = 0; i < NGENECODE; i++) { len[i] = 0; ss = s; se = genecodetag[i]; while (c == *ss++) { if (upcasec(c) != *se++) break; len[i]++; }} m = -1; lmax = 0; i = -1; while (++i < NGENECODE) if (len[i] > lmax) { m = i; lmax = len[i]; } if (m >= 0) sw->geneticcode = m; else error_report(4,s); MOD: sw->ngcmod = 0; ss = s; while (ss = strpos(ss,",")) { if (sw->ngcmod >= MAXGCMOD) error_report(5,NULL); ss++; for (i = 0; i < 3; i++) { b[i] = Adenine; c = upcasec(ss[i]); if (c == 'C') b[i] = Cytosine; if (c == 'G') b[i] = Guanine; if (c == 'T') b[i] = Thymine; if (c == 'U') b[i] = Thymine; } anticodon = ((Thymine - b[2])<<4) + ((Thymine - b[1])<<2) + (Thymine - b[0]); if (!(se = strpos(ss,"="))) break; se++; for (i = 0; i < NAMINOACID; i++) if (upcasec(se[0]) == upcasec(aaname[i][0])) if (upcasec(se[1]) == upcasec(aaname[i][1])) if (upcasec(se[2]) == upcasec(aaname[i][2])) { aamap[sw->geneticcode][anticodon] = i; sw->gcmod[sw->ngcmod] = anticodon; sw->ngcmod++; break; } }} void change_thresholds(csw *sw, double psthresh) { sw->threshlevel = psthresh; sw->cdsthresh *= psthresh; sw->srpthresh *= psthresh; sw->tmrnathresh *= psthresh; sw->mtdtthresh *= psthresh; sw->mttthresh *= psthresh; sw->mtdthresh *= psthresh; sw->trnathresh *= psthresh; } int main(int z, char *v[]) { int i,lv,filecounter; long l; double psthresh; char c1,c2,c3,c4,*s; data_set d; static csw sw = { {"tRNA","tmRNA","","","CDS","overall"}, NULL,0,0,0,0,0,0,0,0,1,0,0, STANDARD,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},0,METAZOAN_MT, 1,0,5,5,1,0,0,0,2,0,0,0,0,0,0,3,0,2,1,1,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,{0,0,0,0,0,0},0,0,0,0,NTAG,10,30, {0,0,0,0,0,0},{0,0,0,0,0,0},{0,0,0,0,0,0},0,0,0,0,0L, 100.0,1.0,tRNAthresh,4.0,29.0,26.0,7.5,8.0, mtRNAtthresh,mtRNAdthresh,mtRNAdtthresh,-7.9,-6.0, tmRNAthresh,14.0,10.0,25.0,9.0,srpRNAthresh,CDSthresh, {tRNAthresh,tmRNAthresh,srpRNAthresh,0.0,CDSthresh }, { 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 10, 65, 82, 65, 71, 79, 82, 78, 32, 118, 49, 46, 50, 46, 51, 56, 32, 32, 32, 68, 101, 97,110, 32, 76, 97, 115, 108, 101, 116, 116, 10, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 10, TERM }}; sw.f = stdout; d.bugmode = 0; filecounter = 0; i = 0; while (++i < z) if (*(v[i]) == '-') { lv = length(v[i]); if (lv < 2) continue; s = v[i] + 1; c1 = upcasec(*s); c2 = (lv > 2)?upcasec(s[1]):' '; c3 = (lv > 3)?upcasec(s[2]):' '; c4 = (lv > 4)?upcasec(s[3]):' '; switch(c1) { case 'E': sw.energydisp = (c2 == 'S')?2:1; break; case 'A': if (c2 == '7') sw.extastem = 0; else if (c2 == 'A') sw.matchacceptor = 1; else if (c2 == 'M') { l = 1L; if (c3 == 'T') { if (lv > 4) { s = lconvert(s+3,&l); if (l < 1L) l = 1L; sw.trnalenmisthresh = (int)l; } else sw.trnalenmisthresh = 1; } else if (c3 == 'M') { if (lv > 4) { s = lconvert(s+3,&l); if (l < 1L) l = 1L; sw.tmrnalenmisthresh = (int)l; } else sw.tmrnalenmisthresh = 1; } else if (lv > 3) { s = lconvert(s+2,&l); if (l < 1L) l = 1L; sw.trnalenmisthresh = (int)l; sw.tmrnalenmisthresh = (int)l; } else { sw.trnalenmisthresh = 1; sw.tmrnalenmisthresh = 1; }} else sw.secstructdisp = 1; break; case 'B': if (c2 == 'R') sw.seqdisp = 2; else sw.libflag = 1; break; case 'X': sw.libflag = 2; break; case 'W': if (c2 == 'U') if (c3 == 'N') if (c4 == 'I') { d.bugmode = 1; break; } if (sw.batch < 1) sw.batch = 1; if (c2 == 'A') sw.batchfullspecies = 1; break; case 'V': sw.verbose = 1; break; case 'S': if (c2 == 'S') { sw.sp1max = 2; sw.sp2min = 1; sw.sp2max = 1; break; } if (c2 == 'E') { if (sw.seqdisp < 1) sw.seqdisp = 1; break; } if ((c2 == 'C') || (c2 == '-')) { sw.both = 1; break; } sw.both = 0; break; case 'F': if (softstrpos(s,"O")) { sw.batch = 2; if (softstrpos(s,"S")) sw.batch |= 0x4; if (softstrpos(s,"N")) sw.batch |= 0x8; if (softstrpos(s,"C")) sw.batch |= 0x10; } else { if (softstrpos(s,"C")) sw.seqdisp = 4; else sw.seqdisp = 3; } break; case 'D': sw.both = 2; break; case 'L': sw.linear = 1; break; case 'C': if (c2 == '7') sw.cloop7 = 1; else sw.linear = 0; break; case 'J': if (lv > 2) { if (c2 == 'R') sw.aataildiv = 1; if (c3 == '4') sw.aataildisp = 1; } else sw.aataildisp = 1; break; case '1': sw.minintronlen = 10; break; case 'I': if (c2 == 'O') { sw.ioverlay = 1; s++; lv--; } else if (c2 == 'F') { sw.ifixedpos = 1; s++; lv--; } else if (c2 == 'R') { sw.ireportminintronlen = 1; s++; lv--; } if (c3 == 'O') { sw.ioverlay = 1; s++; lv--; } else if (c3 == 'F') { sw.ifixedpos = 1; s++; lv--; } else if (c3 == 'R') { sw.ireportminintronlen = 1; s++; lv--; } if (c4 == 'O') { sw.ioverlay = 1; s++; lv--; } else if (c4 == 'F') { sw.ifixedpos = 1; s++; lv--; } else if (c4 == 'R') { sw.ireportminintronlen = 1; s++; lv--; } if (lv > 2) s = lconvert(s+1,&l); else goto IMAX; if (*s == ',') { if (sw.ireportminintronlen == 1) sw.minintronlenreport = (int)l; else sw.minintronlen = (int)l; lconvert(s+1,&l); sw.maxintronlen = (int)l; } else sw.maxintronlen = (int)l; if (sw.maxintronlen > (LSEQ - MAXTRNALEN)) sw.maxintronlen = (LSEQ - MAXTRNALEN); if (sw.maxintronlen > MAXINTRONLEN) sw.maxintronlen = MAXINTRONLEN; if ((sw.minintronlen < 0) || (sw.maxintronlen < sw.minintronlen)) error_report(1,v[i]); if ((sw.minintronlenreport < 0) || (sw.maxintronlen < sw.minintronlenreport)) error_report(1,v[i]); break; IMAX: sw.maxintronlen = MAXINTRONLEN; break; case 'T': if (c2 == 'V') { sw.tvloop = 0; break; } sw.trna = 1; if (lv > 2) { s = dconvert(s+1,&sw.trnathresh); if (*s == ',') dconvert(s+1,&sw.ttarmthresh); } break; case 'M': if (c2 == 'T') { sw.mtrna = 1; if (!sw.gcfix) sw.geneticcode = METAZOAN_MT; if (lv > 3) { s += 2; c3 = upcasec(*s); if (c3 == 'M') { do c3 = upcasec(*++s); while ((c3 == 'A') || (c3 == 'M') || (c3 == 'L')); sw.tvloop = 0; sw.geneticcode = VERTEBRATE_MT; sw.discrim = MAMMAL_MT; } MTNXTC: if (c3 == 'X') { c3 = upcasec(*++s); sw.mtxdetect = 0; goto MTNXTC; } if (c3 == 'C') { c3 = upcasec(*++s); sw.mtcdsscan = 0; goto MTNXTC; } if (c3 == 'D') { c3 = upcasec(*++s); sw.mtcompov = 1; goto MTNXTC; } if (c3 != '-') if (c3 != '.') if ((c3 < '0') || (c3 > '9')) break; s = dconvert(s,&sw.mtdtthresh); if (*s == ',') s = dconvert(s+1,&sw.mttthresh); if (*s == ',') s = dconvert(s+1,&sw.mtdthresh); if (*s == ',') s = dconvert(s+1,&sw.mttarmthresh); if (*s == ',') dconvert(s+1,&sw.mtdarmthresh); }} else { sw.tmrna = 1; if (c2 == 'U') if (c3 == 'T') { sw.updatetmrnatags = 1; lv -= 2; s += 2; } if (lv > 2) dconvert(s+1,&sw.tmrnathresh); } break; case 'P': if (c2 == 'S') { if (c3 != '-') if (c3 != '.') if ((c3 < '0') || (c3 > '9')) { change_thresholds(&sw,PSEUDOGENElevel); break; } psthresh = 1.0; dconvert(s+2,&psthresh); change_thresholds(&sw,psthresh); break; } break; case 'G': if (c2 != 'C') break; process_genecode_switch(s+2,&sw); break; case 'R': if (c2 == 'N') sw.repeatsn = 1; else if (c2 == 'P') { sw.reportpseudogenes = 1; if (lv > 3) dconvert(s+2,&sw.reportpsthresh); } else sw.tmstrict = 0; break; case 'Q': sw.showconfig = 0; break; case 'H': aragorn_help_menu(); exit(0); case 'O': if (lv > 2) s++; else { if (++i >= z) break; s = v[i]; } sw.f = fopen(s,"w"); if (!sw.f) error_report(2,s); break; default: error_report(0,s); }} else if (filecounter < 1) { d.f = fopen(v[i],"r"); if (d.f) filecounter++; else error_report(2,v[i]); } else if (filecounter < 2) { sw.f = fopen(v[i],"w"); if (!sw.f) error_report(2,v[i]); filecounter++; } else error_report(0,v[i]); if (filecounter < 1) error_report(3,NULL); if ((!sw.trna) & (!sw.tmrna)) { sw.trna = 1; sw.tmrna = 1; } if (sw.mtrna) sw.trna = 0; ts = (gene *)malloc(NT*sizeof(gene)); if (ts == NULL) { fprintf(stderr,"Not enough memory available to store detected genes\n"); exit(1); } sw.genespace = NT; if (sw.libflag) fprintf(sw.f,"Library\n"); if (sw.batch) bopt_fastafile(&d,&sw); else iopt_fastafile(&d,&sw); free((void *)ts); fclose(d.f); if (!sw.batch && sw.showconfig) { fprintf(sw.f,"Configuration: "); i = -1; while (++i < z) fprintf(sw.f,"%s ",v[i]); fputc('\n',sw.f); } if (sw.f != stdout) fclose(sw.f); return(0); }