predictprotein-1.1.07/0000755015077201507720000000000012504021475011656 500000000000000predictprotein-1.1.07/perl/0000755015077201507720000000000012504021475012620 500000000000000predictprotein-1.1.07/perl/lib/0000755015077201507720000000000012504021475013366 500000000000000predictprotein-1.1.07/perl/lib/RG/0000755015077201507720000000000012504021475013676 500000000000000predictprotein-1.1.07/perl/lib/RG/PP/0000755015077201507720000000000012504021475014215 500000000000000predictprotein-1.1.07/perl/lib/RG/PP/ACL.pm0000644015077201507720000000245412504014770015077 00000000000000# Copyright 2010 Laszlo Kajan Technical University of Munich, Germany # This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. package RG::PP::ACL; =pod =head1 NAME RG::PP::ACL - PredictProtein access control list methods =head1 SYNOPSIS use RG::PP::ACL; =head1 DESCRIPTION =head2 Methods static acl2hash throw( RG::Exception ) =head2 Properties =head2 Package variables =cut use strict; use warnings; use Carp qw| cluck :DEFAULT |; use RG::Exception; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(acl2hash); # symbols to export on request sub acl2hash { my( $setacl ) = @_; # [u:uid:perms]*,[g:gid:perms]*,[o::perms]? # ret: { [ugo] => { lkajan => 7, ... }, ... } my $ret = {}; foreach my $acl ( split( /,/o, $setacl ) ) { if( $acl !~ /^([ugo]):([^:]*):([[:digit:]])$/o ){ die RG::Exception->new( msg => "invalid ACL: '$acl'" ); } else { $ret->{$1}->{$2} = $3; } } return $ret; } 1; =pod =head1 AUTHOR Laszlo Kajan =head1 COPYRIGHT AND LICENSE Copyright 2010 by Laszlo Kajan This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO predictprotein(1) =cut # vim:et:ts=2:ai: predictprotein-1.1.07/perl/Build.PL.in0000644015077201507720000000062512504014770014444 00000000000000use warnings; use strict; use Module::Build; Module::Build->new( module_name => 'RG::PP::ACL', license => 'perl', configure_requires => { 'Module::Build' => 0.38 }, dist_name => '@PACKAGE_NAME@', dist_version => '@VERSION@', dist_author => [ 'Laszlo Kajan ' ], dist_abstract => 'This package provides the RG::PP::ACL module.' )->create_build_script; # vim:et:ts=2:ai: predictprotein-1.1.07/perl/MANIFEST0000644015077201507720000000002112504014770013662 00000000000000lib/RG/PP/ACL.pm predictprotein-1.1.07/perl/MANIFEST.SKIP0000644015077201507720000000146212504014770014441 00000000000000\bdebian/ #!start included /usr/share/perl/5.10/ExtUtils/MANIFEST.SKIP # Avoid version control files. \bRCS\b \bCVS\b \bSCCS\b ,v$ \B\.svn\b \B\.git\b \B\.gitignore\b \b_darcs\b # Avoid Makemaker generated and utility files. \bMANIFEST\.bak \bMakefile$ \bblib/ \bMakeMaker-\d \bpm_to_blib\.ts$ \bpm_to_blib$ \bblibdirs\.ts$ # 6.18 through 6.25 generated this # Avoid Module::Build generated and utility files. \bBuild$ \b_build/ # Avoid temp and backup files. ~$ \.old$ \#$ \b\.# \.bak$ # Avoid Devel::Cover files. \bcover_db\b #!end included /usr/share/perl/5.10/ExtUtils/MANIFEST.SKIP # Avoid Module::Build generated and utility files. \bBuild$ \bBuild.bat$ \b_build \bBuild.COM$ \bBUILD.COM$ \bbuild.com$ # Avoid archives of this distribution \bpredictprotein-[\d\.\_]+ ^MYMETA\.yml$ ^MYMETA\.json$ predictprotein-1.1.07/INSTALL0000644015077201507720000003633212504014771012637 00000000000000Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. predictprotein-1.1.07/NEWS0000644015077201507720000000006612504014771012300 000000000000002012-07 PredictProtein is now released under the GPL. predictprotein-1.1.07/README0000644015077201507720000016420412504014770012465 00000000000000 PP Help 01: Introduction WHAT IS IT? HOW TO USE IT? What is PredictProtein (PP)? PP is an automatic service for protein database searches and the prediction of aspects of protein structure and function. Given an amino acid sequence or an alignment input PP returns: 1. a multiple sequence alignment (i.e. database search), 2. ProSite sequence motifs (more info), 3. low-complexity retions (SEG) ( more info), 4. ProDom domain assignments (more info), 5. Nuclear localisation signals ( more info), 6. and predictions of 1. secondary structure (more info), 2. solvent accessibility (more info), 3. globular regions ( more info), 4. transmembrane helices (more info), 5. coiled-coil regions ( more info). 6. structural switch regions ( more info). 7. b-value (more info), 8. disorder (more info), 9. intra-residue contacts (more info), 10. protein protein and protein/DNA binding sites (more info), 11. sub-cellular localization (more info), 12 . domain assignment 13. beta barrels 14. cysteine predictions and disulphide bridges The following features are available upon request: 1. Fold recognition by prediction-based threading (more info): PDB is searched for possible remote homologues (sequence identity 0-25%) to your sequence, 2. Evaluation of prediction accuracy (more info): For a given predicted and observed secondary structure (for one or several proteins), per-residue and per-segment scores are compiled. For all services, you can submit your query over the Web. How does PredictProtein work? Generating an alignment. The following steps are performed. 1. The sequence database (compiled of SWISSPROT+TrEMBL+PDB) is scanned for similar sequences (by BLASTP). 2. a multiple sequence alignment is generated by iterative blast searches PSI-BLAST. 3. ProSite motifs are retrieved from the ProSite database, 4. low-complexity regions (e.g. composition bias) are marked by the program SEG, 5. and your protein is compared to a domain database (ProDom), Prediction of protein structure in 1D. The multiple alignment is used as input for profile-based neural network predictions (PROF methods). The following levels of prediction accuracy have been evaluated in cross-validation experiments: 1. Secondary structure prediction (PHDsec or PROFsec): expected three-state (helix, strand, rest) overall accuracy >72% (PHD) >76% (PROF) for water-soluble globular proteins. For an automatic, continuous comparison of prediction accuracy to other programs see EVA. You may find details about accuracy in graphs, on tables, and in the literature: Rost 1997 (paper) and 1996 (paper); Rost & Sander 1993 (abstract) and 1994 (abstract). 2. Solvent accessibility prediction (PHDacc or PROFacc): Expected correlation between observed and predicted relative accessibility > 0.5. You may find details about accuracy in graphs, on tables, and in the literature: Rost 1997 (paper) and 1996 (paper), Rost & Sander 1994 (abstract). 3. Transmembrane helix prediction (PHDhtm): Expected overall two-state accuracy (transmembrane, non-transmembrane) > 95%; refined prediction of transmembrane helices and topology & expected likelihood of predicting all helices correctly about 89%, expected accuracy of topology prediction > 86% You may find details about accuracy on tables, and in the literature: Rost, Casadio & Fariselli 1996 (abstract), and Rost, Casadio, Fariselli & Sander 1995 abstract). 4. Other predictions reference to literature Fold recognition by prediction-based threading. Predictions of secondary structure and accessibility are aligned against PDB to detect remote homologues (prediction-based threading). As for other threading methods, results should be taken with caution. * The first hit of the prediction-based threading is on average in 30% of the cases correct. * Hits with z-scores above 3.0 are more reliable (accuracy > 60%). * For exceptional cases the resulting alignments suffice for building correct homology-based models. You may find details about accuracy in the literature: Rost, Schneider & Sander, 1996 (paper), Rost 1995 (abstract) and 1994 (abstract). Evaluation of prediction accuracy. If you opt for 'evaluate prediction accuracy', we evaluate the accuracy of the secondary structure prediction provided. The following per-residue and per-segment scores are returned: overall three-state accuracy, single state accuracy, correlation coefficients, information entropy, fractional segment overlap, and finally the accuracy of predicting secondary structure content and structural class (Rost et al., JMB, 1994, 235, 13-26, example for output). What is META-PP? META-PP provides a single-page interface to various World Wide Web services for sequence analysis (list of servers available at the moment). 'Single-page interface' means that you fill in your sequence only once, and can select any number of a list of services. For each selected service, you will receive the results by email. Currently, the following features of sequence analysis are covered by META-PP: 1. signal peptides 2. cleavage sites 3. O-glycosylation sites 4. cleavage sites of picornaviral proteases 5. chloroplast transit peptides and cleavage sites 6. secondary structure prediction 7. membrane helix prediction 8. threading, or remote homology modelling (searching for proteins of known 3D structure that appear structurally similar to your protein) 9. database searches 10. homology modelling (prediction of protein 3D structure by homology to a sequence similar protein of known structure) NOTE: this will only work if there is a protein of known structure that has sufficient sequence similarity to your protein! How to use PP and META-PP? Use of the PredictProtein server is free for academics. Commercial users may want to apply for a license. The use of META-PredictProtein is currently restricted to academical users. Using the web: 1. Home page: http://www.predictprotein.org 2. Help page (this): http://www.predictprotein.org/doc/help_hello.html 3. Submit request to PP: http://www.predictprotein.org/submit.php Submit request to META-PP: http://www.predictprotein.org/meta.php Questions, feedback: http://www.predictprotein.org/feedback.php What can we do for you? * You have a protein sequence and want to find out anything we can say about structure and function? In general, we can provide multiple sequence alignments and predictions of secondary structure, residue solvent accessibility and the location of transmembrane helices (examples for: request; and output). * You have a helical transmembrane protein sequence and want a refined prediction of the helix locations and topology? We provide multiple sequence alignments and refined predictions for the location of transmembrane helices and for the topology, i.e. the orientation of the N-term with respect to the membrane (examples for: request; and output). * You have a protein sequence and search for remote homologues (i.e., homologues with <25% sequence identity)? We find secondary structure and accessibility motifs similar between a known structure and your protein by prediction-based threading (examples for: request; and output). * You have a multiple sequence alignment and want to obtain a prediction of 1D structure based on that alignment? We use your alignment as input to the methods predicting secondary structure, solvent accessibility and transmembrane helices (examples for: request; and output). * You have a list of sequences not in current databases and want it to be used for 1D predictions? We align your sequences and use the resulting alignment as input to the structure and function (examples for: request; and output). * You have a prediction of secondary structure and accessibility and search similar motifs in known structures? We base the threading procedure on your prediction (examples for: request; and output). * You have a prediction and an observation of secondary structure and you want to compile the prediction accuracy? We compile per-residue and per-segment based score for the evaluation of prediction accuracy (examples for: request; and output). QUOTE and COPYRIGHT Who are we? * Current team: o Burkhard Rost (CUBIC, Columbia Univ, New York / LION, Heidelberg WWW, rost@columbia.edu ) pioneered the PredictProtein service; wrote the PHD prediction programs: PHDsec, PHDacc, PHDhtm, and PHDtopology; developed the prediction-based threading method PHDthreader/TOPITS; programmed, and proposed the scores compiled by EvalSec; hacked some of the PredictProtein scripts; is helping to keep the service up and running, and is responsible for the documentation. o Guy Yachdav (CUBIC, Columbia Univ, New York WWW, yachdav@rostlab.org ) is currently maintaining the PredictProtein server and providing on-going support. o Laszlo Kajan - visualPP o Joe Karlin - web master * Others who contributed (in the past): o Reinhard Schneider ( now EMBL, Heidelberg,) wrote the program MaxHom for multiple sequence, and 1D structure alignments, and helped the service take off. o Antoine de Daruvar (now Univ. Bordeaux and LION, Heidelberg) rewrote the scripts managing requests, and maintained the service running for twelve months. o Volker Eyrich (Chemistry Dept, Columbia Univ, New York,) wrote the scripts for the META server which allows you to access a large variety of selected servers world-wide from a single-page interface. o Jinfeng Liu provided scientific support for the PredictProtein server. o Chris Sander (nowMemorial Sloan Kettering Medical Center,) organized resources, contributed ideas, and simulated the grand guru. * Authors of other programs used: o Stephen F Altschul and Samuel Karlin (NCBI, WWW, altschul@ncbi.nlm.nih.gov) wrote the initial database search bestseller BLAST. o Stephen F Altschul and colleagues (NCBI, WWW, altschul@ncbi.nlm.nih.gov) wrote the recent hit PSI-BLAST. o Amos Bairoch (Geneva, WWW, bairoch@cmu.unige.ch) maintains SWISS-PROT, and initialised ProSite (as well as, numerous other services!). o Amos Bairoch, Philip Bucher, Kay Hofmann (Geneva, WWW, bairoch@cmu.unige.ch) maintain ProSite, and wrote the scripts returning the ProSite output. o Nigel Brown (London, WWW,