cinfony-1.2/ 0000775 0001750 0001750 00000000000 12061452073 012737 5 ustar noel noel 0000000 0000000 cinfony-1.2/cinfony/ 0000775 0001750 0001750 00000000000 12061452073 014404 5 ustar noel noel 0000000 0000000 cinfony-1.2/cinfony/rdk.py 0000664 0001750 0001750 00000052502 12061452051 015536 0 ustar noel noel 0000000 0000000 #-*. coding: utf-8 -*-
## Copyright (c) 2008-2011, Noel O'Boyle; 2012, Adrià Cereto-Massagué
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the BSD license
## which is included in the file LICENSE_BSD.txt.
"""
rdkit - A Cinfony module for accessing the RDKit from CPython
Global variables:
Chem and AllChem - the underlying RDKit Python bindings
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
descs - a list of supported descriptors
fps - a list of supported fingerprint types
forcefields - a list of supported forcefields
"""
import os
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem import Descriptors
_descDict = dict(Descriptors.descList)
import rdkit.DataStructs
import rdkit.Chem.MACCSkeys
import rdkit.Chem.AtomPairs.Pairs
import rdkit.Chem.AtomPairs.Torsions
# PIL and Tkinter
try:
import Tkinter as tk
import Image as PIL
import ImageTk as PILtk
except:
PILtk = None
# Aggdraw
try:
import aggdraw
from rdkit.Chem.Draw import aggCanvas
except ImportError:
aggdraw = None
fps = ['rdkit', 'layered', 'maccs', 'atompairs', 'torsions', 'morgan']
"""A list of supported fingerprint types"""
descs = _descDict.keys()
"""A list of supported descriptors"""
_formats = {'smi': "SMILES",
'can': "Canonical SMILES",
'mol': "MDL MOL file",
'mol2': "Tripos MOL2 file",
'sdf': "MDL SDF file",
'inchi':"InChI",
'inchikey':"InChIKey"}
_notinformats = ['can', 'inchikey']
_notoutformats = ['mol2']
if not Chem.INCHI_AVAILABLE:
_notinformats += ['inchi']
_notoutformats += ['inchi', 'inchikey']
informats = dict([(_x, _formats[_x]) for _x in _formats if _x not in _notinformats])
"""A dictionary of supported input formats"""
outformats = dict([(_x, _formats[_x]) for _x in _formats if _x not in _notoutformats])
"""A dictionary of supported output formats"""
_forcefields = {'uff': AllChem.UFFOptimizeMolecule}
forcefields = _forcefields.keys()
"""A list of supported forcefields"""
def readfile(format, filename):
"""Iterate over the molecules in a file.
Required parameters:
format - see the informats variable for a list of available
input formats
filename
You can access the first molecule in a file using the next() method
of the iterator:
mol = readfile("smi", "myfile.smi").next()
You can make a list of the molecules in a file using:
mols = list(readfile("smi", "myfile.smi"))
You can iterate over the molecules in a file as shown in the
following code snippet:
>>> atomtotal = 0
>>> for mol in readfile("sdf", "head.sdf"):
... atomtotal += len(mol.atoms)
...
>>> print atomtotal
43
"""
if not os.path.isfile(filename):
raise IOError, "No such file: '%s'" % filename
format = format.lower()
# Eagerly evaluate the supplier functions in order to report
# errors in the format and errors in opening the file.
# Then switch to an iterator...
if format=="sdf":
iterator = Chem.SDMolSupplier(filename)
def sdf_reader():
for mol in iterator:
yield Molecule(mol)
return sdf_reader()
elif format=="mol":
def mol_reader():
yield Molecule(Chem.MolFromMolFile(filename))
return mol_reader()
elif format=="mol2":
def mol_reader():
yield Molecule(Chem.MolFromMol2File(filename))
return mol_reader()
elif format=="smi":
iterator = Chem.SmilesMolSupplier(filename, delimiter=" \t",
titleLine=False)
def smi_reader():
for mol in iterator:
yield Molecule(mol)
return smi_reader()
elif format=='inchi' and Chem.INCHI_AVAILABLE:
def inchi_reader():
for line in open(filename, 'r'):
mol = Chem.inchi.MolFromInchi(line.strip())
yield Molecule(mol)
return inchi_reader()
else:
raise ValueError, "%s is not a recognised RDKit format" % format
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
>>> len(mymol.atoms)
5
"""
format = format.lower()
if format=="mol":
mol = Chem.MolFromMolBlock(string)
elif format=="mol2":
mol = Chem.MolFromMol2Block(string)
elif format=="smi":
mol = Chem.MolFromSmiles(string)
elif format=='inchi' and Chem.INCHI_AVAILABLE:
mol = Chem.inchi.MolFromInchi(string)
else:
raise ValueError,"%s is not a recognised RDKit format" % format
if mol:
return Molecule(mol)
else:
raise IOError, "Failed to convert '%s' to format '%s'" % (
string, format)
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwite -- if the output file already exists, should it
be overwritten? (default is False)
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False):
self.format = format
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % self.filename
if format=="sdf":
self._writer = Chem.SDWriter(self.filename)
elif format=="smi":
self._writer = Chem.SmilesWriter(self.filename, isomericSmiles=True)
elif format in ('inchi', 'inchikey') and Chem.INCHI_AVAILABLE:
self._writer= open(filename, 'w')
else:
raise ValueError,"%s is not a recognised RDKit format" % format
self.total = 0 # The total number of molecules written to the file
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if not self.filename:
raise IOError, "Outputfile instance is closed."
if self.format in ('inchi', 'inchikey'):
self._writer.write(molecule.write(self.format) +'\n')
else:
self._writer.write(molecule.Mol)
self.total += 1
def close(self):
"""Close the Outputfile to further writing."""
self.filename = None
self._writer.flush()
del self._writer
class Molecule(object):
"""Represent an rdkit Molecule.
Required parameter:
Mol -- an RDKit Mol or any type of cinfony Molecule
Attributes:
atoms, data, formula, molwt, title
Methods:
addh(), calcfp(), calcdesc(), draw(), localopt(), make3D(), removeh(),
write()
The underlying RDKit Mol can be accessed using the attribute:
Mol
"""
_cinfony = True
def __init__(self, Mol):
if hasattr(Mol, "_cinfony"):
a, b = Mol._exchange
if a == 0:
molecule = readstring("smi", b)
else:
molecule = readstring("mol", b)
Mol = molecule.Mol
self.Mol = Mol
@property
def atoms(self): return [Atom(rdkatom) for rdkatom in self.Mol.GetAtoms()]
@property
def data(self): return MoleculeData(self.Mol)
@property
def molwt(self): return Descriptors.MolWt(self.Mol)
@property
def formula(self): return Descriptors.MolecularFormula(self.Mol)
def _gettitle(self):
# Note to self: maybe should implement the get() method for self.data
if "_Name" in self.data:
return self.data["_Name"]
else:
return ""
def _settitle(self, val): self.Mol.SetProp("_Name", val)
title = property(_gettitle, _settitle)
@property
def _exchange(self):
if self.Mol.GetNumConformers() == 0:
return (0, self.write("smi"))
else:
return (1, self.write("mol"))
def addh(self):
"""Add hydrogens."""
self.Mol = Chem.AddHs(self.Mol)
def removeh(self):
"""Remove hydrogens."""
self.Mol = Chem.RemoveHs(self.Mol)
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
format = format.lower()
if filename:
if not overwrite and os.path.isfile(filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % filename
if format=="smi":
result = Chem.MolToSmiles(self.Mol, isomericSmiles=True, canonical=False)
elif format=="can":
result = Chem.MolToSmiles(self.Mol, isomericSmiles=True, canonical=True)
elif format=="mol":
result = Chem.MolToMolBlock(self.Mol)
elif format in ('inchi', 'inchikey') and Chem.INCHI_AVAILABLE:
result = Chem.inchi.MolToInchi(self.Mol)
if format == 'inchikey':
result = Chem.inchi.InchiToInchiKey(result)
else:
raise ValueError,"%s is not a recognised RDKit format" % format
if filename:
print >> open(filename, "w"), result
else:
return result
def __iter__(self):
"""Iterate over the Atoms of the Molecule.
This allows constructions such as the following:
for atom in mymol:
print atom
"""
return iter(self.atoms)
def __str__(self):
return self.write()
def calcdesc(self, descnames=[]):
"""Calculate descriptor values.
Optional parameter:
descnames -- a list of names of descriptors
If descnames is not specified, all available descriptors are
calculated. See the descs variable for a list of available
descriptors.
"""
if not descnames:
descnames = descs
ans = {}
for descname in descnames:
try:
desc = _descDict[descname]
except KeyError:
raise ValueError, "%s is not a recognised RDKit descriptor type" % descname
ans[descname] = desc(self.Mol)
return ans
def calcfp(self, fptype="rdkit", opt=None):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "rdkit"). See the
fps variable for a list of of available fingerprint
types.
opt -- a dictionary of options for fingerprints. Currently only used
for radius and bitInfo in Morgan fingerprints.
"""
if opt == None:
opt = {}
fptype = fptype.lower()
if fptype=="rdkit":
fp = Fingerprint(Chem.RDKFingerprint(self.Mol))
elif fptype=="layered":
fp = Fingerprint(Chem.LayeredFingerprint(self.Mol))
elif fptype=="maccs":
fp = Fingerprint(Chem.MACCSkeys.GenMACCSKeys(self.Mol))
elif fptype=="atompairs":
# Going to leave as-is. See Atom Pairs documentation.
fp = Chem.AtomPairs.Pairs.GetAtomPairFingerprintAsIntVect(self.Mol)
elif fptype=="torsions":
# Going to leave as-is.
fp = Chem.AtomPairs.Torsions.GetTopologicalTorsionFingerprintAsIntVect(self.Mol)
elif fptype == "morgan":
info = opt.get('bitInfo', None)
radius = opt.get('radius', 4)
fp = Fingerprint(Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(self.Mol,radius,bitInfo=info))
else:
raise ValueError, "%s is not a recognised RDKit Fingerprint type" % fptype
return fp
def draw(self, show=True, filename=None, update=False, usecoords=False):
"""Create a 2D depiction of the molecule.
Optional parameters:
show -- display on screen (default is True)
filename -- write to file (default is None)
update -- update the coordinates of the atoms to those
determined by the structure diagram generator
(default is False)
usecoords -- don't calculate 2D coordinates, just use
the current coordinates (default is False)
Aggdraw or Cairo is used for 2D depiction. Tkinter and
Python Imaging Library are required for image display.
"""
if not usecoords and update:
AllChem.Compute2DCoords(self.Mol)
usecoords = True
mol = Chem.Mol(self.Mol.ToBinary()) # Clone
if not usecoords:
AllChem.Compute2DCoords(mol)
if filename: # Note: overwrite is allowed
Draw.MolToFile(mol, filename)
if show:
if not tk:
errormessage = ("Tkinter or Python Imaging "
"Library not found, but is required for image "
"display. See installation instructions for "
"more information.")
raise ImportError(errormessage)
img = Draw.MolToImage(mol)
root = tk.Tk()
root.title((hasattr(self, "title") and self.title)
or self.__str__().rstrip())
frame = tk.Frame(root, colormap="new", visual='truecolor').pack()
imagedata = PILtk.PhotoImage(img)
label = tk.Label(frame, image=imagedata).pack()
quitbutton = tk.Button(root, text="Close", command=root.destroy).pack(fill=tk.X)
root.mainloop()
def localopt(self, forcefield = "uff", steps = 500):
"""Locally optimize the coordinates.
Optional parameters:
forcefield -- default is "uff". See the forcefields variable
for a list of available forcefields.
steps -- default is 500
If the molecule does not have any coordinates, make3D() is
called before the optimization.
"""
forcefield = forcefield.lower()
if self.Mol.GetNumConformers() == 0:
self.make3D(forcefield)
_forcefields[forcefield](self.Mol, maxIters = steps)
def make3D(self, forcefield = "uff", steps = 50):
"""Generate 3D coordinates.
Optional parameters:
forcefield -- default is "uff". See the forcefields variable
for a list of available forcefields.
steps -- default is 50
Once coordinates are generated, a quick
local optimization is carried out with 50 steps and the
UFF forcefield. Call localopt() if you want
to improve the coordinates further.
"""
forcefield = forcefield.lower()
success = AllChem.EmbedMolecule(self.Mol)
if success == -1: # Failed
success = AllChem.EmbedMolecule(self.Mol,
useRandomCoords = True)
if success == -1:
raise Error, "Embedding failed!"
self.localopt(forcefield, steps)
class Atom(object):
"""Represent an rdkit Atom.
Required parameters:
Atom -- an RDKit Atom
Attributes:
atomicnum, coords, formalcharge
The original RDKit Atom can be accessed using the attribute:
Atom
"""
def __init__(self, Atom):
self.Atom = Atom
@property
def atomicnum(self): return self.Atom.GetAtomicNum()
@property
def coords(self):
owningmol = self.Atom.GetOwningMol()
if owningmol.GetNumConformers() == 0:
raise AttributeError, "Atom has no coordinates (0D structure)"
idx = self.Atom.GetIdx()
atomcoords = owningmol.GetConformer().GetAtomPosition(idx)
return (atomcoords[0], atomcoords[1], atomcoords[2])
@property
def formalcharge(self): return self.Atom.GetFormalCharge()
def __str__(self):
if hasattr(self, "coords"):
return "Atom: %d (%.2f %.2f %.2f)" % (self.atomicnum, self.coords[0],
self.coords[1], self.coords[2])
else:
return "Atom: %d (no coords)" % (self.atomicnum)
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
findall(molecule)
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> print smarts.findall(mol)
[(0, 1), (3, 4), (5, 6)]
The numbers returned are the indices (starting from 0) of the atoms
that match the SMARTS pattern. In this case, there are three matches
for each of the three ethyl groups in the molecule.
"""
def __init__(self,smartspattern):
"""Initialise with a SMARTS pattern."""
self.rdksmarts = Chem.MolFromSmarts(smartspattern)
if not self.rdksmarts:
raise IOError, "Invalid SMARTS pattern."
def findall(self,molecule):
"""Find all matches of the SMARTS pattern to a particular molecule.
Required parameters:
molecule
"""
return molecule.Mol.GetSubstructMatches(self.rdksmarts)
class MoleculeData(object):
"""Store molecule data in a dictionary-type object
Required parameters:
Mol -- an RDKit Mol
Methods and accessor methods are like those of a dictionary except
that the data is retrieved on-the-fly from the underlying Mol.
Example:
>>> mol = readfile("sdf", 'head.sdf').next()
>>> data = mol.data
>>> print data
{'Comment': 'CORINA 2.61 0041 25.10.2001', 'NSC': '1'}
>>> print len(data), data.keys(), data.has_key("NSC")
2 ['Comment', 'NSC'] True
>>> print data['Comment']
CORINA 2.61 0041 25.10.2001
>>> data['Comment'] = 'This is a new comment'
>>> for k,v in data.iteritems():
... print k, "-->", v
Comment --> This is a new comment
NSC --> 1
>>> del data['NSC']
>>> print len(data), data.keys(), data.has_key("NSC")
1 ['Comment'] False
"""
def __init__(self, Mol):
self._mol = Mol
def _testforkey(self, key):
if not key in self:
raise KeyError, "'%s'" % key
def keys(self):
return self._mol.GetPropNames()
def values(self):
return [self._mol.GetProp(x) for x in self.keys()]
def items(self):
return zip(self.keys(), self.values())
def __iter__(self):
return iter(self.keys())
def iteritems(self):
return iter(self.items())
def __len__(self):
return len(self.keys())
def __contains__(self, key):
return self._mol.HasProp(key)
def __delitem__(self, key):
self._testforkey(key)
self._mol.ClearProp(key)
def clear(self):
for key in self:
del self[key]
def has_key(self, key):
return key in self
def update(self, dictionary):
for k, v in dictionary.iteritems():
self[k] = v
def __getitem__(self, key):
self._testforkey(key)
return self._mol.GetProp(key)
def __setitem__(self, key, value):
self._mol.SetProp(key, str(value))
def __repr__(self):
return dict(self.iteritems()).__repr__()
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a vector calculated by one of the fingerprint methods
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
return rdkit.DataStructs.FingerprintSimilarity(self.fp, other.fp)
def __getattr__(self, attr):
if attr == "bits":
# Create a bits attribute on-the-fly
return list(self.fp.GetOnBits())
else:
raise AttributeError, "Fingerprint has no attribute %s" % attr
def __str__(self):
return ", ".join([str(x) for x in _compressbits(self.fp)])
def _compressbits(bitvector, wordsize=32):
"""Compress binary vector into vector of long ints.
This function is used by the Fingerprint class.
>>> _compressbits([0, 1, 0, 0, 0, 1], 2)
[2, 0, 2]
"""
ans = []
for start in range(0, len(bitvector), wordsize):
compressed = 0
for i in range(wordsize):
if i + start < len(bitvector) and bitvector[i + start]:
compressed += 2**i
ans.append(compressed)
return ans
if __name__=="__main__": #pragma: no cover
import doctest
doctest.testmod()
cinfony-1.2/cinfony/pybel.py 0000664 0001750 0001750 00000071325 12061452051 016075 0 ustar noel noel 0000000 0000000 #-*. coding: utf-8 -*-
## Copyright (c) 2008-2012, Noel O'Boyle; 2012, Adrià Cereto-Massagué
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the GPL v2 license
## which is included in the file LICENSE_GPLv2.txt.
"""
pybel - A Cinfony module for accessing Open Babel
Global variables:
ob - the underlying SWIG bindings for Open Babel
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
descs - a list of supported descriptors
fps - a list of supported fingerprint types
forcefields - a list of supported forcefields
"""
import sys
import math
import os.path
import tempfile
if sys.platform[:4] == "java":
import org.openbabel as ob
import java.lang.System
java.lang.System.loadLibrary("openbabel_java")
_obfuncs = ob.openbabel_java
_obconsts = ob.openbabel_javaConstants
import javax
elif sys.platform[:3] == "cli":
import System
import clr
clr.AddReference('System.Windows.Forms')
clr.AddReference('System.Drawing')
from System.Windows.Forms import (
Application, DockStyle, Form, PictureBox, PictureBoxSizeMode
)
from System.Drawing import Image, Size
_obdotnet = os.environ["OBDOTNET"]
if _obdotnet[0] == '"': # Remove trailing quotes
_obdotnet = _obdotnet[1:-1]
clr.AddReferenceToFileAndPath(os.path.join(_obdotnet, "OBDotNet.dll"))
import OpenBabel as ob
_obfuncs = ob.openbabel_csharp
_obconsts = ob.openbabel_csharp
else:
import openbabel as ob
_obfuncs = _obconsts = ob
try:
import Tkinter as tk
import Image as PIL
import ImageTk as piltk
except ImportError: #pragma: no cover
tk = None
def _formatstodict(list):
if sys.platform[:4] == "java":
list = [list.get(i) for i in range(list.size())]
broken = [x.replace("[Read-only]", "").replace("[Write-only]","").split(" -- ") for x in list]
broken = [(x,y.strip()) for x,y in broken]
return dict(broken)
_obconv = ob.OBConversion()
_builder = ob.OBBuilder()
informats = _formatstodict(_obconv.GetSupportedInputFormat())
"""A dictionary of supported input formats"""
outformats = _formatstodict(_obconv.GetSupportedOutputFormat())
"""A dictionary of supported output formats"""
def _getplugins(findplugin, names):
plugins = dict([(x, findplugin(x)) for x in names if findplugin(x)])
return plugins
def _getpluginnames(ptype):
if sys.platform[:4] == "cli":
plugins = ob.VectorString()
else:
plugins = ob.vectorString()
ob.OBPlugin.ListAsVector(ptype, None, plugins)
if sys.platform[:4] == "java":
plugins = [plugins.get(i) for i in range(plugins.size())]
return [x.split()[0] for x in plugins]
descs = _getpluginnames("descriptors")
"""A list of supported descriptors"""
_descdict = _getplugins(ob.OBDescriptor.FindType, descs)
fps = [_x.lower() for _x in _getpluginnames("fingerprints")]
"""A list of supported fingerprint types"""
_fingerprinters = _getplugins(ob.OBFingerprint.FindFingerprint, fps)
forcefields = [_x.lower() for _x in _getpluginnames("forcefields")]
"""A list of supported forcefields"""
_forcefields = _getplugins(ob.OBForceField.FindType, forcefields)
operations = _getpluginnames("ops")
"""A list of supported operations"""
_operations = _getplugins(ob.OBOp.FindType, operations)
def readfile(format, filename, opt=None):
"""Iterate over the molecules in a file.
Required parameters:
format - see the informats variable for a list of available
input formats
filename
Optional parameters:
opt - a dictionary of format-specific options
For format options with no parameters, specify the
value as None.
You can access the first molecule in a file using the next() method
of the iterator (or the next() keyword in Python 3):
mol = readfile("smi", "myfile.smi").next() # Python 2
mol = next(readfile("smi", "myfile.smi")) # Python 3
You can make a list of the molecules in a file using:
mols = list(readfile("smi", "myfile.smi"))
You can iterate over the molecules in a file as shown in the
following code snippet:
>>> atomtotal = 0
>>> for mol in readfile("sdf", "head.sdf"):
... atomtotal += len(mol.atoms)
...
>>> print atomtotal
43
"""
if opt == None:
opt = {}
obconversion = ob.OBConversion()
formatok = obconversion.SetInFormat(format)
for k, v in opt.items():
if v == None:
obconversion.AddOption(k, obconversion.INOPTIONS)
else:
obconversion.AddOption(k, obconversion.INOPTIONS, str(v))
if not formatok:
raise ValueError("%s is not a recognised Open Babel format" % format)
if not os.path.isfile(filename):
raise IOError("No such file: '%s'" % filename)
def filereader():
obmol = ob.OBMol()
notatend = obconversion.ReadFile(obmol,filename)
while notatend:
yield Molecule(obmol)
obmol = ob.OBMol()
notatend = obconversion.Read(obmol)
return filereader()
def readstring(format, string, opt=None):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Optional parameters:
opt - a dictionary of format-specific options
For format options with no parameters, specify the
value as None.
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
>>> len(mymol.atoms)
5
"""
if opt == None:
opt = {}
obmol = ob.OBMol()
obconversion = ob.OBConversion()
formatok = obconversion.SetInFormat(format)
if not formatok:
raise ValueError("%s is not a recognised Open Babel format" % format)
for k, v in opt.items():
if v == None:
obconversion.AddOption(k, obconversion.INOPTIONS)
else:
obconversion.AddOption(k, obconversion.INOPTIONS, str(v))
success = obconversion.ReadString(obmol, string)
if not success:
raise IOError("Failed to convert '%s' to format '%s'" % (
string, format))
return Molecule(obmol)
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Although it's possible to write a single molecule to a file by
calling the write() method of a molecule, if multiple molecules
are to be written to the same file you should use the Outputfile
class.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwrite -- if the output file already exists, should it
be overwritten? (default is False)
opt -- a dictionary of format-specific options
For format options with no parameters, specify the
value as None.
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False, opt=None):
if opt == None:
opt = {}
self.format = format
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % self.filename)
self.obConversion = ob.OBConversion()
formatok = self.obConversion.SetOutFormat(self.format)
if not formatok:
raise ValueError("%s is not a recognised Open Babel format" % format)
for k, v in opt.items():
if v == None:
self.obConversion.AddOption(k, self.obConversion.OUTOPTIONS)
else:
self.obConversion.AddOption(k, self.obConversion.OUTOPTIONS, str(v))
self.total = 0 # The total number of molecules written to the file
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if not self.filename:
raise IOError("Outputfile instance is closed.")
if self.total==0:
self.obConversion.WriteFile(molecule.OBMol, self.filename)
else:
self.obConversion.Write(molecule.OBMol)
self.total += 1
def close(self):
"""Close the Outputfile to further writing."""
self.obConversion.CloseOutFile()
self.filename = None
class Molecule(object):
"""Represent a Pybel Molecule.
Required parameter:
OBMol -- an Open Babel OBMol or any type of cinfony Molecule
Attributes:
atoms, charge, conformers, data, dim, energy, exactmass, formula,
molwt, spin, sssr, title, unitcell.
(refer to the Open Babel library documentation for more info).
Methods:
addh(), calcfp(), calcdesc(), draw(), localopt(), make3D(), removeh(),
write()
The underlying Open Babel molecule can be accessed using the attribute:
OBMol
"""
_cinfony = True
def __init__(self, OBMol):
if hasattr(OBMol, "_cinfony"):
a, b = OBMol._exchange
if a == 0:
mol = readstring("smi", b)
else:
mol = readstring("mol", b)
OBMol = mol.OBMol
self.OBMol = OBMol
@property
def atoms(self):
return [ Atom(self.OBMol.GetAtom(i+1)) for i in range(self.OBMol.NumAtoms()) ]
@property
def charge(self): return self.OBMol.GetTotalCharge()
@property
def conformers(self): return self.OBMol.GetConformers()
@property
def data(self): return MoleculeData(self.OBMol)
@property
def dim(self): return self.OBMol.GetDimension()
@property
def energy(self): return self.OBMol.GetEnergy()
@property
def exactmass(self): return self.OBMol.GetExactMass()
@property
def formula(self): return self.OBMol.GetFormula()
@property
def molwt(self): return self.OBMol.GetMolWt()
@property
def spin(self): return self.OBMol.GetTotalSpinMultiplicity()
@property
def sssr(self): return self.OBMol.GetSSSR()
def _gettitle(self): return self.OBMol.GetTitle()
def _settitle(self, val): self.OBMol.SetTitle(val)
title = property(_gettitle, _settitle)
@property
def unitcell(self):
unitcell_index = _obconsts.UnitCell
if sys.platform[:3] == "cli":
unitcell_index = System.UInt32(unitcell_index)
unitcell = self.OBMol.GetData(unitcell_index)
if unitcell:
if sys.platform[:3] != "cli":
return _obfuncs.toUnitCell(unitcell)
else:
return unitcell.Downcast[ob.OBUnitCell]()
else:
raise AttributeError("Molecule has no attribute 'unitcell'")
@property
def _exchange(self):
if self.OBMol.HasNonZeroCoords():
return (1, self.write("mol"))
else:
return (0, self.write("can").split()[0])
def __iter__(self):
"""Iterate over the Atoms of the Molecule.
This allows constructions such as the following:
for atom in mymol:
print atom
"""
return iter(self.atoms)
def calcdesc(self, descnames=[]):
"""Calculate descriptor values.
Optional parameter:
descnames -- a list of names of descriptors
If descnames is not specified, all available descriptors are
calculated. See the descs variable for a list of available
descriptors.
"""
if not descnames:
descnames = descs
ans = {}
for descname in descnames:
try:
desc = _descdict[descname]
except KeyError:
raise ValueError("%s is not a recognised Open Babel descriptor type" % descname)
ans[descname] = desc.Predict(self.OBMol)
return ans
def calcfp(self, fptype="FP2"):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "FP2"). See the
fps variable for a list of of available fingerprint
types.
"""
if sys.platform[:3] == "cli":
fp = ob.VectorUInt()
else:
fp = ob.vectorUnsignedInt()
fptype = fptype.lower()
try:
fingerprinter = _fingerprinters[fptype]
except KeyError:
raise ValueError("%s is not a recognised Open Babel Fingerprint type" % fptype)
fingerprinter.GetFingerprint(self.OBMol, fp)
return Fingerprint(fp)
def write(self, format="smi", filename=None, overwrite=False, opt=None):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
opt -- a dictionary of format specific options
For format options with no parameters, specify the
value as None.
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
if opt == None:
opt = {}
obconversion = ob.OBConversion()
formatok = obconversion.SetOutFormat(format)
if not formatok:
raise ValueError("%s is not a recognised Open Babel format" % format)
for k, v in opt.items():
if v == None:
obconversion.AddOption(k, obconversion.OUTOPTIONS)
else:
obconversion.AddOption(k, obconversion.OUTOPTIONS, str(v))
if filename:
if not overwrite and os.path.isfile(filename):
raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % filename)
obconversion.WriteFile(self.OBMol,filename)
obconversion.CloseOutFile()
else:
return obconversion.WriteString(self.OBMol)
def localopt(self, forcefield="mmff94", steps=500):
"""Locally optimize the coordinates.
Optional parameters:
forcefield -- default is "mmff94". See the forcefields variable
for a list of available forcefields.
steps -- default is 500
If the molecule does not have any coordinates, make3D() is
called before the optimization. Note that the molecule needs
to have explicit hydrogens. If not, call addh().
"""
forcefield = forcefield.lower()
if self.dim != 3:
self.make3D(forcefield)
ff = _forcefields[forcefield]
success = ff.Setup(self.OBMol)
if not success:
return
ff.SteepestDescent(steps)
ff.GetCoordinates(self.OBMol)
## def globalopt(self, forcefield="MMFF94", steps=1000):
## if not (self.OBMol.Has2D() or self.OBMol.Has3D()):
## self.make3D()
## self.localopt(forcefield, 250)
## ff = _forcefields[forcefield]
## numrots = self.OBMol.NumRotors()
## if numrots > 0:
## ff.WeightedRotorSearch(numrots, int(math.log(numrots + 1) * steps))
## ff.GetCoordinates(self.OBMol)
def make3D(self, forcefield = "mmff94", steps = 50):
"""Generate 3D coordinates.
Optional parameters:
forcefield -- default is "mmff94". See the forcefields variable
for a list of available forcefields.
steps -- default is 50
Once coordinates are generated, hydrogens are added and a quick
local optimization is carried out with 50 steps and the
MMFF94 forcefield. Call localopt() if you want
to improve the coordinates further.
"""
forcefield = forcefield.lower()
_builder.Build(self.OBMol)
self.addh()
self.localopt(forcefield, steps)
def addh(self):
"""Add hydrogens."""
self.OBMol.AddHydrogens()
def removeh(self):
"""Remove hydrogens."""
self.OBMol.DeleteHydrogens()
def __str__(self):
return self.write()
def draw(self, show=True, filename=None, update=False, usecoords=False):
"""Create a 2D depiction of the molecule.
Optional parameters:
show -- display on screen (default is True)
filename -- write to file (default is None)
update -- update the coordinates of the atoms to those
determined by the structure diagram generator
(default is False)
usecoords -- don't calculate 2D coordinates, just use
the current coordinates (default is False)
Tkinter and Python Imaging Library are required for image display.
"""
obconversion = ob.OBConversion()
formatok = obconversion.SetOutFormat("_png2")
if not formatok:
errormessage = ("PNG depiction support not found. You should compile "
"Open Babel with support for Cairo. See installation "
"instructions for more information.")
raise ImportError(errormessage)
# Need to copy to avoid removing hydrogens from self
workingmol = Molecule(ob.OBMol(self.OBMol))
workingmol.removeh()
if not usecoords:
_operations['gen2D'].Do(workingmol.OBMol)
if update == True:
if workingmol.OBMol.NumAtoms() != self.OBMol.NumAtoms():
errormessage = ("It is not possible to update the original molecule "
"with the calculated coordinates, as the original "
"molecule contains explicit hydrogens for which no "
"coordinates have been calculated.")
raise RuntimeError(errormessage)
else:
for i in range(workingmol.OBMol.NumAtoms()):
self.OBMol.GetAtom(i + 1).SetVector(workingmol.OBMol.GetAtom(i + 1).GetVector())
if filename:
filedes = None
else:
if sys.platform[:3] == "cli" and show:
errormessage = ("It is only possible to show the molecule if you "
"provide a filename. The reason for this is that I kept "
"having problems when using temporary files.")
raise RuntimeError(errormessage)
filedes, filename = tempfile.mkstemp()
workingmol.write("_png2", filename=filename, overwrite=True)
if show:
if sys.platform[:4] == "java":
image = javax.imageio.ImageIO.read(java.io.File(filename))
frame = javax.swing.JFrame(visible=1)
frame.getContentPane().add(javax.swing.JLabel(javax.swing.ImageIcon(image)))
frame.setSize(300,300)
frame.setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE)
frame.show()
elif sys.platform[:3] == "cli":
form = _MyForm()
form.setup(filename, self.title)
Application.Run(form)
else:
if not tk:
errormessage = ("Tkinter or Python Imaging "
"Library not found, but is required for image "
"display. See installation instructions for "
"more information.")
raise ImportError(errormessage)
root = tk.Tk()
root.title((hasattr(self, "title") and self.title)
or self.__str__().rstrip())
frame = tk.Frame(root, colormap="new", visual='truecolor').pack()
image = PIL.open(filename)
imagedata = piltk.PhotoImage(image)
label = tk.Label(frame, image=imagedata).pack()
quitbutton = tk.Button(root, text="Close", command=root.destroy).pack(fill=tk.X)
root.mainloop()
if filedes:
os.close(filedes)
os.remove(filename)
class Atom(object):
"""Represent a Pybel atom.
Required parameter:
OBAtom -- an Open Babel OBAtom
Attributes:
atomicmass, atomicnum, cidx, coords, coordidx, exactmass,
formalcharge, heavyvalence, heterovalence, hyb, idx,
implicitvalence, isotope, partialcharge, spin, type,
valence, vector.
(refer to the Open Babel library documentation for more info).
The original Open Babel atom can be accessed using the attribute:
OBAtom
"""
def __init__(self, OBAtom):
self.OBAtom = OBAtom
@property
def coords(self):
return (self.OBAtom.GetX(), self.OBAtom.GetY(), self.OBAtom.GetZ())
@property
def atomicmass(self): return self.OBAtom.GetAtomicMass()
@property
def atomicnum(self): return self.OBAtom.GetAtomicNum()
@property
def cidx(self): return self.OBAtom.GetCIdx()
@property
def coordidx(self): return self.OBAtom.GetCoordinateIdx()
@property
def exactmass(self): return self.OBAtom.GetExactMass()
@property
def formalcharge(self): return self.OBAtom.GetFormalCharge()
@property
def heavyvalence(self): return self.OBAtom.GetHvyValence()
@property
def heterovalence(self): return self.OBAtom.GetHeteroValence()
@property
def hyb(self): return self.OBAtom.GetHyb()
@property
def idx(self): return self.OBAtom.GetIdx()
@property
def implicitvalence(self): return self.OBAtom.GetImplicitValence()
@property
def isotope(self): return self.OBAtom.GetIsotope()
@property
def partialcharge(self): return self.OBAtom.GetPartialCharge()
@property
def spin(self): return self.OBAtom.GetSpinMultiplicity()
@property
def type(self): return self.OBAtom.GetType()
@property
def valence(self): return self.OBAtom.GetValence()
@property
def vector(self): return self.OBAtom.GetVector()
def __str__(self):
c = self.coords
return "Atom: %d (%.2f %.2f %.2f)" % (self.atomicnum, c[0], c[1], c[2])
def _findbits(fp, bitsperint):
"""Find which bits are set in a list/vector.
This function is used by the Fingerprint class.
>>> _findbits([13, 71], 8)
[1, 3, 4, 9, 10, 11, 15]
"""
ans = []
start = 1
if sys.platform[:4] == "java":
fp = [fp.get(i) for i in range(fp.size())]
for x in fp:
i = start
while x > 0:
if x % 2:
ans.append(i)
x >>= 1
i += 1
start += bitsperint
return ans
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a vector calculated by OBFingerprint.FindFingerprint()
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
return ob.OBFingerprint.Tanimoto(self.fp, other.fp)
@property
def bits(self):
return _findbits(self.fp, ob.OBFingerprint.Getbitsperint())
def __str__(self):
fp = self.fp
if sys.platform[:4] == "java":
fp = [self.fp.get(i) for i in range(self.fp.size())]
return ", ".join([str(x) for x in fp])
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
findall(molecule)
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> print smarts.findall(mol)
[(1, 2), (4, 5), (6, 7)]
The numbers returned are the indices (starting from 1) of the atoms
that match the SMARTS pattern. In this case, there are three matches
for each of the three ethyl groups in the molecule.
"""
def __init__(self,smartspattern):
"""Initialise with a SMARTS pattern."""
self.obsmarts = ob.OBSmartsPattern()
success = self.obsmarts.Init(smartspattern)
if not success:
raise IOError("Invalid SMARTS pattern")
def findall(self,molecule):
"""Find all matches of the SMARTS pattern to a particular molecule.
Required parameters:
molecule
"""
self.obsmarts.Match(molecule.OBMol)
vector = self.obsmarts.GetUMapList()
if sys.platform[:4] == "java":
vector = [vector.get(i) for i in range(vector.size())]
return list(vector)
class MoleculeData(object):
"""Store molecule data in a dictionary-type object
Required parameters:
obmol -- an Open Babel OBMol
Methods and accessor methods are like those of a dictionary except
that the data is retrieved on-the-fly from the underlying OBMol.
Example:
>>> mol = readfile("sdf", 'head.sdf').next() # Python 2
>>> # mol = next(readfile("sdf", 'head.sdf')) # Python 3
>>> data = mol.data
>>> print data
{'Comment': 'CORINA 2.61 0041 25.10.2001', 'NSC': '1'}
>>> print len(data), data.keys(), data.has_key("NSC")
2 ['Comment', 'NSC'] True
>>> print data['Comment']
CORINA 2.61 0041 25.10.2001
>>> data['Comment'] = 'This is a new comment'
>>> for k,v in data.items():
... print k, "-->", v
Comment --> This is a new comment
NSC --> 1
>>> del data['NSC']
>>> print len(data), data.keys(), data.has_key("NSC")
1 ['Comment'] False
"""
def __init__(self, obmol):
self._mol = obmol
def _data(self):
data = self._mol.GetData()
if sys.platform[:4] == "java":
data = [data.get(i) for i in range(data.size())]
answer = [x for x in data if
x.GetDataType()==_obconsts.PairData or
x.GetDataType()==_obconsts.CommentData]
if sys.platform[:3] != "cli":
answer = [_obfuncs.toPairData(x) for x in answer]
return answer
def _testforkey(self, key):
if not key in self:
raise KeyError("'%s'" % key)
def keys(self):
return [x.GetAttribute() for x in self._data()]
def values(self):
return [x.GetValue() for x in self._data()]
def items(self):
return iter(zip(self.keys(), self.values()))
def __iter__(self):
return iter(self.keys())
def iteritems(self): # Can remove for Python 3
return self.items()
def __len__(self):
return len(self._data())
def __contains__(self, key):
return self._mol.HasData(key)
def __delitem__(self, key):
self._testforkey(key)
self._mol.DeleteData(self._mol.GetData(key))
def clear(self):
for key in self:
del self[key]
def has_key(self, key):
return key in self
def update(self, dictionary):
for k, v in dictionary.items():
self[k] = v
def __getitem__(self, key):
self._testforkey(key)
answer = self._mol.GetData(key)
if sys.platform[:3] != "cli":
answer = _obfuncs.toPairData(answer)
return answer.GetValue()
def __setitem__(self, key, value):
if key in self:
if sys.platform[:3] != "cli":
pairdata = _obfuncs.toPairData(self._mol.GetData(key))
else:
pairdata = self._mol.GetData(key).Downcast[ob.OBPairData]()
pairdata.SetValue(str(value))
else:
pairdata = ob.OBPairData()
pairdata.SetAttribute(key)
pairdata.SetValue(str(value))
self._mol.CloneData(pairdata)
def __repr__(self):
return dict(self.items()).__repr__()
if sys.platform[:3] == "cli":
class _MyForm(Form):
def __init__(self):
Form.__init__(self)
def setup(self, filename, title):
# adjust the form's client area size to the picture
self.ClientSize = Size(300, 300)
self.Text = title
self.filename = filename
self.image = Image.FromFile(self.filename)
pictureBox = PictureBox()
# this will fit the image to the form
pictureBox.SizeMode = PictureBoxSizeMode.StretchImage
pictureBox.Image = self.image
# fit the picture box to the frame
pictureBox.Dock = DockStyle.Fill
self.Controls.Add(pictureBox)
self.Show()
if __name__=="__main__": #pragma: no cover
import doctest
doctest.testmod(verbose=True)
cinfony-1.2/cinfony/indy.py 0000664 0001750 0001750 00000053737 12061452051 015734 0 ustar noel noel 0000000 0000000 #-*. coding: utf-8 -*-
## Copyright (c) 2011, Noel O'Boyle; 2012, Adrià Cereto-Massagué
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the GPL v3 license
## which is included in the file LICENSE_GPLv3.txt.
"""
indy - A Cinfony module for accessing Indigo from CPython, Jython or IronPython
Global variables:
indigo - the underlying Indigo() object
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
fps - a list of supported fingerprint types
"""
import os
import sys
import tempfile
if sys.platform[:3] == "cli":
_indigonet = os.environ["INDIGONET"]
import clr
clr.AddReference('System.Windows.Forms')
clr.AddReference('System.Drawing')
clr.AddReferenceToFileAndPath(_indigonet + "\\indigo-dotnet.dll")
clr.AddReferenceToFileAndPath(_indigonet + "\\indigo-inchi-dotnet.dll")
clr.AddReferenceToFileAndPath(_indigonet + "\\indigo-renderer-dotnet.dll")
from System.Windows.Forms import (
Application, DockStyle, Form, PictureBox, PictureBoxSizeMode
)
from System.Drawing import Image, Size
elif sys.platform[:4] == "java":
import java, javax
if sys.platform[:3] == "cli" or sys.platform[:4] == "java":
from com.ggasoftware.indigo import Indigo, IndigoException, IndigoRenderer, IndigoInchi
else:
from indigo import Indigo, IndigoException
from indigo_renderer import IndigoRenderer
from indigo_inchi import IndigoInchi
indigo = Indigo()
indigoInchi = IndigoInchi(indigo)
# PIL and Tkinter
try:
import Tkinter as tk
import Image as PIL
import ImageTk as PILtk
except:
PILtk = None
fps = ["sim", "sub", "sub-res", "sub-tau", "full"]
"""A list of supported fingerprint types"""
_formats = {'smi': "SMILES", 'can': "Canonical SMILES", "rdf": "MDL RDF file",
'mol': "MDL MOL file", 'sdf': "MDL SDF file",
'cml': "Chemical Markup Language",
'inchi': "InChI", 'inchikey': "InChIKey"}
informats = dict([(_x, _formats[_x]) for _x in ['mol', 'sdf', 'rdf', 'smi',
'cml', 'inchi']])
"""A dictionary of supported input formats"""
outformats = dict([(_x, _formats[_x]) for _x in ['mol', 'sdf', 'smi', 'can',
'cml', 'inchi', 'inchikey']])
"""A dictionary of supported output formats"""
def readfile(format, filename):
"""Iterate over the molecules in a file.
Required parameters:
format - see the informats variable for a list of available
input formats
filename
You can access the first molecule in a file using the next() method
of the iterator:
mol = readfile("smi", "myfile.smi").next()
You can make a list of the molecules in a file using:
mols = list(readfile("smi", "myfile.smi"))
You can iterate over the molecules in a file as shown in the
following code snippet:
>>> atomtotal = 0
>>> for mol in readfile("sdf", "head.sdf"):
... atomtotal += len(mol.atoms)
...
>>> print atomtotal
43
"""
if not os.path.isfile(filename):
raise IOError, "No such file: '%s'" % filename
format = format.lower()
# Eagerly evaluate the supplier functions in order to report
# errors in the format and errors in opening the file.
# Then switch to an iterator...
if format=="sdf":
iterator = indigo.iterateSDFile(filename)
def sdf_reader():
for mol in iterator:
yield Molecule(mol)
return sdf_reader()
elif format=="rdf":
iterator = indigo.iterateRDFile(filename)
def rdf_reader():
for mol in iterator:
yield Molecule(mol)
return rdf_reader()
elif format=="mol":
def mol_reader():
yield Molecule(indigo.loadMoleculeFromFile(filename))
return mol_reader()
elif format=="smi":
iterator = iterateSmilesFile(filename)
def smi_reader():
for mol in iterator:
yield Molecule(mol)
return smi_reader()
elif format=="cml":
iterator = iterateCMLFile(filename)
def cml_reader():
for mol in iterator:
yield Molecule(mol)
return cml_reader()
else:
raise ValueError, "%s is not a recognised Indigo format" % format
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
>>> len(mymol.atoms)
5
"""
format = format.lower()
if format not in informats:
raise ValueError,"%s is not a recognised Indigo format" % format
module = indigo if format != "inchi" else indigoInchi
try:
mol = module.loadMolecule(string)
except IndigoException:
raise IOError, "Failed to convert '%s' to format '%s'" % (
string, format)
return Molecule(mol)
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwite -- if the output file already exists, should it
be overwritten? (default is False)
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False):
self.format = format
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % self.filename
if self.format in ["sdf", "cml", "rdf", "smi"]:
self._writer = indigo.writeFile(self.filename)
else:
raise ValueError,"%s is not supported for multimolecule output" % format
self.total = 0 # The total number of molecules written to the file
if self.format == "cml":
self._writer.cmlHeader()
elif self.format == "rdf":
self._writer.rdfHeader()
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if not self.filename:
raise IOError, "Outputfile instance is closed."
if self.format == "sdf":
self._writer.sdfAppend(molecule.Mol)
elif self.format == "rdf":
self._writer.rdfAppend(molecule.Mol)
elif self.format == "cml":
self._writer.cmlAppend(molecule.Mol)
elif self.format == "smi":
self._writer.smilesAppend(molecule.Mol)
self.total += 1
def close(self):
"""Close the Outputfile to further writing."""
if self.format == "cml":
self._writer.cmlFooter()
self._writer.close()
self.filename = None
del self._writer
class Molecule(object):
"""Represent an Indigo Molecule.
Required parameter:
Mol -- an Indigo Mol or any type of cinfony Molecule
Attributes:
atoms, data, molwt, title
Methods:
addh(), calcfp(), draw(), localopt(), removeh(),
write()
The underlying Indigo Molecule can be accessed using the attribute:
Mol
"""
_cinfony = True
def __init__(self, Mol):
if hasattr(Mol, "_cinfony"):
a, b = Mol._exchange
if a == 0:
molecule = readstring("smi", b)
else:
molecule = readstring("mol", b)
Mol = molecule.Mol
self.Mol = Mol
@property
def atoms(self): return [Atom(atom) for atom in self.Mol.iterateAtoms()]
@property
def data(self): return MoleculeData(self.Mol)
@property
def formula(self): return self.Mol.grossFormula()
@property
def molwt(self): return self.Mol.molecularWeight()
def _gettitle(self):
return self.Mol.name()
def _settitle(self, val): self.Mol.setName(val)
title = property(_gettitle, _settitle)
@property
def _exchange(self):
if not self.Mol.hasZCoord():
return (0, self.write("can"))
else: # If 3D
return (1, self.write("mol"))
def addh(self):
"""Add hydrogens."""
self.Mol.unfoldHydrogens()
def removeh(self):
"""Remove hydrogens."""
self.Mol.foldHydrogens()
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
format = format.lower()
if filename:
if not overwrite and os.path.isfile(filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % filename
if format=="smi":
result = self.Mol.smiles()
elif format=="can":
result = self.Mol.canonicalSmiles()
elif format=="mol":
result = self.Mol.molfile()
elif format=="inchi":
result = indigoInchi.getInchi(self.Mol)
elif format=="inchikey":
result = indigoInchi.getInchiKey(self.write("inchi"))
elif format=="cml":
result = self.Mol.cml()
elif format=="sdf":
# No sdf method so use a writeBuffer() as described by Dmitry
buf = indigo.writeBuffer()
buf.sdfAppend(self.Mol)
result = buf.toString()
else:
raise ValueError,"%s is not a recognised Indigo format" % format
if filename:
output = open(filename, "w")
output.write(result)
output.close()
else:
return result
def __iter__(self):
"""Iterate over the Atoms of the Molecule.
This allows constructions such as the following:
for atom in mymol:
print atom
"""
return iter(self.atoms)
def __str__(self):
return self.write()
## def calcdesc(self, descnames=[]):
## """Calculate descriptor values.
##
## Optional parameter:
## descnames -- a list of names of descriptors
##
## If descnames is not specified, all available descriptors are
## calculated. See the descs variable for a list of available
## descriptors.
## """
## if not descnames:
## descnames = descs
## ans = {}
## for descname in descnames:
## try:
## desc = descDict[descname]
## except KeyError:
## raise ValueError, "%s is not a recognised RDKit descriptor type" % descname
## ans[descname] = desc(self.Mol)
## return ans
def calcfp(self, fptype="sim"):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "sim"). See the
fps variable for a list of of available fingerprint
types.
"""
fptype = fptype.lower()
if fptype in ["sim", "sub", "sub-res", "sub-tau", "full"]:
fp = Fingerprint(self.Mol.fingerprint(fptype))
else:
raise ValueError, "%s is not a recognised Indigo Fingerprint type" % fptype
return fp
def draw(self, show=True, filename=None, update=False, usecoords=False):
"""Create a 2D depiction of the molecule.
Optional parameters:
show -- display on screen (default is True)
filename -- write to file (default is None)
update -- update the coordinates of the atoms to those
determined by the structure diagram generator
(default is False)
usecoords -- don't calculate 2D coordinates, just use
the current coordinates (default is False)
Tkinter and Python Imaging Library are required for image display.
"""
if update:
mol = self.Mol
else:
mol = self.Mol.clone()
if not usecoords:
mol.layout()
if show or filename:
renderer = IndigoRenderer(indigo)
indigo.setOption("render-output-format", "png")
indigo.setOption("render-margins", 10, 10)
indigo.setOption("render-coloring", "True")
indigo.setOption("render-image-size", 300, 300)
indigo.setOption("render-background-color", "1.0, 1.0, 1.0")
if self.title:
indigo.setOption("render-comment", self.title)
if filename:
filedes = None
else:
filedes, filename = tempfile.mkstemp()
renderer.renderToFile(mol, filename)
if show:
if sys.platform[:4] == "java":
image = javax.imageio.ImageIO.read(java.io.File(filename))
frame = javax.swing.JFrame(visible=1)
frame.getContentPane().add(javax.swing.JLabel(javax.swing.ImageIcon(image)))
frame.setSize(300,300)
frame.setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE)
frame.show()
elif sys.platform[:3] == "cli":
if filedes:
errormessage = ("It is only possible to show the molecule if you "
"provide a filename. The reason for this is that I kept "
"having problems when using temporary files.")
raise RuntimeError(errormessage)
form = Form()
form.ClientSize = Size(300, 300)
form.Text = self.title
image = Image.FromFile(filename)
box = PictureBox()
box.SizeMode = PictureBoxSizeMode.StretchImage
box.Image = image
box.Dock = DockStyle.Fill
form.Controls.Add(box)
form.Show()
Application.Run(form)
else:
if not PILtk:
errormessage = ("Tkinter or Python Imaging "
"Library not found, but is required for image "
"display. See installation instructions for "
"more information.")
raise ImportError, errormessage
root = tk.Tk()
root.title((hasattr(self, "title") and self.title)
or self.__str__().rstrip())
frame = tk.Frame(root, colormap="new", visual='truecolor').pack()
image = PIL.open(filename)
imagedata = PILtk.PhotoImage(image)
label = tk.Label(frame, image=imagedata).pack()
quitbutton = tk.Button(root, text="Close", command=root.destroy).pack(fill=tk.X)
root.mainloop()
if filedes:
os.close(filedes)
os.remove(filename)
class Atom(object):
"""Represent an Indigo Atom.
Required parameters:
Atom -- an Indigo Atom
Attributes:
atomicnum, coords, formalcharge
The original Indigo Atom can be accessed using the attribute:
Atom
"""
def __init__(self, Atom):
self.Atom = Atom
@property
def atomicnum(self): return self.Atom.atomicNumber()
@property
def coords(self):
return tuple(self.Atom.xyz())
@property
def formalcharge(self): return self.Atom.charge()
def __str__(self):
if hasattr(self, "coords"):
return "Atom: %d (%.2f %.2f %.2f)" % (self.atomicnum, self.coords[0],
self.coords[1], self.coords[2])
else:
return "Atom: %d (no coords)" % (self.atomicnum)
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
findall(molecule)
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> print smarts.findall(mol)
[(0, 1), (3, 4), (5, 6)]
The numbers returned are the indices (starting from 0) of the atoms
that match the SMARTS pattern. In this case, there are three matches
for each of the three ethyl groups in the molecule.
"""
def __init__(self,smartspattern):
"""Initialise with a SMARTS pattern."""
try:
self.smarts = indigo.loadSmarts(smartspattern)
except IndigoException:
raise IOError, "Invalid SMARTS pattern."
def findall(self,molecule):
"""Find all matches of the SMARTS pattern to a particular molecule.
Required parameters:
molecule
"""
matcher = indigo.substructureMatcher(molecule.Mol)
matches = list(matcher.iterateMatches(self.smarts))
ans = []
for match in matches:
a = []
for queryatom in self.smarts.iterateAtoms():
a.append(match.mapAtom(queryatom).index())
ans.append(tuple(a))
return ans
class MoleculeData(object):
"""Store molecule data in a dictionary-type object
Required parameters:
Mol -- an Indigo Mol
Methods and accessor methods are like those of a dictionary except
that the data is retrieved on-the-fly from the underlying Mol.
Example:
>>> mol = readfile("sdf", 'head.sdf').next()
>>> data = mol.data
>>> print data
{'Comment': 'CORINA 2.61 0041 25.10.2001', 'NSC': '1'}
>>> print len(data), data.keys(), data.has_key("NSC")
2 ['Comment', 'NSC'] True
>>> print data['Comment']
CORINA 2.61 0041 25.10.2001
>>> data['Comment'] = 'This is a new comment'
>>> for k,v in data.iteritems():
... print k, "-->", v
Comment --> This is a new comment
NSC --> 1
>>> del data['NSC']
>>> print len(data), data.keys(), data.has_key("NSC")
1 ['Comment'] False
"""
def __init__(self, Mol):
self._mol = Mol
def _testforkey(self, key):
if not self._mol.hasProperty(key):
raise KeyError, "'%s'" % key
def keys(self):
return [prop.name() for prop in self._mol.iterateProperties()]
def values(self):
return [prop.rawData() for prop in self._mol.iterateProperties()]
def items(self):
return [(prop.name(), prop.rawData())
for prop in self._mol.iterateProperties()]
def __iter__(self):
return iter(self.keys())
def iteritems(self):
return iter(self.items())
def __len__(self):
return len(self.keys())
def __contains__(self, key):
return self._mol.hasProperty(key)
def __delitem__(self, key):
self._testforkey(key)
self._mol.removeProperty(key)
def clear(self):
for key in self:
del self[key]
def has_key(self, key):
return key in self
def update(self, dictionary):
for k, v in dictionary.iteritems():
self[k] = v
def __getitem__(self, key):
self._testforkey(key)
return self._mol.getProperty(key)
def __setitem__(self, key, value):
self._mol.setProperty(key, str(value))
def __repr__(self):
return dict(self.iteritems()).__repr__()
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a vector calculated by one of the fingerprint methods
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
return indigo.similarity(self.fp, other.fp, "tanimoto")
def _buffer_to_int(self):
stringrep = self.fp.toString()
return [int(stringrep[i:i+1]) for i in range(0, len(stringrep), 1)]
@property
def bits(self):
return _findbits(self._buffer_to_int(), 8)
def __str__(self):
return str(self._buffer_to_int())
def _toint(string):
"""
Some bits sometimes are a character. I haven't found what do they mean,
but they break cinfony fingerprints unless taken care of. This functions is just for that.
"""
if string.isdigit():
return int(string)
else:
return 0
def _findbits(fp, bitsperint):
"""Find which bits are set in a list/vector.
This function is used by the Fingerprint class.
>>> _findbits([13, 71], 8)
[1, 3, 4, 9, 10, 11, 15]
"""
ans = []
start = 1
for x in fp:
i = start
while x > 0:
if x % 2:
ans.append(i)
x >>= 1
i += 1
start += bitsperint
return ans
def _compressbits(bitvector, wordsize=32):
"""Compress binary vector into vector of long ints.
This function is used by the Fingerprint class.
>>> _compressbits([0, 1, 0, 0, 0, 1], 2)
[2, 0, 2]
"""
ans = []
for start in range(0, len(bitvector), wordsize):
compressed = 0
for i in range(wordsize):
if i + start < len(bitvector) and bitvector[i + start]:
compressed += 2**i
ans.append(compressed)
return ans
if __name__=="__main__": #pragma: no cover
import doctest
doctest.testmod()
cinfony-1.2/cinfony/__init__.py 0000664 0001750 0001750 00000000024 12061452051 016505 0 ustar noel noel 0000000 0000000 __version__ = "1.2"
cinfony-1.2/cinfony/webel.py 0000664 0001750 0001750 00000031314 12061452051 016052 0 ustar noel noel 0000000 0000000 ## Copyright (c) 2009-2011, Noel O'Boyle
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the BSD license
## which is included in the file LICENSE_BSD.txt.
"""
webel - A Cinfony module that runs entirely on web services
webel can be used from all of CPython, Jython and IronPython.
Global variables:
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
fps - a list of supported fingerprint types
"""
import re
import os
import urllib2
import StringIO
try:
import Tkinter as tk
import Image as PIL
import ImageTk as piltk
except ImportError:
tk = None
informats = {"smi":"SMILES", "inchikey":"InChIKey", "inchi":"InChI",
"name":"Common name"}
"""A dictionary of supported input formats"""
outformats = {"smi":"SMILES", "cdxml":"ChemDraw XML", "inchi":"InChI",
"sdf":"Symyx SDF", "names":"Common names", "inchikey":"InChIKey",
"alc":"Alchemy", "cerius":"MSI Cerius II", "charmm":"CHARMM",
"cif":"Crystallographic Information File",
"cml":"Chemical Markup Language", "ctx":"Gasteiger Clear Text",
"gjf":"Gaussian job file", "gromacs":"GROMACS",
"hyperchem":"HyperChem", "jme":"Java Molecule Editor",
"maestro":"Schrodinger MacroModel",
"mol":"Symyx mol", "mol2":"Tripos Sybyl MOL2",
"mrv":"ChemAxon MRV", "pdb":"Protein Data Bank",
"sdf3000":"Symyx SDF3000", "sln":"Sybl line notation",
"xyz":"XYZ", "iupac":"IUPAC name"}
"""A dictionary of supported output formats"""
fps = ["std", "maccs", "estate"]
"""A list of supported fingerprint types"""
# The following function is taken from urllib.py in the IronPython dist
def _quo(text, safe="/"):
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safemaps = {}
cachekey = (safe, always_safe)
try:
safe_map = _safemaps[cachekey]
except KeyError:
safe += always_safe
safe_map = {}
for i in range(256):
c = chr(i)
safe_map[c] = (c in safe) and c or ('%%%02X' % i)
_safemaps[cachekey] = safe_map
res = map(safe_map.__getitem__, text)
return ''.join(res)
def _makeserver(serverurl):
"""Curry the name of the server"""
def server(*urlcomponents):
url = "%s/" % serverurl + "/".join(urlcomponents)
resp = urllib2.urlopen(url)
return resp.read()
return server
rajweb = _makeserver("http://ws1.bmc.uu.se:8182/cdk")
nci = _makeserver("http://cactus.nci.nih.gov/chemical/structure")
_descs = None # Cache the list of descriptors
def getdescs():
"""Return a list of supported descriptor types"""
global _descs
if not _descs:
response = rajweb("descriptors").rstrip()
_descs = [x.split(".")[-1] for x in response.split("\n")]
return _descs
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Note: For InChIKeys a list of molecules is returned.
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
"""
format = format.lower()
if not format in informats:
raise ValueError("%s is not a recognised Webel format" % format)
if format != "smi":
smiles = nci(_quo(string), "smiles").rstrip()
else:
smiles = string
if format == "inchikey":
return [Molecule(smile) for smile in smiles.split("\n")]
else:
mol = Molecule(smiles)
if format == "name":
mol.title = string
return mol
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Although it's possible to write a single molecule to a file by
calling the write() method of a molecule, if multiple molecules
are to be written to the same file you should use the Outputfile
class.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwrite -- if the output file already exists, should it
be overwritten? (default is False)
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False):
self.format = format.lower()
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % self.filename)
if not format in outformats:
raise ValueError("%s is not a recognised Webel format" % format)
self.file = open(filename, "w")
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if self.file.closed:
raise IOError("Outputfile instance is closed.")
output = molecule.write(self.format)
print >> self.file, output
def close(self):
"""Close the Outputfile to further writing."""
self.file.close()
class Molecule(object):
"""Represent a Webel Molecule.
Required parameter:
smiles -- a SMILES string or any type of cinfony Molecule
Attributes:
formula, molwt, title
Methods:
calcfp(), calcdesc(), draw(), write()
The underlying SMILES string can be accessed using the attribute:
smiles
"""
_cinfony = True
def __init__(self, smiles):
if hasattr(smiles, "_cinfony"):
a, b = smiles._exchange
if a == 0:
smiles = b
else:
# Must convert to SMILES
smiles = smiles.write("smi").split()[0]
self.smiles = smiles
self.title = ""
@property
def formula(self): return rajweb("mf", _quo(self.smiles))
@property
def molwt(self): return float(rajweb("mw", _quo(self.smiles)))
@property
def _exchange(self):
return (0, self.smiles)
def calcdesc(self, descnames=[]):
"""Calculate descriptor values.
Optional parameter:
descnames -- a list of names of descriptors
If descnames is not specified, all available descriptors are
calculated. See the descs variable for a list of available
descriptors.
"""
if not descnames:
descnames = getdescs()
else:
for descname in descnames:
if descname not in getdescs():
raise ValueError("%s is not a recognised Webel descriptor type" % descname)
ans = {}
p = re.compile("""Descriptor parent="(\w*)" name="([\w\-\+\d]*)" value="([\d\.]*)""")
for descname in descnames:
longname = "org.openscience.cdk.qsar.descriptors.molecular." + descname
response = rajweb("descriptor", longname, _quo(self.smiles))
for match in p.findall(response):
if match[2]:
ans["%s_%s" % (match[0], match[1])] = float(match[2])
return ans
def calcfp(self, fptype="std"):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "std"). See the
fps variable for a list of of available fingerprint
types.
"""
fptype = fptype.lower()
if fptype not in fps:
raise ValueError("%s is not a recognised Webel Fingerprint type" % fptype)
fp = rajweb("fingerprint/%s/%s" % (fptype, _quo(self.smiles))).rstrip()
return Fingerprint(fp)
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
format = format.lower()
if not format in outformats:
raise ValueError("%s is not a recognised Webel format" % format)
if format == "smi":
output = self.smiles
elif format == "names":
try:
output = nci(_quo(self.smiles), "%s" % format).rstrip().split("\n")
except urllib2.URLError, e:
if e.code == 404:
output = []
elif format in ['inchi', 'inchikey']:
format = "std" + format
output = nci(_quo(self.smiles), "%s" % format).rstrip()
elif format == 'iupac':
format = format + "_name"
try:
output = nci(_quo(self.smiles), "%s" % format).rstrip()
except urllib2.URLError, e:
if e.code == 404:
output = ""
else:
output = nci(_quo(self.smiles), "file?format=%s" % format).rstrip()
if filename:
if not overwrite and os.path.isfile(filename):
raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % filename)
outputfile = open(filename, "w")
print >> outputfile, output
outputfile.close()
else:
return output
def __str__(self):
return self.write()
def draw(self, show=True, filename=None):
"""Create a 2D depiction of the molecule.
Optional parameters:
show -- display on screen (default is True)
filename -- write to file (default is None)
Tkinter and Python Imaging Library are required for
image display.
"""
imagedata = nci(_quo(self.smiles), "image")
if filename:
print >> open(filename, "wb"), imagedata
if show:
if not tk:
errormessage = ("Tkinter or Python Imaging "
"Library not found, but is required for image "
"display. See installation instructions for "
"more information.")
raise ImportError, errormessage
root = tk.Tk()
root.title(self.smiles)
frame = tk.Frame(root, colormap="new", visual='truecolor').pack()
image = PIL.open(StringIO.StringIO(imagedata))
imagedata = piltk.PhotoImage(image)
label = tk.Label(frame, image=imagedata).pack()
quitbutton = tk.Button(root, text="Close", command=root.destroy).pack(fill=tk.X)
root.mainloop()
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a string of 0's and 1's representing a binary fingerprint
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
mybits = set(self.bits)
otherbits = set(other.bits)
return len(mybits&otherbits) / float(len(mybits|otherbits))
@property
def bits(self):
return [i for i,x in enumerate(self.fp) if x=="1"]
def __str__(self):
return self.fp
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
match(molecule)
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> smarts.match(mol)
True
"""
def __init__(self, smartspattern):
"""Initialise with a SMARTS pattern."""
self.pat = smartspattern
def match(self, molecule):
"""Does a SMARTS pattern match a particular molecule?
Required parameters:
molecule
"""
resp = rajweb("substruct", _quo(molecule.smiles), _quo(self.pat)).rstrip()
return resp == "true"
if __name__=="__main__": #pragma: no cover
import doctest
doctest.run_docstring_examples(rajweb, globals())
cinfony-1.2/cinfony/silverwebel.py 0000664 0001750 0001750 00000027765 12061452051 017316 0 ustar noel noel 0000000 0000000 ## Copyright (c) 2009-2011, Noel O'Boyle
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the BSD license
## which is included in the file LICENSE_BSD.txt.
"""
silverwebel - A Cinfony module for Silverlight that runs on web services
Global variables:
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
fps - a list of supported fingerprint types
"""
import re
from time import sleep
# .NET classes
from System.Net import WebClient
from System import Uri, UriKind
_webclient = WebClient()
tk = None
informats = {"smi":"SMILES", "inchikey":"InChIKey", "inchi":"InChI",
"name":"Common name"}
"""A dictionary of supported input formats"""
outformats = {"smi":"SMILES", "cdxml":"ChemDraw XML", "inchi":"InChI",
"sdf":"Symyx SDF", "names":"Common names", "inchikey":"InChIKey",
"alc":"Alchemy", "cerius":"MSI Cerius II", "charmm":"CHARMM",
"cif":"Crystallographic Information File",
"cml":"Chemical Markup Language", "ctx":"Gasteiger Clear Text",
"gjf":"Gaussian job file", "gromacs":"GROMACS",
"hyperchem":"HyperChem", "jme":"Java Molecule Editor",
"maestro":"Schrodinger MacroModel",
"mol":"Symyx mol", "mol2":"Tripos Sybyl MOL2",
"mrv":"ChemAxon MRV", "pdb":"Protein Data Bank",
"sdf3000":"Symyx SDF3000", "sln":"Sybl line notation",
"xyz":"XYZ", "iupac":"IUPAC name"}
"""A dictionary of supported output formats"""
fps = ["std", "maccs", "estate"]
"""A list of supported fingerprint types"""
# The following function is taken from urllib.py in the IronPython dist
def _quo(text, safe="/"):
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safemaps = {}
cachekey = (safe, always_safe)
try:
safe_map = _safemaps[cachekey]
except KeyError:
safe += always_safe
safe_map = {}
for i in range(256):
c = chr(i)
safe_map[c] = (c in safe) and c or ('%%%02X' % i)
_safemaps[cachekey] = safe_map
res = map(safe_map.__getitem__, text)
return ''.join(res)
def _makeserver(serverurl):
"""Curry the name of the server"""
def server(*urlcomponents):
url = "/%s/" % serverurl + "/".join(urlcomponents)
result = [False, None, None]
def callback(s, e):
result[0] = True
result[1] = e.Error
if not result[1]:
result[2] = e.Result
webclient = WebClient()
webclient.DownloadStringCompleted += callback
webclient.DownloadStringAsync(Uri(url, UriKind.Relative))
while not result[0]:
sleep(0.5)
if result[1]:
raise IOError, "Problem accessing web server\n%s" % result[1]
return result[2]
return server
rajweb = _makeserver("rajweb")
nci = _makeserver("nci")
_descs = None # Cache the list of descriptors
def getdescs():
"""Return a list of supported descriptor types"""
global _descs
if not _descs:
response = rajweb("descriptors").rstrip()
_descs = [x.split(".")[-1] for x in response.split("\n")]
return _descs
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Note: For InChIKeys a list of molecules is returned.
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
"""
format = format.lower()
if not format in informats:
raise ValueError("%s is not a recognised Webel format" % format)
if format != "smi":
smiles = nci(_quo(string), "smiles").rstrip()
else:
smiles = string
if format == "inchikey":
return [Molecule(smile) for smile in smiles.split("\n")]
else:
mol = Molecule(smiles)
if format == "name":
mol.title = string
return mol
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Although it's possible to write a single molecule to a file by
calling the write() method of a molecule, if multiple molecules
are to be written to the same file you should use the Outputfile
class.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwrite -- if the output file already exists, should it
be overwritten? (default is False)
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False):
self.format = format.lower()
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % self.filename)
if not format in outformats:
raise ValueError("%s is not a recognised Webel format" % format)
self.file = open(filename, "w")
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if self.file.closed:
raise IOError("Outputfile instance is closed.")
output = molecule.write(self.format)
print >> self.file, output
def close(self):
"""Close the Outputfile to further writing."""
self.file.close()
class Molecule(object):
"""Represent a Webel Molecule.
Required parameter:
smiles -- a SMILES string or any type of cinfony Molecule
Attributes:
formula, molwt, title
Methods:
calcfp(), calcdesc(), draw(), write()
The underlying SMILES string can be accessed using the attribute:
smiles
"""
_cinfony = True
def __init__(self, smiles):
if hasattr(smiles, "_cinfony"):
a, b = smiles._exchange
if a == 0:
smiles = b
else:
# Must convert to SMILES
smiles = smiles.write("smi").split()[0]
self.smiles = smiles
self.title = ""
@property
def formula(self): return rajweb("mf", _quo(self.smiles))
@property
def molwt(self): return float(rajweb("mw", _quo(self.smiles)))
@property
def _exchange(self):
return (0, self.smiles)
def calcdesc(self, descnames=[]):
"""Calculate descriptor values.
Optional parameter:
descnames -- a list of names of descriptors
If descnames is not specified, all available descriptors are
calculated. See the descs variable for a list of available
descriptors.
"""
if not descnames:
descnames = getdescs()
else:
for descname in descnames:
if descname not in getdescs():
raise ValueError("%s is not a recognised Webel descriptor type" % descname)
ans = {}
p = re.compile("""Descriptor parent="(\w*)" name="([\w\-\+\d]*)" value="([\d\.]*)""")
for descname in descnames:
longname = "org.openscience.cdk.qsar.descriptors.molecular." + descname
response = rajweb("descriptor", longname, _quo(self.smiles))
for match in p.findall(response):
if match[2]:
ans["%s_%s" % (match[0], match[1])] = float(match[2])
return ans
def calcfp(self, fptype="std"):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "std"). See the
fps variable for a list of of available fingerprint
types.
"""
fptype = fptype.lower()
if fptype not in fps:
raise ValueError("%s is not a recognised Webel Fingerprint type" % fptype)
fp = rajweb("fingerprint/%s/%s" % (fptype, _quo(self.smiles))).rstrip()
return Fingerprint(fp)
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
format = format.lower()
if not format in outformats:
raise ValueError("%s is not a recognised Webel format" % format)
if format == "smi":
output = self.smiles
elif format == "names":
try:
output = nci(_quo(self.smiles), "%s" % format).rstrip().split("\n")
except urllib2.URLError, e:
if e.code == 404:
output = []
elif format in ['inchi', 'inchikey']:
format = "std" + format
output = nci(_quo(self.smiles), "%s" % format).rstrip()
elif format == 'iupac':
format = format + "_name"
try:
output = nci(_quo(self.smiles), "%s" % format).rstrip()
except urllib2.URLError, e:
if e.code == 404:
output = ""
else:
output = nci(_quo(self.smiles), "file?format=%s" % format).rstrip()
if filename:
if not overwrite and os.path.isfile(filename):
raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % filename)
outputfile = open(filename, "w")
print >> outputfile, output
outputfile.close()
else:
return output
def __str__(self):
return self.write()
def draw(self):
"""Create a 2D depiction of the molecule."""
global showimage
url = "http://cactus.nci.nih.gov/chemical/structure/%s/image" % _quo(self.smiles)
showimage(url)
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a string of 0's and 1's representing a binary fingerprint
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
mybits = set(self.bits)
otherbits = set(other.bits)
return len(mybits&otherbits) / float(len(mybits|otherbits))
@property
def bits(self):
return [i for i,x in enumerate(self.fp) if x=="1"]
def __str__(self):
return self.fp
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
match(molecule)
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> smarts.match(mol)
True
"""
def __init__(self, smartspattern):
"""Initialise with a SMARTS pattern."""
self.pat = smartspattern
def match(self, molecule):
"""Does a SMARTS pattern match a particular molecule?
Required parameters:
molecule
"""
resp = rajweb("substruct", _quo(molecule.smiles), _quo(self.pat)).rstrip()
return resp == "true"
if __name__=="__main__": #pragma: no cover
import doctest
doctest.run_docstring_examples(rajweb, globals())
cinfony-1.2/cinfony/opsin.py 0000664 0001750 0001750 00000010074 12061452051 016104 0 ustar noel noel 0000000 0000000 ## Copyright (c) 2011, Noel O'Boyle
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the BSD license
## which is included in the file LICENSE_BSD.txt.
"""
opsin - A Cinfony module for accessing OPSIN from CPython and Jython
Global variables:
opsin - the underlying OPSIN library (uk.ac.cam.ch.wwmm.opsin)
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
"""
import os
import sys
if sys.platform[:4] == "java": # Jython
import uk.ac.cam.ch.wwmm.opsin as opsin
else: # CPython
import jpype
if not jpype.isJVMStarted():
_jvm = os.environ['JPYPE_JVM']
if _jvm[0] == '"': # Handle trailing quotes
_jvm = _jvm[1:-1]
_cp = os.environ['CLASSPATH']
jpype.startJVM(_jvm, "-Djava.class.path=" + _cp)
opsin = jpype.JPackage("uk").ac.cam.ch.wwmm.opsin
try:
_nametostruct = opsin.NameToStructure.getInstance()
_restoinchi = opsin.NameToInchi.convertResultToInChI
except TypeError:
raise ImportError("The OPSIN Jar file cannot be found.")
informats = {'iupac': 'IUPAC name'}
"""A dictionary of supported input formats"""
outformats = {'cml': "Chemical Markup Language", 'inchi': "InChI",
'smi': "SMILES"}
"""A dictionary of supported output formats"""
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Example:
>>> input = "propane"
>>> mymol = readstring("iupac", input)
"""
if format!="iupac":
raise ValueError("%s is not a recognised OPSIN format" % format)
result = _nametostruct.parseChemicalName(string)
if str(result.getStatus()) == "FAILURE":
raise IOError("Failed to convert '%s' to format '%s'\n%s" % (
string, format, result.getMessage()))
return Molecule(result)
class Molecule(object):
"""Represent a opsinjpype Molecule.
Required parameters:
OpsinResult -- the result of using OPSIN to parse an IUPAC string
Methods:
write()
The underlying OpsinResult can be accessed using the attribute:
OpsinResult
"""
_cinfony = True
def __init__(self, OpsinResult):
if hasattr(OpsinResult, "_cinfony"):
raise IOError, "An opsin Molecule cannot be created from another Cinfony Molecule"
self.OpsinResult = OpsinResult
def __str__(self):
return self.write()
@property
def _exchange(self):
return (0, self.write("smi"))
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the outformats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
"""
if format not in outformats:
raise ValueError,"%s is not a recognised OPSIN format" % format
if filename is not None and not overwrite and os.path.isfile(filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % filename
if format == "cml":
result = str(self.OpsinResult.getCml().toXML())
elif format == "inchi":
result = str(_restoinchi(self.OpsinResult))
elif format == "smi":
result = str(self.OpsinResult.getSmiles())
if filename:
outputfile = open(filename, "w")
print >> outputfile, result
outputfile.close()
else:
return result
if __name__=="__main__": #pragma: no cover
mol = readstring("iupac", "propane")
print mol.write("inchi")
cinfony-1.2/cinfony/cdk.py 0000664 0001750 0001750 00000061443 12061452051 015523 0 ustar noel noel 0000000 0000000 #-*. coding: utf-8 -*-
## Copyright (c) 2008-2011, Noel O'Boyle; 2012, Adrià Cereto-Massagué
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the BSD license
## which is included in the file LICENSE_BSD.txt.
"""
cdk - A Cinfony module for accessing the CDK from CPython and Jython
Global variables:
cdk - the underlying CDK Java library (org.openscience.cdk)
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
descs - a list of supported descriptors
fps - a list of supported fingerprint types
forcefields - a list of supported forcefields
"""
import sys
import os
if sys.platform[:4] == "java":
import org.openscience.cdk as cdk
import java
import javax
#Exceptions are handled differently in jpype and jython. We need to wrap them:
InvalidSmilesException = cdk.exception.InvalidSmilesException
CDKException = cdk.exception.CDKException
NullPointerException = java.lang.NullPointerException
else:
from jpype import *
if not isJVMStarted():
_jvm = os.environ['JPYPE_JVM']
if _jvm[0] == '"': # Remove trailing quotes
_jvm = _jvm[1:-1]
_cp = os.environ['CLASSPATH']
startJVM(_jvm, "-Djava.class.path=" + _cp)
cdk = JPackage("org").openscience.cdk
try:
_testmol = cdk.Molecule()
except TypeError:
raise ImportError, "The CDK Jar file cannot be found."
#Exception wrappers for Jpype
InvalidSmilesException = JavaException
CDKException = JavaException
NullPointerException = JavaException
def _getdescdict():
de = cdk.qsar.DescriptorEngine(cdk.qsar.DescriptorEngine.MOLECULAR)
descdict = {}
for desc in de.getDescriptorInstances():
spec = desc.getSpecification()
descclass = de.getDictionaryClass(spec)
if "proteinDescriptor" not in descclass:
# Using str() for unicode conversion
name = str(spec.getSpecificationReference().split("#")[-1])
descdict[name] = desc
return descdict
_descdict = _getdescdict()
descs = _descdict.keys()
"""A list of supported descriptors"""
_fingerprinters = {"daylight":cdk.fingerprint.Fingerprinter
, "graph":cdk.fingerprint.GraphOnlyFingerprinter
, "maccs":cdk.fingerprint.MACCSFingerprinter
, "estate":cdk.fingerprint.EStateFingerprinter
, "extended":cdk.fingerprint.ExtendedFingerprinter
, "hybridization":cdk.fingerprint.HybridizationFingerprinter
, "klekota-roth":cdk.fingerprint.KlekotaRothFingerprinter
, "pubchem":cdk.fingerprint.PubchemFingerprinter
, "substructure":cdk.fingerprint.SubstructureFingerprinter
}
fps = _fingerprinters.keys()
"""A list of supported fingerprint types"""
_formats = {'smi': "SMILES" , 'sdf': "MDL SDF",
'mol2': "MOL2", 'mol': "MDL MOL",
"inchi":"InChI",
"inchikey":"InChIKey"}
_informats = {'sdf': cdk.io.MDLV2000Reader, 'mol': cdk.io.MDLV2000Reader}
informats = dict([(_x, _formats[_x]) for _x in ['smi', 'sdf', 'mol', 'inchi']])
"""A dictionary of supported input formats"""
_outformats = {'mol': cdk.io.MDLV2000Writer,
'mol2': cdk.io.Mol2Writer,
'sdf': cdk.io.SDFWriter}
outformats = dict([(_x, _formats[_x]) for _x in _outformats.keys() + ['smi', 'inchi', 'inchikey']])
"""A dictionary of supported output formats"""
forcefields = list(cdk.modeling.builder3d.ModelBuilder3D.getInstance().getFfTypes())
"""A list of supported forcefields"""
_isofact = cdk.config.IsotopeFactory.getInstance(cdk.ChemObject().getBuilder())
_bondtypes = {1: cdk.CDKConstants.BONDORDER_SINGLE,
2: cdk.CDKConstants.BONDORDER_DOUBLE,
3: cdk.CDKConstants.BONDORDER_TRIPLE}
_revbondtypes = dict([(_y,_x) for (_x,_y) in _bondtypes.iteritems()])
def _intvalue(integer):
"""Paper over some differences between JPype and Jython"""
# Jython automagically converts Integer to ints
if type(integer) != type(42): # Is it a Python int?
integer = integer.intValue()
return integer
def readfile(format, filename):
"""Iterate over the molecules in a file.
Required parameters:
format - see the informats variable for a list of available
input formats
filename
You can access the first molecule in a file using the next() method
of the iterator:
mol = readfile("smi", "myfile.smi").next()
You can make a list of the molecules in a file using:
mols = list(readfile("smi", "myfile.smi"))
You can iterate over the molecules in a file as shown in the
following code snippet:
>>> atomtotal = 0
>>> for mol in readfile("sdf", "head.sdf"):
... atomtotal += len(mol.atoms)
...
>>> print atomtotal
43
"""
format = format.lower()
if not os.path.isfile(filename):
raise IOError, "No such file: '%s'" % filename
builder = cdk.DefaultChemObjectBuilder.getInstance()
if format=="sdf":
return (Molecule(mol) for mol in cdk.io.iterator.IteratingMDLReader(
java.io.FileInputStream(java.io.File(filename)),
builder)
)
elif format=="smi":
return (Molecule(mol) for mol in cdk.io.iterator.IteratingSmilesReader(
java.io.FileInputStream(java.io.File(filename)),
builder
))
elif format == 'inchi':
inputfile = open(filename, 'rb')
return (readstring('inchi', line.rstrip()) for line in inputfile)
elif format in informats:
reader = _informats[format](java.io.FileInputStream(java.io.File(filename)))
chemfile = reader.read(cdk.ChemFile())
manip = cdk.tools.manipulator.ChemFileManipulator
return iter(Molecule(manip.getAllAtomContainers(chemfile)[0]),)
else:
raise ValueError,"%s is not a recognised CDK format" % format
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - see the informats variable for a list of available
input formats
string
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
>>> len(mymol.atoms)
5
"""
format = format.lower()
if format=="smi":
sp = cdk.smiles.SmilesParser(cdk.DefaultChemObjectBuilder.getInstance())
try:
ans = sp.parseSmiles(string)
except InvalidSmilesException, ex:
if sys.platform[:4] != "java":
#Jpype exception
ex = ex.message()
raise IOError, ex
return Molecule(ans)
elif format == 'inchi':
factory = cdk.inchi.InChIGeneratorFactory.getInstance()
intostruct = factory.getInChIToStructure(string,cdk.DefaultChemObjectBuilder.getInstance())
return Molecule(intostruct.getAtomContainer())
elif format in informats:
reader = _informats[format](java.io.StringReader(string))
chemfile = reader.read(cdk.ChemFile())
manip = cdk.tools.manipulator.ChemFileManipulator
return Molecule(manip.getAllAtomContainers(chemfile)[0])
else:
raise ValueError,"%s is not a recognised CDK format" % format
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwite -- if the output file already exists, should it
be overwritten? (default is False)
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False):
self.format = format.lower()
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % self.filename
if not format in outformats:
raise ValueError,"%s is not a recognised CDK format" % format
if self.format in ('smi','inchi', 'inchikey'):
self._outputfile = open(self.filename, "w")
else:
self._writer = java.io.FileWriter(java.io.File(self.filename))
self._molwriter = _outformats[self.format](self._writer)
self.total = 0 # The total number of molecules written to the file
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if not self.filename:
raise IOError, "Outputfile instance is closed."
if self.format in ('smi','inchi', 'inchikey'):
self._outputfile.write("%s\n" % molecule.write(format))
else:
self._molwriter.write(molecule.Molecule)
self.total += 1
def close(self):
"""Close the Outputfile to further writing."""
self.filename = None
if self.format in ('smi','inchi', 'inchikey'):
self._outputfile.close()
else:
self._molwriter.close()
self._writer.close()
class Molecule(object):
"""Represent a cdkjpype Molecule.
Required parameters:
Molecule -- a CDK Molecule or any type of cinfony Molecule
Attributes:
atoms, data, exactmass, formula, molwt, title
Methods:
addh(), calcfp(), calcdesc(), draw(), removeh(), write()
The underlying CDK Molecule can be accessed using the attribute:
Molecule
"""
_cinfony = True
def __init__(self, Molecule):
if hasattr(Molecule, "_cinfony"):
a, b = Molecule._exchange
if a == 0:
mol = readstring("smi", b)
else:
mol = readstring("sdf", b)
Molecule = mol.Molecule
self.Molecule = Molecule
@property
def atoms(self): return [Atom(self.Molecule.getAtom(i)) for i in range(self.Molecule.getAtomCount())]
@property
def data(self): return MoleculeData(self.Molecule)
@property
def formula(self):
manip = cdk.tools.manipulator.MolecularFormulaManipulator
mf = manip.getMolecularFormula(self.Molecule)
return manip.getString(mf) # GetHillString
@property
def exactmass(self):
clone = Molecule(self.Molecule.clone())
clone.addh()
manip = cdk.tools.manipulator.MolecularFormulaManipulator
mf = manip.getMolecularFormula(clone.Molecule)
return manip.getMajorIsotopeMass(mf)
@property
def molwt(self):
clone = Molecule(self.Molecule.clone())
clone.addh()
atommanip = cdk.tools.manipulator.AtomContainerManipulator
return atommanip.getNaturalExactMass(clone.Molecule)
def _gettitle(self): return self.Molecule.getProperty(cdk.CDKConstants.TITLE)
def _settitle(self, val): self.Molecule.setProperty(cdk.CDKConstants.TITLE, val)
title = property(_gettitle, _settitle)
@property
def _exchange(self):
gt = cdk.geometry.GeometryTools
if gt.has2DCoordinates(self.Molecule) or gt.has3DCoordinates(self.Molecule):
return (1, self.write("mol"))
else:
return (0, self.write("smi"))
def __iter__(self):
"""Iterate over the Atoms of the Molecule.
This allows constructions such as the following:
for atom in mymol:
print atom
"""
return iter(self.atoms)
def __str__(self):
return self.write()
def addh(self):
"""Add hydrogens."""
atommanip = cdk.tools.manipulator.AtomContainerManipulator
atommanip.convertImplicitToExplicitHydrogens(self.Molecule)
def removeh(self):
"""Remove hydrogens."""
atommanip = cdk.tools.manipulator.AtomContainerManipulator
self.Molecule = atommanip.removeHydrogens(self.Molecule)
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
format = format.lower()
if format not in outformats:
raise ValueError,"%s is not a recognised CDK format" % format
if filename is not None and not overwrite and os.path.isfile(filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % filename
if format == "smi":
sg = cdk.smiles.SmilesGenerator()
# Set flag or else c1ccccc1 will be written as C1CCCCC1
sg.setUseAromaticityFlag(True)
smiles = sg.createSMILES(self.Molecule)
if filename:
output = open(filename, "w")
print >> output, smiles
output.close()
return
else:
return smiles
elif format in ('inchi', 'inchikey'):
factory = cdk.inchi.InChIGeneratorFactory.getInstance()
gen = factory.getInChIGenerator(self.Molecule)
if format == 'inchi':
return gen.getInchi()
else:
return gen.getInchiKey()
else:
if filename is None:
writer = java.io.StringWriter()
else:
writer = java.io.FileWriter(java.io.File(filename))
molwriter = _outformats[format](writer)
molwriter.write(self.Molecule)
molwriter.close()
writer.close()
if filename == None:
return str(writer.toString())
def calcfp(self, fp="daylight"):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "daylight"). See the
fps variable for a list of of available fingerprint
types.
"""
fp = fp.lower()
if fp in _fingerprinters:
fingerprinter = _fingerprinters[fp]()
else:
raise ValueError, "%s is not a recognised CDK Fingerprint type" % fp
return Fingerprint(fingerprinter.getFingerprint(self.Molecule))
def calcdesc(self, descnames=[]):
"""Calculate descriptor values.
Optional parameter:
descnames -- a list of names of descriptors
If descnames is not specified, all available descriptors are
calculated. See the descs variable for a list of available
descriptors.
"""
if not descnames:
descnames = descs
ans = {}
for descname in descnames:
try:
desc = _descdict[descname]
except KeyError:
raise ValueError, "%s is not a recognised CDK descriptor type" % descname
try:
value = desc.calculate(self.Molecule).getValue()
if hasattr(value, "get"): # Instead of array
for i in range(value.length()):
ans[descname + ".%d" % i] = value.get(i)
elif hasattr(value, "doubleValue"):
ans[descname] = value.doubleValue()
else:
ans[descname] = _intvalue(value)
except CDKException, ex:
# Can happen if molecule has no 3D coordinates
pass
except NullPointerException, ex:
# Happens with moment of inertia descriptor
pass
return ans
def draw(self, show=True, filename=None, update=False,
usecoords=False):
"""Create a 2D depiction of the molecule.
There is no option to display or write an image file of
the depiction. For this, you should use the CDK from
Jython or else the depiction engine of one of the other
toolkits.
When using jpype, arguments will be ignored: calling this function is
equivalent to calling the draw() method of one of the other Cinfony
modules with parameters:
show=False, filename=None, update=True, usecoords=False
"""
if sys.platform[:4] != "java":
show=False
filename=None
update=True
usecoords=False
mol = Molecule(self.Molecule.clone())
cdk.aromaticity.CDKHueckelAromaticityDetector.detectAromaticity(mol.Molecule)
if not usecoords:
# Do the SDG
sdg = cdk.layout.StructureDiagramGenerator()
sdg.setMolecule(mol.Molecule)
sdg.generateCoordinates()
mol = Molecule(sdg.getMolecule())
if update:
for atom, newatom in zip(self.atoms, mol.atoms):
coords = newatom.Atom.getPoint2d()
atom.Atom.setPoint3d(javax.vecmath.Point3d(
coords.x, coords.y, 0.0))
else:
if self.atoms[0].Atom.getPoint2d() is None:
# Use the 3D coords to set the 2D coords
for atom, newatom in zip(self.atoms, mol.atoms):
coords = atom.Atom.getPoint3d()
newatom.Atom.setPoint2d(javax.vecmath.Point2d(
coords.x, coords.y))
if sys.platform[:4] != "java":
#We are done in jpype
return
mol.removeh()
canvas = _Canvas(mol.Molecule)
if filename:
canvas.writetofile(filename)
if show:
canvas.popup()
else:
canvas.frame.dispose()
if sys.platform[:4] == "java":
class _Canvas(javax.swing.JPanel):
"""
Class used by Molecule.draw() in jython
"""
def __init__(self, mol):
self.mol = mol
self.frame = javax.swing.JFrame()
generators = []
generators.append(cdk.renderer.generators.BasicSceneGenerator())
generators.append(cdk.renderer.generators.BasicBondGenerator())
generators.append(cdk.renderer.generators.RingGenerator())
generators.append(cdk.renderer.generators.BasicAtomGenerator())
self.renderer = cdk.renderer.AtomContainerRenderer(generators,
cdk.renderer.font.AWTFontManager())
drawArea = java.awt.Rectangle(300, 300)
self.renderer.setup(mol, drawArea)
image = java.awt.image.BufferedImage(300, 300,
java.awt.image.BufferedImage.TYPE_INT_RGB)
screenSize = java.awt.Dimension(300, 300)
self.setPreferredSize(screenSize)
self.setBackground(java.awt.Color.WHITE)
self.frame.getContentPane().add(self)
self.frame.pack()
self.frame.setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE)
def paint(self, g):
javax.swing.JPanel.paint(self, g)
self.renderer.paint(self.mol, cdk.renderer.visitor.AWTDrawVisitor(g),
java.awt.Rectangle(300, 300), True);
def popup(self):
self.frame.visible = True
def writetofile(self, filename):
img = self.createImage(300, 300)
g2 = img.getGraphics() # Graphics2D
g2.setColor(java.awt.Color.WHITE)
g2.fillRect(0, 0, 300, 300)
self.paint(g2)
javax.imageio.ImageIO.write(img, "png", java.io.File(filename))
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a vector calculated by one of the fingerprint methods
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
return cdk.similarity.Tanimoto.calculate(self.fp, other.fp)
def __getattr__(self, attr):
if attr == "bits":
# Create a bits attribute on-the-fly
bits = []
idx = self.fp.nextSetBit(0)
while idx >= 0:
bits.append(idx)
idx = self.fp.nextSetBit(idx + 1)
return bits
else:
raise AttributeError, "Fingerprint has no attribute %s" % attr
def __str__(self):
return self.fp.toString()
class Atom(object):
"""Represent a cdkjpype Atom.
Required parameters:
Atom -- a CDK Atom
Attributes:
atomicnum, coords, formalcharge
The original CDK Atom can be accessed using the attribute:
Atom
"""
def __init__(self, Atom):
self.Atom = Atom
@property
def atomicnum(self):
_isofact.configure(self.Atom)
return _intvalue(self.Atom.getAtomicNumber())
@property
def coords(self):
coords = self.Atom.point3d
if not coords:
coords = self.Atom.point2d
if not coords:
return (0., 0., 0.)
else:
return (coords.x, coords.y, coords.z)
@property
def formalcharge(self):
_isofact.configure(self.Atom)
return _intvalue(self.Atom.getFormalCharge())
def __str__(self):
c = self.coords
return "Atom: %d (%.2f %.2f %.2f)" % (self.atomicnum, c[0], c[1], c[2])
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
findall()
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> print smarts.findall(mol)
[(1, 2), (4, 5), (6, 7)]
"""
def __init__(self, smartspattern):
"""Initialise with a SMARTS pattern."""
self.smarts = cdk.smiles.smarts.SMARTSQueryTool(smartspattern)
def findall(self, molecule):
"""Find all matches of the SMARTS pattern to a particular molecule.
Required parameters:
molecule
"""
match = self.smarts.matches(molecule.Molecule)
return list(self.smarts.getUniqueMatchingAtoms())
class MoleculeData(object):
"""Store molecule data in a dictionary-type object
Required parameters:
Molecule -- a CDK Molecule
Methods and accessor methods are like those of a dictionary except
that the data is retrieved on-the-fly from the underlying Molecule.
Example:
>>> mol = readfile("sdf", 'head.sdf').next()
>>> data = mol.data
>>> print data
{'Comment': 'CORINA 2.61 0041 25.10.2001', 'NSC': '1'}
>>> print len(data), data.keys(), data.has_key("NSC")
2 ['Comment', 'NSC'] True
>>> print data['Comment']
CORINA 2.61 0041 25.10.2001
>>> data['Comment'] = 'This is a new comment'
>>> for k,v in data.iteritems():
... print k, "-->", v
Comment --> This is a new comment
NSC --> 1
>>> del data['NSC']
>>> print len(data), data.keys(), data.has_key("NSC")
1 ['Comment'] False
"""
def __init__(self, Molecule):
self._mol = Molecule
def _data(self):
return self._mol.getProperties()
def _testforkey(self, key):
if not key in self:
raise KeyError, "'%s'" % key
def keys(self):
return list(self._data().keySet())
def values(self):
return list(self._data().values())
def items(self):
return [(k, self[k]) for k in self._data().keySet()]
def __iter__(self):
return iter(self.keys())
def iteritems(self):
return iter(self.items())
def __len__(self):
return len(self._data())
def __contains__(self, key):
return key in self._data()
def __delitem__(self, key):
self._testforkey(key)
self._mol.removeProperty(key)
def clear(self):
for key in self:
del self[key]
def has_key(self, key):
return key in self
def update(self, dictionary):
for k, v in dictionary.iteritems():
self[k] = v
def __getitem__(self, key):
self._testforkey(key)
return self._mol.getProperty(key)
def __setitem__(self, key, value):
self._mol.setProperty(key, str(value))
def __repr__(self):
return dict(self.iteritems()).__repr__()
if __name__=="__main__": #pragma: no cover
mol = readstring("smi", "CC(=O)Cl")
mol.title = "Noel"
mol.draw()
for mol in readfile("sdf", "head.sdf"):
pass
cinfony-1.2/cinfony/jchem.py 0000664 0001750 0001750 00000052510 12061452051 016043 0 ustar noel noel 0000000 0000000 #-*. coding: utf-8 -*-
## Copyright (c) 2012, Adrià Cereto-Massagué, Noel O'Boyle
## All rights reserved.
##
## This file is part of Cinfony.
## The contents are covered by the terms of the BSD license
## which is included in the file LICENSE_BSD.txt.
"""
jchem - A Cinfony module for accessing ChemAxon's JChem from CPython and Jython
Global variables:
chemaxon - the underlying JChem Java library
informats - a dictionary of supported input formats
outformats - a dictionary of supported output formats
descs - a list of supported descriptors
fps - a list of supported fingerprint types
forcefields - a list of supported forcefields
"""
import sys
import os
from glob import glob
if sys.platform[:4] == "java":
classpath = []
if 'JCHEMDIR' in os.environ:
assert os.path.isdir(os.path.join(os.environ['JCHEMDIR'], 'lib'))
for jar in glob(os.path.join(os.path.join(os.environ['JCHEMDIR'],'lib'), '*.jar')):
classpath.append(jar)
if sys.platform[:4] == "java" or sys.platform[:3] == "cli":
import sys
sys.path = classpath + sys.path
import java, javax
import chemaxon
from chemaxon.util import MolHandler
#Exceptions are handled differently in jpype and jython. We need to wrap them:
MolExportException = chemaxon.marvin.io.MolExportException
MolFormatException = chemaxon.formats.MolFormatException
else:
from jpype import *
if not isJVMStarted():
_jvm = os.environ['JPYPE_JVM']
if _jvm[0] == '"': # Remove trailing quotes
_jvm = _jvm[1:-1]
_cp = os.pathsep.join(os.environ.get('CLASSPATH', '').split(os.pathsep))
startJVM(_jvm, "-Djava.class.path=" + _cp)
chemaxon = JPackage("chemaxon")
MolHandler = chemaxon.util.MolHandler
try:
_testmol = MolHandler()
except TypeError:
raise ImportError, "jchem.jar file cannot be found."
# Exception wrappers for JPype
MolExportException = JavaException
MolFormatException = JavaException
_descset = set(['HAcc', 'HDon', 'Heavy', 'LogD', 'LogP', 'Mass', 'TPSA'])
_descset.update(dir(chemaxon.descriptors.scalars))
descs = [cls for cls in _descset if hasattr(getattr(chemaxon.descriptors.scalars, cls),'generate') and cls != 'LogD'] + ['RotatableBondsCount']
"""A list of supported descriptors"""
fps = ['ecfp']
"""A list of supported fingerprint types"""
forcefields = ["mmff94"]
"""A list of supported forcefields"""
informats = {
'smi': "SMILES"
,'cxsmi': "ChemAxon exntended SMILES"
,'mol': "MDL MOL"
,'sdf': "MDL SDF"
,'inchi': "InChI"
,'cml': "Chemical Markup Language"
, 'mrv':'Marvin Documents'
, 'skc':'ISIS/Draw sketch file'
, 'cdx':'ChemDraw sketch file'
, 'cdxml':'ChemDraw sketch file'
, "name":"Common name"
, "peptide":"Aminoacid sequence"
, "sybyl":"Tripos SYBYL"
, "pdb":"PDB"
, "xyz":"XYZ"
, 'cube':'Gaussian cube'
, 'gout':'Gaussian output format'
}
"""A dictionary of supported input formats"""
outformats = {
'smi': "SMILES"
,'cxsmi': "ChemAxon exntended SMILES"
,'mol': "MDL MOL"
,'sdf': "MDL SDF"
,'inchi': "InChI"
,'inchikey': "InChIKey"
,'cml': "CML"
, 'mrv':'Marvin Documents'
, 'skc':'ISIS/Draw sketch file'
, 'cdx':'ChemDraw sketch file'
, 'cdxml':'ChemDraw sketch file'
, "name":"Common name"
, "peptide":"Aminoacid sequence"
, "sybyl":"Tripos SYBYL"
, "pdb":"PDB"
, "xyz":"XYZ"
, 'cube':'Gaussian cube'
, 'gjf':'Gaussian input format'
}
"""A dictionary of supported output formats"""
def readfile(format, filename):
"""Iterate over the molecules in a file.
Required parameters:
format - Ignored, but needed for compatibility with other cinfony
modules and also good for readability
filename
You can access the first molecule in a file using the next() method
of the iterator:
mol = readfile("smi", "myfile.smi").next()
You can make a list of the molecules in a file using:
mols = list(readfile("smi", "myfile.smi"))
You can iterate over the molecules in a file as shown in the
following code snippet:
>>> atomtotal = 0
>>> for mol in readfile("sdf", "head.sdf"):
... atomtotal += len(mol.atoms)
...
>>> print atomtotal
43
"""
if not os.path.isfile(filename):
raise IOError, "No such file: '%s'" % filename
if not format in outformats:
raise ValueError("%s is not a recognised JChem format" % format)
try:
mi = chemaxon.formats.MolImporter(filename)
mol = mi.read()
while mol:
mol.aromatize()
yield Molecule(mol)
mol = mi.read()
except chemaxon.formats.MolFormatException:
raise ValueError("%s is not a recognised JChem format" % format)
def readstring(format, string):
"""Read in a molecule from a string.
Required parameters:
format - Ignored, but needed for compatibility with other cinfony
modules and also good for readability
string
Example:
>>> input = "C1=CC=CS1"
>>> mymol = readstring("smi", input)
>>> len(mymol.atoms)
5
"""
format = format.lower()
if format not in informats:
raise ValueError("%s is not a recognised JChem format" % format)
try:
mh = MolHandler(string)
return Molecule(mh.molecule)
except MolFormatException, ex:
if sys.platform[:4] != "java":
#Jpype exception
ex = ex.message()
raise IOError, ex
else:
raise IOError("Problem reading the supplied string")
class Outputfile(object):
"""Represent a file to which *output* is to be sent.
Required parameters:
format - see the outformats variable for a list of available
output formats
filename
Optional parameters:
overwite -- if the output file already exists, should it
be overwritten? (default is False)
Methods:
write(molecule)
close()
"""
def __init__(self, format, filename, overwrite=False):
if ':' in format:
format, options = format.split(':')
if options:
options = ':' + options
else:
options = ''
self.format = format.lower()
self.filename = filename
if not overwrite and os.path.isfile(self.filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % self.filename
if format in ("smi", 'cxsmi'):
if not options:
options = ':a-H'
out = chemaxon.formats.MolExporter.exportToFormat(self.Molecule,format +'les:a-H')
try:
self._writer = chemaxon.formats.MolExporter(filename, format + options)
except MolExportException, e:
raise ValueError(e)
self.total = 0 # The total number of molecules written to the file
def write(self, molecule):
"""Write a molecule to the output file.
Required parameters:
molecule
"""
if not self.filename:
raise IOError, "Outputfile instance is closed."
self._writer.write(molecule.Molecule)
self.total += 1
def close(self):
"""Close the Outputfile to further writing."""
self.filename = None
self._writer.close()
class Molecule(object):
"""Represent a JChem Molecule.
Required parameters:
Molecule -- a JChem Molecule or any type of cinfony Molecule
Attributes:
atoms, data, exactmass, formula, molwt, title
Methods:
addh(), calcfp(), calcdesc(), draw(), removeh(), write()
The underlying JChem Molecule can be accessed using the attribute:
Molecule
The associated JChem MolHandler can be accessed using the attribute:
MolHandler
"""
_cinfony = True
def __init__(self, Molecule):
if hasattr(Molecule, "_cinfony"):
a, b = Molecule._exchange
if a == 0:
mol = readstring("smi", b)
else:
mol = readstring("sdf", b)
Molecule = mol.Molecule
self.Molecule = Molecule
self.MolHandler = chemaxon.util.MolHandler(self.Molecule)
self.MolHandler.aromatize()
@property
def atoms(self): return [Atom(atom) for atom in self.Molecule.atomArray]
@property
def data(self): return MoleculeData(self)
@property
def formula(self): return self.MolHandler.calcMolFormula()
@property
def exactmass(self):
return self.MolHandler.calcMolWeightInDouble()
@property
def molwt(self):
return self.MolHandler.calcMolWeight()
def _gettitle(self): return self.Molecule.getName()
def _settitle(self, val): self.Molecule.setName(val)
title = property(_gettitle, _settitle)
@property
def _exchange(self):
if self.Molecule.dim > 1:
return (1, self.write("mol"))
else:
return (0, self.write("smi"))
def __iter__(self):
"""Iterate over the Atoms of the Molecule.
This allows constructions such as the following:
for atom in mymol:
print atom
"""
return iter(self.atoms)
def __str__(self):
return self.write()
def addh(self):
"""Add hydrogens."""
self.MolHandler.addHydrogens()
def removeh(self):
"""Remove hydrogens."""
self.MolHandler.removeHydrogens()
def write(self, format="smi", filename=None, overwrite=False):
"""Write the molecule to a file or return a string.
Optional parameters:
format -- see the informats variable for a list of available
output formats (default is "smi")
filename -- default is None
overwite -- if the output file already exists, should it
be overwritten? (default is False)
If a filename is specified, the result is written to a file.
Otherwise, a string is returned containing the result.
To write multiple molecules to the same file you should use
the Outputfile class.
"""
if ':' in format:
format, options = format.split(':')
if options:
options = ':' + options
else:
options = ''
format = format.lower()
if format not in outformats:
raise ValueError("%s is not a recognised format" % format)
if filename is not None and not overwrite and os.path.isfile(filename):
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % filename
if format in ("smi", 'cxsmi'):
if not options:
options = ':a-H'
out = chemaxon.formats.MolExporter.exportToFormat(self.Molecule,format +'les' + options)
elif format == 'inchikey':
out = chemaxon.formats.MolExporter.exportToFormat(self.Molecule,'inchikey').replace('InChIKey=', '')
else:
out = chemaxon.formats.MolExporter.exportToFormat(self.Molecule,format + options)
if format == 'inchi':
out = out.split('AuxInfo=')[0]
if filename:
output = open(filename, "w")
print >> output, out
output.close()
return
else:
return out
def calcfp(self, fp="ecfp"):
"""Calculate a molecular fingerprint.
Optional parameters:
fptype -- the fingerprint type (default is "daylight"). See the
fps variable for a list of of available fingerprint
types.
"""
fp = fp.lower()
if fp in fps:
if fp == 'ecfp':
fp = chemaxon.descriptors.ECFP(ECFPConfiguration)
fp.generate(self.Molecule)
else:
raise ValueError, "%s is not a recognised fingerprint type" % fp
return Fingerprint(fp)
def calcdesc(self, descnames=[]):
"""Calculate descriptor values.
Optional parameter:
descnames -- a list of names of descriptors
If descnames is not specified, all available descriptors are
calculated. See the descs variable for a list of available
descriptors.
"""
if not descnames:
descnames = descs
ans = {}
for descname in descnames:
if descname not in descs:
raise ValueError, "%s is not a recognised descriptor type" % descname
if descname == 'RotatableBondsCount':
ta = chemaxon.calculations.TopologyAnalyser()
ta.setMolecule(self.Molecule)
ans[descname] = ta.rotatableBondCount()
else:
desc = getattr(chemaxon.descriptors.scalars, descname)('')
desc.generate(self.Molecule)
ans[descname] = desc.toFloatArray()[0]
return ans
def make3D(self):
"""Generate 3D coordinates.
Hydrogens are added, and a low energy conformer is found
using the MMFF94 forcefield.
"""
self.addh()
cp = chemaxon.marvin.calculations.ConformerPlugin()
cp.setMolecule(self.Molecule)
cp.setLowestEnergyConformerCalculation(True)
cp.setMMFF94Optimization(True)
success = cp.run()
optmol = cp.getMMFF94OptimizedStrucutre()
self.Molecule = optmol
self.MolHandler = chemaxon.util.MolHandler(self.Molecule)
self.MolHandler.aromatize()
def draw(self, show=True, filename=None, update=False,
usecoords=False):
"""Create a 2D depiction of the molecule.
"""
if not usecoords:
molecule = self.Molecule.clone()
molecule.setDim(0)
else:
molecule = self.Molecule
if update:
myMolecule = readstring("mol", Molecule(molecule).write("mol"))
self.Molecule = myMolecule.Molecule
self.MolHandler = myMolecule.MolHandler
bytearray = chemaxon.formats.MolExporter.exportToBinFormat(molecule, 'png')
if filename:
of = java.io.FileOutputStream(filename)
of.write(bytearray)
of.close()
if show:
source = java.io.ByteArrayInputStream(bytearray)
reader = javax.imageio.ImageIO.getImageReadersByFormatName('png').next()
iis = javax.imageio.ImageIO.createImageInputStream(source)
reader.setInput(iis, True)
param = reader.getDefaultReadParam()
image = reader.read(0, param)
frame = javax.swing.JFrame()
imageIcon = javax.swing.ImageIcon(image)
label = javax.swing.JLabel()
label.setIcon(imageIcon)
frame.getContentPane().add(label, java.awt.BorderLayout.CENTER)
frame.pack()
frame.setVisible(True)
frame.show()
class Fingerprint(object):
"""A Molecular Fingerprint.
Required parameters:
fingerprint -- a vector calculated by one of the fingerprint methods
Attributes:
fp -- the underlying fingerprint object
bits -- a list of bits set in the Fingerprint
Methods:
The "|" operator can be used to calculate the Tanimoto coeff. For example,
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
tanimoto = a | b
"""
def __init__(self, fingerprint):
self.fp = fingerprint
def __or__(self, other):
return 1 - self.fp.getTanimoto(other.fp)
def __getattr__(self, attr):
if attr == "bits":
# Create a bits attribute on-the-fly
bs = self.fp.toBitSet()
bits = [-1]
while True:
setbit = bs.nextSetBit(bits[-1] + 1)
if setbit == -1:
break
bits.append(setbit)
return bits[1:] # Leave out the initial '-1'
else:
raise AttributeError, "Fingerprint has no attribute %s" % attr
def __str__(self):
return ", ".join([str(x) for x in self.fp.toIntArray()])
class Atom(object):
"""Represent an Atom.
Required parameters:
Atom -- a JChem Atom
Attributes:
atomicnum, coords, formalcharge
The original JChem Atom can be accessed using the attribute:
Atom
"""
def __init__(self, Atom):
self.Atom = Atom
@property
def atomicnum(self): return self.Atom.getAtno()
@property
def coords(self):
return (self.Atom.x, self.Atom.y, self.Atom.z)
@property
def formalcharge(self):
return self.Atom.charge
def __str__(self):
c = self.coords
return "Atom: %d (%.2f %.2f %.2f)" % (self.atomicnum, c[0], c[1], c[2])
class Smarts(object):
"""A Smarts Pattern Matcher
Required parameters:
smartspattern
Methods:
findall()
Example:
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
>>> print smarts.findall(mol)
[(1, 2), (4, 5), (6, 7)]
"""
def __init__(self, smartspattern):
"""Initialise with a SMARTS pattern."""
self.search = chemaxon.sss.search.MolSearch()
smarts = MolHandler(smartspattern)
smarts.setQueryMode(True)
smarts.aromatize()
self.search.setQuery(smarts.molecule)
def findall(self, molecule):
"""Find all matches of the SMARTS pattern to a particular molecule.
Required parameters:
molecule
"""
self.search.setTarget(molecule.Molecule)
match = self.search.findAll()
result = []
for i in xrange(len(match)):
result.append(tuple([n+1 for n in match[i]]))
return result
class MoleculeData(object):
"""Store molecule data in a dictionary-type object
Required parameters:
Molecule -- a JChem Molecule
Methods and accessor methods are like those of a dictionary except
that the data is retrieved on-the-fly from the underlying Molecule.
Example:
>>> mol = readfile("sdf", 'head.sdf').next()
>>> data = mol.data
>>> print data
{'Comment': 'CORINA 2.61 0041 25.10.2001', 'NSC': '1'}
>>> print len(data), data.keys(), data.has_key("NSC")
2 ['Comment', 'NSC'] True
>>> print data['Comment']
CORINA 2.61 0041 25.10.2001
>>> data['Comment'] = 'This is a new comment'
>>> for k,v in data.iteritems():
... print k, "-->", v
Comment --> This is a new comment
NSC --> 1
>>> del data['NSC']
>>> print len(data), data.keys(), data.has_key("NSC")
1 ['Comment'] False
"""
def __init__(self, Molecule):
self._data = Molecule.Molecule.properties()
def _testforkey(self, key):
if not key in self:
raise KeyError, "'%s'" % key
def keys(self):
return list(self._data.keys)
def values(self):
return [self[k] for k in self._data.keys]
def items(self):
return [(k, self[k]) for k in self._data.keys]
def __iter__(self):
return iter(self.keys())
def iteritems(self):
return iter(self.items())
def __len__(self):
return len(self._data.keys)
def __contains__(self, key):
return key in self.keys()
def __delitem__(self, key):
self._testforkey(key)
self._data.setString(key, None)
def clear(self):
for key in self:
del self[key]
def has_key(self, key):
return key in self
def update(self, dictionary):
for k, v in dictionary.iteritems():
self[k] = v
def __getitem__(self, key):
self._testforkey(key)
return self._data.get(key).propValue
def __setitem__(self, key, value):
self._data.setString(key, str(value))
def __repr__(self):
return dict(self.iteritems()).__repr__()
ECFPConfiguration = """
"""
if __name__=="__main__": #pragma: no cover
mol = readstring("smi", "CC(=O)Cl")
mol.title = u"Adrià"
mol.draw()
for mol in readfile("sdf", "head.sdf"):
pass
cinfony-1.2/PKG-INFO 0000664 0001750 0001750 00000001657 12061452073 014045 0 ustar noel noel 0000000 0000000 Metadata-Version: 1.0
Name: cinfony
Version: 1.2
Summary: cinfony: a common API to several cheminformatics toolkits
Home-page: http://cinfony.googlecode.com
Author: Noel O'Boyle
Author-email: baoilleach@gmail.com
License: BSD
Description: cinfony is a Python library that provides a common API to several
open source cheminformatics toolkits.
Platform: Any.
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Intended Audience :: Science/Research
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: BSD License
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
Classifier: Natural Language :: English
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Topic :: Scientific/Engineering :: Chemistry
Classifier: Topic :: Software Development :: Libraries :: Python Modules
cinfony-1.2/test/ 0000775 0001750 0001750 00000000000 12061452073 013716 5 ustar noel noel 0000000 0000000 cinfony-1.2/test/testall.py 0000664 0001750 0001750 00000057162 12061452051 015747 0 ustar noel noel 0000000 0000000 import pdb
import os
import sys
import unittest
pybel = indy = ironable = rdk = cdk = webel = opsin = jchem = None
try:
import pybel # From Open Babel
except ImportError:
pass
try:
from cinfony import cdk
except (RuntimeError, ImportError, KeyError):
pass
try:
from cinfony import pybel
except (ImportError, AttributeError, KeyError):
pass
try:
from cinfony import rdk
except ImportError:
pass
try:
from cinfony import opsin
except (ImportError, KeyError):
pass
try:
from cinfony import indy
except (IOError, ImportError, KeyError):
pass
try:
from cinfony import webel
except ImportError:
pass
try:
from cinfony import jchem
except (NameError, RuntimeError, ImportError, KeyError):
pass
try: # Define next() for Jython 2.5
next
except (NameError):
next = lambda x: x.next()
class myTestCase(unittest.TestCase):
"""Additional methods not present in Jython 2.2"""
# Taken from unittest.py in Python 2.5 distribution
def assertFalse(self, expr, msg=None):
"Fail the test if the expression is true."
if expr: raise self.failureException(msg)
def assertTrue(self, expr, msg=None):
"""Fail the test unless the expression is true."""
if not expr: raise self.failureException(msg)
def assertAlmostEqual(self, first, second, places=7, msg=None):
"""Fail if the two objects are unequal as determined by their
difference rounded to the given number of decimal places
(default 7) and comparing to zero.
Note that decimal places (from zero) are usually not the same
as significant digits (measured from the most signficant digit).
"""
if round(second-first, places) != 0:
raise self.failureException(
(msg or '%r != %r within %r places' % (first, second, places)))
class TestOpsin(myTestCase):
toolkit = opsin
def testconversion(self):
"""Convert from acetylsaliclyic acid to other formats"""
mol = self.toolkit.readstring("iupac", "benzene")
self.assertEqual(mol.write("smi"), "C1=CC=CC=C1")
self.assertEqual(mol.write("inchi"), "InChI=1/C6H6/c1-2-4-6-5-3-1/h1-6H")
cml = mol.write("cml")
def testnoconversion(self):
"""A failed conversion - should raise IOError"""
self.assertRaises(IOError, self.toolkit.readstring, "iupac", "Nosuchname")
def testnoformats(self):
"""No such format - should raise ValueError"""
self.assertRaises(ValueError, self.toolkit.readstring, "noel", "benzene")
def testwritefile(self):
"""Test writing a file"""
if os.path.isfile("tmp.cml"):
os.remove("tmp.cml")
mol = self.toolkit.readstring("iupac", "benzene")
mol.write("cml", "tmp.cml")
self.assertTrue(os.path.isfile("tmp.cml"))
self.assertRaises(IOError, mol.write, "cml", "tmp.cml")
mol.write("cml", "tmp.cml", overwrite=True)
os.remove("tmp.cml")
class TestToolkit(myTestCase):
def setUp(self):
self.mols = [self.toolkit.readstring("smi", "CCCC"),
self.toolkit.readstring("smi", "CCCN")]
self.head = list(self.toolkit.readfile("sdf", "head.sdf"))
self.atom = self.head[0].atoms[1]
def testattributes(self):
"""Test attributes like informats, descs and so on"""
informats, outformats = self.toolkit.informats, self.toolkit.outformats
self.assertNotEqual(len(self.toolkit.informats.keys()), 0)
self.assertNotEqual(len(self.toolkit.outformats.keys()), 0)
self.assertNotEqual(len(self.toolkit.descs), 0)
self.assertNotEqual(len(self.toolkit.forcefields), 0)
self.assertNotEqual(len(self.toolkit.fps), 0)
def testInChI(self):
"""Test InChI generation"""
inchi = self.mols[0].write("inchi").rstrip()
inchikey = self.mols[0].write("inchikey").rstrip()
self.assertEqual(inchi, "InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3")
self.assertEqual(inchikey, "IJDNQMDRQITEOD-UHFFFAOYSA-N")
mol = self.toolkit.readstring("inchi", inchi)
self.assertEqual("CCCC", mol.write("smi").rstrip())
def FPaccesstest(self):
# Should raise AttributeError
return self.mols[0].calcfp().nosuchname
def testFPTanimoto(self):
"""Test the calculation of the Tanimoto coefficient"""
fps = [x.calcfp() for x in self.mols]
self.assertAlmostEqual(fps[0] | fps[1], self.tanimotoresult, 3)
def testFPstringrepr(self):
"""Test the string representation and corner cases."""
self.assertRaises(ValueError, self.mols[0].calcfp, "Nosuchname")
self.assertRaises(AttributeError, self.FPaccesstest)
r = str(self.mols[0].calcfp())
t = r.split(", ")
self.assertEqual(len(t), self.Nfpbits)
def testFPbits(self):
"""Test whether the bits are set correctly."""
bits = [x.calcfp().bits for x in self.mols]
self.assertNotEqual(len(bits[0]), 0)
bits = [set(x) for x in bits]
# Calculate the Tanimoto coefficient the old-fashioned way
tanimoto = len(bits[0] & bits[1]) / float(len(bits[0] | bits[1]))
self.assertAlmostEqual(tanimoto, self.tanimotoresult, 3)
def RSaccesstest(self):
# Should raise AttributeError
return self.mols[0].nosuchname
def testRSformaterror(self):
"""Test that invalid formats raise an error"""
self.assertRaises(ValueError, self.toolkit.readstring, "noel", "jkjk")
self.assertRaises(IOError, self.toolkit.readstring, "smi", "&*)(%)($)")
def testselfconversion(self):
"""Test that the toolkit can eat its own dog-food."""
newmol = self.toolkit.Molecule(self.head[0])
self.assertEqual(newmol._exchange,
self.head[0]._exchange)
newmol = self.toolkit.Molecule(self.mols[0])
self.assertEqual(newmol._exchange,
self.mols[0]._exchange)
def testLocalOpt(self):
"""Test that local optimisation affects the coordinates"""
oldcoords = self.head[0].atoms[0].coords
self.head[0].localopt()
newcoords = self.head[0].atoms[0].coords
self.assertNotEqual(oldcoords, newcoords)
def testMake3D(self):
"""Test that 3D coordinate generation does something"""
mol = self.mols[0]
mol.make3D()
self.assertNotEqual(mol.atoms[3].coords, (0., 0., 0.))
def testDraw(self):
"""Create a 2D depiction"""
self.mols[0].draw(show=False,
filename="%s.png" % self.toolkit.__name__)
self.mols[0].draw(show=False) # Just making sure that it doesn't raise an Error
self.mols[0].draw(show=False, update=True)
coords = [x.coords for x in self.mols[0].atoms[0:2]]
self.assertNotEqual(coords, [(0., 0., 0.), (0., 0., 0.)])
self.mols[0].draw(show=False, usecoords=True,
filename="%s_b.png" % self.toolkit.__name__)
def testRSgetprops(self):
"""Get the values of the properties."""
# self.assertAlmostEqual(self.mols[0].exactmass, 58.078, 3)
# Only OpenBabel has a working exactmass
self.assertAlmostEqual(self.mols[0].molwt, 58.12, 2)
self.assertEqual(len(self.mols[0].atoms), 4)
self.assertRaises(AttributeError, self.RSaccesstest)
def testRoundTripSMILES(self):
"""Convert the SMILES of benzene to itself"""
benzene = "c1ccccc1"
mol = self.toolkit.readstring("smi", benzene)
smi = mol.write("smi").rstrip()
self.assertEqual(smi, benzene)
def testRSconversiontoMOL(self):
"""Convert to mol"""
as_mol = self.mols[0].write("mol")
test = """
OpenBabel04220815032D
4 3 0 0 0 0 0 0 0 0999 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0
0.0000 0.0000 0.0000 C 0 0 0 0 0
0.0000 0.0000 0.0000 C 0 0 0 0 0
0.0000 0.0000 0.0000 C 0 0 0 0 0
1 2 1 0 0 0
2 3 1 0 0 0
3 4 1 0 0 0
M END
"""
data, result = test.split("\n"), as_mol.split("\n")
self.assertEqual(len(data), len(result))
self.assertEqual(data[-2], result[-2].rstrip()) # M END
def testRSstringrepr(self):
"""Test the string representation of a molecule"""
self.assertEqual(str(self.mols[0]).strip(), "CCCC")
def testRFread(self):
"""Is the right number of molecules read from the file?"""
self.assertEqual(len(self.mols), 2)
def RFreaderror(self):
mol = next(self.toolkit.readfile("sdf", "nosuchfile.sdf"))
def testRFmissingfile(self):
"""Test that reading from a non-existent file raises an error."""
self.assertRaises(IOError, self.RFreaderror)
def RFformaterror(self):
mol = next(self.toolkit.readfile("noel", "head.sdf"))
def testRFformaterror(self):
"""Test that invalid formats raise an error"""
self.assertRaises(ValueError, self.RFformaterror)
def RFunitcellerror(self):
unitcell = self.mols[0].unitcell
def testRFunitcellerror(self):
"""Test that accessing the unitcell raises an error"""
self.assertRaises(AttributeError, self.RFunitcellerror)
def testRFconversion(self):
"""Convert to smiles"""
as_smi = [mol.write("smi").split("\t")[0] for mol in self.mols]
ans = []
for smi in as_smi:
t = list(smi)
t.sort()
ans.append("".join(t))
test = ['CCCC', 'CCCN']
self.assertEqual(ans, test)
def testRFsingletofile(self):
"""Test the molecule.write() method"""
mol = self.mols[0]
mol.write("smi", "testoutput.txt")
test = 'CCCC'
input = open("testoutput.txt", "r")
filecontents = input.readlines()[0].split("\t")[0].strip()
input.close()
self.assertEqual(filecontents, test)
self.assertRaises(IOError, mol.write, "smi", "testoutput.txt")
os.remove("testoutput.txt")
self.assertRaises(ValueError, mol.write, "noel", "testoutput.txt")
def testRFoutputfile(self):
"""Test the Outputfile class"""
self.assertRaises(ValueError, self.toolkit.Outputfile, "noel", "testoutput.txt")
outputfile = self.toolkit.Outputfile("sdf", "testoutput.txt")
for mol in self.head:
outputfile.write(mol)
outputfile.close()
self.assertRaises(IOError, outputfile.write, mol)
self.assertRaises(IOError, self.toolkit.Outputfile, "sdf", "testoutput.txt")
input = open("testoutput.txt", "r")
numdollar = len([x for x in input.readlines()
if x.rstrip() == "$$$$"])
input.close()
os.remove("testoutput.txt")
self.assertEqual(numdollar, 2)
def RFdesctest(self):
# Should raise ValueError
self.mols[0].calcdesc("BadDescName")
def testRFdesc(self):
"""Test the descriptors"""
desc = self.mols[1].calcdesc()
self.assertTrue(len(desc) > 1)
self.assertAlmostEqual(desc[self.tpsaname], 26.02, 2)
self.assertRaises(ValueError, self.RFdesctest)
def MDaccesstest(self):
# Should raise KeyError
return self.head[0].data['noel']
def testMDaccess(self):
"""Change the value of a field"""
data = self.head[0].data
self.assertRaises(KeyError, self.MDaccesstest)
data['noel'] = 'testvalue'
self.assertEqual(data['noel'], 'testvalue')
newvalues = {'hey':'there', 'yo':1}
data.update(newvalues)
self.assertEqual(data['yo'], '1')
self.assertTrue('there' in data.values())
def testMDglobalaccess(self):
"""Check out the keys"""
data = self.head[0].data
self.assertFalse(data.has_key('Noel'))
self.assertEqual(len(data), len(self.datakeys))
for key in data:
self.assertEqual(key in self.datakeys, True)
r = repr(data)
self.assertTrue(r[0]=="{" and r[-2:]=="'}", r)
def testMDdelete(self):
"""Delete some keys"""
data = self.head[0].data
self.assertTrue(data.has_key('NSC'))
del data['NSC']
self.assertFalse(data.has_key('NSC'))
data.clear()
self.assertEqual(len(data), 0)
def testAiteration(self):
"""Test the ability to iterate over the atoms"""
atoms = [atom for atom in self.head[0]]
self.assertEqual(len(atoms), self.Natoms)
def Atomaccesstest(self):
# Should raise AttributeError
return self.atom.nosuchname
def testAattributes(self):
"""Get the values of some properties"""
self.assertRaises(AttributeError, self.Atomaccesstest)
self.assertAlmostEqual(self.atom.coords[0], -0.0691, 4)
def testAstringrepr(self):
"""Test the string representation of the Atom"""
test = "Atom: 8 (-0.07 5.24 0.03)"
self.assertEqual(str(self.atom), test)
def testSMARTS(self):
"""Searching for ethyl groups in triethylamine"""
mol = self.toolkit.readstring("smi", "CCN(CC)CC")
smarts = self.toolkit.Smarts("[#6][#6]")
ans = smarts.findall(mol)
self.assertEqual(len(ans), 3)
def testAddh(self):
"""Adding and removing hydrogens"""
self.assertEqual(len(self.mols[0].atoms),4)
self.mols[0].addh()
self.assertEqual(len(self.mols[0].atoms),14)
self.mols[0].removeh()
self.assertEqual(len(self.mols[0].atoms),4)
class TestOBabel(TestToolkit):
toolkit = pybel
tanimotoresult = 1/3.
Natoms = 15
tpsaname = "TPSA"
Nfpbits = 32
datakeys = ['NSC', 'Comment', 'OpenBabel Symmetry Classes',
'MOL Chiral Flag']
def testFP_FP3(self):
"Checking the results from FP3"
fps = [x.calcfp("FP3") for x in self.mols]
self.assertEqual(fps[0] | fps[1], 0.)
def testunitcell(self):
"""Testing unit cell access"""
mol = next(self.toolkit.readfile("cif", "hashizume.cif"))
cell = mol.unitcell
self.assertAlmostEqual(cell.GetAlpha(), 93.0, 1)
def testMDcomment(self):
"""Mess about with the comment field"""
data = self.head[0].data
self.assertEqual('Comment' in data, True)
self.assertEqual(data['Comment'], 'CORINA 2.61 0041 25.10.2001')
data['Comment'] = 'New comment'
self.assertEqual(data['Comment'], 'New comment')
def importtest(self):
self.mols[0].draw(show=True, usecoords=True)
def testRSgetprops(self):
"""Get the values of the properties."""
self.assertAlmostEqual(self.mols[0].exactmass, 58.078, 3)
self.assertAlmostEqual(self.mols[0].molwt, 58.122, 3)
self.assertEqual(len(self.mols[0].atoms), 4)
self.assertRaises(AttributeError, self.RSaccesstest)
class TestJybel(TestOBabel):
pass
class TestIronable(TestJybel):
def testDraw(self):
"""No creating a 2D depiction"""
pass
class TestPybel(TestOBabel):
toolkit = pybel
class TestRDKit(TestToolkit):
toolkit = rdk
tanimotoresult = 1/3.
Natoms = 9
tpsaname = "TPSA"
Nfpbits = 64
datakeys = ['NSC']
def testRSconversiontoMOL2(self):
"""No conversion to MOL2 done"""
pass
class TestIndigo(TestToolkit):
toolkit = indy
tanimotoresult = 1/3.
Natoms = 15
tpsaname = "TPSA"
Nfpbits = 934
datakeys = ['NSC']
def testRSconversiontoMOL2(self):
"""No conversion to MOL2 done"""
pass
def testRFdesc(self):
"""No descriptors"""
pass
def testattributes(self):
"""Test attributes like informats, descs and so on"""
informats, outformats = self.toolkit.informats, self.toolkit.outformats
self.assertNotEqual(len(self.toolkit.informats.keys()), 0)
self.assertNotEqual(len(self.toolkit.outformats.keys()), 0)
self.assertNotEqual(len(self.toolkit.fps), 0)
def testLocalOpt(self):
"""No forcefields"""
pass
def testMake3D(self):
"""No forcefields"""
pass
class TestWebel(TestToolkit):
toolkit = webel
tanimotoresult = 0.375
Natoms = 9
tpsaname = "TPSADescriptor_TopoPSA"
Nfpbits = 1
datakeys = ['NSC']
def setUp(self):
self.mols = [self.toolkit.readstring("smi", "CCCC"),
self.toolkit.readstring("smi", "CCCN")]
def testselfconversion(self):
"""Test that the toolkit can eat its own dog-food."""
## newmol = self.toolkit.Molecule(self.head[0])
## self.assertEqual(newmol._exchange,
## self.head[0]._exchange)
newmol = self.toolkit.Molecule(self.mols[0])
self.assertEqual(newmol._exchange,
self.mols[0]._exchange)
def testAattributes(self):
"""Not testing atom attributes"""
def testAstringrepr(self):
"""Not testing atom repr"""
def testAiteration(self):
"""Not testing the ability to iterate over the atoms"""
def testAddh(self):
"""Not testing adding/removing hydrogens"""
def testLocalOpt(self):
"""Not testing local opt"""
def testMake3D(self):
"""Not generating 3D coordinates"""
def testMDaccess(self):
"""Not changing the value of a field"""
def testMDglobalaccess(self):
"""Not checking out the keys"""
def testMDdelete(self):
"""Not deleting some keys"""
def testRFmissingfile(self):
"""Not testing that reading from a non-existent file raises an error."""
def testRFformaterror(self):
"""Not testing that invalid formats raise an error"""
def testRSgetprops(self):
"""Get the values of the properties."""
self.assertAlmostEqual(self.mols[0].molwt, 58.12, 2)
self.assertEqual(self.mols[1].formula, "C3H9N")
self.assertRaises(AttributeError, self.RSaccesstest)
def testDraw(self):
"""Create a 2D depiction"""
self.mols[0].draw(show=False,
filename="%s.png" % self.toolkit.__name__)
self.mols[0].draw(show=False) # Just making sure that it doesn't raise an Error
def testRSformaterror(self):
"""Test that invalid formats raise an error"""
self.assertRaises(ValueError, self.toolkit.readstring, "noel", "jkjk")
def testSMARTS(self):
"""Searching for ethyl groups in triethylamine"""
mol = self.toolkit.readstring("smi", "CCN(CC)CC")
smarts = self.toolkit.Smarts("[#6][#6]")
ans = smarts.match(mol)
self.assertTrue(ans)
def testRFoutputfile(self):
"""Test the Outputfile class"""
self.assertRaises(ValueError, self.toolkit.Outputfile, "noel", "testoutput.txt")
outputfile = self.toolkit.Outputfile("sdf", "testoutput.txt")
for mol in self.mols:
outputfile.write(mol)
outputfile.close()
self.assertRaises(IOError, outputfile.write, mol)
self.assertRaises(IOError, self.toolkit.Outputfile, "sdf", "testoutput.txt")
input = open("testoutput.txt", "r")
numdollar = len([x for x in input.readlines()
if x.rstrip() == "$$$$"])
input.close()
os.remove("testoutput.txt")
self.assertEqual(numdollar, 2)
def testattributes(self):
"""Test attributes like informats, descs and so on"""
informats, outformats = self.toolkit.informats, self.toolkit.outformats
self.assertNotEqual(len(self.toolkit.informats.keys()), 0)
self.assertNotEqual(len(self.toolkit.outformats.keys()), 0)
self.assertNotEqual(len(self.toolkit.getdescs()), 0)
self.assertNotEqual(len(self.toolkit.fps), 0)
def testRSconversiontoMOL(self):
"""Convert to mol"""
as_mol = self.mols[0].write("mol")
test = """C4H10
APtclcactv01251010412D 0 0.00000 0.00000
14 13 0 0 0 0 0 0 0 0999 V2000
2.8660 -0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.7321 0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.0000 0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.5981 -0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.3100 0.7869 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4631 0.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1.6900 -0.2869 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
2.4675 -0.7249 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
3.2646 -0.7249 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
4.1306 0.7249 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
3.3335 0.7249 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
4.2881 -0.7869 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
5.1350 -0.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
4.9081 0.2869 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1 3 1 0 0 0 0
1 2 1 0 0 0 0
2 4 1 0 0 0 0
3 5 1 0 0 0 0
3 6 1 0 0 0 0
3 7 1 0 0 0 0
1 8 1 0 0 0 0
1 9 1 0 0 0 0
2 10 1 0 0 0 0
2 11 1 0 0 0 0
4 12 1 0 0 0 0
4 13 1 0 0 0 0
4 14 1 0 0 0 0
M END
$$$$"""
data, result = test.split("\n"), as_mol.split("\n")
self.assertEqual(len(data), len(result))
self.assertEqual(data[-2], result[-2].rstrip()) # M END
class TestCDK(TestToolkit):
toolkit = cdk
tanimotoresult = 0.375
Natoms = 15
tpsaname = "tpsa"
Nfpbits = 4 # The CDK uses a true java.util.Bitset
datakeys = ['NSC', 'cdk:Remark', 'cdk:Title']
def testLocalOpt(self):
"""No local opt testing done"""
pass
def testMake3D(self):
"""No 3D coordinate generation done"""
pass
def testRSgetprops(self):
"""Get the values of the properties."""
# self.assertAlmostEqual(self.mols[0].exactmass, 58.078, 3)
# Only OpenBabel has a working exactmass
self.assertAlmostEqual(self.mols[0].molwt, 58.12, 2)
self.assertEqual(len(self.mols[0].atoms), 4)
self.assertRaises(AttributeError, self.RSaccesstest)
class TestJchem(TestToolkit):
toolkit = jchem
tanimotoresult = 0.444
Natoms = 15
tpsaname = "TPSA"
Nfpbits = 5
datakeys = ['NSC']
def testLocalOpt(self):
"""No local opt testing done"""
pass
def testRSgetprops(self):
"""Get the values of the properties."""
# self.assertAlmostEqual(self.mols[0].exactmass, 58.078, 3)
# Only OpenBabel has a working exactmass
self.assertAlmostEqual(self.mols[0].molwt, 58.12, 2)
self.assertEqual(len(self.mols[0].atoms), 4)
self.assertRaises(AttributeError, self.RSaccesstest)
class TestCDKJPype(TestCDK):
def testDraw(self):
"""No depiction supported I'm afraid"""
pass
if __name__=="__main__":
if os.path.isfile("testoutput.txt"):
os.remove("testoutput.txt")
lookup = {'cdk': TestCDK, 'obabel':TestOBabel, 'rdk':TestRDKit,
'webel': TestWebel, 'opsin': TestOpsin, 'indy': TestIndigo,
'pybel':TestPybel, 'jchem':TestJchem}
if sys.platform[:4] == "java":
lookup['obabel'] = TestJybel
del lookup['rdk']
elif sys.platform[:3] == "cli":
lookup['obabel'] = TestIronable
del lookup['rdk']
del lookup['cdk']
del lookup['jchem']
del lookup['opsin']
else:
lookup['cdk'] = TestCDKJPype
# Only run Pybel tests if specifically asked
testcases = list(lookup.values()).remove(TestPybel)
if len(sys.argv) > 1:
testcases = [lookup[x] for x in sys.argv[1:]]
for testcase in testcases:
print("\n\n\nTESTING %s\n%s\n\n" % (testcase.__name__, "== "*10))
myunittest = unittest.defaultTestLoader.loadTestsFromTestCase(testcase)
unittest.TextTestRunner(verbosity=1).run(myunittest)
cinfony-1.2/LICENSE_BSD.txt 0000664 0001750 0001750 00000002460 12061452051 015250 0 ustar noel noel 0000000 0000000 Copyright (c) 2008-2011, Noel O'Boyle
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cinfony-1.2/LICENSE_GPLv3.txt 0000664 0001750 0001750 00000104513 12061452051 015535 0 ustar noel noel 0000000 0000000 GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
Copyright (C)
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
.
cinfony-1.2/LICENSE_GPLv2.txt 0000664 0001750 0001750 00000043254 12061452051 015540 0 ustar noel noel 0000000 0000000 GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
cinfony-1.2/setup.py 0000664 0001750 0001750 00000002613 12061452051 014447 0 ustar noel noel 0000000 0000000 #-*. coding: utf-8 -*-
"""cinfony: a common API to several cheminformatics toolkits
cinfony is a Python library that provides a common API to several
open source cheminformatics toolkits.
"""
doclines = __doc__.split("\n")
# My code is BSD
# Open Babel is GPL v2
# OPSIN is Artistic License v2.0 (not viral)
# CDK is LGPL (not viral)
# RDKit is BSD
# Indigo is GPL v3
# Chosen from http://www.python.org/pypi?:action=list_classifiers
classifiers = """\
Development Status :: 5 - Production/Stable
Environment :: Console
Intended Audience :: Science/Research
Intended Audience :: Developers
License :: OSI Approved :: BSD License
License :: OSI Approved :: GNU General Public License (GPL)
Natural Language :: English
Operating System :: OS Independent
Programming Language :: Python
Topic :: Scientific/Engineering :: Chemistry
Topic :: Software Development :: Libraries :: Python Modules
"""
from distutils.core import setup
setup(
name = "cinfony",
version = "1.2",
url = "http://cinfony.googlecode.com",
author = "Noel O'Boyle, Adrià Cereto-Massagué",
author_email = "baoilleach@gmail.com",
maintainer = "Noel O'Boyle",
maintainer_email = "baoilleach@gmail.com",
license = "BSD",
description = doclines[0],
long_description = "\n".join(doclines[2:]),
classifiers = filter(None, classifiers.split("\n")),
platforms = ["Any."],
packages = ['cinfony'],
)
cinfony-1.2/README.txt 0000664 0001750 0001750 00000000750 12061452051 014433 0 ustar noel noel 0000000 0000000 =======
Cinfony
=======
Cinfony is a Python library that makes it easy to access several
cheminformatics toolkits and resources. It currently supports:
Open Babel
Chemistry Development Kit (CDK)
RDKit
Indigo
JChem
OPSIN
On-line chemical webservices
For more information please see http://cinfony.googlecode.com.
Cinfony is 100% Open Source. Portions are licensed under the BSD, GPLv2
or GPLv3. See individual source files for license information.
- Noel O'Boyle, 2012.