from org.openscience.cdk import * from org.openscience.cdk.tools import MFAnalyser from org.openscience.cdk.io.iterator import IteratingMDLReader def readfile(format, filename): """Iterate over the molecules in a file. Required parameters: format filename You can access the first molecule in a file using: mol = readfile("smi", "myfile.smi").next() You can make a list of the molecules in a file using: mols = [mol for mol in readfile("smi", "myfile.smi")] You can iterate over the molecules in a file as shown in the following code snippet... >>> atomtotal = 0 >>> for mol in readfile("sdf","head.sdf"): ... atomtotal += len(mol.atoms) ... >>> print atomtotal 43 """ return iter([Molecule(mol) for mol in IteratingMDLReader( open(filename), DefaultChemObjectBuilder.getInstance() )]) class Molecule(object): """Represent a Pybel molecule. Optional parameters: CDKMol -- an Open Babel molecule (default is None) An empty Molecule is created if an Open Babel molecule is not provided. Attributes: atoms, charge, data, dim, energy, exactmass, flags, formula, mod, molwt, spin, sssr, title, unitcell. (refer to the Open Babel library documentation for more info). Methods: write(), calcfp(), calcdesc() The original Open Babel molecule can be accessed using the attribute: CDKMol """ _getmethods = { 'conformers':'GetConformers', # 'coords':'GetCoordinates', you can access the coordinates the atoms elsewhere # 'data':'GetData', has been removed 'dim':'GetDimension', 'energy':'GetEnergy', 'exactmass':'GetExactMass', 'flags':'GetFlags', 'formula':'GetFormula', # 'internalcoord':'GetInternalCoord', # Causes SWIG warning 'mod':'GetMod', 'molwt':'GetMolWt', 'sssr':'GetSSSR', 'title':'GetTitle', 'charge':'GetTotalCharge', 'spin':'GetTotalSpinMultiplicity' } def __init__(self, CDKMol=None): self.CDKMol = CDKMol if not self.CDKMol: self.CDKMol = ob.CDKMol() def __getattr__(self, attr): """Return the value of an attribute Note: The values are calculated on-the-fly. You may want to store the value in a variable if you repeatedly access the same attribute. """ if attr == "atoms": return [self.CDKMol.getAtom(i) for i in range(self.CDKMol.getAtomCount()) ] elif attr == 'exactmass': # I have probably confused exact, canonical and natural masses return MFAnalyser(self.CDKMol).getCanonicalMass() elif attr == 'molwt': # I have probably confused exact, canonical and natural masses return MFAnalyser(self.CDKMol).getMass() elif attr == 'formula': return MFAnalyser(self.CDKMol).getMolecularFormula() else: raise AttributeError, "Molecule has no attribute '%s'" % attr def __iter__(self): return iter(self.__getattr__("atoms")) def __str__(self): return self.CDKMol.toString() if __name__=="__main__": #pragma: no cover for mol in readfile("sdf", "head.sdf"): print "Molecule has molwt of %.2f and %d atoms" % (mol.molwt, len(mol.atoms))