The Chemistry Development Kit (CDK) is an opensource Java framework for cheminformatics. In an article for the CDK News, I describe how to use Jython (a Java implementation of Python) to access the CDK.
Here I provide supplementary material for this article.
The code for the Python and Java implementations is available below. Here is a short example of their use to calculate the BCUT descriptors for β-estradiol.
javac calculateBCUT.java java calculateBCUT BetaEstradiol.sd
or
jython pycalculateBCUT.txt BetaEstradiol.sd
gives
11.996163377263015 15.998263783541644 -0.41661438592771444 \ 0.08657868420569534 5.618366876046048 11.845146625965969
import org.openscience.cdk.*; import org.openscience.cdk.qsar.*; import org.openscience.cdk.io.iterator. IteratingMDLReader; import org.openscience.cdk.exception. CDKException; import org.openscience.cdk.qsar.result.*; import java.io.*; class calculateBCUT { public static void main(String[] args) throws CDKException { FileReader sdfile=null; try { sdfile=new FileReader(new File(args[0])); } catch (FileNotFoundException e) { System.err.println("File not found: " + args[0]); System.exit(1); } catch (ArrayIndexOutOfBoundsException e) { System.err.println("You need to give" + "the name of an .sd file"); System.exit(1); } Descriptor bcut=new BCUTDescriptor(); bcut.setParameters(new Object[] {1,1}); IteratingMDLReader myiter=new IteratingMDLReader(sdfile); Molecule mol=null; while (myiter.hasNext()) { mol=(Molecule)myiter.next(); DoubleArrayResult BCUTvalue= (DoubleArrayResult) bcut.calculate(mol) .getValue(); System.out.print(BCUTvalue.get(0)); for (int i=1; i<6; i++) { System.out.print("\t"+BCUTvalue.get(i)); } System.out.print("\n"); } } }
[pycalculateBCUT.txt] (I had to rename this file to .txt because of problems with the web server.) This uses the CVS version of the CDK on 21/Nov/05.
from org.openscience.cdk import * from org.openscience.cdk.qsar.descriptors.molecular import BCUTDescriptor from org.openscience.cdk.io.iterator import IteratingMDLReader from java.io import InputStreamReader import sys try: sdfile=open(sys.argv[1],"r") except (IOError,IndexError): print "You need to give the name of an existing .sd file" sys.exit(1) bcut=BCUTDescriptor() bcut.setParameters([1,1]) for mol in IteratingMDLReader( InputStreamReader(sdfile)): BCUTvalue=bcut.calculate(mol).getValue() # Create list of values in BCUTvalue BCUTlist=[str(BCUTvalue.get(i)) for i in range(6)] # Concatenate BCUTlist around tabs print "\t".join(BCUTlist)
# Using CDK-20050125.jar and JDK1.5 # (problems with Iterating MDLReader so # left it out of this one) # TO DO: Catch the Exceptions properly from jpype import * startJVM("/home/no228/Tools/Java/jdk1.5.0_03/jre/lib/i386/server/libjvm.so","-Djava.class.path=/home/no228/Tools/CDK/cdk-20050125.jar") cdk = JPackage("org").openscience.cdk BCUTDescriptor = cdk.qsar.BCUTDescriptor MDLReader = cdk.io.MDLReader FileReader = java.io.FileReader import sys try: sdfile=FileReader(sys.argv[1],"r") except (IOError,IndexError): print "You need to give the name of an \ existing .sd file" sys.exit(1) bcut=BCUTDescriptor() one = java.lang.Integer(1) objarr = JArray(java.lang.Object)([one,one]) bcut.setParameters(objarr) mol = MDLReader(sdfile).read(Molecule()) BCUTvalue = bcut.calculate(mol) # Create list of values in BCUTvalue BCUTlist=[str(BCUTvalue.get(i)) \ for i in range(6)] # Concatenate BCUTlist around tabs print "\t".join(BCUTlist)
One of the peculiarities of this program is that, after use, you need to exit Jython using CTRL+D, followed by CTRL+C (the order is important!). I am not sure of the cause of this, or whether there is any workaround.
[viewmol.txt] (I had to rename this file to .txt to avoid problems with the web server. You need to rename it to .py to use it.)
# Java stuff from java.awt import Dimension from java.awt import Rectangle from javax.swing import JPanel,JFrame from java.awt.event import WindowAdapter # Jmol stuff from org.jmol.api import JmolAdapter,JmolViewer from org.jmol.adapter.cdk import CdkJmolAdapter # CDK stuff from org.openscience.cdk import Molecule from org.openscience.cdk.io import MDLReader from org.openscience.cdk.exception import CDKException from org.openscience.cdk.layout import StructureDiagramGenerator from org.openscience.cdk.applications.swing import MoleculeViewer2D # Finally...Python stuff import sys class JmolPanel(JPanel): def __init__(self): self.adapter=CdkJmolAdapter(None) self.viewer=JmolViewer.allocateViewer(self,self.adapter) self.currentSize=Dimension() self.rectClip=Rectangle() def getViewer(self): return self.viewer def paint(self,g): self.viewer.setScreenDimension(self.getSize(self.currentSize)) g.getClipBounds(self.rectClip) self.viewer.renderScreenImage(g,self.currentSize,self.rectClip) class ApplicationCloser(WindowAdapter): def windowClosing(self,e): if __name__=="__main__": sys.exit(0) class ViewMol3D: def __init__(self,mol=None,file=None): if file and not mol: try: infile=open(file,"r") mol=MDLReader(infile).read(Molecule()) infile.close() except IOError,CDKException: print "Problem reading molecule from %s" % file infile.close() # No finally clause in Jython (unlike CPython) if mol: self.frame=JFrame("viewmol CDK Jmol") self.frame.setSize(300,300) self.frame.addWindowListener(ApplicationCloser()) contentPane=self.frame.getContentPane() jmolPanel=JmolPanel() contentPane.add(jmolPanel) self.viewer=jmolPanel.getViewer() self.viewer.openClientFile("","",mol) # The previous line causes 15 "indexInt: null"s to be printed self.frame.setVisible(1) def script(self,text): self.viewer.evalString(text) def close(self): self.frame.remove() class ViewMol2D: def __init__(self,mol=None,file=None): if file and not mol: try: infile=open(file,"r") mol=MDLReader(infile).read(Molecule()) infile.close() except IOError,CDKException: print "Problem reading molecule from %s" % file infile.close() if mol: sdg=StructureDiagramGenerator(mol) try: sdg.generateCoordinates() except CDKException: print "Strange coordinates..." else: self.mv=MoleculeViewer2D(sdg.getMolecule()) self.mv.display() if __name__=="__main__": mol=MDLReader(open("mymol.sd","r")).read(Molecule()) ViewMol2D(mol) firstview=ViewMol3D(mol) secondview=ViewMol3D(file="anothermol.sd") secondview.script("spacefill on; spin")