>> Noel O'Boyle

Accessing the CDK using Jython

The Chemistry Development Kit (CDK) is an opensource Java framework for cheminformatics. In an article for the CDK News, I describe how to use Jython (a Java implementation of Python) to access the CDK.

Here I provide supplementary material for this article.

Example 1

The code for the Python and Java implementations is available below. Here is a short example of their use to calculate the BCUT descriptors for β-estradiol.

javac calculateBCUT.java
java calculateBCUT BetaEstradiol.sd

or

jython pycalculateBCUT.txt BetaEstradiol.sd

gives

11.996163377263015      15.998263783541644      -0.41661438592771444 \
0.08657868420569534     5.618366876046048       11.845146625965969

[calculateBCUT.java]

import org.openscience.cdk.*;
import org.openscience.cdk.qsar.*;
import org.openscience.cdk.io.iterator.
 IteratingMDLReader;
import org.openscience.cdk.exception.
 CDKException;
import org.openscience.cdk.qsar.result.*;

import java.io.*;

class calculateBCUT {
 public static void main(String[] args)
  throws CDKException {
  FileReader sdfile=null;
  try {
   sdfile=new FileReader(new File(args[0]));
  }
  catch (FileNotFoundException e) {
   System.err.println("File not found: " +
    args[0]);
   System.exit(1);
  }
  catch (ArrayIndexOutOfBoundsException e) {
   System.err.println("You need to give" +
    "the name of an .sd file");
   System.exit(1);
  }

  Descriptor bcut=new BCUTDescriptor();
  bcut.setParameters(new Object[] {1,1});
  IteratingMDLReader myiter=new
   IteratingMDLReader(sdfile);
  Molecule mol=null;

  while (myiter.hasNext()) {
   mol=(Molecule)myiter.next();
   DoubleArrayResult BCUTvalue=
    (DoubleArrayResult) bcut.calculate(mol)
    .getValue();
   System.out.print(BCUTvalue.get(0));
   for (int i=1; i<6; i++) {
    System.out.print("\t"+BCUTvalue.get(i));
   }
   System.out.print("\n");
  }
 }
}

[pycalculateBCUT.txt] (I had to rename this file to .txt because of problems with the web server.) This uses the CVS version of the CDK on 21/Nov/05.

from org.openscience.cdk import *
from org.openscience.cdk.qsar.descriptors.molecular import BCUTDescriptor
from org.openscience.cdk.io.iterator import IteratingMDLReader

from java.io import InputStreamReader

import sys

try:
    sdfile=open(sys.argv[1],"r")
except (IOError,IndexError):
    print "You need to give the name of an existing .sd file"
    sys.exit(1)

bcut=BCUTDescriptor()
bcut.setParameters([1,1])
for mol in IteratingMDLReader( InputStreamReader(sdfile)):
    BCUTvalue=bcut.calculate(mol).getValue()
    # Create list of values in BCUTvalue
    BCUTlist=[str(BCUTvalue.get(i)) for i in range(6)]
    # Concatenate BCUTlist around tabs
    print "\t".join(BCUTlist)

Implementation in JPype

# Using CDK-20050125.jar and JDK1.5
# (problems with Iterating MDLReader so
# left it out of this one)
# TO DO: Catch the Exceptions properly

from jpype import *
startJVM("/home/no228/Tools/Java/jdk1.5.0_03/jre/lib/i386/server/libjvm.so","-Djava.class.path=/home/no228/Tools/CDK/cdk-20050125.jar")

cdk = JPackage("org").openscience.cdk
BCUTDescriptor = cdk.qsar.BCUTDescriptor
MDLReader = cdk.io.MDLReader

FileReader = java.io.FileReader

import sys

try:
    sdfile=FileReader(sys.argv[1],"r")
except (IOError,IndexError):
    print "You need to give the name of an \
existing .sd file"
    sys.exit(1)

bcut=BCUTDescriptor()
one = java.lang.Integer(1)
objarr = JArray(java.lang.Object)([one,one])

bcut.setParameters(objarr)

mol = MDLReader(sdfile).read(Molecule())
BCUTvalue = bcut.calculate(mol)

# Create list of values in BCUTvalue
BCUTlist=[str(BCUTvalue.get(i)) \
  for i in range(6)]
# Concatenate BCUTlist around tabs
print "\t".join(BCUTlist)

Example 2

One of the peculiarities of this program is that, after use, you need to exit Jython using CTRL+D, followed by CTRL+C (the order is important!). I am not sure of the cause of this, or whether there is any workaround.

[viewmol.txt] (I had to rename this file to .txt to avoid problems with the web server. You need to rename it to .py to use it.)

# Java stuff
from java.awt import Dimension
from java.awt import Rectangle
from javax.swing import JPanel,JFrame
from java.awt.event import WindowAdapter

# Jmol stuff
from org.jmol.api import JmolAdapter,JmolViewer
from org.jmol.adapter.cdk import CdkJmolAdapter

# CDK stuff
from org.openscience.cdk import Molecule
from org.openscience.cdk.io import MDLReader
from org.openscience.cdk.exception import CDKException
from org.openscience.cdk.layout import StructureDiagramGenerator
from org.openscience.cdk.applications.swing import MoleculeViewer2D

# Finally...Python stuff
import sys


class JmolPanel(JPanel):
    def __init__(self):
        self.adapter=CdkJmolAdapter(None)
        self.viewer=JmolViewer.allocateViewer(self,self.adapter)
        self.currentSize=Dimension()
        self.rectClip=Rectangle()
    def getViewer(self):
        return self.viewer
    def paint(self,g):
        self.viewer.setScreenDimension(self.getSize(self.currentSize))
        g.getClipBounds(self.rectClip)
        self.viewer.renderScreenImage(g,self.currentSize,self.rectClip)

class ApplicationCloser(WindowAdapter):
    def windowClosing(self,e):
        if __name__=="__main__":
            sys.exit(0)

class ViewMol3D:
    def __init__(self,mol=None,file=None):
        if file and not mol:
            try:
                infile=open(file,"r")
                mol=MDLReader(infile).read(Molecule())
                infile.close()
            except IOError,CDKException:
                print "Problem reading molecule from %s" % file
                infile.close() # No finally clause in Jython (unlike CPython)
        if mol:
            self.frame=JFrame("viewmol CDK Jmol")
            self.frame.setSize(300,300)
            self.frame.addWindowListener(ApplicationCloser())
            contentPane=self.frame.getContentPane()
            jmolPanel=JmolPanel()
            contentPane.add(jmolPanel)
            self.viewer=jmolPanel.getViewer()
            self.viewer.openClientFile("","",mol)
# The previous line causes 15 "indexInt: null"s to be printed
            self.frame.setVisible(1)
    def script(self,text):
        self.viewer.evalString(text)
    def close(self):
        self.frame.remove()

class ViewMol2D:
    def __init__(self,mol=None,file=None):
        if file and not mol:
            try:
                infile=open(file,"r")
                mol=MDLReader(infile).read(Molecule())
                infile.close()
            except IOError,CDKException:
                print "Problem reading molecule from %s" % file
                infile.close()

        if mol:
            sdg=StructureDiagramGenerator(mol)
            try:
                sdg.generateCoordinates()
            except CDKException:
                print "Strange coordinates..."
            else:
                self.mv=MoleculeViewer2D(sdg.getMolecule())
                self.mv.display()

if __name__=="__main__":
    mol=MDLReader(open("mymol.sd","r")).read(Molecule())
    ViewMol2D(mol)
    firstview=ViewMol3D(mol)
    secondview=ViewMol3D(file="anothermol.sd")
    secondview.script("spacefill on; spin")