forked from christhorpe/grauniady
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ChemSpiPy.py
143 lines (97 loc) · 4.43 KB
/
ChemSpiPy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import urllib
from xml.etree import cElementTree as ET
import unittest
class ChemSpiderId(str):
"""An class for holding ChemSpider IDs and enabling searches based on them.
The purpose of the class is to enable a series of bound methods to be easily
wrapped to provide access to the ChemSpider API in Python. Currently the
methods include returning the URL of a png image of the named chemical.
"""
def __init__(self,csid):
"""Initialize the ChemSpiderId object with a value.
"""
self.id = ''
self.image = ''
self.molwt = ''
if type(csid) == str and csid.isdigit() == True:
self.id = csid
elif type(csid) == int:
self.id = str(csid)
else:
raise TypeError('ChemSpiderId needs to be intialised with an int or a str')
def __string__(self):
return self.id
def imageurl(self):
""" Return the URL of a png image for a specific Chemspider ID.
The actual ChemSpider API returns the binary of the PNG wrapped in XML. The
purpose of constructing a URL to the image is to enable easy insertion into
webservices etc by serving the address for the image rather than the image.
"""
assert self != '', 'ChemSpiderId not initialised with value'
if self.image == '':
baseurl = 'http://www.chemspider.com/'
url = baseurl + 'ImagesHandler.ashx?id=%s' % self
self.image = url
return url
else:
return self.image
def molweight(self):
"""Poll the ChemSpider MS API for average mol wt for a specific Chemspider ID."""
assert self != '', 'ChemSpiderID not initialised with value'
if self.molwt == '':
baseurl = 'http://www.chemspider.com/'
token = '3a19d00d-874f-4879-adc0-3013dbecbbc9'
# Construct a search URL and poll Chemspider for the XML result
searchurl = baseurl + 'MassSpecAPI.asmx/GetExtendedCompoundInfo?CSID=' + self.id + '&token=' + token
response = urllib.urlopen(searchurl)
tree = ET.parse(response) #parse the CS XML response
elem = tree.getroot()
csmolwt_tags = elem.getiterator('{http://www.chemspider.com/}MolecularWeight')
molwtlist = []
for tags in csmolwt_tags:
molwtlist.append(tags.text)
molecularweight = float(molwtlist[0])
self.molwt = molecularweight
return molecularweight
def simplesearch(query):
"""Returns ChemSpiderId string from a simple search for query.
SimpleSearch on the Chempspider API provides a list of objects which this
routine is currently capturing but not returning back. At the moment it
simply returns a single object of the type ChemSpiderID
"""
assert type(query) == str or type(query) == unicode, 'query not a string object'
baseurl = 'http://www.chemspider.com/'
token = '3a19d00d-874f-4879-adc0-3013dbecbbc9'
# Construct a search URL and poll Chemspider for the XML result
searchurl = baseurl + 'Search.asmx/SimpleSearch?query=' + query + '&token=' + token
response = urllib.urlopen(searchurl)
tree = ET.parse(response) #parse the CS XML response
elem = tree.getroot()
csid_tags = elem.getiterator('{http://www.chemspider.com/}int')
csidlist = []
for tags in csid_tags:
csidlist.append(tags.text)
returned_id = ChemSpiderId(csidlist[0])
return returned_id
########################################
#
# Unit tests
#
########################################
class TestChemSpiPy(unittest.TestCase):
def setUp(self):
self.testint = 236
self.teststring = '236'
self.testquery = 'benzene'
self.testimageurl = 'http://www.chemspider.com/ImagesHandler.ashx?id=236'
self.testmolwt = 78.1118
def testchemspiderid(self):
self.assertRaises(TypeError, ChemSpiderId, 1.2)
self.assertEqual(ChemSpiderId(self.teststring), self.teststring)
self.assertEqual(ChemSpiderId(self.testint), self.teststring)
self.assertEqual(ChemSpiderId(self.teststring).imageurl(), self.testimageurl)
self.assertEqual(ChemSpiderId(self.teststring).molweight(), self.testmolwt)
def testsimplesearch(self):
self.assertEqual(simplesearch(self.testquery), self.teststring)
if __name__ == '__main__':
unittest.main()