1
2 """ATC/DDD handling code.
3
4 http://who.no
5
6 license: GPL
7 """
8
9
10
11 __version__ = "$Revision: 1.6 $"
12 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
13
14 import sys, codecs, logging, csv, re as regex, os.path
15
16
17 if __name__ == '__main__':
18 sys.path.insert(0, '../../')
19 from Gnumed.pycommon import gmPG2, gmTools, gmCfg2
20
21
22 _log = logging.getLogger('gm.atc')
23 _log.info(__version__)
24
25 _cfg = gmCfg2.gmCfgData()
26
28
29 _log.debug('substance <%s>, ATC <%s>', substance, atc)
30
31 if atc is not None:
32 if atc.strip() == u'':
33 atc = None
34
35 if atc is None:
36 atcs = text2atc(text = substance, fuzzy = False)
37 if len(atcs) == 0:
38 _log.debug(u'no ATC found, aborting')
39 return atc
40 if len(atcs) > 1:
41 _log.debug(u'non-unique ATC mapping, aborting')
42 return atc
43 atc = atcs[0][0].strip()
44
45 args = {'atc': atc, 'term': substance.strip()}
46 queries = [
47 {'cmd': u"UPDATE ref.substance_in_brand SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL",
48 'args': args},
49 {'cmd': u"UPDATE clin.consumed_substance SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL",
50 'args': args},
51 {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL",
52 'args': args}
53 ]
54 gmPG2.run_rw_queries(queries = queries)
55
56 return atc
57
58 -def text2atc(text=None, fuzzy=False):
59
60 text = text.strip()
61
62 if fuzzy:
63 args = {'term': u'%%%s%%' % text}
64 cmd = u"""
65 SELECT DISTINCT ON (atc_code) *
66 FROM (
67 SELECT atc as atc_code, is_group_code, pk_data_source
68 FROM ref.v_atc
69 WHERE term ilike %(term)s AND atc IS NOT NULL
70 UNION
71 SELECT atc_code, null, null
72 FROM ref.substance_in_brand
73 WHERE description ilike %(term)s AND atc_code IS NOT NULL
74 UNION
75 SELECT atc_code, null, null
76 FROM ref.branded_drug
77 WHERE description ilike %(term)s AND atc_code IS NOT NULL
78 UNION
79 SELECT atc_code, null, null
80 FROM clin.consumed_substance
81 WHERE description ilike %(term)s AND atc_code IS NOT NULL
82 ) as tmp
83 ORDER BY atc_code
84 """
85 else:
86 args = {'term': text.lower()}
87 cmd = u"""
88 SELECT DISTINCT ON (atc_code) *
89 FROM (
90 SELECT atc as atc_code, is_group_code, pk_data_source
91 FROM ref.v_atc
92 WHERE lower(term) = %(term)s AND atc IS NOT NULL
93 UNION
94 SELECT atc_code, null, null
95 FROM ref.substance_in_brand
96 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL
97 UNION
98 SELECT atc_code, null, null
99 FROM ref.branded_drug
100 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL
101 UNION
102 SELECT atc_code, null, null
103 FROM clin.consumed_substance
104 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL
105 ) as tmp
106 ORDER BY atc_code
107 """
108
109 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
110
111 _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy)
112
113 return rows
114
116
117
118 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8')
119
120 data_fname = os.path.join (
121 os.path.dirname(cfg_fname),
122 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')])
123 )
124 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')])
125 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')])
126 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')])
127 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')])
128 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')])
129 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')])
130
131 _cfg.remove_source(source = 'atc')
132
133 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname)
134
135 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
136
137
138 queries = [
139 {
140 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
141 'args': args
142 }, {
143 'cmd': u"""
144 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
145 %(name_long)s,
146 %(name_short)s,
147 %(ver)s,
148 %(desc)s,
149 %(lang)s,
150 %(url)s
151 )""",
152 'args': args
153 }, {
154 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
155 'args': args
156 }
157 ]
158 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
159 data_src_pk = rows[0][0]
160 _log.debug('ATC data source record created, pk is #%s', data_src_pk)
161
162
163 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
164 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"')
165
166
167 curs = conn.cursor()
168 cmd = u"""delete from ref.atc_staging"""
169 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
170 curs.close()
171 conn.commit()
172 _log.debug('ATC staging table emptied')
173
174
175 curs = conn.cursor()
176 cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s, %s)"""
177 first = False
178 for loinc_line in atc_reader:
179
180 if not first:
181 first = True
182 continue
183
184 if loinc_line[0] + loinc_line[1] + loinc_line[2] + loinc_line[3] + loinc_line[4] == u'':
185 continue
186
187 comment = u''
188 ddd_val = u''
189 unit = u''
190 adro = u''
191
192
193 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', loinc_line[4]):
194 ddd_val, unit, adro = regex.split('\s', loinc_line[4])
195
196 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', loinc_line[4]):
197 ddd_val, unit, adro, comment = regex.split('\s', loinc_line[4], 3)
198
199 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', loinc_line[4]):
200 ddd_val, unit, adro = regex.split('\s', loinc_line[4])
201
202 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', loinc_line[4]):
203 ddd_val, unit, adro, comment = regex.split('\s', loinc_line[4], 3)
204
205 else:
206 comment = loinc_line[4]
207
208 args = [
209 loinc_line[0].strip(),
210 loinc_line[2],
211 ddd_val.replace(',', '.'),
212 unit,
213 adro,
214 comment
215 ]
216
217 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
218
219 curs.close()
220 conn.commit()
221 csv_file.close()
222 _log.debug('ATC staging table loaded')
223
224
225 curs = conn.cursor()
226 args = {'src_pk': data_src_pk}
227 cmd = u"""
228 insert into ref.atc (
229 fk_data_source,
230 code,
231 term,
232 comment,
233 ddd,
234 unit,
235 administration_route
236 ) select
237 %(src_pk)s,
238 atc,
239 name,
240 nullif(comment, ''),
241 nullif(ddd, '')::numeric,
242 nullif(unit, ''),
243 nullif(adro, '')
244
245 from
246 ref.atc_staging
247 """
248
249 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
250
251 curs.close()
252 conn.commit()
253 _log.debug('transfer from ATC staging table to real ATC table done')
254
255
256 curs = conn.cursor()
257 cmd = u"""delete from ref.atc_staging"""
258 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
259 curs.close()
260 conn.commit()
261 _log.debug('ATC staging table emptied')
262
263 return True
264
265
266
267 if __name__ == "__main__":
268
269 from Gnumed.pycommon import gmLog2
270 from Gnumed.pycommon import gmI18N
271
272 gmI18N.activate_locale()
273
274
275
278
280 print 'searching ATC code for:', sys.argv[2]
281 print ' ', text2atc(sys.argv[2])
282 print ' ', text2atc(sys.argv[2], True)
283
284 if (len(sys.argv)) > 1 and (sys.argv[1] == 'test'):
285
286 test_text2atc()
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309