Package Gnumed :: Package business :: Module gmATC
[frames] | no frames]

Source Code for Module Gnumed.business.gmATC

  1  # -*- coding: utf8 -*- 
  2  """ATC/DDD handling code. 
  3   
  4  http://who.no 
  5   
  6  license: GPL 
  7  """ 
  8  #============================================================ 
  9  # $Source: /cvsroot/gnumed/gnumed/gnumed/client/business/gmATC.py,v $ 
 10  # $Id: gmATC.py,v 1.6 2009/12/01 21:47:02 ncq Exp $ 
 11  __version__ = "$Revision: 1.6 $" 
 12  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 13   
 14  import sys, codecs, logging, csv, re as regex, os.path 
 15   
 16   
 17  if __name__ == '__main__': 
 18          sys.path.insert(0, '../../') 
 19  from Gnumed.pycommon import gmPG2, gmTools, gmCfg2 
 20   
 21   
 22  _log = logging.getLogger('gm.atc') 
 23  _log.info(__version__) 
 24   
 25  _cfg = gmCfg2.gmCfgData() 
 26  #============================================================ 
27 -def propagate_atc(substance=None, atc=None):
28 29 _log.debug('substance <%s>, ATC <%s>', substance, atc) 30 31 if atc is not None: 32 if atc.strip() == u'': 33 atc = None 34 35 if atc is None: 36 atcs = text2atc(text = substance, fuzzy = False) 37 if len(atcs) == 0: 38 _log.debug(u'no ATC found, aborting') 39 return atc 40 if len(atcs) > 1: 41 _log.debug(u'non-unique ATC mapping, aborting') 42 return atc 43 atc = atcs[0][0].strip() 44 45 args = {'atc': atc, 'term': substance.strip()} 46 queries = [ 47 {'cmd': u"UPDATE ref.substance_in_brand SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL", 48 'args': args}, 49 {'cmd': u"UPDATE clin.consumed_substance SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL", 50 'args': args}, 51 {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL", 52 'args': args} 53 ] 54 gmPG2.run_rw_queries(queries = queries) 55 56 return atc
57 #============================================================
58 -def text2atc(text=None, fuzzy=False):
59 60 text = text.strip() 61 62 if fuzzy: 63 args = {'term': u'%%%s%%' % text} 64 cmd = u""" 65 SELECT DISTINCT ON (atc_code) * 66 FROM ( 67 SELECT atc as atc_code, is_group_code, pk_data_source 68 FROM ref.v_atc 69 WHERE term ilike %(term)s AND atc IS NOT NULL 70 UNION 71 SELECT atc_code, null, null 72 FROM ref.substance_in_brand 73 WHERE description ilike %(term)s AND atc_code IS NOT NULL 74 UNION 75 SELECT atc_code, null, null 76 FROM ref.branded_drug 77 WHERE description ilike %(term)s AND atc_code IS NOT NULL 78 UNION 79 SELECT atc_code, null, null 80 FROM clin.consumed_substance 81 WHERE description ilike %(term)s AND atc_code IS NOT NULL 82 ) as tmp 83 ORDER BY atc_code 84 """ 85 else: 86 args = {'term': text.lower()} 87 cmd = u""" 88 SELECT DISTINCT ON (atc_code) * 89 FROM ( 90 SELECT atc as atc_code, is_group_code, pk_data_source 91 FROM ref.v_atc 92 WHERE lower(term) = %(term)s AND atc IS NOT NULL 93 UNION 94 SELECT atc_code, null, null 95 FROM ref.substance_in_brand 96 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL 97 UNION 98 SELECT atc_code, null, null 99 FROM ref.branded_drug 100 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL 101 UNION 102 SELECT atc_code, null, null 103 FROM clin.consumed_substance 104 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL 105 ) as tmp 106 ORDER BY atc_code 107 """ 108 109 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 110 111 _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy) 112 113 return rows
114 #============================================================
115 -def atc_import(cfg_fname=None, conn=None):
116 117 # read meta data 118 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') 119 120 data_fname = os.path.join ( 121 os.path.dirname(cfg_fname), 122 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) 123 ) # must be in same dir as conf file 124 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) 125 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) 126 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) 127 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) 128 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) 129 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) 130 131 _cfg.remove_source(source = 'atc') 132 133 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) 134 135 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 136 137 # create data source record 138 queries = [ 139 { 140 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 141 'args': args 142 }, { 143 'cmd': u""" 144 insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 145 %(name_long)s, 146 %(name_short)s, 147 %(ver)s, 148 %(desc)s, 149 %(lang)s, 150 %(url)s 151 )""", 152 'args': args 153 }, { 154 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 155 'args': args 156 } 157 ] 158 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 159 data_src_pk = rows[0][0] 160 _log.debug('ATC data source record created, pk is #%s', data_src_pk) 161 162 # import data 163 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 164 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') 165 166 # clean out staging area 167 curs = conn.cursor() 168 cmd = u"""delete from ref.atc_staging""" 169 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 170 curs.close() 171 conn.commit() 172 _log.debug('ATC staging table emptied') 173 174 # from file into staging table 175 curs = conn.cursor() 176 cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s, %s)""" 177 first = False 178 for loinc_line in atc_reader: 179 # skip first 180 if not first: 181 first = True 182 continue 183 # skip blanks 184 if loinc_line[0] + loinc_line[1] + loinc_line[2] + loinc_line[3] + loinc_line[4] == u'': 185 continue 186 187 comment = u'' 188 ddd_val = u'' 189 unit = u'' 190 adro = u'' 191 192 # "1,1 mg O,P,R,..." 193 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', loinc_line[4]): 194 ddd_val, unit, adro = regex.split('\s', loinc_line[4]) 195 # "1,1 mg O,P,R bezogen auf ..." 196 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', loinc_line[4]): 197 ddd_val, unit, adro, comment = regex.split('\s', loinc_line[4], 3) 198 # "20 mg O" 199 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', loinc_line[4]): 200 ddd_val, unit, adro = regex.split('\s', loinc_line[4]) 201 # "20 mg O bezogen auf ..." 202 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', loinc_line[4]): 203 ddd_val, unit, adro, comment = regex.split('\s', loinc_line[4], 3) 204 # "Standarddosis: 1 Tablette oder 30 ml Mixtur" 205 else: 206 comment = loinc_line[4] 207 208 args = [ 209 loinc_line[0].strip(), 210 loinc_line[2], 211 ddd_val.replace(',', '.'), 212 unit, 213 adro, 214 comment 215 ] 216 217 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 218 219 curs.close() 220 conn.commit() 221 csv_file.close() 222 _log.debug('ATC staging table loaded') 223 224 # from staging table to real table 225 curs = conn.cursor() 226 args = {'src_pk': data_src_pk} 227 cmd = u""" 228 insert into ref.atc ( 229 fk_data_source, 230 code, 231 term, 232 comment, 233 ddd, 234 unit, 235 administration_route 236 ) select 237 %(src_pk)s, 238 atc, 239 name, 240 nullif(comment, ''), 241 nullif(ddd, '')::numeric, 242 nullif(unit, ''), 243 nullif(adro, '') 244 245 from 246 ref.atc_staging 247 """ 248 249 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 250 251 curs.close() 252 conn.commit() 253 _log.debug('transfer from ATC staging table to real ATC table done') 254 255 # clean out staging area 256 curs = conn.cursor() 257 cmd = u"""delete from ref.atc_staging""" 258 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 259 curs.close() 260 conn.commit() 261 _log.debug('ATC staging table emptied') 262 263 return True
264 #============================================================ 265 # main 266 #------------------------------------------------------------ 267 if __name__ == "__main__": 268 269 from Gnumed.pycommon import gmLog2 270 from Gnumed.pycommon import gmI18N 271 272 gmI18N.activate_locale() 273 # gmDateTime.init() 274 275 #--------------------------------------------------------
276 - def test_atc_import():
277 atc_import(cfg_fname = sys.argv[2], conn = gmPG2.get_connection(readonly = False))
278 #--------------------------------------------------------
279 - def test_text2atc():
280 print 'searching ATC code for:', sys.argv[2] 281 print ' ', text2atc(sys.argv[2]) 282 print ' ', text2atc(sys.argv[2], True)
283 #-------------------------------------------------------- 284 if (len(sys.argv)) > 1 and (sys.argv[1] == 'test'): 285 #test_atc_import() 286 test_text2atc() 287 288 #============================================================ 289 # $Log: gmATC.py,v $ 290 # Revision 1.6 2009/12/01 21:47:02 ncq 291 # - make ATC propatation smarter 292 # 293 # Revision 1.5 2009/11/29 19:58:36 ncq 294 # - propagate-atc 295 # 296 # Revision 1.4 2009/11/28 18:12:02 ncq 297 # - text2atc() and test 298 # 299 # Revision 1.3 2009/10/21 20:32:45 ncq 300 # - cleanup 301 # 302 # Revision 1.2 2009/06/10 20:59:12 ncq 303 # - data file must be in the same directory as conf file 304 # 305 # Revision 1.1 2009/06/04 16:42:54 ncq 306 # - first version 307 # 308 # 309