Coverage for openhcs/processing/backends/analysis/cx5_format.py: 0.3%

442 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1import copy 

2import xlsxwriter 

3import string 

4import numpy as np 

5import pandas as pd 

6import sys 

7 

8 

9 

10 

11def read_results(results_path,scope=None): 

12 xls = pd.ExcelFile(results_path) 

13 if scope == "EDDU_CX5": 

14 raw_df = pd.read_excel(xls, 'Rawdata') 

15 elif scope == "EDDU_metaxpress": 

16 raw_df = pd.read_excel(xls, xls.sheet_names[0]) 

17 else: 

18 print("microscope "+str(scope)+" not known. Exiting") 

19 sys.exit() 

20 return raw_df 

21 

22def get_features(raw_df,scope=None): 

23 if scope == "EDDU_CX5": 

24 return get_features_EDDU_CX5(raw_df) 

25 if scope == "EDDU_metaxpress": 

26 return get_features_EDDU_metaxpress(raw_df) 

27 else: 

28 print("microscope "+str(scope)+" not known. Exiting") 

29 sys.exit() 

30 

31def is_N_row(row_name): 

32 row_name = row_name.lower() 

33 is_N = False 

34 if row_name == "n" or row_name=="ns": 

35 is_N = True 

36 if row_name == "replicate" or row_name=="replicates": 

37 is_N = True 

38 return is_N 

39 

40 

41def is_well_all_replicates_row(row_name): 

42 row_name = row_name.lower() 

43 return row_name == "well" or row_name == "wells" 

44 

45def is_well_specific_replicate_row(row_name): 

46 row_name = row_name.lower() 

47 if 'well' in row_name: 

48 return row_name[-1].isdigit() 

49 else: return False 

50 

51def read_plate_layout(config_path): 

52 xls = pd.ExcelFile(config_path) 

53 df = pd.read_excel(xls, 'drug_curve_map',index_col=0,header=None) 

54 df = df.dropna(how='all') 

55 layout={} 

56 condition=None 

57 doses=None 

58 wells=None 

59 plate_groups=None 

60 N = None 

61 specific_N = None 

62 scope = None 

63 conditions=[] 

64 ctrl_wells=None 

65 ctrl_wells_aligned=None 

66 ctrl_groups=None 

67 ctrl_positions_replicates=None 

68 ctrl_positions=None 

69 

70 def sanitize_compare(string1,string2): 

71 string1 = string1.lower() 

72 string2 = string2.lower() 

73 string1 = string1.replace('_','') 

74 string1 = string1.replace(' ','') 

75 string2 = string2.replace('_','') 

76 string2 = string2.replace(' ','') 

77 if not string1[-1] == 's': string1 +='s' 

78 if not string2[-1] == 's': string2 +='s' 

79 return string1 == string2 

80 

81 for i,row in df.iterrows(): 

82 #check max number of replicates 

83 if is_N_row(row.name): 

84 N = int(row.iloc[0]) 

85 for i in range(N): 

86 layout["N"+str(i+1)]={} 

87 #load microscope 

88 if sanitize_compare(row.name,'scope') or sanitize_compare(row.name,'microscope'): 

89 scope = row.iloc[0] 

90 

91 #finished reading controls 

92 if sanitize_compare(row.name,'plate group') and ctrl_wells is not None: 

93 if ctrl_groups is None: 

94 ctrl_groups = [] 

95 ctrl_groups += row.dropna().tolist() 

96 continue 

97# if sanitize_compare(row.name,'plate group') and not ctrl_wells is None and not ctrl_groups is None: 

98# ctrl_positions = [] 

99# for i in range(len(ctrl_wells_aligned)): 

100# if not ctrl_well_replicates is None: 

101# ctrl_positions.append((ctrl_wells_aligned[i],ctrl_groups[i],ctrl_well_replicates[i])) 

102# else: 

103# ctrl_positions = None 

104# continue 

105 

106 #get control wells 

107 if sanitize_compare(row.name,'control') or sanitize_compare(row.name,'control well'): 

108 if ctrl_wells is None: 

109 ctrl_wells = [] 

110 ctrl_wells+=row.dropna().tolist() 

111 continue 

112 

113 #get replicate for ctrl position 

114 if sanitize_compare(row.name,'group n'): 

115 if ctrl_positions_replicates is None: 

116 ctrl_positions_replicates = [] 

117 if ctrl_wells_aligned is None: 

118 ctrl_wells_aligned = [] 

119 ctrl_positions_replicates+=row.dropna().tolist() 

120 ctrl_wells_aligned += ctrl_wells 

121 continue 

122 

123 #get new condition name 

124 #finished reading controls 

125 if sanitize_compare(row.name,'condition'): 

126 # make control well dict 

127 ctrl_positions = {"N"+str(i+1):[] for i in range(N)} 

128 for i in range(len(ctrl_wells_aligned)): 

129 if ctrl_positions_replicates is not None: 

130 ctrl_positions["N"+str(ctrl_positions_replicates[i])].append((ctrl_wells_aligned[i],ctrl_groups[i])) 

131 ctrl_wells = None 

132 else: 

133 ctrl_positions = None 

134 

135 #make dict[replicate][condition][dose] 

136 for i in range(N): 

137 if row.iloc[0] not in layout["N"+str(i+1)].keys(): 

138 layout["N"+str(i+1)][row.iloc[0]]={} 

139 condition=row.iloc[0] 

140 conditions.append(condition) 

141 if sanitize_compare(row.name,'dose'): 

142 doses=row.dropna().tolist() 

143 

144 #if well is same for all Ns 

145 if is_well_all_replicates_row(row.name): 

146 wells=row.dropna().tolist() 

147 specific_N = None 

148 # or not 

149 if is_well_specific_replicate_row(row.name): 

150 specific_N = int(row.name[-1]) 

151 wells=row.dropna().tolist() 

152 

153 # add plate group to wells from previous row 

154 if sanitize_compare(row.name,'plate group'): 

155 plate_groups=row.dropna().tolist() 

156 if specific_N == None: 

157 for i in range(N): 

158 for y in range(len(doses)): 

159 #add to all Ns 

160 if doses[y] not in layout["N"+str(i+1)][condition].keys(): 

161 layout["N"+str(i+1)][condition][doses[y]]=[] 

162 layout["N"+str(i+1)][condition][doses[y]].append((wells[y],plate_groups[y])) 

163 else: 

164 for y in range(len(doses)): 

165 #add to specific N 

166 if doses[y] not in layout["N"+str(specific_N)][condition].keys(): 

167 layout["N"+str(specific_N)][condition][doses[y]]=[] 

168 layout["N"+str(specific_N)][condition][doses[y]].append((wells[y],plate_groups[y])) 

169 return scope, layout, conditions, ctrl_positions 

170 

171def get_features_EDDU_CX5(raw_df): 

172 return raw_df.iloc[:,raw_df.columns.str.find("Replicate").argmax()+1:-1].columns 

173 

174def get_features_EDDU_metaxpress(raw_df): 

175 feature_rows = raw_df[pd.isnull(raw_df.iloc[:,0])].iloc[0].tolist()[2:] 

176 return feature_rows 

177 

178def create_well_dict(raw_df, wells=None,scope=None): 

179 if wells == None: 

180 rows=[string.ascii_uppercase[i] for i in range(8)] 

181 cols=[i+1 for i in range(12)] 

182 wells = [] 

183 for row in rows: 

184 for col in cols: 

185 wells.append(str(row)+str(col).zfill(2)) 

186 features = get_features(raw_df,scope=scope) 

187 return {well:{feature:None for feature in features} for well in wells} 

188 

189def add_well_to_well_dict(wells,well_dict, raw_df): 

190 features = get_features(raw_df).columns 

191 for well in wells: 

192 well_dict[well]={feature:None for feature in features} 

193 return well_dict 

194 

195def create_plates_dict(raw_df,scope=None): 

196 if scope == "EDDU_CX5": 

197 return create_plates_dict_EDDU_CX5(raw_df) 

198 if scope == "EDDU_metaxpress": 

199 return create_plates_dict_EDDU_metaxpress(raw_df) 

200 else: 

201 print("microscope "+str(scope)+" not known. Exiting") 

202 sys.exit() 

203 

204def create_plates_dict_EDDU_metaxpress(raw_df): 

205 plate_names = raw_df[(raw_df == 'Plate Name').any(axis=1)].iloc[:,1].tolist() 

206 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_metaxpress") for plate_id in plate_names} 

207 return plate_dict 

208 

209def create_plates_dict_EDDU_CX5(raw_df): 

210 plate_ids = raw_df['UniquePlateId'].tolist() 

211 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_CX5") for plate_id in plate_ids} 

212 return plate_dict 

213 

214def indices_to_well(row,col,dim): 

215 rMax, cMax = dim[0],dim[1] 

216 col += 1 

217 total = row*cMax+col 

218 i=0 

219 i+=1 

220 offset = int((total-1)/(cMax)*i) 

221 rowIndex = str(chr(65 + offset)) 

222 colIndex = str(total - (offset * (cMax)*i)).zfill(2) 

223 return rowIndex + str(colIndex) 

224 

225def row_col_to_well(row,col): 

226 row_letter=chr(row+64) 

227 number=str(col).zfill(2) 

228 return row_letter+number 

229 

230def well_to_num(well,dim): 

231 rMax, cMax = dim[0],dim[1] 

232 (rowIndex, colIndex) = (0,0) 

233 for i in range(0, len(well)): 

234 (left, right) = (well[:i], well[i:i+1]) 

235 if right.isdigit(): 

236 (rowIndex, colIndex) = (left, well[i:]) 

237 break 

238 ascii_value = ord(rowIndex) - 65 

239 return ascii_value*(rMax+(4*i)) + int(colIndex) 

240 

241def fill_plates_dict(raw_df,plates_dict,scope=None): 

242 features = get_features(raw_df,scope=scope) 

243 if scope == "EDDU_CX5": 

244 return fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features) 

245 if scope == "EDDU_metaxpress": 

246 return fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features) 

247 else: 

248 print("microscope "+str(scope)+" not known. Exiting") 

249 sys.exit() 

250 

251def fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features): 

252 for index,row in raw_df.iterrows(): 

253 well = row_col_to_well(row[2],row[3]) 

254 for feature in features: 

255 plates_dict[row[1]][well][feature]=row[feature] 

256 return plates_dict 

257 

258def fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features): 

259 df_col_names = raw_df.set_axis(["Well","Laser Focus"]+features, axis=1, inplace=False) 

260 plate_name=None 

261 start_collect=False 

262 for index,row in df_col_names.iterrows(): 

263 if row[0] == "Barcode": 

264 start_collect=False 

265 if start_collect: 

266 for feature in features: 

267 plates_dict[plate_name][row[0]][feature]=row[feature] 

268 if row[0] == "Plate Name": 

269 plate_name=row[1] 

270 elif pd.isnull(row[0]): 

271 start_collect=True 

272 return plates_dict 

273 

274def average_plates(plates,raw_df,scope=None): 

275 average_plate=create_well_dict(raw_df,scope=scope) 

276 features = get_features(raw_df) 

277 for well in average_plate.keys(): 

278 for feature in features: 

279 average_value=0 

280 for plate in plates: 

281 average_value+=plate[well][feature] 

282 average_value=average_value/len(plates) 

283 average_plate[well][feature]=average_value 

284 return average_plate 

285 

286def average_plates_all_replicates(plate_groups,plates_dict,raw_df): 

287 averaged_plates_dict = {replicate:None for replicate in plate_groups.keys()} 

288 for replicate in plate_groups.keys(): 

289 one_replicate=average_plates_one_replicate(plate_groups[replicate],plates_dict,raw_df) 

290 averaged_plates_dict[replicate]=one_replicate 

291 return averaged_plates_dict 

292 

293def average_plates_duplicate_rows(plate_groups,plates_dict,raw_df,wells_to_average=None,scope=None): 

294 features = get_features(raw_df,scope=scope) 

295 averaged_plates_dict={} 

296 for plate_name,plate in plates_dict.items(): 

297 average_plate=create_well_dict(raw_df,scope=scope,wells=wells_to_average) 

298 for well in wells_to_average: 

299 average_plate=average_rows(plate,average_plate,well,features) 

300 averaged_plates_dict[plate_name]=average_plate 

301 return plates_dict 

302 

303def average_rows(plate_dict,average_plate,well,features,num_rows_average=2): 

304 original_well=well 

305 wells_to_average = [] 

306 wells_to_average.append(well) 

307 for i in range(num_rows_average-1): 

308 well_next_row = get_well_next_row(well) 

309 wells_to_average.append(well_next_row) 

310 well_next_row = well 

311 for feature in features: 

312 average_value=0 

313 for well in wells_to_average: 

314 average_value+=plate_dict[well][feature] 

315 average_value=average_value/num_rows_average 

316 average_plate[original_well][feature]=average_value 

317 return average_plate 

318 

319def get_well_next_row(well): 

320 return chr(ord(well[0])+1)+well[1:] 

321 

322 

323def average_plates(plates,raw_df,scope=None): 

324 average_plate=create_well_dict(raw_df,scope=scope) 

325 features = get_features(raw_df) 

326 for well in average_plate.keys(): 

327 for feature in features: 

328 average_value=0 

329 for plate in plates: 

330 average_value+=plate[well][feature] 

331 average_value=average_value/len(plates) 

332 average_plate[well][feature]=average_value 

333 return average_plate 

334 

335 

336def average_plates_one_replicate(averaged_plates_names_dict,plates_dict,raw_df): 

337 averaged_plates_dict = {plate_average_name:None for plate_average_name in averaged_plates_names_dict.keys()} 

338 for plate_average_name in averaged_plates_dict.keys(): 

339 plates_to_average = averaged_plates_names_dict[plate_average_name] 

340 plates_to_average = [plates_dict[plate_name] for plate_name in plates_to_average] 

341 averaged_plates_dict[plate_average_name]=average_plates(plates_to_average,raw_df) 

342 return averaged_plates_dict 

343 

344def load_plate_groups(config_path): 

345 xls = pd.ExcelFile(config_path) 

346 df = pd.read_excel(xls, 'plate_groups',index_col=0,header=None) 

347 replicates = df.index.tolist()[1:] 

348 groups = [str(group) for group in df.columns.tolist()] 

349 plate_groups = {replicate:{group:None for group in groups} for replicate in replicates} 

350 for group in groups: 

351 for replicate in replicates: 

352 #well_replicates = df.filter(like=group).loc[replicate].tolist()[0] 

353 plate_groups[replicate][group]=df.loc[replicate][int(group)] 

354 return plate_groups 

355 

356def normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name): 

357 features = get_features(raw_df) 

358 normalized_plate=create_well_dict(raw_df) 

359 normalized_plate = add_well_to_well_dict([ctrl_avg_name],normalized_plate, raw_df) 

360 for feature in features: 

361 control_values = [plate[well][feature] for well in reference_wells] 

362 control_avg = np.mean(np.array(control_values)) 

363 normalized_plate[ctrl_avg_name][feature]=control_avg 

364 for well in normalized_plate.keys(): 

365 if well not in ctrl_avg_name: 

366 try: 

367 normalized_plate[well][feature] = plate[well][feature]/control_avg 

368 except: 

369 normalized_plate[well][feature] = plate[well][feature] 

370 return normalized_plate 

371 

372 

373def normalize_all_plates(plates_dict,reference_wells,raw_df,ctrl_avg_name): 

374 normalized_plates={replicate:{} for replicate in plates_dict.keys()} 

375 for replicate, condition_plates in plates_dict.items(): 

376 for condition, plate in condition_plates.items(): 

377 normalized_plates[replicate][condition]=normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name) 

378 return normalized_plates 

379 

380def create_table_for_feature(feature,plates_dict): 

381 conditions = list(plates_dict.keys()) 

382 replicates = list(list(plates_dict.values())[0].keys()) 

383 doses=list(plates_dict[conditions[0]][replicates[0]].keys()) 

384 col_names=[] 

385 for condition in conditions: 

386 for replicate in replicates: 

387 col_names.append(str(condition)+"_"+str(replicate)) 

388 feature_table = {col_name:[] for col_name in col_names} 

389 for dose in doses: 

390 for replicate in replicates: 

391 for condition in conditions: 

392 col_name=(str(condition)+"_"+str(replicate)) 

393 try: 

394 value=plates_dict[condition][replicate][dose][feature] 

395 except: 

396 value=None 

397 feature_table[col_name].append(value) 

398 feature_table=pd.DataFrame(feature_table) 

399 feature_table.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in feature_table.columns]) 

400 feature_table.index=doses 

401 return feature_table 

402 

403 

404 

405def create_all_feature_tables(plates_dict,features): 

406 feature_tables={feature:None for feature in features} 

407 for feature in features: 

408 feature_tables[feature]=create_table_for_feature(feature,plates_dict) 

409 return feature_tables 

410 

411def feature_tables_to_excel(feature_tables,outpath): 

412 def remove_inval_chars(name): 

413 inval_chars=['[',']',':','*','?','/','\\'] 

414 for char in inval_chars: 

415 name=name.replace(char,"") 

416 return name 

417 with pd.ExcelWriter(outpath) as writer: 

418 for feature in feature_tables.keys(): 

419 feature_tables[feature].to_excel(writer, sheet_name=remove_inval_chars(feature[:31])) 

420 

421def create_duplicate_wells(): 

422 rows=[string.ascii_uppercase[i] for i in range(0,8,2)] 

423 cols=[i+1 for i in range(12)] 

424 wells = [] 

425 for row in rows: 

426 for col in cols: 

427 wells.append(str(row)+str(col).zfill(2)) 

428 return wells 

429 

430def make_experiment_dict_locations(plate_groups,plate_layout,conditions): 

431 experiment_dict={condition:{} for condition in conditions} 

432 #experiment_dict={replicate:{} for replicate in plate_layout.keys()} 

433 for replicate, conditions in plate_layout.items(): 

434 for condition,doses in conditions.items(): 

435 experiment_dict[condition][replicate] = {dose:locations for dose,locations in doses.items()} 

436 return experiment_dict 

437 

438def make_experiment_dict_values(plates,experiment_dict_locations,features): 

439 experiment_dict_values=copy.deepcopy(experiment_dict_locations) 

440 for condition,replicates in experiment_dict_locations.items(): 

441 for replicate, doses in replicates.items(): 

442 for dose,locations in doses.items(): 

443 feature_value_dict = {feature:average_wells(locations,replicate,feature,plates,plate_groups) for feature in features} 

444 experiment_dict_values[condition][replicate][dose]= feature_value_dict 

445 return experiment_dict_values 

446 

447def average_wells(locations,replicate,feature,plates,plate_groups): 

448 average=0 

449 for location in locations: 

450 average+=location_to_value(location,replicate,feature,plates,plate_groups) 

451 return average/float(len(locations)) 

452 

453def location_to_value(location,replicate,feature,plates,plate_groups): 

454 well, plate_group = location 

455 plate_name = plate_groups[replicate][str(plate_group)] 

456 value = plates[plate_name][well][feature] 

457 return value 

458 

459def normalize_experiment(experiment_dict_values,ctrl_positions,features,plates): 

460 experiment_dict_values_normalized=copy.deepcopy(experiment_dict_values) 

461 for condition,replicates in experiment_dict_values.items(): 

462 for replicate, doses in replicates.items(): 

463 ctrl_positions_replicate = ctrl_positions[replicate] 

464 feature_control_vals={feature:average_wells(ctrl_positions_replicate,replicate,feature,plates,plate_groups) for feature in features} 

465 for dose,values in doses.items(): 

466 feature_value_dict = {} 

467 for feature in features: 

468 ctrl_value = feature_control_vals[feature] 

469 if ctrl_value == 0: 

470 ctrl_value = 1 

471 condition_value=experiment_dict_values[condition][replicate][dose][feature] 

472 feature_value_dict[feature]=condition_value/ctrl_value 

473 experiment_dict_values_normalized[condition][replicate][dose]= feature_value_dict 

474 return experiment_dict_values_normalized 

475 

476def write_values_heat_map(plates_dict,features,outpath): 

477 workbook = xlsxwriter.Workbook(outpath) 

478 with pd.ExcelWriter(outpath) as writer: 

479 for feature in features: 

480 sheet_rows=[] 

481 for plate in plates_dict.keys(): 

482 sheet_rows.append([plate]) 

483 values=[] 

484 for r in range(65,65+8,1): 

485 values.append([]) 

486 row=[] 

487 for c in range(12): 

488 well=chr(r)+str(c+1).zfill(2) 

489 row.append(plates_dict[plate][well][feature]) 

490 sheet_rows.append(row) 

491 sheet_rows.append([""]) 

492 pd.DataFrame(sheet_rows).to_excel(writer, sheet_name=remove_inval_chars(feature[:31])) 

493 

494def create_reference_wells(): 

495 rows=[string.ascii_uppercase[i] for i in range(8)] 

496 cols=[i+1 for i in range(6,12)] 

497 wells = [] 

498 for row in rows: 

499 for col in cols: 

500 wells.append((str(row)+str(col).zfill(2),2)) 

501 return wells 

502 

503def remove_inval_chars(name): 

504 inval_chars=['[',']',':','*','?','/','\\'] 

505 for char in inval_chars: 

506 name=name.replace(char,"") 

507 return name 

508 

509rows=[string.ascii_uppercase[i] for i in range(8)] 

510cols=[i+1 for i in range(12)] 

511conditions = [] 

512for row in rows: 

513 for col in cols: 

514 conditions.append(str(row)+str(col).zfill(2)) 

515 

516results_path="mx_results.xlsx" 

517config_file="./config.xlsx" 

518compiled_results_path="./compiled_results_normalized.xlsx" 

519heatmap_path="./heatmaps.xlsx" 

520 

521scope, plate_layout, conditions, ctrl_positions=read_plate_layout(config_file) 

522plate_groups=load_plate_groups(config_file) 

523experiment_dict_locations=make_experiment_dict_locations(plate_groups,plate_layout,conditions) 

524df = read_results(results_path,scope=scope) 

525features = get_features(df,scope=scope) 

526well_dict=create_well_dict(df,scope=scope) 

527plates_dict=create_plates_dict(df,scope=scope) 

528plates_dict = fill_plates_dict(df,plates_dict,scope=scope) 

529experiment_dict_values=make_experiment_dict_values(plates_dict,experiment_dict_locations,features) 

530if ctrl_positions is not None: 

531 experiment_dict_values=normalize_experiment(experiment_dict_values,ctrl_positions,features,plates_dict) 

532feature_tables = create_all_feature_tables(experiment_dict_values,features) 

533write_values_heat_map(plates_dict,features,heatmap_path) 

534feature_tables_to_excel(feature_tables,compiled_results_path)