Coverage for openhcs/processing/backends/analysis/cx5_format.py: 0.4%

465 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 05:57 +0000

1from skimage import io 

2import copy 

3import xlsxwriter 

4import string 

5import os 

6import numpy as np 

7import matplotlib.pyplot as plt 

8import re 

9import pandas as pd 

10import pickle 

11import pudb 

12import sys 

13 

14 

15 

16 

17def read_results(results_path,scope=None): 

18 xls = pd.ExcelFile(results_path) 

19 if scope == "EDDU_CX5": 

20 raw_df = pd.read_excel(xls, 'Rawdata') 

21 elif scope == "EDDU_metaxpress": 

22 raw_df = pd.read_excel(xls, xls.sheet_names[0]) 

23 else: 

24 print("microscope "+str(scope)+" not known. Exiting") 

25 sys.exit() 

26 return raw_df 

27 

28def get_features(raw_df,scope=None): 

29 if scope == "EDDU_CX5": 

30 return get_features_EDDU_CX5(raw_df) 

31 if scope == "EDDU_metaxpress": 

32 return get_features_EDDU_metaxpress(raw_df) 

33 else: 

34 print("microscope "+str(scope)+" not known. Exiting") 

35 sys.exit() 

36 

37def is_N_row(row_name): 

38 row_name = row_name.lower() 

39 is_N = False 

40 if row_name == "n" or row_name=="ns": 

41 is_N = True 

42 if row_name == "replicate" or row_name=="replicates": 

43 is_N = True 

44 return is_N 

45 

46 

47def is_well_all_replicates_row(row_name): 

48 row_name = row_name.lower() 

49 return row_name == "well" or row_name == "wells" 

50 

51def is_well_specific_replicate_row(row_name): 

52 row_name = row_name.lower() 

53 if 'well' in row_name: 

54 return row_name[-1].isdigit() 

55 else: return False 

56 

57def read_plate_layout(config_path): 

58 xls = pd.ExcelFile(config_path) 

59 df = pd.read_excel(xls, 'drug_curve_map',index_col=0,header=None) 

60 df = df.dropna(how='all') 

61 layout={} 

62 condition=None 

63 doses=None 

64 wells=None 

65 plate_groups=None 

66 N = None 

67 specific_N = None 

68 scope = None 

69 conditions=[] 

70 ctrl_wells=None 

71 ctrl_wells_aligned=None 

72 ctrl_groups=None 

73 ctrl_positions_replicates=None 

74 ctrl_positions=None 

75 

76 def sanitize_compare(string1,string2): 

77 string1 = string1.lower() 

78 string2 = string2.lower() 

79 string1 = string1.replace('_','') 

80 string1 = string1.replace(' ','') 

81 string2 = string2.replace('_','') 

82 string2 = string2.replace(' ','') 

83 if not string1[-1] == 's': string1 +='s' 

84 if not string2[-1] == 's': string2 +='s' 

85 return string1 == string2 

86 

87 for i,row in df.iterrows(): 

88 #check max number of replicates 

89 if is_N_row(row.name): 

90 N = int(row.iloc[0]) 

91 for i in range(N): 

92 layout["N"+str(i+1)]={} 

93 #load microscope 

94 if sanitize_compare(row.name,'scope') or sanitize_compare(row.name,'microscope'): 

95 scope = row.iloc[0] 

96 

97 #finished reading controls 

98 if sanitize_compare(row.name,'plate group') and not ctrl_wells is None: 

99 if ctrl_groups is None: 

100 ctrl_groups = [] 

101 ctrl_groups += row.dropna().tolist() 

102 continue 

103# if sanitize_compare(row.name,'plate group') and not ctrl_wells is None and not ctrl_groups is None: 

104# ctrl_positions = [] 

105# for i in range(len(ctrl_wells_aligned)): 

106# if not ctrl_well_replicates is None: 

107# ctrl_positions.append((ctrl_wells_aligned[i],ctrl_groups[i],ctrl_well_replicates[i])) 

108# else: 

109# ctrl_positions = None 

110# continue 

111 

112 #get control wells 

113 if sanitize_compare(row.name,'control') or sanitize_compare(row.name,'control well'): 

114 if ctrl_wells is None: 

115 ctrl_wells = [] 

116 ctrl_wells+=row.dropna().tolist() 

117 continue 

118 

119 #get replicate for ctrl position 

120 if sanitize_compare(row.name,'group n'): 

121 if ctrl_positions_replicates is None: 

122 ctrl_positions_replicates = [] 

123 if ctrl_wells_aligned is None: 

124 ctrl_wells_aligned = [] 

125 ctrl_positions_replicates+=row.dropna().tolist() 

126 ctrl_wells_aligned += ctrl_wells 

127 continue 

128 

129 #get new condition name 

130 #finished reading controls 

131 if sanitize_compare(row.name,'condition'): 

132 # make control well dict 

133 ctrl_positions = {"N"+str(i+1):[] for i in range(N)} 

134 for i in range(len(ctrl_wells_aligned)): 

135 if not ctrl_positions_replicates is None: 

136 ctrl_positions["N"+str(ctrl_positions_replicates[i])].append((ctrl_wells_aligned[i],ctrl_groups[i])) 

137 ctrl_wells = None 

138 else: 

139 ctrl_positions = None 

140 

141 #make dict[replicate][condition][dose] 

142 for i in range(N): 

143 if not row.iloc[0] in layout["N"+str(i+1)].keys(): 

144 layout["N"+str(i+1)][row.iloc[0]]={} 

145 condition=row.iloc[0] 

146 conditions.append(condition) 

147 if sanitize_compare(row.name,'dose'): 

148 doses=row.dropna().tolist() 

149 

150 #if well is same for all Ns 

151 if is_well_all_replicates_row(row.name): 

152 wells=row.dropna().tolist() 

153 specific_N = None 

154 # or not 

155 if is_well_specific_replicate_row(row.name): 

156 specific_N = int(row.name[-1]) 

157 wells=row.dropna().tolist() 

158 

159 # add plate group to wells from previous row 

160 if sanitize_compare(row.name,'plate group'): 

161 plate_groups=row.dropna().tolist() 

162 if specific_N == None: 

163 for i in range(N): 

164 for y in range(len(doses)): 

165 #add to all Ns 

166 if not doses[y] in layout["N"+str(i+1)][condition].keys(): 

167 layout["N"+str(i+1)][condition][doses[y]]=[] 

168 layout["N"+str(i+1)][condition][doses[y]].append((wells[y],plate_groups[y])) 

169 else: 

170 for y in range(len(doses)): 

171 #add to specific N 

172 if not doses[y] in layout["N"+str(specific_N)][condition].keys(): 

173 layout["N"+str(specific_N)][condition][doses[y]]=[] 

174 layout["N"+str(specific_N)][condition][doses[y]].append((wells[y],plate_groups[y])) 

175 return scope, layout, conditions, ctrl_positions 

176 

177def get_features_EDDU_CX5(raw_df): 

178 return raw_df.iloc[:,raw_df.columns.str.find("Replicate").argmax()+1:-1].columns 

179 

180def get_features_EDDU_metaxpress(raw_df): 

181 feature_rows = raw_df[pd.isnull(raw_df.iloc[:,0])].iloc[0].tolist()[2:] 

182 return feature_rows 

183 

184def create_well_dict(raw_df, wells=None,scope=None): 

185 if wells == None: 

186 rows=[string.ascii_uppercase[i] for i in range(8)] 

187 cols=[i+1 for i in range(12)] 

188 wells = [] 

189 for row in rows: 

190 for col in cols: 

191 wells.append(str(row)+str(col).zfill(2)) 

192 features = get_features(raw_df,scope=scope) 

193 return {well:{feature:None for feature in features} for well in wells} 

194 

195def add_well_to_well_dict(wells,well_dict, raw_df): 

196 features = get_features(raw_df).columns 

197 for well in wells: 

198 well_dict[well]={feature:None for feature in features} 

199 return well_dict 

200 

201def create_plates_dict(raw_df,scope=None): 

202 if scope == "EDDU_CX5": 

203 return create_plates_dict_EDDU_CX5(raw_df) 

204 if scope == "EDDU_metaxpress": 

205 return create_plates_dict_EDDU_metaxpress(raw_df) 

206 else: 

207 print("microscope "+str(scope)+" not known. Exiting") 

208 sys.exit() 

209 

210def create_plates_dict_EDDU_metaxpress(raw_df): 

211 plate_names = raw_df[(raw_df == 'Plate Name').any(axis=1)].iloc[:,1].tolist() 

212 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_metaxpress") for plate_id in plate_names} 

213 return plate_dict 

214 

215def create_plates_dict_EDDU_CX5(raw_df): 

216 plate_ids = raw_df['UniquePlateId'].tolist() 

217 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_CX5") for plate_id in plate_ids} 

218 return plate_dict 

219 

220def indices_to_well(row,col,dim): 

221 rMax, cMax = dim[0],dim[1] 

222 col += 1 

223 total = row*cMax+col 

224 i=0 

225 i+=1 

226 offset = int((total-1)/(cMax)*i) 

227 rowIndex = str(chr(65 + offset)) 

228 colIndex = str(total - (offset * (cMax)*i)).zfill(2) 

229 return rowIndex + str(colIndex) 

230 

231def row_col_to_well(row,col): 

232 row_letter=chr(row+64) 

233 number=str(col).zfill(2) 

234 return row_letter+number 

235 

236def well_to_num(well,dim): 

237 rMax, cMax = dim[0],dim[1] 

238 (rowIndex, colIndex) = (0,0) 

239 for i in range(0, len(well)): 

240 (left, right) = (well[:i], well[i:i+1]) 

241 if right.isdigit(): 

242 (rowIndex, colIndex) = (left, well[i:]) 

243 break 

244 ascii_value = ord(rowIndex) - 65 

245 return ascii_value*(rMax+(4*i)) + int(colIndex) 

246 

247def fill_plates_dict(raw_df,plates_dict,scope=None): 

248 features = get_features(raw_df,scope=scope) 

249 if scope == "EDDU_CX5": 

250 return fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features) 

251 if scope == "EDDU_metaxpress": 

252 return fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features) 

253 else: 

254 print("microscope "+str(scope)+" not known. Exiting") 

255 sys.exit() 

256 

257def fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features): 

258 for index,row in raw_df.iterrows(): 

259 well = row_col_to_well(row[2],row[3]) 

260 for feature in features: 

261 plates_dict[row[1]][well][feature]=row[feature] 

262 return plates_dict 

263 

264def fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features): 

265 df_col_names = raw_df.set_axis(["Well","Laser Focus"]+features, axis=1, inplace=False) 

266 plate_name=None 

267 start_collect=False 

268 for index,row in df_col_names.iterrows(): 

269 if row[0] == "Barcode": 

270 start_collect=False 

271 if start_collect: 

272 for feature in features: 

273 plates_dict[plate_name][row[0]][feature]=row[feature] 

274 if row[0] == "Plate Name": 

275 plate_name=row[1] 

276 elif pd.isnull(row[0]): 

277 start_collect=True 

278 return plates_dict 

279 

280def average_plates(plates,raw_df,scope=None): 

281 average_plate=create_well_dict(raw_df,scope=scope) 

282 features = get_features(raw_df) 

283 for well in average_plate.keys(): 

284 for feature in features: 

285 average_value=0 

286 for plate in plates: 

287 average_value+=plate[well][feature] 

288 average_value=average_value/len(plates) 

289 average_plate[well][feature]=average_value 

290 return average_plate 

291 

292def average_plates_all_replicates(plate_groups,plates_dict,raw_df): 

293 averaged_plates_dict = {replicate:None for replicate in plate_groups.keys()} 

294 for replicate in plate_groups.keys(): 

295 one_replicate=average_plates_one_replicate(plate_groups[replicate],plates_dict,raw_df) 

296 averaged_plates_dict[replicate]=one_replicate 

297 return averaged_plates_dict 

298 

299def average_plates_duplicate_rows(plate_groups,plates_dict,raw_df,wells_to_average=None,scope=None): 

300 features = get_features(raw_df,scope=scope) 

301 averaged_plates_dict={} 

302 for plate_name,plate in plates_dict.items(): 

303 average_plate=create_well_dict(raw_df,scope=scope,wells=wells_to_average) 

304 for well in wells_to_average: 

305 average_plate=average_rows(plate,average_plate,well,features) 

306 averaged_plates_dict[plate_name]=average_plate 

307 return plates_dict 

308 

309def average_rows(plate_dict,average_plate,well,features,num_rows_average=2): 

310 original_well=well 

311 wells_to_average = [] 

312 wells_to_average.append(well) 

313 for i in range(num_rows_average-1): 

314 well_next_row = get_well_next_row(well) 

315 wells_to_average.append(well_next_row) 

316 well_next_row = well 

317 for feature in features: 

318 average_value=0 

319 for well in wells_to_average: 

320 average_value+=plate_dict[well][feature] 

321 average_value=average_value/num_rows_average 

322 average_plate[original_well][feature]=average_value 

323 return average_plate 

324 

325def get_well_next_row(well): 

326 return chr(ord(well[0])+1)+well[1:] 

327 

328 

329def average_plates(plates,raw_df,scope=None): 

330 average_plate=create_well_dict(raw_df,scope=scope) 

331 features = get_features(raw_df) 

332 for well in average_plate.keys(): 

333 for feature in features: 

334 average_value=0 

335 for plate in plates: 

336 average_value+=plate[well][feature] 

337 average_value=average_value/len(plates) 

338 average_plate[well][feature]=average_value 

339 return average_plate 

340 

341 

342def average_plates_one_replicate(averaged_plates_names_dict,plates_dict,raw_df): 

343 averaged_plates_dict = {plate_average_name:None for plate_average_name in averaged_plates_names_dict.keys()} 

344 for plate_average_name in averaged_plates_dict.keys(): 

345 plates_to_average = averaged_plates_names_dict[plate_average_name] 

346 plates_to_average = [plates_dict[plate_name] for plate_name in plates_to_average] 

347 averaged_plates_dict[plate_average_name]=average_plates(plates_to_average,raw_df) 

348 return averaged_plates_dict 

349 

350def load_plate_groups(config_path): 

351 xls = pd.ExcelFile(config_path) 

352 df = pd.read_excel(xls, 'plate_groups',index_col=0,header=None) 

353 replicates = df.index.tolist()[1:] 

354 groups = [str(group) for group in df.columns.tolist()] 

355 plate_groups = {replicate:{group:None for group in groups} for replicate in replicates} 

356 for group in groups: 

357 for replicate in replicates: 

358 #well_replicates = df.filter(like=group).loc[replicate].tolist()[0] 

359 plate_groups[replicate][group]=df.loc[replicate][int(group)] 

360 return plate_groups 

361 

362def normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name): 

363 features = get_features(raw_df) 

364 normalized_plate=create_well_dict(raw_df) 

365 normalized_plate = add_well_to_well_dict([ctrl_avg_name],normalized_plate, raw_df) 

366 for feature in features: 

367 control_values = [plate[well][feature] for well in reference_wells] 

368 control_avg = np.mean(np.array(control_values)) 

369 normalized_plate[ctrl_avg_name][feature]=control_avg 

370 for well in normalized_plate.keys(): 

371 if well not in ctrl_avg_name: 

372 try: 

373 normalized_plate[well][feature] = plate[well][feature]/control_avg 

374 except: 

375 normalized_plate[well][feature] = plate[well][feature] 

376 return normalized_plate 

377 

378 

379def normalize_all_plates(plates_dict,reference_wells,raw_df,ctrl_avg_name): 

380 normalized_plates={replicate:{} for replicate in plates_dict.keys()} 

381 for replicate, condition_plates in plates_dict.items(): 

382 for condition, plate in condition_plates.items(): 

383 normalized_plates[replicate][condition]=normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name) 

384 return normalized_plates 

385 

386def create_table_for_feature(feature,plates_dict): 

387 conditions = list(plates_dict.keys()) 

388 replicates = list(list(plates_dict.values())[0].keys()) 

389 doses=list(plates_dict[conditions[0]][replicates[0]].keys()) 

390 col_names=[] 

391 for condition in conditions: 

392 for replicate in replicates: 

393 col_names.append(str(condition)+"_"+str(replicate)) 

394 feature_table = {col_name:[] for col_name in col_names} 

395 for dose in doses: 

396 for replicate in replicates: 

397 for condition in conditions: 

398 col_name=(str(condition)+"_"+str(replicate)) 

399 try: 

400 value=plates_dict[condition][replicate][dose][feature] 

401 except: 

402 value=None 

403 feature_table[col_name].append(value) 

404 feature_table=pd.DataFrame(feature_table) 

405 feature_table.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in feature_table.columns]) 

406 feature_table.index=doses 

407 return feature_table 

408 

409def create_feature_results_table(feature,experiment_dict): 

410 replicates = list(experiment_dict_values.keys()) 

411 conditions = list(list(experiment_dict_values.values()).keys()) 

412 col_names=[] 

413 for replicate in replicates: 

414 for condition in conditions: 

415 col_names.append(str(replicate)+"_"+str(condition)) 

416 feature_table = {col_name:[] for col_name in col_names} 

417 for condition in conditions: 

418 for replicate in replicates: 

419 col_name=(str(condition)+"_"+str(replicate)) 

420 for dose in doses: 

421 feature_table[col_name].append(plates_dict[replicate][dose][condition][feature]) 

422 feature_table=pd.DataFrame(feature_table) 

423 feature_table.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in feature_table.columns]) 

424 feature_table.index = replicates 

425 return feature_table 

426 

427def create_all_feature_tables(plates_dict,features): 

428 feature_tables={feature:None for feature in features} 

429 for feature in features: 

430 feature_tables[feature]=create_table_for_feature(feature,plates_dict) 

431 return feature_tables 

432 

433def feature_tables_to_excel(feature_tables,outpath): 

434 def remove_inval_chars(name): 

435 inval_chars=['[',']',':','*','?','/','\\'] 

436 for char in inval_chars: 

437 name=name.replace(char,"") 

438 return name 

439 with pd.ExcelWriter(outpath) as writer: 

440 for feature in feature_tables.keys(): 

441 feature_tables[feature].to_excel(writer, sheet_name=remove_inval_chars(feature[:31])) 

442 

443def create_duplicate_wells(): 

444 rows=[string.ascii_uppercase[i] for i in range(0,8,2)] 

445 cols=[i+1 for i in range(12)] 

446 wells = [] 

447 for row in rows: 

448 for col in cols: 

449 wells.append(str(row)+str(col).zfill(2)) 

450 return wells 

451 

452def make_experiment_dict_locations(plate_groups,plate_layout,conditions): 

453 experiment_dict={condition:{} for condition in conditions} 

454 #experiment_dict={replicate:{} for replicate in plate_layout.keys()} 

455 for replicate, conditions in plate_layout.items(): 

456 for condition,doses in conditions.items(): 

457 experiment_dict[condition][replicate] = {dose:locations for dose,locations in doses.items()} 

458 return experiment_dict 

459 

460def make_experiment_dict_values(plates,experiment_dict_locations,features): 

461 experiment_dict_values=copy.deepcopy(experiment_dict_locations) 

462 for condition,replicates in experiment_dict_locations.items(): 

463 for replicate, doses in replicates.items(): 

464 for dose,locations in doses.items(): 

465 feature_value_dict = {feature:average_wells(locations,replicate,feature,plates,plate_groups) for feature in features} 

466 experiment_dict_values[condition][replicate][dose]= feature_value_dict 

467 return experiment_dict_values 

468 

469def average_wells(locations,replicate,feature,plates,plate_groups): 

470 average=0 

471 for location in locations: 

472 average+=location_to_value(location,replicate,feature,plates,plate_groups) 

473 return average/float(len(locations)) 

474 

475def location_to_value(location,replicate,feature,plates,plate_groups): 

476 well, plate_group = location 

477 plate_name = plate_groups[replicate][str(plate_group)] 

478 value = plates[plate_name][well][feature] 

479 return value 

480 

481def normalize_experiment(experiment_dict_values,ctrl_positions,features,plates): 

482 experiment_dict_values_normalized=copy.deepcopy(experiment_dict_values) 

483 for condition,replicates in experiment_dict_values.items(): 

484 for replicate, doses in replicates.items(): 

485 ctrl_positions_replicate = ctrl_positions[replicate] 

486 feature_control_vals={feature:average_wells(ctrl_positions_replicate,replicate,feature,plates,plate_groups) for feature in features} 

487 for dose,values in doses.items(): 

488 feature_value_dict = {} 

489 for feature in features: 

490 ctrl_value = feature_control_vals[feature] 

491 if ctrl_value == 0: 

492 ctrl_value = 1 

493 condition_value=experiment_dict_values[condition][replicate][dose][feature] 

494 feature_value_dict[feature]=condition_value/ctrl_value 

495 experiment_dict_values_normalized[condition][replicate][dose]= feature_value_dict 

496 return experiment_dict_values_normalized 

497 

498def write_values_heat_map(plates_dict,features,outpath): 

499 workbook = xlsxwriter.Workbook(outpath) 

500 with pd.ExcelWriter(outpath) as writer: 

501 for feature in features: 

502 sheet_rows=[] 

503 for plate in plates_dict.keys(): 

504 sheet_rows.append([plate]) 

505 values=[] 

506 for r in range(65,65+8,1): 

507 values.append([]) 

508 row=[] 

509 for c in range(12): 

510 well=chr(r)+str(c+1).zfill(2) 

511 row.append(plates_dict[plate][well][feature]) 

512 sheet_rows.append(row) 

513 sheet_rows.append([""]) 

514 pd.DataFrame(sheet_rows).to_excel(writer, sheet_name=remove_inval_chars(feature[:31])) 

515 

516def create_reference_wells(): 

517 rows=[string.ascii_uppercase[i] for i in range(8)] 

518 cols=[i+1 for i in range(6,12)] 

519 wells = [] 

520 for row in rows: 

521 for col in cols: 

522 wells.append((str(row)+str(col).zfill(2),2)) 

523 return wells 

524 

525def remove_inval_chars(name): 

526 inval_chars=['[',']',':','*','?','/','\\'] 

527 for char in inval_chars: 

528 name=name.replace(char,"") 

529 return name 

530 

531rows=[string.ascii_uppercase[i] for i in range(8)] 

532cols=[i+1 for i in range(12)] 

533conditions = [] 

534for row in rows: 

535 for col in cols: 

536 conditions.append(str(row)+str(col).zfill(2)) 

537 

538results_path="mx_results.xlsx" 

539config_file="./config.xlsx" 

540compiled_results_path="./compiled_results_normalized.xlsx" 

541heatmap_path="./heatmaps.xlsx" 

542 

543scope, plate_layout, conditions, ctrl_positions=read_plate_layout(config_file) 

544plate_groups=load_plate_groups(config_file) 

545experiment_dict_locations=make_experiment_dict_locations(plate_groups,plate_layout,conditions) 

546df = read_results(results_path,scope=scope) 

547features = get_features(df,scope=scope) 

548well_dict=create_well_dict(df,scope=scope) 

549plates_dict=create_plates_dict(df,scope=scope) 

550plates_dict = fill_plates_dict(df,plates_dict,scope=scope) 

551experiment_dict_values=make_experiment_dict_values(plates_dict,experiment_dict_locations,features) 

552if not ctrl_positions is None: 

553 experiment_dict_values=normalize_experiment(experiment_dict_values,ctrl_positions,features,plates_dict) 

554feature_tables = create_all_feature_tables(experiment_dict_values,features) 

555write_values_heat_map(plates_dict,features,heatmap_path) 

556feature_tables_to_excel(feature_tables,compiled_results_path)