Coverage for openhcs/processing/backends/analysis/cx5_format.py: 0.4%
465 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
1from skimage import io
2import copy
3import xlsxwriter
4import string
5import os
6import numpy as np
7import matplotlib.pyplot as plt
8import re
9import pandas as pd
10import pickle
11import pudb
12import sys
17def read_results(results_path,scope=None):
18 xls = pd.ExcelFile(results_path)
19 if scope == "EDDU_CX5":
20 raw_df = pd.read_excel(xls, 'Rawdata')
21 elif scope == "EDDU_metaxpress":
22 raw_df = pd.read_excel(xls, xls.sheet_names[0])
23 else:
24 print("microscope "+str(scope)+" not known. Exiting")
25 sys.exit()
26 return raw_df
28def get_features(raw_df,scope=None):
29 if scope == "EDDU_CX5":
30 return get_features_EDDU_CX5(raw_df)
31 if scope == "EDDU_metaxpress":
32 return get_features_EDDU_metaxpress(raw_df)
33 else:
34 print("microscope "+str(scope)+" not known. Exiting")
35 sys.exit()
37def is_N_row(row_name):
38 row_name = row_name.lower()
39 is_N = False
40 if row_name == "n" or row_name=="ns":
41 is_N = True
42 if row_name == "replicate" or row_name=="replicates":
43 is_N = True
44 return is_N
47def is_well_all_replicates_row(row_name):
48 row_name = row_name.lower()
49 return row_name == "well" or row_name == "wells"
51def is_well_specific_replicate_row(row_name):
52 row_name = row_name.lower()
53 if 'well' in row_name:
54 return row_name[-1].isdigit()
55 else: return False
57def read_plate_layout(config_path):
58 xls = pd.ExcelFile(config_path)
59 df = pd.read_excel(xls, 'drug_curve_map',index_col=0,header=None)
60 df = df.dropna(how='all')
61 layout={}
62 condition=None
63 doses=None
64 wells=None
65 plate_groups=None
66 N = None
67 specific_N = None
68 scope = None
69 conditions=[]
70 ctrl_wells=None
71 ctrl_wells_aligned=None
72 ctrl_groups=None
73 ctrl_positions_replicates=None
74 ctrl_positions=None
76 def sanitize_compare(string1,string2):
77 string1 = string1.lower()
78 string2 = string2.lower()
79 string1 = string1.replace('_','')
80 string1 = string1.replace(' ','')
81 string2 = string2.replace('_','')
82 string2 = string2.replace(' ','')
83 if not string1[-1] == 's': string1 +='s'
84 if not string2[-1] == 's': string2 +='s'
85 return string1 == string2
87 for i,row in df.iterrows():
88 #check max number of replicates
89 if is_N_row(row.name):
90 N = int(row.iloc[0])
91 for i in range(N):
92 layout["N"+str(i+1)]={}
93 #load microscope
94 if sanitize_compare(row.name,'scope') or sanitize_compare(row.name,'microscope'):
95 scope = row.iloc[0]
97 #finished reading controls
98 if sanitize_compare(row.name,'plate group') and not ctrl_wells is None:
99 if ctrl_groups is None:
100 ctrl_groups = []
101 ctrl_groups += row.dropna().tolist()
102 continue
103# if sanitize_compare(row.name,'plate group') and not ctrl_wells is None and not ctrl_groups is None:
104# ctrl_positions = []
105# for i in range(len(ctrl_wells_aligned)):
106# if not ctrl_well_replicates is None:
107# ctrl_positions.append((ctrl_wells_aligned[i],ctrl_groups[i],ctrl_well_replicates[i]))
108# else:
109# ctrl_positions = None
110# continue
112 #get control wells
113 if sanitize_compare(row.name,'control') or sanitize_compare(row.name,'control well'):
114 if ctrl_wells is None:
115 ctrl_wells = []
116 ctrl_wells+=row.dropna().tolist()
117 continue
119 #get replicate for ctrl position
120 if sanitize_compare(row.name,'group n'):
121 if ctrl_positions_replicates is None:
122 ctrl_positions_replicates = []
123 if ctrl_wells_aligned is None:
124 ctrl_wells_aligned = []
125 ctrl_positions_replicates+=row.dropna().tolist()
126 ctrl_wells_aligned += ctrl_wells
127 continue
129 #get new condition name
130 #finished reading controls
131 if sanitize_compare(row.name,'condition'):
132 # make control well dict
133 ctrl_positions = {"N"+str(i+1):[] for i in range(N)}
134 for i in range(len(ctrl_wells_aligned)):
135 if not ctrl_positions_replicates is None:
136 ctrl_positions["N"+str(ctrl_positions_replicates[i])].append((ctrl_wells_aligned[i],ctrl_groups[i]))
137 ctrl_wells = None
138 else:
139 ctrl_positions = None
141 #make dict[replicate][condition][dose]
142 for i in range(N):
143 if not row.iloc[0] in layout["N"+str(i+1)].keys():
144 layout["N"+str(i+1)][row.iloc[0]]={}
145 condition=row.iloc[0]
146 conditions.append(condition)
147 if sanitize_compare(row.name,'dose'):
148 doses=row.dropna().tolist()
150 #if well is same for all Ns
151 if is_well_all_replicates_row(row.name):
152 wells=row.dropna().tolist()
153 specific_N = None
154 # or not
155 if is_well_specific_replicate_row(row.name):
156 specific_N = int(row.name[-1])
157 wells=row.dropna().tolist()
159 # add plate group to wells from previous row
160 if sanitize_compare(row.name,'plate group'):
161 plate_groups=row.dropna().tolist()
162 if specific_N == None:
163 for i in range(N):
164 for y in range(len(doses)):
165 #add to all Ns
166 if not doses[y] in layout["N"+str(i+1)][condition].keys():
167 layout["N"+str(i+1)][condition][doses[y]]=[]
168 layout["N"+str(i+1)][condition][doses[y]].append((wells[y],plate_groups[y]))
169 else:
170 for y in range(len(doses)):
171 #add to specific N
172 if not doses[y] in layout["N"+str(specific_N)][condition].keys():
173 layout["N"+str(specific_N)][condition][doses[y]]=[]
174 layout["N"+str(specific_N)][condition][doses[y]].append((wells[y],plate_groups[y]))
175 return scope, layout, conditions, ctrl_positions
177def get_features_EDDU_CX5(raw_df):
178 return raw_df.iloc[:,raw_df.columns.str.find("Replicate").argmax()+1:-1].columns
180def get_features_EDDU_metaxpress(raw_df):
181 feature_rows = raw_df[pd.isnull(raw_df.iloc[:,0])].iloc[0].tolist()[2:]
182 return feature_rows
184def create_well_dict(raw_df, wells=None,scope=None):
185 if wells == None:
186 rows=[string.ascii_uppercase[i] for i in range(8)]
187 cols=[i+1 for i in range(12)]
188 wells = []
189 for row in rows:
190 for col in cols:
191 wells.append(str(row)+str(col).zfill(2))
192 features = get_features(raw_df,scope=scope)
193 return {well:{feature:None for feature in features} for well in wells}
195def add_well_to_well_dict(wells,well_dict, raw_df):
196 features = get_features(raw_df).columns
197 for well in wells:
198 well_dict[well]={feature:None for feature in features}
199 return well_dict
201def create_plates_dict(raw_df,scope=None):
202 if scope == "EDDU_CX5":
203 return create_plates_dict_EDDU_CX5(raw_df)
204 if scope == "EDDU_metaxpress":
205 return create_plates_dict_EDDU_metaxpress(raw_df)
206 else:
207 print("microscope "+str(scope)+" not known. Exiting")
208 sys.exit()
210def create_plates_dict_EDDU_metaxpress(raw_df):
211 plate_names = raw_df[(raw_df == 'Plate Name').any(axis=1)].iloc[:,1].tolist()
212 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_metaxpress") for plate_id in plate_names}
213 return plate_dict
215def create_plates_dict_EDDU_CX5(raw_df):
216 plate_ids = raw_df['UniquePlateId'].tolist()
217 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_CX5") for plate_id in plate_ids}
218 return plate_dict
220def indices_to_well(row,col,dim):
221 rMax, cMax = dim[0],dim[1]
222 col += 1
223 total = row*cMax+col
224 i=0
225 i+=1
226 offset = int((total-1)/(cMax)*i)
227 rowIndex = str(chr(65 + offset))
228 colIndex = str(total - (offset * (cMax)*i)).zfill(2)
229 return rowIndex + str(colIndex)
231def row_col_to_well(row,col):
232 row_letter=chr(row+64)
233 number=str(col).zfill(2)
234 return row_letter+number
236def well_to_num(well,dim):
237 rMax, cMax = dim[0],dim[1]
238 (rowIndex, colIndex) = (0,0)
239 for i in range(0, len(well)):
240 (left, right) = (well[:i], well[i:i+1])
241 if right.isdigit():
242 (rowIndex, colIndex) = (left, well[i:])
243 break
244 ascii_value = ord(rowIndex) - 65
245 return ascii_value*(rMax+(4*i)) + int(colIndex)
247def fill_plates_dict(raw_df,plates_dict,scope=None):
248 features = get_features(raw_df,scope=scope)
249 if scope == "EDDU_CX5":
250 return fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features)
251 if scope == "EDDU_metaxpress":
252 return fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features)
253 else:
254 print("microscope "+str(scope)+" not known. Exiting")
255 sys.exit()
257def fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features):
258 for index,row in raw_df.iterrows():
259 well = row_col_to_well(row[2],row[3])
260 for feature in features:
261 plates_dict[row[1]][well][feature]=row[feature]
262 return plates_dict
264def fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features):
265 df_col_names = raw_df.set_axis(["Well","Laser Focus"]+features, axis=1, inplace=False)
266 plate_name=None
267 start_collect=False
268 for index,row in df_col_names.iterrows():
269 if row[0] == "Barcode":
270 start_collect=False
271 if start_collect:
272 for feature in features:
273 plates_dict[plate_name][row[0]][feature]=row[feature]
274 if row[0] == "Plate Name":
275 plate_name=row[1]
276 elif pd.isnull(row[0]):
277 start_collect=True
278 return plates_dict
280def average_plates(plates,raw_df,scope=None):
281 average_plate=create_well_dict(raw_df,scope=scope)
282 features = get_features(raw_df)
283 for well in average_plate.keys():
284 for feature in features:
285 average_value=0
286 for plate in plates:
287 average_value+=plate[well][feature]
288 average_value=average_value/len(plates)
289 average_plate[well][feature]=average_value
290 return average_plate
292def average_plates_all_replicates(plate_groups,plates_dict,raw_df):
293 averaged_plates_dict = {replicate:None for replicate in plate_groups.keys()}
294 for replicate in plate_groups.keys():
295 one_replicate=average_plates_one_replicate(plate_groups[replicate],plates_dict,raw_df)
296 averaged_plates_dict[replicate]=one_replicate
297 return averaged_plates_dict
299def average_plates_duplicate_rows(plate_groups,plates_dict,raw_df,wells_to_average=None,scope=None):
300 features = get_features(raw_df,scope=scope)
301 averaged_plates_dict={}
302 for plate_name,plate in plates_dict.items():
303 average_plate=create_well_dict(raw_df,scope=scope,wells=wells_to_average)
304 for well in wells_to_average:
305 average_plate=average_rows(plate,average_plate,well,features)
306 averaged_plates_dict[plate_name]=average_plate
307 return plates_dict
309def average_rows(plate_dict,average_plate,well,features,num_rows_average=2):
310 original_well=well
311 wells_to_average = []
312 wells_to_average.append(well)
313 for i in range(num_rows_average-1):
314 well_next_row = get_well_next_row(well)
315 wells_to_average.append(well_next_row)
316 well_next_row = well
317 for feature in features:
318 average_value=0
319 for well in wells_to_average:
320 average_value+=plate_dict[well][feature]
321 average_value=average_value/num_rows_average
322 average_plate[original_well][feature]=average_value
323 return average_plate
325def get_well_next_row(well):
326 return chr(ord(well[0])+1)+well[1:]
329def average_plates(plates,raw_df,scope=None):
330 average_plate=create_well_dict(raw_df,scope=scope)
331 features = get_features(raw_df)
332 for well in average_plate.keys():
333 for feature in features:
334 average_value=0
335 for plate in plates:
336 average_value+=plate[well][feature]
337 average_value=average_value/len(plates)
338 average_plate[well][feature]=average_value
339 return average_plate
342def average_plates_one_replicate(averaged_plates_names_dict,plates_dict,raw_df):
343 averaged_plates_dict = {plate_average_name:None for plate_average_name in averaged_plates_names_dict.keys()}
344 for plate_average_name in averaged_plates_dict.keys():
345 plates_to_average = averaged_plates_names_dict[plate_average_name]
346 plates_to_average = [plates_dict[plate_name] for plate_name in plates_to_average]
347 averaged_plates_dict[plate_average_name]=average_plates(plates_to_average,raw_df)
348 return averaged_plates_dict
350def load_plate_groups(config_path):
351 xls = pd.ExcelFile(config_path)
352 df = pd.read_excel(xls, 'plate_groups',index_col=0,header=None)
353 replicates = df.index.tolist()[1:]
354 groups = [str(group) for group in df.columns.tolist()]
355 plate_groups = {replicate:{group:None for group in groups} for replicate in replicates}
356 for group in groups:
357 for replicate in replicates:
358 #well_replicates = df.filter(like=group).loc[replicate].tolist()[0]
359 plate_groups[replicate][group]=df.loc[replicate][int(group)]
360 return plate_groups
362def normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name):
363 features = get_features(raw_df)
364 normalized_plate=create_well_dict(raw_df)
365 normalized_plate = add_well_to_well_dict([ctrl_avg_name],normalized_plate, raw_df)
366 for feature in features:
367 control_values = [plate[well][feature] for well in reference_wells]
368 control_avg = np.mean(np.array(control_values))
369 normalized_plate[ctrl_avg_name][feature]=control_avg
370 for well in normalized_plate.keys():
371 if well not in ctrl_avg_name:
372 try:
373 normalized_plate[well][feature] = plate[well][feature]/control_avg
374 except:
375 normalized_plate[well][feature] = plate[well][feature]
376 return normalized_plate
379def normalize_all_plates(plates_dict,reference_wells,raw_df,ctrl_avg_name):
380 normalized_plates={replicate:{} for replicate in plates_dict.keys()}
381 for replicate, condition_plates in plates_dict.items():
382 for condition, plate in condition_plates.items():
383 normalized_plates[replicate][condition]=normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name)
384 return normalized_plates
386def create_table_for_feature(feature,plates_dict):
387 conditions = list(plates_dict.keys())
388 replicates = list(list(plates_dict.values())[0].keys())
389 doses=list(plates_dict[conditions[0]][replicates[0]].keys())
390 col_names=[]
391 for condition in conditions:
392 for replicate in replicates:
393 col_names.append(str(condition)+"_"+str(replicate))
394 feature_table = {col_name:[] for col_name in col_names}
395 for dose in doses:
396 for replicate in replicates:
397 for condition in conditions:
398 col_name=(str(condition)+"_"+str(replicate))
399 try:
400 value=plates_dict[condition][replicate][dose][feature]
401 except:
402 value=None
403 feature_table[col_name].append(value)
404 feature_table=pd.DataFrame(feature_table)
405 feature_table.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in feature_table.columns])
406 feature_table.index=doses
407 return feature_table
409def create_feature_results_table(feature,experiment_dict):
410 replicates = list(experiment_dict_values.keys())
411 conditions = list(list(experiment_dict_values.values()).keys())
412 col_names=[]
413 for replicate in replicates:
414 for condition in conditions:
415 col_names.append(str(replicate)+"_"+str(condition))
416 feature_table = {col_name:[] for col_name in col_names}
417 for condition in conditions:
418 for replicate in replicates:
419 col_name=(str(condition)+"_"+str(replicate))
420 for dose in doses:
421 feature_table[col_name].append(plates_dict[replicate][dose][condition][feature])
422 feature_table=pd.DataFrame(feature_table)
423 feature_table.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in feature_table.columns])
424 feature_table.index = replicates
425 return feature_table
427def create_all_feature_tables(plates_dict,features):
428 feature_tables={feature:None for feature in features}
429 for feature in features:
430 feature_tables[feature]=create_table_for_feature(feature,plates_dict)
431 return feature_tables
433def feature_tables_to_excel(feature_tables,outpath):
434 def remove_inval_chars(name):
435 inval_chars=['[',']',':','*','?','/','\\']
436 for char in inval_chars:
437 name=name.replace(char,"")
438 return name
439 with pd.ExcelWriter(outpath) as writer:
440 for feature in feature_tables.keys():
441 feature_tables[feature].to_excel(writer, sheet_name=remove_inval_chars(feature[:31]))
443def create_duplicate_wells():
444 rows=[string.ascii_uppercase[i] for i in range(0,8,2)]
445 cols=[i+1 for i in range(12)]
446 wells = []
447 for row in rows:
448 for col in cols:
449 wells.append(str(row)+str(col).zfill(2))
450 return wells
452def make_experiment_dict_locations(plate_groups,plate_layout,conditions):
453 experiment_dict={condition:{} for condition in conditions}
454 #experiment_dict={replicate:{} for replicate in plate_layout.keys()}
455 for replicate, conditions in plate_layout.items():
456 for condition,doses in conditions.items():
457 experiment_dict[condition][replicate] = {dose:locations for dose,locations in doses.items()}
458 return experiment_dict
460def make_experiment_dict_values(plates,experiment_dict_locations,features):
461 experiment_dict_values=copy.deepcopy(experiment_dict_locations)
462 for condition,replicates in experiment_dict_locations.items():
463 for replicate, doses in replicates.items():
464 for dose,locations in doses.items():
465 feature_value_dict = {feature:average_wells(locations,replicate,feature,plates,plate_groups) for feature in features}
466 experiment_dict_values[condition][replicate][dose]= feature_value_dict
467 return experiment_dict_values
469def average_wells(locations,replicate,feature,plates,plate_groups):
470 average=0
471 for location in locations:
472 average+=location_to_value(location,replicate,feature,plates,plate_groups)
473 return average/float(len(locations))
475def location_to_value(location,replicate,feature,plates,plate_groups):
476 well, plate_group = location
477 plate_name = plate_groups[replicate][str(plate_group)]
478 value = plates[plate_name][well][feature]
479 return value
481def normalize_experiment(experiment_dict_values,ctrl_positions,features,plates):
482 experiment_dict_values_normalized=copy.deepcopy(experiment_dict_values)
483 for condition,replicates in experiment_dict_values.items():
484 for replicate, doses in replicates.items():
485 ctrl_positions_replicate = ctrl_positions[replicate]
486 feature_control_vals={feature:average_wells(ctrl_positions_replicate,replicate,feature,plates,plate_groups) for feature in features}
487 for dose,values in doses.items():
488 feature_value_dict = {}
489 for feature in features:
490 ctrl_value = feature_control_vals[feature]
491 if ctrl_value == 0:
492 ctrl_value = 1
493 condition_value=experiment_dict_values[condition][replicate][dose][feature]
494 feature_value_dict[feature]=condition_value/ctrl_value
495 experiment_dict_values_normalized[condition][replicate][dose]= feature_value_dict
496 return experiment_dict_values_normalized
498def write_values_heat_map(plates_dict,features,outpath):
499 workbook = xlsxwriter.Workbook(outpath)
500 with pd.ExcelWriter(outpath) as writer:
501 for feature in features:
502 sheet_rows=[]
503 for plate in plates_dict.keys():
504 sheet_rows.append([plate])
505 values=[]
506 for r in range(65,65+8,1):
507 values.append([])
508 row=[]
509 for c in range(12):
510 well=chr(r)+str(c+1).zfill(2)
511 row.append(plates_dict[plate][well][feature])
512 sheet_rows.append(row)
513 sheet_rows.append([""])
514 pd.DataFrame(sheet_rows).to_excel(writer, sheet_name=remove_inval_chars(feature[:31]))
516def create_reference_wells():
517 rows=[string.ascii_uppercase[i] for i in range(8)]
518 cols=[i+1 for i in range(6,12)]
519 wells = []
520 for row in rows:
521 for col in cols:
522 wells.append((str(row)+str(col).zfill(2),2))
523 return wells
525def remove_inval_chars(name):
526 inval_chars=['[',']',':','*','?','/','\\']
527 for char in inval_chars:
528 name=name.replace(char,"")
529 return name
531rows=[string.ascii_uppercase[i] for i in range(8)]
532cols=[i+1 for i in range(12)]
533conditions = []
534for row in rows:
535 for col in cols:
536 conditions.append(str(row)+str(col).zfill(2))
538results_path="mx_results.xlsx"
539config_file="./config.xlsx"
540compiled_results_path="./compiled_results_normalized.xlsx"
541heatmap_path="./heatmaps.xlsx"
543scope, plate_layout, conditions, ctrl_positions=read_plate_layout(config_file)
544plate_groups=load_plate_groups(config_file)
545experiment_dict_locations=make_experiment_dict_locations(plate_groups,plate_layout,conditions)
546df = read_results(results_path,scope=scope)
547features = get_features(df,scope=scope)
548well_dict=create_well_dict(df,scope=scope)
549plates_dict=create_plates_dict(df,scope=scope)
550plates_dict = fill_plates_dict(df,plates_dict,scope=scope)
551experiment_dict_values=make_experiment_dict_values(plates_dict,experiment_dict_locations,features)
552if not ctrl_positions is None:
553 experiment_dict_values=normalize_experiment(experiment_dict_values,ctrl_positions,features,plates_dict)
554feature_tables = create_all_feature_tables(experiment_dict_values,features)
555write_values_heat_map(plates_dict,features,heatmap_path)
556feature_tables_to_excel(feature_tables,compiled_results_path)