Coverage for openhcs/processing/backends/analysis/cx5_format.py: 0.3%
442 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-04 02:09 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-04 02:09 +0000
1import copy
2import xlsxwriter
3import string
4import numpy as np
5import pandas as pd
6import sys
11def read_results(results_path,scope=None):
12 xls = pd.ExcelFile(results_path)
13 if scope == "EDDU_CX5":
14 raw_df = pd.read_excel(xls, 'Rawdata')
15 elif scope == "EDDU_metaxpress":
16 raw_df = pd.read_excel(xls, xls.sheet_names[0])
17 else:
18 print("microscope "+str(scope)+" not known. Exiting")
19 sys.exit()
20 return raw_df
22def get_features(raw_df,scope=None):
23 if scope == "EDDU_CX5":
24 return get_features_EDDU_CX5(raw_df)
25 if scope == "EDDU_metaxpress":
26 return get_features_EDDU_metaxpress(raw_df)
27 else:
28 print("microscope "+str(scope)+" not known. Exiting")
29 sys.exit()
31def is_N_row(row_name):
32 row_name = row_name.lower()
33 is_N = False
34 if row_name == "n" or row_name=="ns":
35 is_N = True
36 if row_name == "replicate" or row_name=="replicates":
37 is_N = True
38 return is_N
41def is_well_all_replicates_row(row_name):
42 row_name = row_name.lower()
43 return row_name == "well" or row_name == "wells"
45def is_well_specific_replicate_row(row_name):
46 row_name = row_name.lower()
47 if 'well' in row_name:
48 return row_name[-1].isdigit()
49 else: return False
51def read_plate_layout(config_path):
52 xls = pd.ExcelFile(config_path)
53 df = pd.read_excel(xls, 'drug_curve_map',index_col=0,header=None)
54 df = df.dropna(how='all')
55 layout={}
56 condition=None
57 doses=None
58 wells=None
59 plate_groups=None
60 N = None
61 specific_N = None
62 scope = None
63 conditions=[]
64 ctrl_wells=None
65 ctrl_wells_aligned=None
66 ctrl_groups=None
67 ctrl_positions_replicates=None
68 ctrl_positions=None
70 def sanitize_compare(string1,string2):
71 string1 = string1.lower()
72 string2 = string2.lower()
73 string1 = string1.replace('_','')
74 string1 = string1.replace(' ','')
75 string2 = string2.replace('_','')
76 string2 = string2.replace(' ','')
77 if not string1[-1] == 's': string1 +='s'
78 if not string2[-1] == 's': string2 +='s'
79 return string1 == string2
81 for i,row in df.iterrows():
82 #check max number of replicates
83 if is_N_row(row.name):
84 N = int(row.iloc[0])
85 for i in range(N):
86 layout["N"+str(i+1)]={}
87 #load microscope
88 if sanitize_compare(row.name,'scope') or sanitize_compare(row.name,'microscope'):
89 scope = row.iloc[0]
91 #finished reading controls
92 if sanitize_compare(row.name,'plate group') and ctrl_wells is not None:
93 if ctrl_groups is None:
94 ctrl_groups = []
95 ctrl_groups += row.dropna().tolist()
96 continue
97# if sanitize_compare(row.name,'plate group') and not ctrl_wells is None and not ctrl_groups is None:
98# ctrl_positions = []
99# for i in range(len(ctrl_wells_aligned)):
100# if not ctrl_well_replicates is None:
101# ctrl_positions.append((ctrl_wells_aligned[i],ctrl_groups[i],ctrl_well_replicates[i]))
102# else:
103# ctrl_positions = None
104# continue
106 #get control wells
107 if sanitize_compare(row.name,'control') or sanitize_compare(row.name,'control well'):
108 if ctrl_wells is None:
109 ctrl_wells = []
110 ctrl_wells+=row.dropna().tolist()
111 continue
113 #get replicate for ctrl position
114 if sanitize_compare(row.name,'group n'):
115 if ctrl_positions_replicates is None:
116 ctrl_positions_replicates = []
117 if ctrl_wells_aligned is None:
118 ctrl_wells_aligned = []
119 ctrl_positions_replicates+=row.dropna().tolist()
120 ctrl_wells_aligned += ctrl_wells
121 continue
123 #get new condition name
124 #finished reading controls
125 if sanitize_compare(row.name,'condition'):
126 # make control well dict
127 ctrl_positions = {"N"+str(i+1):[] for i in range(N)}
128 for i in range(len(ctrl_wells_aligned)):
129 if ctrl_positions_replicates is not None:
130 ctrl_positions["N"+str(ctrl_positions_replicates[i])].append((ctrl_wells_aligned[i],ctrl_groups[i]))
131 ctrl_wells = None
132 else:
133 ctrl_positions = None
135 #make dict[replicate][condition][dose]
136 for i in range(N):
137 if row.iloc[0] not in layout["N"+str(i+1)].keys():
138 layout["N"+str(i+1)][row.iloc[0]]={}
139 condition=row.iloc[0]
140 conditions.append(condition)
141 if sanitize_compare(row.name,'dose'):
142 doses=row.dropna().tolist()
144 #if well is same for all Ns
145 if is_well_all_replicates_row(row.name):
146 wells=row.dropna().tolist()
147 specific_N = None
148 # or not
149 if is_well_specific_replicate_row(row.name):
150 specific_N = int(row.name[-1])
151 wells=row.dropna().tolist()
153 # add plate group to wells from previous row
154 if sanitize_compare(row.name,'plate group'):
155 plate_groups=row.dropna().tolist()
156 if specific_N == None:
157 for i in range(N):
158 for y in range(len(doses)):
159 #add to all Ns
160 if doses[y] not in layout["N"+str(i+1)][condition].keys():
161 layout["N"+str(i+1)][condition][doses[y]]=[]
162 layout["N"+str(i+1)][condition][doses[y]].append((wells[y],plate_groups[y]))
163 else:
164 for y in range(len(doses)):
165 #add to specific N
166 if doses[y] not in layout["N"+str(specific_N)][condition].keys():
167 layout["N"+str(specific_N)][condition][doses[y]]=[]
168 layout["N"+str(specific_N)][condition][doses[y]].append((wells[y],plate_groups[y]))
169 return scope, layout, conditions, ctrl_positions
171def get_features_EDDU_CX5(raw_df):
172 return raw_df.iloc[:,raw_df.columns.str.find("Replicate").argmax()+1:-1].columns
174def get_features_EDDU_metaxpress(raw_df):
175 feature_rows = raw_df[pd.isnull(raw_df.iloc[:,0])].iloc[0].tolist()[2:]
176 return feature_rows
178def create_well_dict(raw_df, wells=None,scope=None):
179 if wells == None:
180 rows=[string.ascii_uppercase[i] for i in range(8)]
181 cols=[i+1 for i in range(12)]
182 wells = []
183 for row in rows:
184 for col in cols:
185 wells.append(str(row)+str(col).zfill(2))
186 features = get_features(raw_df,scope=scope)
187 return {well:{feature:None for feature in features} for well in wells}
189def add_well_to_well_dict(wells,well_dict, raw_df):
190 features = get_features(raw_df).columns
191 for well in wells:
192 well_dict[well]={feature:None for feature in features}
193 return well_dict
195def create_plates_dict(raw_df,scope=None):
196 if scope == "EDDU_CX5":
197 return create_plates_dict_EDDU_CX5(raw_df)
198 if scope == "EDDU_metaxpress":
199 return create_plates_dict_EDDU_metaxpress(raw_df)
200 else:
201 print("microscope "+str(scope)+" not known. Exiting")
202 sys.exit()
204def create_plates_dict_EDDU_metaxpress(raw_df):
205 plate_names = raw_df[(raw_df == 'Plate Name').any(axis=1)].iloc[:,1].tolist()
206 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_metaxpress") for plate_id in plate_names}
207 return plate_dict
209def create_plates_dict_EDDU_CX5(raw_df):
210 plate_ids = raw_df['UniquePlateId'].tolist()
211 plate_dict = {plate_id:create_well_dict(raw_df,scope="EDDU_CX5") for plate_id in plate_ids}
212 return plate_dict
214def indices_to_well(row,col,dim):
215 rMax, cMax = dim[0],dim[1]
216 col += 1
217 total = row*cMax+col
218 i=0
219 i+=1
220 offset = int((total-1)/(cMax)*i)
221 rowIndex = str(chr(65 + offset))
222 colIndex = str(total - (offset * (cMax)*i)).zfill(2)
223 return rowIndex + str(colIndex)
225def row_col_to_well(row,col):
226 row_letter=chr(row+64)
227 number=str(col).zfill(2)
228 return row_letter+number
230def well_to_num(well,dim):
231 rMax, cMax = dim[0],dim[1]
232 (rowIndex, colIndex) = (0,0)
233 for i in range(0, len(well)):
234 (left, right) = (well[:i], well[i:i+1])
235 if right.isdigit():
236 (rowIndex, colIndex) = (left, well[i:])
237 break
238 ascii_value = ord(rowIndex) - 65
239 return ascii_value*(rMax+(4*i)) + int(colIndex)
241def fill_plates_dict(raw_df,plates_dict,scope=None):
242 features = get_features(raw_df,scope=scope)
243 if scope == "EDDU_CX5":
244 return fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features)
245 if scope == "EDDU_metaxpress":
246 return fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features)
247 else:
248 print("microscope "+str(scope)+" not known. Exiting")
249 sys.exit()
251def fill_plates_dict_EDDU_CX5(raw_df,plates_dict,features):
252 for index,row in raw_df.iterrows():
253 well = row_col_to_well(row[2],row[3])
254 for feature in features:
255 plates_dict[row[1]][well][feature]=row[feature]
256 return plates_dict
258def fill_plates_dict_EDDU_metaxpress(raw_df,plates_dict,features):
259 df_col_names = raw_df.set_axis(["Well","Laser Focus"]+features, axis=1, inplace=False)
260 plate_name=None
261 start_collect=False
262 for index,row in df_col_names.iterrows():
263 if row[0] == "Barcode":
264 start_collect=False
265 if start_collect:
266 for feature in features:
267 plates_dict[plate_name][row[0]][feature]=row[feature]
268 if row[0] == "Plate Name":
269 plate_name=row[1]
270 elif pd.isnull(row[0]):
271 start_collect=True
272 return plates_dict
274def average_plates(plates,raw_df,scope=None):
275 average_plate=create_well_dict(raw_df,scope=scope)
276 features = get_features(raw_df)
277 for well in average_plate.keys():
278 for feature in features:
279 average_value=0
280 for plate in plates:
281 average_value+=plate[well][feature]
282 average_value=average_value/len(plates)
283 average_plate[well][feature]=average_value
284 return average_plate
286def average_plates_all_replicates(plate_groups,plates_dict,raw_df):
287 averaged_plates_dict = {replicate:None for replicate in plate_groups.keys()}
288 for replicate in plate_groups.keys():
289 one_replicate=average_plates_one_replicate(plate_groups[replicate],plates_dict,raw_df)
290 averaged_plates_dict[replicate]=one_replicate
291 return averaged_plates_dict
293def average_plates_duplicate_rows(plate_groups,plates_dict,raw_df,wells_to_average=None,scope=None):
294 features = get_features(raw_df,scope=scope)
295 averaged_plates_dict={}
296 for plate_name,plate in plates_dict.items():
297 average_plate=create_well_dict(raw_df,scope=scope,wells=wells_to_average)
298 for well in wells_to_average:
299 average_plate=average_rows(plate,average_plate,well,features)
300 averaged_plates_dict[plate_name]=average_plate
301 return plates_dict
303def average_rows(plate_dict,average_plate,well,features,num_rows_average=2):
304 original_well=well
305 wells_to_average = []
306 wells_to_average.append(well)
307 for i in range(num_rows_average-1):
308 well_next_row = get_well_next_row(well)
309 wells_to_average.append(well_next_row)
310 well_next_row = well
311 for feature in features:
312 average_value=0
313 for well in wells_to_average:
314 average_value+=plate_dict[well][feature]
315 average_value=average_value/num_rows_average
316 average_plate[original_well][feature]=average_value
317 return average_plate
319def get_well_next_row(well):
320 return chr(ord(well[0])+1)+well[1:]
323def average_plates(plates,raw_df,scope=None):
324 average_plate=create_well_dict(raw_df,scope=scope)
325 features = get_features(raw_df)
326 for well in average_plate.keys():
327 for feature in features:
328 average_value=0
329 for plate in plates:
330 average_value+=plate[well][feature]
331 average_value=average_value/len(plates)
332 average_plate[well][feature]=average_value
333 return average_plate
336def average_plates_one_replicate(averaged_plates_names_dict,plates_dict,raw_df):
337 averaged_plates_dict = {plate_average_name:None for plate_average_name in averaged_plates_names_dict.keys()}
338 for plate_average_name in averaged_plates_dict.keys():
339 plates_to_average = averaged_plates_names_dict[plate_average_name]
340 plates_to_average = [plates_dict[plate_name] for plate_name in plates_to_average]
341 averaged_plates_dict[plate_average_name]=average_plates(plates_to_average,raw_df)
342 return averaged_plates_dict
344def load_plate_groups(config_path):
345 xls = pd.ExcelFile(config_path)
346 df = pd.read_excel(xls, 'plate_groups',index_col=0,header=None)
347 replicates = df.index.tolist()[1:]
348 groups = [str(group) for group in df.columns.tolist()]
349 plate_groups = {replicate:{group:None for group in groups} for replicate in replicates}
350 for group in groups:
351 for replicate in replicates:
352 #well_replicates = df.filter(like=group).loc[replicate].tolist()[0]
353 plate_groups[replicate][group]=df.loc[replicate][int(group)]
354 return plate_groups
356def normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name):
357 features = get_features(raw_df)
358 normalized_plate=create_well_dict(raw_df)
359 normalized_plate = add_well_to_well_dict([ctrl_avg_name],normalized_plate, raw_df)
360 for feature in features:
361 control_values = [plate[well][feature] for well in reference_wells]
362 control_avg = np.mean(np.array(control_values))
363 normalized_plate[ctrl_avg_name][feature]=control_avg
364 for well in normalized_plate.keys():
365 if well not in ctrl_avg_name:
366 try:
367 normalized_plate[well][feature] = plate[well][feature]/control_avg
368 except:
369 normalized_plate[well][feature] = plate[well][feature]
370 return normalized_plate
373def normalize_all_plates(plates_dict,reference_wells,raw_df,ctrl_avg_name):
374 normalized_plates={replicate:{} for replicate in plates_dict.keys()}
375 for replicate, condition_plates in plates_dict.items():
376 for condition, plate in condition_plates.items():
377 normalized_plates[replicate][condition]=normalize_plate(plate,reference_wells,raw_df,ctrl_avg_name)
378 return normalized_plates
380def create_table_for_feature(feature,plates_dict):
381 conditions = list(plates_dict.keys())
382 replicates = list(list(plates_dict.values())[0].keys())
383 doses=list(plates_dict[conditions[0]][replicates[0]].keys())
384 col_names=[]
385 for condition in conditions:
386 for replicate in replicates:
387 col_names.append(str(condition)+"_"+str(replicate))
388 feature_table = {col_name:[] for col_name in col_names}
389 for dose in doses:
390 for replicate in replicates:
391 for condition in conditions:
392 col_name=(str(condition)+"_"+str(replicate))
393 try:
394 value=plates_dict[condition][replicate][dose][feature]
395 except:
396 value=None
397 feature_table[col_name].append(value)
398 feature_table=pd.DataFrame(feature_table)
399 feature_table.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in feature_table.columns])
400 feature_table.index=doses
401 return feature_table
405def create_all_feature_tables(plates_dict,features):
406 feature_tables={feature:None for feature in features}
407 for feature in features:
408 feature_tables[feature]=create_table_for_feature(feature,plates_dict)
409 return feature_tables
411def feature_tables_to_excel(feature_tables,outpath):
412 def remove_inval_chars(name):
413 inval_chars=['[',']',':','*','?','/','\\']
414 for char in inval_chars:
415 name=name.replace(char,"")
416 return name
417 with pd.ExcelWriter(outpath) as writer:
418 for feature in feature_tables.keys():
419 feature_tables[feature].to_excel(writer, sheet_name=remove_inval_chars(feature[:31]))
421def create_duplicate_wells():
422 rows=[string.ascii_uppercase[i] for i in range(0,8,2)]
423 cols=[i+1 for i in range(12)]
424 wells = []
425 for row in rows:
426 for col in cols:
427 wells.append(str(row)+str(col).zfill(2))
428 return wells
430def make_experiment_dict_locations(plate_groups,plate_layout,conditions):
431 experiment_dict={condition:{} for condition in conditions}
432 #experiment_dict={replicate:{} for replicate in plate_layout.keys()}
433 for replicate, conditions in plate_layout.items():
434 for condition,doses in conditions.items():
435 experiment_dict[condition][replicate] = {dose:locations for dose,locations in doses.items()}
436 return experiment_dict
438def make_experiment_dict_values(plates,experiment_dict_locations,features):
439 experiment_dict_values=copy.deepcopy(experiment_dict_locations)
440 for condition,replicates in experiment_dict_locations.items():
441 for replicate, doses in replicates.items():
442 for dose,locations in doses.items():
443 feature_value_dict = {feature:average_wells(locations,replicate,feature,plates,plate_groups) for feature in features}
444 experiment_dict_values[condition][replicate][dose]= feature_value_dict
445 return experiment_dict_values
447def average_wells(locations,replicate,feature,plates,plate_groups):
448 average=0
449 for location in locations:
450 average+=location_to_value(location,replicate,feature,plates,plate_groups)
451 return average/float(len(locations))
453def location_to_value(location,replicate,feature,plates,plate_groups):
454 well, plate_group = location
455 plate_name = plate_groups[replicate][str(plate_group)]
456 value = plates[plate_name][well][feature]
457 return value
459def normalize_experiment(experiment_dict_values,ctrl_positions,features,plates):
460 experiment_dict_values_normalized=copy.deepcopy(experiment_dict_values)
461 for condition,replicates in experiment_dict_values.items():
462 for replicate, doses in replicates.items():
463 ctrl_positions_replicate = ctrl_positions[replicate]
464 feature_control_vals={feature:average_wells(ctrl_positions_replicate,replicate,feature,plates,plate_groups) for feature in features}
465 for dose,values in doses.items():
466 feature_value_dict = {}
467 for feature in features:
468 ctrl_value = feature_control_vals[feature]
469 if ctrl_value == 0:
470 ctrl_value = 1
471 condition_value=experiment_dict_values[condition][replicate][dose][feature]
472 feature_value_dict[feature]=condition_value/ctrl_value
473 experiment_dict_values_normalized[condition][replicate][dose]= feature_value_dict
474 return experiment_dict_values_normalized
476def write_values_heat_map(plates_dict,features,outpath):
477 workbook = xlsxwriter.Workbook(outpath)
478 with pd.ExcelWriter(outpath) as writer:
479 for feature in features:
480 sheet_rows=[]
481 for plate in plates_dict.keys():
482 sheet_rows.append([plate])
483 values=[]
484 for r in range(65,65+8,1):
485 values.append([])
486 row=[]
487 for c in range(12):
488 well=chr(r)+str(c+1).zfill(2)
489 row.append(plates_dict[plate][well][feature])
490 sheet_rows.append(row)
491 sheet_rows.append([""])
492 pd.DataFrame(sheet_rows).to_excel(writer, sheet_name=remove_inval_chars(feature[:31]))
494def create_reference_wells():
495 rows=[string.ascii_uppercase[i] for i in range(8)]
496 cols=[i+1 for i in range(6,12)]
497 wells = []
498 for row in rows:
499 for col in cols:
500 wells.append((str(row)+str(col).zfill(2),2))
501 return wells
503def remove_inval_chars(name):
504 inval_chars=['[',']',':','*','?','/','\\']
505 for char in inval_chars:
506 name=name.replace(char,"")
507 return name
509rows=[string.ascii_uppercase[i] for i in range(8)]
510cols=[i+1 for i in range(12)]
511conditions = []
512for row in rows:
513 for col in cols:
514 conditions.append(str(row)+str(col).zfill(2))
516results_path="mx_results.xlsx"
517config_file="./config.xlsx"
518compiled_results_path="./compiled_results_normalized.xlsx"
519heatmap_path="./heatmaps.xlsx"
521scope, plate_layout, conditions, ctrl_positions=read_plate_layout(config_file)
522plate_groups=load_plate_groups(config_file)
523experiment_dict_locations=make_experiment_dict_locations(plate_groups,plate_layout,conditions)
524df = read_results(results_path,scope=scope)
525features = get_features(df,scope=scope)
526well_dict=create_well_dict(df,scope=scope)
527plates_dict=create_plates_dict(df,scope=scope)
528plates_dict = fill_plates_dict(df,plates_dict,scope=scope)
529experiment_dict_values=make_experiment_dict_values(plates_dict,experiment_dict_locations,features)
530if ctrl_positions is not None:
531 experiment_dict_values=normalize_experiment(experiment_dict_values,ctrl_positions,features,plates_dict)
532feature_tables = create_all_feature_tables(experiment_dict_values,features)
533write_values_heat_map(plates_dict,features,heatmap_path)
534feature_tables_to_excel(feature_tables,compiled_results_path)