Coverage for openhcs/processing/backends/experimental_analysis/unified_analysis_engine.py: 21.9%

82 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 18:33 +0000

1""" 

2Unified experimental analysis engine. 

3 

4This module provides a unified analysis engine that uses the format registry 

5system to process experimental data from multiple microscope formats following 

6OpenHCS architectural principles. 

7""" 

8 

9import copy 

10from pathlib import Path 

11from typing import Dict, List, Any, Optional, Tuple 

12import pandas as pd 

13 

14from openhcs.core.config import ExperimentalAnalysisConfig, PlateMetadataConfig 

15from .format_registry_service import FormatRegistryService 

16from .format_registry import FormatDetectionError, DataProcessingError 

17 

18 

19class ExperimentalAnalysisEngine: 

20 """ 

21 Unified analysis engine using format registry system. 

22  

23 This engine eliminates code duplication by using the registry pattern 

24 to handle different microscope formats through a unified interface. 

25 """ 

26 

27 def __init__(self, config: ExperimentalAnalysisConfig): 

28 """ 

29 Initialize analysis engine with configuration. 

30  

31 Args: 

32 config: Experimental analysis configuration 

33 """ 

34 self.config = config 

35 self.format_service = FormatRegistryService() 

36 

37 def run_analysis( 

38 self, 

39 results_path: str, 

40 config_file: str, 

41 compiled_results_path: str, 

42 heatmap_path: Optional[str] = None 

43 ) -> Dict[str, Any]: 

44 """ 

45 Run complete experimental analysis with automatic format detection. 

46  

47 Args: 

48 results_path: Path to microscope results file 

49 config_file: Path to experimental configuration Excel file 

50 compiled_results_path: Output path for compiled results 

51 heatmap_path: Optional output path for heatmap visualization 

52  

53 Returns: 

54 Dictionary containing analysis results and metadata 

55  

56 Raises: 

57 FormatDetectionError: If microscope format cannot be detected 

58 DataProcessingError: If data processing fails 

59 FileNotFoundError: If required files are missing 

60 """ 

61 try: 

62 # Step 1: Detect or determine format 

63 format_name = self._determine_format(results_path) 

64 

65 # Step 2: Get format registry 

66 format_registry = self.format_service.get_registry_instance_for_format(format_name) 

67 

68 # Step 3: Parse experimental configuration 

69 experiment_config = self._parse_experiment_config(config_file) 

70 

71 # Step 4: Process microscope data 

72 processed_data = format_registry.process_data(results_path) 

73 

74 # Step 5: Create experiment data structure 

75 experiment_dict_locations = self._make_experiment_dict_locations( 

76 experiment_config['plate_groups'], 

77 experiment_config['plate_layout'], 

78 experiment_config['conditions'] 

79 ) 

80 

81 # Step 6: Map experimental design to measured values 

82 experiment_dict_values = self._make_experiment_dict_values( 

83 processed_data['plates_dict'], 

84 experiment_dict_locations, 

85 processed_data['features'], 

86 experiment_config['plate_groups'], 

87 experiment_config['per_well_datapoints'] 

88 ) 

89 

90 # Step 7: Apply normalization if controls are defined 

91 if experiment_config['ctrl_positions'] is not None: 

92 experiment_dict_values_normalized = self._normalize_experiment( 

93 experiment_dict_values, 

94 experiment_config['ctrl_positions'], 

95 processed_data['features'], 

96 processed_data['plates_dict'], 

97 experiment_config['plate_groups'] 

98 ) 

99 else: 

100 experiment_dict_values_normalized = experiment_dict_values 

101 

102 # Step 8: Generate results tables 

103 feature_tables = self._create_all_feature_tables( 

104 experiment_dict_values_normalized, 

105 processed_data['features'], 

106 experiment_config['per_well_datapoints'] 

107 ) 

108 

109 # Step 9: Export results 

110 self._export_results(feature_tables, compiled_results_path) 

111 

112 # Step 10: Export raw results if configured 

113 if self.config.export_raw_results: 

114 raw_results_path = compiled_results_path.replace('.xlsx', '_raw.xlsx') 

115 feature_tables_raw = self._create_all_feature_tables( 

116 experiment_dict_values, 

117 processed_data['features'], 

118 experiment_config['per_well_datapoints'] 

119 ) 

120 self._export_results(feature_tables_raw, raw_results_path) 

121 

122 # Step 11: Generate heatmaps if configured 

123 if self.config.export_heatmaps and heatmap_path: 

124 self._export_heatmaps(feature_tables, heatmap_path) 

125 

126 return { 

127 'format_name': format_name, 

128 'features': processed_data['features'], 

129 'conditions': experiment_config['conditions'], 

130 'feature_tables': feature_tables, 

131 'experiment_config': experiment_config, 

132 'processed_data': processed_data 

133 } 

134 

135 except Exception as e: 

136 raise DataProcessingError(f"Analysis failed: {e}") from e 

137 

138 def _determine_format(self, results_path: str) -> str: 

139 """ 

140 Determine microscope format for results file. 

141  

142 Args: 

143 results_path: Path to results file 

144  

145 Returns: 

146 Format name 

147  

148 Raises: 

149 FormatDetectionError: If format cannot be determined 

150 """ 

151 if self.config.auto_detect_format: 

152 try: 

153 return self.format_service.detect_format_from_file(results_path) 

154 except FormatDetectionError: 

155 if self.config.default_format: 

156 return self.config.default_format 

157 raise 

158 elif self.config.default_format: 

159 return self.config.default_format 

160 else: 

161 raise FormatDetectionError( 

162 "Auto-detection disabled and no default format specified" 

163 ) 

164 

165 def _parse_experiment_config(self, config_file: str) -> Dict[str, Any]: 

166 """ 

167 Parse experimental configuration from Excel file. 

168  

169 Args: 

170 config_file: Path to configuration file 

171  

172 Returns: 

173 Parsed configuration dictionary 

174  

175 Raises: 

176 FileNotFoundError: If config file doesn't exist 

177 ValueError: If config parsing fails 

178 """ 

179 config_path = Path(config_file) 

180 if not config_path.exists(): 

181 raise FileNotFoundError(f"Configuration file not found: {config_file}") 

182 

183 try: 

184 # Parse experimental design 

185 scope, plate_layout, conditions, ctrl_positions, excluded_positions, per_well_datapoints = self._read_plate_layout(config_file) 

186 

187 # Parse plate groups 

188 plate_groups = self._load_plate_groups(config_file) 

189 

190 return { 

191 'scope': scope, 

192 'plate_layout': plate_layout, 

193 'conditions': conditions, 

194 'ctrl_positions': ctrl_positions, 

195 'excluded_positions': excluded_positions, 

196 'per_well_datapoints': per_well_datapoints, 

197 'plate_groups': plate_groups 

198 } 

199 

200 except Exception as e: 

201 raise ValueError(f"Failed to parse configuration file {config_file}: {e}") 

202 

203 def _read_plate_layout(self, config_path: str) -> Tuple[str, Dict, List, Optional[Dict], Optional[Dict], bool]: 

204 """ 

205 Read plate layout from configuration file. 

206 

207 This method maintains compatibility with existing configuration format 

208 while using the new architecture. 

209 

210 Args: 

211 config_path: Path to configuration file 

212 

213 Returns: 

214 Tuple of (scope, plate_layout, conditions, ctrl_positions, excluded_positions, per_well_datapoints) 

215 """ 

216 # Import the existing function to maintain compatibility 

217 # This will be gradually refactored to use the new architecture 

218 from openhcs.formats.experimental_analysis import read_plate_layout 

219 return read_plate_layout(config_path) 

220 

221 def _load_plate_groups(self, config_path: str) -> Dict: 

222 """ 

223 Load plate groups from configuration file. 

224  

225 Args: 

226 config_path: Path to configuration file 

227  

228 Returns: 

229 Plate groups dictionary 

230 """ 

231 # Import the existing function to maintain compatibility 

232 from openhcs.formats.experimental_analysis import load_plate_groups 

233 return load_plate_groups(config_path) 

234 

235 def _make_experiment_dict_locations(self, plate_groups: Dict, plate_layout: Dict, conditions: List) -> Dict: 

236 """Create experiment location mapping.""" 

237 from openhcs.formats.experimental_analysis import make_experiment_dict_locations 

238 return make_experiment_dict_locations(plate_groups, plate_layout, conditions) 

239 

240 def _make_experiment_dict_values(self, plates_dict: Dict, experiment_dict_locations: Dict, features: List, plate_groups: Dict, per_well_datapoints: bool = False) -> Dict: 

241 """Map experimental design to measured values.""" 

242 from openhcs.formats.experimental_analysis import make_experiment_dict_values 

243 return make_experiment_dict_values(plates_dict, experiment_dict_locations, features, plate_groups, per_well_datapoints) 

244 

245 def _normalize_experiment(self, experiment_dict_values: Dict, ctrl_positions: Dict, features: List, plates_dict: Dict, plate_groups: Dict) -> Dict: 

246 """Apply normalization using control wells.""" 

247 from openhcs.formats.experimental_analysis import normalize_experiment 

248 return normalize_experiment(experiment_dict_values, ctrl_positions, features, plates_dict, plate_groups) 

249 

250 def _create_all_feature_tables(self, experiment_dict_values: Dict, features: List, per_well_datapoints: bool = False) -> Dict: 

251 """Create feature tables for export.""" 

252 from openhcs.formats.experimental_analysis import create_all_feature_tables 

253 return create_all_feature_tables(experiment_dict_values, features, per_well_datapoints) 

254 

255 def _export_results(self, feature_tables: Dict, output_path: str): 

256 """Export results to Excel file.""" 

257 from openhcs.formats.experimental_analysis import feature_tables_to_excel 

258 feature_tables_to_excel(feature_tables, output_path) 

259 

260 def _export_heatmaps(self, feature_tables: Dict, output_path: str): 

261 """Export heatmap visualizations.""" 

262 # This would be implemented to generate heatmaps 

263 # For now, use the same export as regular results 

264 self._export_results(feature_tables, output_path) 

265 

266 

267# Backward compatibility function 

268def run_experimental_analysis( 

269 results_path: str = "mx_results.xlsx", 

270 config_file: str = "./config.xlsx", 

271 compiled_results_path: str = "./compiled_results_normalized.xlsx", 

272 heatmap_path: str = "./heatmaps.xlsx" 

273) -> Dict[str, Any]: 

274 """ 

275 Run complete experimental analysis pipeline (backward compatibility wrapper). 

276  

277 Args: 

278 results_path: Path to results Excel file (CX5 or MetaXpress format) 

279 config_file: Path to experimental configuration Excel file 

280 compiled_results_path: Output path for compiled results 

281 heatmap_path: Output path for heatmap visualization 

282  

283 Returns: 

284 Analysis results dictionary 

285 """ 

286 from openhcs.core.config import ExperimentalAnalysisConfig 

287 

288 # Use default configuration for backward compatibility 

289 config = ExperimentalAnalysisConfig() 

290 engine = ExperimentalAnalysisEngine(config) 

291 

292 return engine.run_analysis( 

293 results_path=results_path, 

294 config_file=config_file, 

295 compiled_results_path=compiled_results_path, 

296 heatmap_path=heatmap_path 

297 )