Coverage for openhcs/processing/backends/experimental_analysis/unified_analysis_engine.py: 20.2%

80 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1""" 

2Unified experimental analysis engine. 

3 

4This module provides a unified analysis engine that uses the format registry 

5system to process experimental data from multiple microscope formats following 

6OpenHCS architectural principles. 

7""" 

8 

9from pathlib import Path 

10from typing import Dict, List, Any, Optional, Tuple 

11 

12from openhcs.core.config import ExperimentalAnalysisConfig 

13from .format_registry_service import FormatRegistryService 

14from .format_registry import FormatDetectionError, DataProcessingError 

15 

16 

17class ExperimentalAnalysisEngine: 

18 """ 

19 Unified analysis engine using format registry system. 

20  

21 This engine eliminates code duplication by using the registry pattern 

22 to handle different microscope formats through a unified interface. 

23 """ 

24 

25 def __init__(self, config: ExperimentalAnalysisConfig): 

26 """ 

27 Initialize analysis engine with configuration. 

28  

29 Args: 

30 config: Experimental analysis configuration 

31 """ 

32 self.config = config 

33 self.format_service = FormatRegistryService() 

34 

35 def run_analysis( 

36 self, 

37 results_path: str, 

38 config_file: str, 

39 compiled_results_path: str, 

40 heatmap_path: Optional[str] = None 

41 ) -> Dict[str, Any]: 

42 """ 

43 Run complete experimental analysis with automatic format detection. 

44  

45 Args: 

46 results_path: Path to microscope results file 

47 config_file: Path to experimental configuration Excel file 

48 compiled_results_path: Output path for compiled results 

49 heatmap_path: Optional output path for heatmap visualization 

50  

51 Returns: 

52 Dictionary containing analysis results and metadata 

53  

54 Raises: 

55 FormatDetectionError: If microscope format cannot be detected 

56 DataProcessingError: If data processing fails 

57 FileNotFoundError: If required files are missing 

58 """ 

59 try: 

60 # Step 1: Detect or determine format 

61 format_name = self._determine_format(results_path) 

62 

63 # Step 2: Get format registry 

64 format_registry = self.format_service.get_registry_instance_for_format(format_name) 

65 

66 # Step 3: Parse experimental configuration 

67 experiment_config = self._parse_experiment_config(config_file) 

68 

69 # Step 4: Process microscope data 

70 processed_data = format_registry.process_data(results_path) 

71 

72 # Step 5: Create experiment data structure 

73 experiment_dict_locations = self._make_experiment_dict_locations( 

74 experiment_config['plate_groups'], 

75 experiment_config['plate_layout'], 

76 experiment_config['conditions'] 

77 ) 

78 

79 # Step 6: Map experimental design to measured values 

80 experiment_dict_values = self._make_experiment_dict_values( 

81 processed_data['plates_dict'], 

82 experiment_dict_locations, 

83 processed_data['features'], 

84 experiment_config['plate_groups'], 

85 experiment_config['per_well_datapoints'] 

86 ) 

87 

88 # Step 7: Apply normalization if controls are defined 

89 if experiment_config['ctrl_positions'] is not None: 

90 experiment_dict_values_normalized = self._normalize_experiment( 

91 experiment_dict_values, 

92 experiment_config['ctrl_positions'], 

93 processed_data['features'], 

94 processed_data['plates_dict'], 

95 experiment_config['plate_groups'] 

96 ) 

97 else: 

98 experiment_dict_values_normalized = experiment_dict_values 

99 

100 # Step 8: Generate results tables 

101 feature_tables = self._create_all_feature_tables( 

102 experiment_dict_values_normalized, 

103 processed_data['features'], 

104 experiment_config['per_well_datapoints'] 

105 ) 

106 

107 # Step 9: Export results 

108 self._export_results(feature_tables, compiled_results_path) 

109 

110 # Step 10: Export raw results if configured 

111 if self.config.export_raw_results: 

112 raw_results_path = compiled_results_path.replace('.xlsx', '_raw.xlsx') 

113 feature_tables_raw = self._create_all_feature_tables( 

114 experiment_dict_values, 

115 processed_data['features'], 

116 experiment_config['per_well_datapoints'] 

117 ) 

118 self._export_results(feature_tables_raw, raw_results_path) 

119 

120 # Step 11: Generate heatmaps if configured 

121 if self.config.export_heatmaps and heatmap_path: 

122 self._export_heatmaps(feature_tables, heatmap_path) 

123 

124 return { 

125 'format_name': format_name, 

126 'features': processed_data['features'], 

127 'conditions': experiment_config['conditions'], 

128 'feature_tables': feature_tables, 

129 'experiment_config': experiment_config, 

130 'processed_data': processed_data 

131 } 

132 

133 except Exception as e: 

134 raise DataProcessingError(f"Analysis failed: {e}") from e 

135 

136 def _determine_format(self, results_path: str) -> str: 

137 """ 

138 Determine microscope format for results file. 

139 

140 Args: 

141 results_path: Path to results file 

142 

143 Returns: 

144 Format name 

145 

146 Raises: 

147 FormatDetectionError: If format cannot be determined 

148 """ 

149 if self.config.auto_detect_format: 

150 try: 

151 return self.format_service.detect_format_from_file(results_path) 

152 except FormatDetectionError: 

153 if self.config.default_format: 

154 return self.config.default_format.value 

155 raise 

156 elif self.config.default_format: 

157 return self.config.default_format.value 

158 else: 

159 raise FormatDetectionError( 

160 "Auto-detection disabled and no default format specified" 

161 ) 

162 

163 def _parse_experiment_config(self, config_file: str) -> Dict[str, Any]: 

164 """ 

165 Parse experimental configuration from Excel file. 

166  

167 Args: 

168 config_file: Path to configuration file 

169  

170 Returns: 

171 Parsed configuration dictionary 

172  

173 Raises: 

174 FileNotFoundError: If config file doesn't exist 

175 ValueError: If config parsing fails 

176 """ 

177 config_path = Path(config_file) 

178 if not config_path.exists(): 

179 raise FileNotFoundError(f"Configuration file not found: {config_file}") 

180 

181 try: 

182 # Parse experimental design 

183 scope, plate_layout, conditions, ctrl_positions, excluded_positions, per_well_datapoints = self._read_plate_layout(config_file) 

184 

185 # Parse plate groups 

186 plate_groups = self._load_plate_groups(config_file) 

187 

188 return { 

189 'scope': scope, 

190 'plate_layout': plate_layout, 

191 'conditions': conditions, 

192 'ctrl_positions': ctrl_positions, 

193 'excluded_positions': excluded_positions, 

194 'per_well_datapoints': per_well_datapoints, 

195 'plate_groups': plate_groups 

196 } 

197 

198 except Exception as e: 

199 raise ValueError(f"Failed to parse configuration file {config_file}: {e}") 

200 

201 def _read_plate_layout(self, config_path: str) -> Tuple[str, Dict, List, Optional[Dict], Optional[Dict], bool]: 

202 """ 

203 Read plate layout from configuration file. 

204 

205 This method maintains compatibility with existing configuration format 

206 while using the new architecture. 

207 

208 Args: 

209 config_path: Path to configuration file 

210 

211 Returns: 

212 Tuple of (scope, plate_layout, conditions, ctrl_positions, excluded_positions, per_well_datapoints) 

213 """ 

214 # Import the existing function to maintain compatibility 

215 # This will be gradually refactored to use the new architecture 

216 from openhcs.formats.experimental_analysis import read_plate_layout 

217 return read_plate_layout(config_path) 

218 

219 def _load_plate_groups(self, config_path: str) -> Dict: 

220 """ 

221 Load plate groups from configuration file. 

222  

223 Args: 

224 config_path: Path to configuration file 

225  

226 Returns: 

227 Plate groups dictionary 

228 """ 

229 # Import the existing function to maintain compatibility 

230 from openhcs.formats.experimental_analysis import load_plate_groups 

231 return load_plate_groups(config_path) 

232 

233 def _make_experiment_dict_locations(self, plate_groups: Dict, plate_layout: Dict, conditions: List) -> Dict: 

234 """Create experiment location mapping.""" 

235 from openhcs.formats.experimental_analysis import make_experiment_dict_locations 

236 return make_experiment_dict_locations(plate_groups, plate_layout, conditions) 

237 

238 def _make_experiment_dict_values(self, plates_dict: Dict, experiment_dict_locations: Dict, features: List, plate_groups: Dict, per_well_datapoints: bool = False) -> Dict: 

239 """Map experimental design to measured values.""" 

240 from openhcs.formats.experimental_analysis import make_experiment_dict_values 

241 return make_experiment_dict_values(plates_dict, experiment_dict_locations, features, plate_groups, per_well_datapoints) 

242 

243 def _normalize_experiment(self, experiment_dict_values: Dict, ctrl_positions: Dict, features: List, plates_dict: Dict, plate_groups: Dict) -> Dict: 

244 """Apply normalization using control wells.""" 

245 from openhcs.formats.experimental_analysis import normalize_experiment 

246 return normalize_experiment(experiment_dict_values, ctrl_positions, features, plates_dict, plate_groups) 

247 

248 def _create_all_feature_tables(self, experiment_dict_values: Dict, features: List, per_well_datapoints: bool = False) -> Dict: 

249 """Create feature tables for export.""" 

250 from openhcs.formats.experimental_analysis import create_all_feature_tables 

251 return create_all_feature_tables(experiment_dict_values, features, per_well_datapoints) 

252 

253 def _export_results(self, feature_tables: Dict, output_path: str): 

254 """Export results to Excel file.""" 

255 from openhcs.formats.experimental_analysis import feature_tables_to_excel 

256 feature_tables_to_excel(feature_tables, output_path) 

257 

258 def _export_heatmaps(self, feature_tables: Dict, output_path: str): 

259 """Export heatmap visualizations.""" 

260 # This would be implemented to generate heatmaps 

261 # For now, use the same export as regular results 

262 self._export_results(feature_tables, output_path) 

263 

264 

265# Backward compatibility function 

266def run_experimental_analysis( 

267 results_path: str = "mx_results.xlsx", 

268 config_file: str = "./config.xlsx", 

269 compiled_results_path: str = "./compiled_results_normalized.xlsx", 

270 heatmap_path: str = "./heatmaps.xlsx" 

271) -> Dict[str, Any]: 

272 """ 

273 Run complete experimental analysis pipeline (backward compatibility wrapper). 

274  

275 Args: 

276 results_path: Path to results Excel file (CX5 or MetaXpress format) 

277 config_file: Path to experimental configuration Excel file 

278 compiled_results_path: Output path for compiled results 

279 heatmap_path: Output path for heatmap visualization 

280  

281 Returns: 

282 Analysis results dictionary 

283 """ 

284 from openhcs.core.config import ExperimentalAnalysisConfig 

285 

286 # Use default configuration for backward compatibility 

287 config = ExperimentalAnalysisConfig() 

288 engine = ExperimentalAnalysisEngine(config) 

289 

290 return engine.run_analysis( 

291 results_path=results_path, 

292 config_file=config_file, 

293 compiled_results_path=compiled_results_path, 

294 heatmap_path=heatmap_path 

295 )