Coverage for openhcs/processing/backends/experimental_analysis/format_registry.py: 41.4%

56 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1""" 

2Base classes for microscope format registry system. 

3 

4This module provides the abstract base class and common functionality for 

5microscope format registries, following OpenHCS registry architecture patterns. 

6""" 

7 

8from abc import ABC, abstractmethod 

9from dataclasses import dataclass 

10from typing import Dict, List, Any, Optional, Tuple, Type 

11import pandas as pd 

12from pathlib import Path 

13 

14from openhcs.core.auto_register_meta import AutoRegisterMeta 

15 

16 

17@dataclass(frozen=True) 

18class MicroscopeFormatConfig: 

19 """Configuration for microscope format processing.""" 

20 format_name: str 

21 sheet_name: Optional[str] 

22 supported_extensions: Tuple[str, ...] 

23 feature_extraction_method: str 

24 plate_detection_method: str 

25 

26 

27class MicroscopeFormatRegistryBase(ABC, metaclass=AutoRegisterMeta): 

28 """ 

29 Abstract base class for microscope format registries. 

30 

31 Following OpenHCS registry patterns, this provides a unified interface 

32 for processing different microscope data formats while eliminating 

33 code duplication and hardcoded format-specific logic. 

34 

35 Registry auto-created and stored as MicroscopeFormatRegistryBase.__registry__. 

36 Subclasses auto-register by setting FORMAT_NAME class attribute. 

37 """ 

38 __registry_key__ = 'FORMAT_NAME' 

39 

40 # Abstract class attributes - each implementation must define these 

41 FORMAT_NAME: str 

42 SHEET_NAME: Optional[str] # None means use first sheet 

43 SUPPORTED_EXTENSIONS: Tuple[str, ...] 

44 

45 def __init__(self): 

46 """Initialize registry with format configuration.""" 

47 self.config = MicroscopeFormatConfig( 

48 format_name=self.FORMAT_NAME, 

49 sheet_name=self.SHEET_NAME, 

50 supported_extensions=self.SUPPORTED_EXTENSIONS, 

51 feature_extraction_method=f"extract_features_{self.FORMAT_NAME.lower()}", 

52 plate_detection_method=f"extract_plates_{self.FORMAT_NAME.lower()}" 

53 ) 

54 

55 @property 

56 def format_name(self) -> str: 

57 """Get format name for this registry.""" 

58 return self.FORMAT_NAME 

59 

60 @abstractmethod 

61 def extract_features(self, raw_df: pd.DataFrame) -> List[str]: 

62 """ 

63 Extract feature column names from raw microscope data. 

64  

65 Args: 

66 raw_df: Raw data DataFrame from microscope 

67  

68 Returns: 

69 List of feature column names 

70  

71 Raises: 

72 ValueError: If feature extraction fails 

73 """ 

74 pass 

75 

76 @abstractmethod 

77 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]: 

78 """ 

79 Extract plate identifiers from raw microscope data. 

80  

81 Args: 

82 raw_df: Raw data DataFrame from microscope 

83  

84 Returns: 

85 List of plate identifiers 

86  

87 Raises: 

88 ValueError: If plate extraction fails 

89 """ 

90 pass 

91 

92 @abstractmethod 

93 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]: 

94 """ 

95 Create nested dictionary structure for plate data. 

96  

97 Args: 

98 raw_df: Raw data DataFrame from microscope 

99  

100 Returns: 

101 Dictionary structure: {plate_id: {well_id: {feature: value}}} 

102  

103 Raises: 

104 ValueError: If data structure creation fails 

105 """ 

106 pass 

107 

108 @abstractmethod 

109 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]: 

110 """ 

111 Fill plates dictionary with actual measurement values. 

112  

113 Args: 

114 raw_df: Raw data DataFrame from microscope 

115 plates_dict: Empty plates dictionary structure 

116  

117 Returns: 

118 Filled plates dictionary with measurement values 

119  

120 Raises: 

121 ValueError: If data filling fails 

122 """ 

123 pass 

124 

125 def read_results(self, results_path: str) -> pd.DataFrame: 

126 """ 

127 Read results file using format-specific logic. 

128  

129 Args: 

130 results_path: Path to results file 

131  

132 Returns: 

133 Raw data DataFrame 

134  

135 Raises: 

136 FileNotFoundError: If results file doesn't exist 

137 ValueError: If file format is not supported 

138 """ 

139 results_file = Path(results_path) 

140 

141 if not results_file.exists(): 

142 raise FileNotFoundError(f"Results file not found: {results_path}") 

143 

144 if results_file.suffix not in self.SUPPORTED_EXTENSIONS: 

145 raise ValueError(f"Unsupported file extension {results_file.suffix} for format {self.FORMAT_NAME}") 

146 

147 if results_path.endswith('.csv'): 

148 return pd.read_csv(results_path) 

149 else: 

150 # Excel file 

151 xls = pd.ExcelFile(results_path) 

152 sheet_name = self.SHEET_NAME if self.SHEET_NAME else xls.sheet_names[0] 

153 return pd.read_excel(xls, sheet_name) 

154 

155 def process_data(self, results_path: str) -> Dict[str, Any]: 

156 """ 

157 Complete data processing pipeline for this format. 

158  

159 Args: 

160 results_path: Path to results file 

161  

162 Returns: 

163 Processed data structure ready for analysis 

164  

165 Raises: 

166 ValueError: If data processing fails 

167 """ 

168 # Read raw data 

169 raw_df = self.read_results(results_path) 

170 

171 # Extract features and plates 

172 features = self.extract_features(raw_df) 

173 plate_names = self.extract_plate_names(raw_df) 

174 

175 # Create and fill data structures 

176 plates_dict = self.create_plates_dict(raw_df) 

177 filled_plates_dict = self.fill_plates_dict(raw_df, plates_dict) 

178 

179 return { 

180 'raw_df': raw_df, 

181 'features': features, 

182 'plate_names': plate_names, 

183 'plates_dict': filled_plates_dict, 

184 'format_name': self.FORMAT_NAME 

185 } 

186 

187 def validate_data_structure(self, data: Dict[str, Any]) -> bool: 

188 """ 

189 Validate processed data structure. 

190  

191 Args: 

192 data: Processed data dictionary 

193  

194 Returns: 

195 True if data structure is valid 

196  

197 Raises: 

198 ValueError: If validation fails 

199 """ 

200 required_keys = ['raw_df', 'features', 'plate_names', 'plates_dict', 'format_name'] 

201 

202 for key in required_keys: 

203 if key not in data: 

204 raise ValueError(f"Missing required key in data structure: {key}") 

205 

206 if not data['features']: 

207 raise ValueError("No features extracted from data") 

208 

209 if not data['plate_names']: 

210 raise ValueError("No plates detected in data") 

211 

212 return True 

213 

214 

215class FormatDetectionError(Exception): 

216 """Raised when microscope format cannot be detected.""" 

217 pass 

218 

219 

220class DataProcessingError(Exception): 

221 """Raised when data processing fails.""" 

222 pass 

223 

224 

225# ============================================================================ 

226# Registry Export 

227# ============================================================================ 

228# Auto-created registry from MicroscopeFormatRegistryBase 

229MICROSCOPE_FORMAT_REGISTRIES = MicroscopeFormatRegistryBase.__registry__