Coverage for openhcs/processing/backends/experimental_analysis/format_registry.py: 41.4%
56 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-04 02:09 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-04 02:09 +0000
1"""
2Base classes for microscope format registry system.
4This module provides the abstract base class and common functionality for
5microscope format registries, following OpenHCS registry architecture patterns.
6"""
8from abc import ABC, abstractmethod
9from dataclasses import dataclass
10from typing import Dict, List, Any, Optional, Tuple, Type
11import pandas as pd
12from pathlib import Path
14from openhcs.core.auto_register_meta import AutoRegisterMeta
17@dataclass(frozen=True)
18class MicroscopeFormatConfig:
19 """Configuration for microscope format processing."""
20 format_name: str
21 sheet_name: Optional[str]
22 supported_extensions: Tuple[str, ...]
23 feature_extraction_method: str
24 plate_detection_method: str
27class MicroscopeFormatRegistryBase(ABC, metaclass=AutoRegisterMeta):
28 """
29 Abstract base class for microscope format registries.
31 Following OpenHCS registry patterns, this provides a unified interface
32 for processing different microscope data formats while eliminating
33 code duplication and hardcoded format-specific logic.
35 Registry auto-created and stored as MicroscopeFormatRegistryBase.__registry__.
36 Subclasses auto-register by setting FORMAT_NAME class attribute.
37 """
38 __registry_key__ = 'FORMAT_NAME'
40 # Abstract class attributes - each implementation must define these
41 FORMAT_NAME: str
42 SHEET_NAME: Optional[str] # None means use first sheet
43 SUPPORTED_EXTENSIONS: Tuple[str, ...]
45 def __init__(self):
46 """Initialize registry with format configuration."""
47 self.config = MicroscopeFormatConfig(
48 format_name=self.FORMAT_NAME,
49 sheet_name=self.SHEET_NAME,
50 supported_extensions=self.SUPPORTED_EXTENSIONS,
51 feature_extraction_method=f"extract_features_{self.FORMAT_NAME.lower()}",
52 plate_detection_method=f"extract_plates_{self.FORMAT_NAME.lower()}"
53 )
55 @property
56 def format_name(self) -> str:
57 """Get format name for this registry."""
58 return self.FORMAT_NAME
60 @abstractmethod
61 def extract_features(self, raw_df: pd.DataFrame) -> List[str]:
62 """
63 Extract feature column names from raw microscope data.
65 Args:
66 raw_df: Raw data DataFrame from microscope
68 Returns:
69 List of feature column names
71 Raises:
72 ValueError: If feature extraction fails
73 """
74 pass
76 @abstractmethod
77 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]:
78 """
79 Extract plate identifiers from raw microscope data.
81 Args:
82 raw_df: Raw data DataFrame from microscope
84 Returns:
85 List of plate identifiers
87 Raises:
88 ValueError: If plate extraction fails
89 """
90 pass
92 @abstractmethod
93 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]:
94 """
95 Create nested dictionary structure for plate data.
97 Args:
98 raw_df: Raw data DataFrame from microscope
100 Returns:
101 Dictionary structure: {plate_id: {well_id: {feature: value}}}
103 Raises:
104 ValueError: If data structure creation fails
105 """
106 pass
108 @abstractmethod
109 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]:
110 """
111 Fill plates dictionary with actual measurement values.
113 Args:
114 raw_df: Raw data DataFrame from microscope
115 plates_dict: Empty plates dictionary structure
117 Returns:
118 Filled plates dictionary with measurement values
120 Raises:
121 ValueError: If data filling fails
122 """
123 pass
125 def read_results(self, results_path: str) -> pd.DataFrame:
126 """
127 Read results file using format-specific logic.
129 Args:
130 results_path: Path to results file
132 Returns:
133 Raw data DataFrame
135 Raises:
136 FileNotFoundError: If results file doesn't exist
137 ValueError: If file format is not supported
138 """
139 results_file = Path(results_path)
141 if not results_file.exists():
142 raise FileNotFoundError(f"Results file not found: {results_path}")
144 if results_file.suffix not in self.SUPPORTED_EXTENSIONS:
145 raise ValueError(f"Unsupported file extension {results_file.suffix} for format {self.FORMAT_NAME}")
147 if results_path.endswith('.csv'):
148 return pd.read_csv(results_path)
149 else:
150 # Excel file
151 xls = pd.ExcelFile(results_path)
152 sheet_name = self.SHEET_NAME if self.SHEET_NAME else xls.sheet_names[0]
153 return pd.read_excel(xls, sheet_name)
155 def process_data(self, results_path: str) -> Dict[str, Any]:
156 """
157 Complete data processing pipeline for this format.
159 Args:
160 results_path: Path to results file
162 Returns:
163 Processed data structure ready for analysis
165 Raises:
166 ValueError: If data processing fails
167 """
168 # Read raw data
169 raw_df = self.read_results(results_path)
171 # Extract features and plates
172 features = self.extract_features(raw_df)
173 plate_names = self.extract_plate_names(raw_df)
175 # Create and fill data structures
176 plates_dict = self.create_plates_dict(raw_df)
177 filled_plates_dict = self.fill_plates_dict(raw_df, plates_dict)
179 return {
180 'raw_df': raw_df,
181 'features': features,
182 'plate_names': plate_names,
183 'plates_dict': filled_plates_dict,
184 'format_name': self.FORMAT_NAME
185 }
187 def validate_data_structure(self, data: Dict[str, Any]) -> bool:
188 """
189 Validate processed data structure.
191 Args:
192 data: Processed data dictionary
194 Returns:
195 True if data structure is valid
197 Raises:
198 ValueError: If validation fails
199 """
200 required_keys = ['raw_df', 'features', 'plate_names', 'plates_dict', 'format_name']
202 for key in required_keys:
203 if key not in data:
204 raise ValueError(f"Missing required key in data structure: {key}")
206 if not data['features']:
207 raise ValueError("No features extracted from data")
209 if not data['plate_names']:
210 raise ValueError("No plates detected in data")
212 return True
215class FormatDetectionError(Exception):
216 """Raised when microscope format cannot be detected."""
217 pass
220class DataProcessingError(Exception):
221 """Raised when data processing fails."""
222 pass
225# ============================================================================
226# Registry Export
227# ============================================================================
228# Auto-created registry from MicroscopeFormatRegistryBase
229MICROSCOPE_FORMAT_REGISTRIES = MicroscopeFormatRegistryBase.__registry__