Coverage for openhcs/processing/backends/experimental_analysis/format_registry.py: 38.8%
53 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
1"""
2Base classes for microscope format registry system.
4This module provides the abstract base class and common functionality for
5microscope format registries, following OpenHCS registry architecture patterns.
6"""
8from abc import ABC, abstractmethod
9from dataclasses import dataclass
10from typing import Dict, List, Any, Optional, Tuple
11import pandas as pd
12from pathlib import Path
15@dataclass(frozen=True)
16class MicroscopeFormatConfig:
17 """Configuration for microscope format processing."""
18 format_name: str
19 sheet_name: Optional[str]
20 supported_extensions: Tuple[str, ...]
21 feature_extraction_method: str
22 plate_detection_method: str
25class MicroscopeFormatRegistryBase(ABC):
26 """
27 Abstract base class for microscope format registries.
29 Following OpenHCS registry patterns, this provides a unified interface
30 for processing different microscope data formats while eliminating
31 code duplication and hardcoded format-specific logic.
32 """
34 # Abstract class attributes - each implementation must define these
35 FORMAT_NAME: str
36 SHEET_NAME: Optional[str] # None means use first sheet
37 SUPPORTED_EXTENSIONS: Tuple[str, ...]
39 def __init__(self):
40 """Initialize registry with format configuration."""
41 self.config = MicroscopeFormatConfig(
42 format_name=self.FORMAT_NAME,
43 sheet_name=self.SHEET_NAME,
44 supported_extensions=self.SUPPORTED_EXTENSIONS,
45 feature_extraction_method=f"extract_features_{self.FORMAT_NAME.lower()}",
46 plate_detection_method=f"extract_plates_{self.FORMAT_NAME.lower()}"
47 )
49 @property
50 def format_name(self) -> str:
51 """Get format name for this registry."""
52 return self.FORMAT_NAME
54 @abstractmethod
55 def extract_features(self, raw_df: pd.DataFrame) -> List[str]:
56 """
57 Extract feature column names from raw microscope data.
59 Args:
60 raw_df: Raw data DataFrame from microscope
62 Returns:
63 List of feature column names
65 Raises:
66 ValueError: If feature extraction fails
67 """
68 pass
70 @abstractmethod
71 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]:
72 """
73 Extract plate identifiers from raw microscope data.
75 Args:
76 raw_df: Raw data DataFrame from microscope
78 Returns:
79 List of plate identifiers
81 Raises:
82 ValueError: If plate extraction fails
83 """
84 pass
86 @abstractmethod
87 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]:
88 """
89 Create nested dictionary structure for plate data.
91 Args:
92 raw_df: Raw data DataFrame from microscope
94 Returns:
95 Dictionary structure: {plate_id: {well_id: {feature: value}}}
97 Raises:
98 ValueError: If data structure creation fails
99 """
100 pass
102 @abstractmethod
103 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]:
104 """
105 Fill plates dictionary with actual measurement values.
107 Args:
108 raw_df: Raw data DataFrame from microscope
109 plates_dict: Empty plates dictionary structure
111 Returns:
112 Filled plates dictionary with measurement values
114 Raises:
115 ValueError: If data filling fails
116 """
117 pass
119 def read_results(self, results_path: str) -> pd.DataFrame:
120 """
121 Read results file using format-specific logic.
123 Args:
124 results_path: Path to results file
126 Returns:
127 Raw data DataFrame
129 Raises:
130 FileNotFoundError: If results file doesn't exist
131 ValueError: If file format is not supported
132 """
133 results_file = Path(results_path)
135 if not results_file.exists():
136 raise FileNotFoundError(f"Results file not found: {results_path}")
138 if results_file.suffix not in self.SUPPORTED_EXTENSIONS:
139 raise ValueError(f"Unsupported file extension {results_file.suffix} for format {self.FORMAT_NAME}")
141 if results_path.endswith('.csv'):
142 return pd.read_csv(results_path)
143 else:
144 # Excel file
145 xls = pd.ExcelFile(results_path)
146 sheet_name = self.SHEET_NAME if self.SHEET_NAME else xls.sheet_names[0]
147 return pd.read_excel(xls, sheet_name)
149 def process_data(self, results_path: str) -> Dict[str, Any]:
150 """
151 Complete data processing pipeline for this format.
153 Args:
154 results_path: Path to results file
156 Returns:
157 Processed data structure ready for analysis
159 Raises:
160 ValueError: If data processing fails
161 """
162 # Read raw data
163 raw_df = self.read_results(results_path)
165 # Extract features and plates
166 features = self.extract_features(raw_df)
167 plate_names = self.extract_plate_names(raw_df)
169 # Create and fill data structures
170 plates_dict = self.create_plates_dict(raw_df)
171 filled_plates_dict = self.fill_plates_dict(raw_df, plates_dict)
173 return {
174 'raw_df': raw_df,
175 'features': features,
176 'plate_names': plate_names,
177 'plates_dict': filled_plates_dict,
178 'format_name': self.FORMAT_NAME
179 }
181 def validate_data_structure(self, data: Dict[str, Any]) -> bool:
182 """
183 Validate processed data structure.
185 Args:
186 data: Processed data dictionary
188 Returns:
189 True if data structure is valid
191 Raises:
192 ValueError: If validation fails
193 """
194 required_keys = ['raw_df', 'features', 'plate_names', 'plates_dict', 'format_name']
196 for key in required_keys:
197 if key not in data:
198 raise ValueError(f"Missing required key in data structure: {key}")
200 if not data['features']:
201 raise ValueError("No features extracted from data")
203 if not data['plate_names']:
204 raise ValueError("No plates detected in data")
206 return True
209class FormatDetectionError(Exception):
210 """Raised when microscope format cannot be detected."""
211 pass
214class DataProcessingError(Exception):
215 """Raised when data processing fails."""
216 pass