Coverage for openhcs/processing/backends/experimental_analysis/cx5_registry.py: 13.6%
77 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
1"""
2ThermoFisher CX5 format registry implementation.
4This module provides format-specific processing for ThermoFisher CX5 microscope
5data following OpenHCS registry architecture patterns.
6"""
8import string
9from typing import Dict, List, Any
10import pandas as pd
12from .format_registry import MicroscopeFormatRegistryBase
15class CX5FormatRegistry(MicroscopeFormatRegistryBase):
16 """
17 Registry for ThermoFisher CX5 microscope format.
19 Handles CX5-specific data structure parsing, feature extraction,
20 and plate organization following OpenHCS registry patterns.
21 """
23 FORMAT_NAME = "EDDU_CX5"
24 SHEET_NAME = "Rawdata"
25 SUPPORTED_EXTENSIONS = (".xlsx", ".xls")
27 def extract_features(self, raw_df: pd.DataFrame) -> List[str]:
28 """
29 Extract feature column names from CX5 raw data.
31 CX5 format stores features after the 'Replicate' column.
33 Args:
34 raw_df: Raw CX5 data DataFrame
36 Returns:
37 List of feature column names
39 Raises:
40 ValueError: If feature extraction fails
41 """
42 try:
43 # Find the 'Replicate' column and extract features after it
44 replicate_col_idx = raw_df.columns.str.find("Replicate").argmax()
45 feature_columns = raw_df.iloc[:, replicate_col_idx + 1:-1].columns.tolist()
47 if not feature_columns:
48 raise ValueError("No features found in CX5 data")
50 return feature_columns
52 except Exception as e:
53 raise ValueError(f"Failed to extract features from CX5 data: {e}")
55 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]:
56 """
57 Extract plate identifiers from CX5 raw data.
59 CX5 format stores plate names in the second column.
61 Args:
62 raw_df: Raw CX5 data DataFrame
64 Returns:
65 List of unique plate identifiers
67 Raises:
68 ValueError: If plate extraction fails
69 """
70 try:
71 if len(raw_df.columns) < 2:
72 raise ValueError("CX5 data must have at least 2 columns")
74 # Plate names are in the second column (index 1)
75 plate_names = raw_df.iloc[:, 1].unique().tolist()
77 # Remove any NaN values
78 plate_names = [name for name in plate_names if pd.notna(name)]
80 if not plate_names:
81 raise ValueError("No plate names found in CX5 data")
83 return plate_names
85 except Exception as e:
86 raise ValueError(f"Failed to extract plate names from CX5 data: {e}")
88 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]:
89 """
90 Create nested dictionary structure for CX5 plate data.
92 Args:
93 raw_df: Raw CX5 data DataFrame
95 Returns:
96 Dictionary structure: {plate_id: {well_id: {feature: None}}}
98 Raises:
99 ValueError: If data structure creation fails
100 """
101 try:
102 features = self.extract_features(raw_df)
103 plate_names = self.extract_plate_names(raw_df)
105 # Generate standard 96-well plate layout
106 wells = self._generate_well_ids()
108 # Create nested structure
109 plates_dict = {}
110 for plate_id in plate_names:
111 plates_dict[plate_id] = {}
112 for well_id in wells:
113 plates_dict[plate_id][well_id] = {feature: None for feature in features}
115 return plates_dict
117 except Exception as e:
118 raise ValueError(f"Failed to create CX5 plates dictionary: {e}")
120 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]:
121 """
122 Fill plates dictionary with actual measurement values from CX5 data.
124 CX5 format stores row/column indices in columns 2 and 3.
126 Args:
127 raw_df: Raw CX5 data DataFrame
128 plates_dict: Empty plates dictionary structure
130 Returns:
131 Filled plates dictionary with measurement values
133 Raises:
134 ValueError: If data filling fails
135 """
136 try:
137 features = self.extract_features(raw_df)
139 for index, row in raw_df.iterrows():
140 # Extract plate, row, and column information
141 plate_id = row.iloc[1] # Plate name in second column
142 row_idx = row.iloc[2] # Row index in third column
143 col_idx = row.iloc[3] # Column index in fourth column
145 # Convert row/column indices to well ID
146 well_id = self._row_col_to_well(row_idx, col_idx)
148 # Fill feature values
149 if plate_id in plates_dict and well_id in plates_dict[plate_id]:
150 for feature in features:
151 if feature in row.index:
152 plates_dict[plate_id][well_id][feature] = row[feature]
154 return plates_dict
156 except Exception as e:
157 raise ValueError(f"Failed to fill CX5 plates dictionary: {e}")
159 def _generate_well_ids(self) -> List[str]:
160 """
161 Generate standard 96-well plate well IDs.
163 Returns:
164 List of well IDs (A01, A02, ..., H12)
165 """
166 rows = [string.ascii_uppercase[i] for i in range(8)] # A-H
167 cols = [i + 1 for i in range(12)] # 1-12
169 wells = []
170 for row in rows:
171 for col in cols:
172 wells.append(f"{row}{col:02d}")
174 return wells
176 def _row_col_to_well(self, row_idx: int, col_idx: int) -> str:
177 """
178 Convert row/column indices to well ID.
180 Args:
181 row_idx: Row index (1-based)
182 col_idx: Column index (1-based)
184 Returns:
185 Well ID (e.g., "A01")
187 Raises:
188 ValueError: If indices are out of range
189 """
190 try:
191 # Convert to 0-based indices
192 row_zero_based = int(row_idx) - 1
193 col_zero_based = int(col_idx) - 1
195 # Validate ranges
196 if row_zero_based < 0 or row_zero_based >= 8:
197 raise ValueError(f"Row index {row_idx} out of range (1-8)")
199 if col_zero_based < 0 or col_zero_based >= 12:
200 raise ValueError(f"Column index {col_idx} out of range (1-12)")
202 # Convert to well ID
203 row_letter = string.ascii_uppercase[row_zero_based]
204 well_id = f"{row_letter}{col_idx:02d}"
206 return well_id
208 except (ValueError, TypeError) as e:
209 raise ValueError(f"Invalid row/column indices: {row_idx}, {col_idx}: {e}")