Coverage for openhcs/processing/backends/experimental_analysis/metaxpress_registry.py: 11.5%
81 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
1"""
2MetaXpress format registry implementation.
4This module provides format-specific processing for MetaXpress microscope
5data following OpenHCS registry architecture patterns.
6"""
8import string
9from typing import Dict, List, Any
10import pandas as pd
12from .format_registry import MicroscopeFormatRegistryBase
15class MetaXpressFormatRegistry(MicroscopeFormatRegistryBase):
16 """
17 Registry for MetaXpress microscope format.
19 Handles MetaXpress-specific data structure parsing, feature extraction,
20 and plate organization following OpenHCS registry patterns.
21 """
23 FORMAT_NAME = "EDDU_metaxpress"
24 SHEET_NAME = None # Use first sheet
25 SUPPORTED_EXTENSIONS = (".xlsx", ".xls", ".csv")
27 def extract_features(self, raw_df: pd.DataFrame) -> List[str]:
28 """
29 Extract feature column names from MetaXpress raw data.
31 MetaXpress format stores features in rows where the first column is null.
33 Args:
34 raw_df: Raw MetaXpress data DataFrame
36 Returns:
37 List of feature column names
39 Raises:
40 ValueError: If feature extraction fails
41 """
42 try:
43 # Find rows where first column is null - these contain feature names
44 feature_rows = raw_df[pd.isnull(raw_df.iloc[:, 0])]
46 if feature_rows.empty:
47 raise ValueError("No feature rows found in MetaXpress data")
49 # Get feature names from the first feature row, starting from column 2
50 feature_names = feature_rows.iloc[0].tolist()[2:]
52 # Remove any NaN values
53 feature_names = [name for name in feature_names if pd.notna(name)]
55 if not feature_names:
56 raise ValueError("No features found in MetaXpress data")
58 return feature_names
60 except Exception as e:
61 raise ValueError(f"Failed to extract features from MetaXpress data: {e}")
63 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]:
64 """
65 Extract plate identifiers from MetaXpress raw data.
67 MetaXpress format stores plate names in rows where first column is 'Plate Name'.
69 Args:
70 raw_df: Raw MetaXpress data DataFrame
72 Returns:
73 List of unique plate identifiers
75 Raises:
76 ValueError: If plate extraction fails
77 """
78 try:
79 # Find rows where first column is 'Plate Name'
80 plate_name_rows = raw_df[raw_df.iloc[:, 0] == 'Plate Name']
82 if plate_name_rows.empty:
83 raise ValueError("No 'Plate Name' rows found in MetaXpress data")
85 # Extract plate names from second column
86 plate_names = plate_name_rows.iloc[:, 1].unique().tolist()
88 # Remove any NaN values
89 plate_names = [name for name in plate_names if pd.notna(name)]
91 if not plate_names:
92 raise ValueError("No plate names found in MetaXpress data")
94 return plate_names
96 except Exception as e:
97 raise ValueError(f"Failed to extract plate names from MetaXpress data: {e}")
99 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]:
100 """
101 Create nested dictionary structure for MetaXpress plate data.
103 Args:
104 raw_df: Raw MetaXpress data DataFrame
106 Returns:
107 Dictionary structure: {plate_id: {well_id: {feature: None}}}
109 Raises:
110 ValueError: If data structure creation fails
111 """
112 try:
113 features = self.extract_features(raw_df)
114 plate_names = self.extract_plate_names(raw_df)
116 # Generate standard 96-well plate layout
117 wells = self._generate_well_ids()
119 # Create nested structure
120 plates_dict = {}
121 for plate_id in plate_names:
122 plates_dict[plate_id] = {}
123 for well_id in wells:
124 plates_dict[plate_id][well_id] = {feature: None for feature in features}
126 return plates_dict
128 except Exception as e:
129 raise ValueError(f"Failed to create MetaXpress plates dictionary: {e}")
131 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]:
132 """
133 Fill plates dictionary with actual measurement values from MetaXpress data.
135 MetaXpress format has a complex structure where data collection starts
136 after plate name declaration and ends at 'Barcode' rows.
138 Args:
139 raw_df: Raw MetaXpress data DataFrame
140 plates_dict: Empty plates dictionary structure
142 Returns:
143 Filled plates dictionary with measurement values
145 Raises:
146 ValueError: If data filling fails
147 """
148 try:
149 features = self.extract_features(raw_df)
151 # Create column mapping for easier access
152 column_names = ["Well", "Laser_Focus"] + features
153 df_with_names = raw_df.set_axis(column_names, axis=1, copy=False)
155 current_plate = None
156 collecting_data = False
158 for index, row in df_with_names.iterrows():
159 first_col = row.iloc[0]
161 # Stop collecting when we hit 'Barcode'
162 if first_col == "Barcode":
163 collecting_data = False
164 continue
166 # Start collecting data when we hit a null first column (after plate name)
167 if pd.isnull(first_col) and current_plate is not None:
168 collecting_data = True
169 continue
171 # Set current plate when we hit 'Plate Name'
172 if first_col == "Plate Name":
173 current_plate = row.iloc[1]
174 collecting_data = False
175 continue
177 # Collect data if we're in collection mode
178 if collecting_data and current_plate and not pd.isnull(first_col):
179 well_id = first_col
181 if (current_plate in plates_dict and
182 well_id in plates_dict[current_plate]):
184 # Fill feature values
185 for feature in features:
186 if feature in row.index:
187 plates_dict[current_plate][well_id][feature] = row[feature]
189 return plates_dict
191 except Exception as e:
192 raise ValueError(f"Failed to fill MetaXpress plates dictionary: {e}")
194 def _generate_well_ids(self) -> List[str]:
195 """
196 Generate standard 96-well plate well IDs.
198 Returns:
199 List of well IDs (A01, A02, ..., H12)
200 """
201 rows = [string.ascii_uppercase[i] for i in range(8)] # A-H
202 cols = [i + 1 for i in range(12)] # 1-12
204 wells = []
205 for row in rows:
206 for col in cols:
207 wells.append(f"{row}{col:02d}")
209 return wells