Coverage for openhcs/processing/backends/experimental_analysis/metaxpress_registry.py: 11.5%

81 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 18:33 +0000

1""" 

2MetaXpress format registry implementation. 

3 

4This module provides format-specific processing for MetaXpress microscope 

5data following OpenHCS registry architecture patterns. 

6""" 

7 

8import string 

9from typing import Dict, List, Any 

10import pandas as pd 

11 

12from .format_registry import MicroscopeFormatRegistryBase 

13 

14 

15class MetaXpressFormatRegistry(MicroscopeFormatRegistryBase): 

16 """ 

17 Registry for MetaXpress microscope format. 

18  

19 Handles MetaXpress-specific data structure parsing, feature extraction, 

20 and plate organization following OpenHCS registry patterns. 

21 """ 

22 

23 FORMAT_NAME = "EDDU_metaxpress" 

24 SHEET_NAME = None # Use first sheet 

25 SUPPORTED_EXTENSIONS = (".xlsx", ".xls", ".csv") 

26 

27 def extract_features(self, raw_df: pd.DataFrame) -> List[str]: 

28 """ 

29 Extract feature column names from MetaXpress raw data. 

30  

31 MetaXpress format stores features in rows where the first column is null. 

32  

33 Args: 

34 raw_df: Raw MetaXpress data DataFrame 

35  

36 Returns: 

37 List of feature column names 

38  

39 Raises: 

40 ValueError: If feature extraction fails 

41 """ 

42 try: 

43 # Find rows where first column is null - these contain feature names 

44 feature_rows = raw_df[pd.isnull(raw_df.iloc[:, 0])] 

45 

46 if feature_rows.empty: 

47 raise ValueError("No feature rows found in MetaXpress data") 

48 

49 # Get feature names from the first feature row, starting from column 2 

50 feature_names = feature_rows.iloc[0].tolist()[2:] 

51 

52 # Remove any NaN values 

53 feature_names = [name for name in feature_names if pd.notna(name)] 

54 

55 if not feature_names: 

56 raise ValueError("No features found in MetaXpress data") 

57 

58 return feature_names 

59 

60 except Exception as e: 

61 raise ValueError(f"Failed to extract features from MetaXpress data: {e}") 

62 

63 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]: 

64 """ 

65 Extract plate identifiers from MetaXpress raw data. 

66  

67 MetaXpress format stores plate names in rows where first column is 'Plate Name'. 

68  

69 Args: 

70 raw_df: Raw MetaXpress data DataFrame 

71  

72 Returns: 

73 List of unique plate identifiers 

74  

75 Raises: 

76 ValueError: If plate extraction fails 

77 """ 

78 try: 

79 # Find rows where first column is 'Plate Name' 

80 plate_name_rows = raw_df[raw_df.iloc[:, 0] == 'Plate Name'] 

81 

82 if plate_name_rows.empty: 

83 raise ValueError("No 'Plate Name' rows found in MetaXpress data") 

84 

85 # Extract plate names from second column 

86 plate_names = plate_name_rows.iloc[:, 1].unique().tolist() 

87 

88 # Remove any NaN values 

89 plate_names = [name for name in plate_names if pd.notna(name)] 

90 

91 if not plate_names: 

92 raise ValueError("No plate names found in MetaXpress data") 

93 

94 return plate_names 

95 

96 except Exception as e: 

97 raise ValueError(f"Failed to extract plate names from MetaXpress data: {e}") 

98 

99 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]: 

100 """ 

101 Create nested dictionary structure for MetaXpress plate data. 

102  

103 Args: 

104 raw_df: Raw MetaXpress data DataFrame 

105  

106 Returns: 

107 Dictionary structure: {plate_id: {well_id: {feature: None}}} 

108  

109 Raises: 

110 ValueError: If data structure creation fails 

111 """ 

112 try: 

113 features = self.extract_features(raw_df) 

114 plate_names = self.extract_plate_names(raw_df) 

115 

116 # Generate standard 96-well plate layout 

117 wells = self._generate_well_ids() 

118 

119 # Create nested structure 

120 plates_dict = {} 

121 for plate_id in plate_names: 

122 plates_dict[plate_id] = {} 

123 for well_id in wells: 

124 plates_dict[plate_id][well_id] = {feature: None for feature in features} 

125 

126 return plates_dict 

127 

128 except Exception as e: 

129 raise ValueError(f"Failed to create MetaXpress plates dictionary: {e}") 

130 

131 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]: 

132 """ 

133 Fill plates dictionary with actual measurement values from MetaXpress data. 

134  

135 MetaXpress format has a complex structure where data collection starts 

136 after plate name declaration and ends at 'Barcode' rows. 

137  

138 Args: 

139 raw_df: Raw MetaXpress data DataFrame 

140 plates_dict: Empty plates dictionary structure 

141  

142 Returns: 

143 Filled plates dictionary with measurement values 

144  

145 Raises: 

146 ValueError: If data filling fails 

147 """ 

148 try: 

149 features = self.extract_features(raw_df) 

150 

151 # Create column mapping for easier access 

152 column_names = ["Well", "Laser_Focus"] + features 

153 df_with_names = raw_df.set_axis(column_names, axis=1, copy=False) 

154 

155 current_plate = None 

156 collecting_data = False 

157 

158 for index, row in df_with_names.iterrows(): 

159 first_col = row.iloc[0] 

160 

161 # Stop collecting when we hit 'Barcode' 

162 if first_col == "Barcode": 

163 collecting_data = False 

164 continue 

165 

166 # Start collecting data when we hit a null first column (after plate name) 

167 if pd.isnull(first_col) and current_plate is not None: 

168 collecting_data = True 

169 continue 

170 

171 # Set current plate when we hit 'Plate Name' 

172 if first_col == "Plate Name": 

173 current_plate = row.iloc[1] 

174 collecting_data = False 

175 continue 

176 

177 # Collect data if we're in collection mode 

178 if collecting_data and current_plate and not pd.isnull(first_col): 

179 well_id = first_col 

180 

181 if (current_plate in plates_dict and 

182 well_id in plates_dict[current_plate]): 

183 

184 # Fill feature values 

185 for feature in features: 

186 if feature in row.index: 

187 plates_dict[current_plate][well_id][feature] = row[feature] 

188 

189 return plates_dict 

190 

191 except Exception as e: 

192 raise ValueError(f"Failed to fill MetaXpress plates dictionary: {e}") 

193 

194 def _generate_well_ids(self) -> List[str]: 

195 """ 

196 Generate standard 96-well plate well IDs. 

197  

198 Returns: 

199 List of well IDs (A01, A02, ..., H12) 

200 """ 

201 rows = [string.ascii_uppercase[i] for i in range(8)] # A-H 

202 cols = [i + 1 for i in range(12)] # 1-12 

203 

204 wells = [] 

205 for row in rows: 

206 for col in cols: 

207 wells.append(f"{row}{col:02d}") 

208 

209 return wells