Coverage for openhcs/processing/backends/experimental_analysis/cx5_registry.py: 13.6%

77 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 18:33 +0000

1""" 

2ThermoFisher CX5 format registry implementation. 

3 

4This module provides format-specific processing for ThermoFisher CX5 microscope 

5data following OpenHCS registry architecture patterns. 

6""" 

7 

8import string 

9from typing import Dict, List, Any 

10import pandas as pd 

11 

12from .format_registry import MicroscopeFormatRegistryBase 

13 

14 

15class CX5FormatRegistry(MicroscopeFormatRegistryBase): 

16 """ 

17 Registry for ThermoFisher CX5 microscope format. 

18  

19 Handles CX5-specific data structure parsing, feature extraction, 

20 and plate organization following OpenHCS registry patterns. 

21 """ 

22 

23 FORMAT_NAME = "EDDU_CX5" 

24 SHEET_NAME = "Rawdata" 

25 SUPPORTED_EXTENSIONS = (".xlsx", ".xls") 

26 

27 def extract_features(self, raw_df: pd.DataFrame) -> List[str]: 

28 """ 

29 Extract feature column names from CX5 raw data. 

30  

31 CX5 format stores features after the 'Replicate' column. 

32  

33 Args: 

34 raw_df: Raw CX5 data DataFrame 

35  

36 Returns: 

37 List of feature column names 

38  

39 Raises: 

40 ValueError: If feature extraction fails 

41 """ 

42 try: 

43 # Find the 'Replicate' column and extract features after it 

44 replicate_col_idx = raw_df.columns.str.find("Replicate").argmax() 

45 feature_columns = raw_df.iloc[:, replicate_col_idx + 1:-1].columns.tolist() 

46 

47 if not feature_columns: 

48 raise ValueError("No features found in CX5 data") 

49 

50 return feature_columns 

51 

52 except Exception as e: 

53 raise ValueError(f"Failed to extract features from CX5 data: {e}") 

54 

55 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]: 

56 """ 

57 Extract plate identifiers from CX5 raw data. 

58  

59 CX5 format stores plate names in the second column. 

60  

61 Args: 

62 raw_df: Raw CX5 data DataFrame 

63  

64 Returns: 

65 List of unique plate identifiers 

66  

67 Raises: 

68 ValueError: If plate extraction fails 

69 """ 

70 try: 

71 if len(raw_df.columns) < 2: 

72 raise ValueError("CX5 data must have at least 2 columns") 

73 

74 # Plate names are in the second column (index 1) 

75 plate_names = raw_df.iloc[:, 1].unique().tolist() 

76 

77 # Remove any NaN values 

78 plate_names = [name for name in plate_names if pd.notna(name)] 

79 

80 if not plate_names: 

81 raise ValueError("No plate names found in CX5 data") 

82 

83 return plate_names 

84 

85 except Exception as e: 

86 raise ValueError(f"Failed to extract plate names from CX5 data: {e}") 

87 

88 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]: 

89 """ 

90 Create nested dictionary structure for CX5 plate data. 

91  

92 Args: 

93 raw_df: Raw CX5 data DataFrame 

94  

95 Returns: 

96 Dictionary structure: {plate_id: {well_id: {feature: None}}} 

97  

98 Raises: 

99 ValueError: If data structure creation fails 

100 """ 

101 try: 

102 features = self.extract_features(raw_df) 

103 plate_names = self.extract_plate_names(raw_df) 

104 

105 # Generate standard 96-well plate layout 

106 wells = self._generate_well_ids() 

107 

108 # Create nested structure 

109 plates_dict = {} 

110 for plate_id in plate_names: 

111 plates_dict[plate_id] = {} 

112 for well_id in wells: 

113 plates_dict[plate_id][well_id] = {feature: None for feature in features} 

114 

115 return plates_dict 

116 

117 except Exception as e: 

118 raise ValueError(f"Failed to create CX5 plates dictionary: {e}") 

119 

120 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]: 

121 """ 

122 Fill plates dictionary with actual measurement values from CX5 data. 

123  

124 CX5 format stores row/column indices in columns 2 and 3. 

125  

126 Args: 

127 raw_df: Raw CX5 data DataFrame 

128 plates_dict: Empty plates dictionary structure 

129  

130 Returns: 

131 Filled plates dictionary with measurement values 

132  

133 Raises: 

134 ValueError: If data filling fails 

135 """ 

136 try: 

137 features = self.extract_features(raw_df) 

138 

139 for index, row in raw_df.iterrows(): 

140 # Extract plate, row, and column information 

141 plate_id = row.iloc[1] # Plate name in second column 

142 row_idx = row.iloc[2] # Row index in third column 

143 col_idx = row.iloc[3] # Column index in fourth column 

144 

145 # Convert row/column indices to well ID 

146 well_id = self._row_col_to_well(row_idx, col_idx) 

147 

148 # Fill feature values 

149 if plate_id in plates_dict and well_id in plates_dict[plate_id]: 

150 for feature in features: 

151 if feature in row.index: 

152 plates_dict[plate_id][well_id][feature] = row[feature] 

153 

154 return plates_dict 

155 

156 except Exception as e: 

157 raise ValueError(f"Failed to fill CX5 plates dictionary: {e}") 

158 

159 def _generate_well_ids(self) -> List[str]: 

160 """ 

161 Generate standard 96-well plate well IDs. 

162  

163 Returns: 

164 List of well IDs (A01, A02, ..., H12) 

165 """ 

166 rows = [string.ascii_uppercase[i] for i in range(8)] # A-H 

167 cols = [i + 1 for i in range(12)] # 1-12 

168 

169 wells = [] 

170 for row in rows: 

171 for col in cols: 

172 wells.append(f"{row}{col:02d}") 

173 

174 return wells 

175 

176 def _row_col_to_well(self, row_idx: int, col_idx: int) -> str: 

177 """ 

178 Convert row/column indices to well ID. 

179  

180 Args: 

181 row_idx: Row index (1-based) 

182 col_idx: Column index (1-based) 

183  

184 Returns: 

185 Well ID (e.g., "A01") 

186  

187 Raises: 

188 ValueError: If indices are out of range 

189 """ 

190 try: 

191 # Convert to 0-based indices 

192 row_zero_based = int(row_idx) - 1 

193 col_zero_based = int(col_idx) - 1 

194 

195 # Validate ranges 

196 if row_zero_based < 0 or row_zero_based >= 8: 

197 raise ValueError(f"Row index {row_idx} out of range (1-8)") 

198 

199 if col_zero_based < 0 or col_zero_based >= 12: 

200 raise ValueError(f"Column index {col_idx} out of range (1-12)") 

201 

202 # Convert to well ID 

203 row_letter = string.ascii_uppercase[row_zero_based] 

204 well_id = f"{row_letter}{col_idx:02d}" 

205 

206 return well_id 

207 

208 except (ValueError, TypeError) as e: 

209 raise ValueError(f"Invalid row/column indices: {row_idx}, {col_idx}: {e}")