Coverage for openhcs/processing/backends/experimental_analysis/cx5

1"""

2ThermoFisher CX5 format registry implementation.

4This module provides format-specific processing for ThermoFisher CX5 microscope

5data following OpenHCS registry architecture patterns.

6"""

8import string

9from typing import Dict, List, Any

10import pandas as pd

12from .format_registry import MicroscopeFormatRegistryBase

15class CX5FormatRegistry(MicroscopeFormatRegistryBase):

16 """

17 Registry for ThermoFisher CX5 microscope format.

19 Handles CX5-specific data structure parsing, feature extraction,

20 and plate organization following OpenHCS registry patterns.

21 """

23 FORMAT_NAME = "EDDU_CX5"

24 SHEET_NAME = "Rawdata"

25 SUPPORTED_EXTENSIONS = (".xlsx", ".xls")

27 def extract_features(self, raw_df: pd.DataFrame) -> List[str]:

28 """

29 Extract feature column names from CX5 raw data.

31 CX5 format stores features after the 'Replicate' column.

33 Args:

34 raw_df: Raw CX5 data DataFrame

36 Returns:

37 List of feature column names

39 Raises:

40 ValueError: If feature extraction fails

41 """

42 try:

43 # Find the 'Replicate' column and extract features after it

44 replicate_col_idx = raw_df.columns.str.find("Replicate").argmax()

45 feature_columns = raw_df.iloc[:, replicate_col_idx + 1:-1].columns.tolist()

47 if not feature_columns:

48 raise ValueError("No features found in CX5 data")

50 return feature_columns

52 except Exception as e:

53 raise ValueError(f"Failed to extract features from CX5 data: {e}")

55 def extract_plate_names(self, raw_df: pd.DataFrame) -> List[str]:

56 """

57 Extract plate identifiers from CX5 raw data.

59 CX5 format stores plate names in the second column.

61 Args:

62 raw_df: Raw CX5 data DataFrame

64 Returns:

65 List of unique plate identifiers

67 Raises:

68 ValueError: If plate extraction fails

69 """

70 try:

71 if len(raw_df.columns) < 2:

72 raise ValueError("CX5 data must have at least 2 columns")

74 # Plate names are in the second column (index 1)

75 plate_names = raw_df.iloc[:, 1].unique().tolist()

77 # Remove any NaN values

78 plate_names = [name for name in plate_names if pd.notna(name)]

80 if not plate_names:

81 raise ValueError("No plate names found in CX5 data")

83 return plate_names

85 except Exception as e:

86 raise ValueError(f"Failed to extract plate names from CX5 data: {e}")

88 def create_plates_dict(self, raw_df: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, Any]]]:

89 """

90 Create nested dictionary structure for CX5 plate data.

92 Args:

93 raw_df: Raw CX5 data DataFrame

95 Returns:

96 Dictionary structure: {plate_id: {well_id: {feature: None}}}

98 Raises:

99 ValueError: If data structure creation fails

100 """

101 try:

102 features = self.extract_features(raw_df)

103 plate_names = self.extract_plate_names(raw_df)

104

105 # Generate standard 96-well plate layout

106 wells = self._generate_well_ids()

107

108 # Create nested structure

109 plates_dict = {}

110 for plate_id in plate_names:

111 plates_dict[plate_id] = {}

112 for well_id in wells:

113 plates_dict[plate_id][well_id] = {feature: None for feature in features}

114

115 return plates_dict

116

117 except Exception as e:

118 raise ValueError(f"Failed to create CX5 plates dictionary: {e}")

119

120 def fill_plates_dict(self, raw_df: pd.DataFrame, plates_dict: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Dict[str, Any]]]:

121 """

122 Fill plates dictionary with actual measurement values from CX5 data.

123

124 CX5 format stores row/column indices in columns 2 and 3.

125

126 Args:

127 raw_df: Raw CX5 data DataFrame

128 plates_dict: Empty plates dictionary structure

129

130 Returns:

131 Filled plates dictionary with measurement values

132

133 Raises:

134 ValueError: If data filling fails

135 """

136 try:

137 features = self.extract_features(raw_df)

138

139 for index, row in raw_df.iterrows():

140 # Extract plate, row, and column information

141 plate_id = row.iloc[1] # Plate name in second column

142 row_idx = row.iloc[2] # Row index in third column

143 col_idx = row.iloc[3] # Column index in fourth column

144

145 # Convert row/column indices to well ID

146 well_id = self._row_col_to_well(row_idx, col_idx)

147

148 # Fill feature values

149 if plate_id in plates_dict and well_id in plates_dict[plate_id]:

150 for feature in features:

151 if feature in row.index:

152 plates_dict[plate_id][well_id][feature] = row[feature]

153

154 return plates_dict

155

156 except Exception as e:

157 raise ValueError(f"Failed to fill CX5 plates dictionary: {e}")

158

159 def _generate_well_ids(self) -> List[str]:

160 """

161 Generate standard 96-well plate well IDs.

162

163 Returns:

164 List of well IDs (A01, A02, ..., H12)

165 """

166 rows = [string.ascii_uppercase[i] for i in range(8)] # A-H

167 cols = [i + 1 for i in range(12)] # 1-12

168

169 wells = []

170 for row in rows:

171 for col in cols:

172 wells.append(f"{row}{col:02d}")

173

174 return wells

175

176 def _row_col_to_well(self, row_idx: int, col_idx: int) -> str:

177 """

178 Convert row/column indices to well ID.

179

180 Args:

181 row_idx: Row index (1-based)

182 col_idx: Column index (1-based)

183

184 Returns:

185 Well ID (e.g., "A01")

186

187 Raises:

188 ValueError: If indices are out of range

189 """

190 try:

191 # Convert to 0-based indices

192 row_zero_based = int(row_idx) - 1

193 col_zero_based = int(col_idx) - 1

194

195 # Validate ranges

196 if row_zero_based < 0 or row_zero_based >= 8:

197 raise ValueError(f"Row index {row_idx} out of range (1-8)")

198

199 if col_zero_based < 0 or col_zero_based >= 12:

200 raise ValueError(f"Column index {col_idx} out of range (1-12)")

201

202 # Convert to well ID

203 row_letter = string.ascii_uppercase[row_zero_based]

204 well_id = f"{row_letter}{col_idx:02d}"

205

206 return well_id

207

208 except (ValueError, TypeError) as e:

209 raise ValueError(f"Invalid row/column indices: {row_idx}, {col_idx}: {e}")

Coverage for openhcs/processing/backends/experimental_analysis/cx5_registry.py: 13.6%

77 statements