Coverage for openhcs/microscopes/opera_phenix.py: 57.3%

326 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 18:33 +0000

1""" 

2Opera Phenix microscope implementations for openhcs. 

3 

4This module provides concrete implementations of FilenameParser and MetadataHandler 

5for Opera Phenix microscopes. 

6""" 

7 

8import logging 

9import os 

10import re 

11from pathlib import Path 

12from typing import Any, Dict, List, Optional, Union, Type, Tuple 

13 

14from openhcs.constants.constants import Backend 

15from openhcs.microscopes.opera_phenix_xml_parser import OperaPhenixXmlParser 

16from openhcs.io.filemanager import FileManager 

17from openhcs.microscopes.microscope_base import MicroscopeHandler 

18from openhcs.microscopes.microscope_interfaces import (FilenameParser, 

19 MetadataHandler) 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24 

25class OperaPhenixHandler(MicroscopeHandler): 

26 """ 

27 MicroscopeHandler implementation for Opera Phenix systems. 

28 

29 This handler combines the OperaPhenix filename parser with its 

30 corresponding metadata handler. It guarantees aligned behavior 

31 for plate structure parsing, metadata extraction, and any optional 

32 post-processing steps required after workspace setup. 

33 """ 

34 

35 # Explicit microscope type for proper registration 

36 _microscope_type = 'opera_phenix' 

37 

38 # Class attribute for automatic metadata handler registration (set after class definition) 

39 _metadata_handler_class = None 

40 

41 def __init__(self, filemanager: FileManager, pattern_format: Optional[str] = None): 

42 self.parser = OperaPhenixFilenameParser(filemanager, pattern_format=pattern_format) 

43 self.metadata_handler = OperaPhenixMetadataHandler(filemanager) 

44 super().__init__(parser=self.parser, metadata_handler=self.metadata_handler) 

45 

46 @property 

47 def common_dirs(self) -> List[str]: 

48 """Subdirectory names commonly used by Opera Phenix.""" 

49 return ['Images'] 

50 

51 @property 

52 def microscope_type(self) -> str: 

53 """Microscope type identifier (for interface enforcement only).""" 

54 return 'opera_phenix' 

55 

56 @property 

57 def metadata_handler_class(self) -> Type[MetadataHandler]: 

58 """Metadata handler class (for interface enforcement only).""" 

59 return OperaPhenixMetadataHandler 

60 

61 @property 

62 def compatible_backends(self) -> List[Backend]: 

63 """ 

64 Opera Phenix is compatible with DISK backend only. 

65 

66 Legacy microscope format with standard file operations. 

67 """ 

68 return [Backend.DISK] 

69 

70 

71 

72 # Uses default workspace initialization from base class 

73 

74 def _prepare_workspace(self, workspace_path: Path, filemanager: FileManager): 

75 """ 

76 Renames Opera Phenix images to follow a consistent field order 

77 based on spatial layout extracted from Index.xml. Uses remapped 

78 filenames and replaces the directory in-place. 

79 

80 This method performs preparation but does not determine the final image directory. 

81 

82 Args: 

83 workspace_path: Path to the symlinked workspace 

84 filemanager: FileManager instance for file operations 

85 

86 Returns: 

87 Path to the normalized image directory. 

88 """ 

89 

90 # Check if workspace has already been processed by looking for temp directory 

91 # If temp directory exists, workspace was already processed - skip processing 

92 temp_dir_name = "__opera_phenix_temp" 

93 for entry in filemanager.list_dir(workspace_path, Backend.DISK.value): 

94 entry_path = Path(workspace_path) / entry 

95 if entry_path.is_dir() and entry_path.name == temp_dir_name: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true

96 logger.info(f"📁 WORKSPACE ALREADY PROCESSED: Found {temp_dir_name} - skipping Opera Phenix preparation") 

97 return workspace_path 

98 

99 logger.info(f"🔄 PROCESSING WORKSPACE: Applying Opera Phenix name remapping to {workspace_path}") 

100 # Find the image directory using the common_dirs property 

101 # Clause 245: Workspace operations are disk-only by design 

102 # This call is structurally hardcoded to use the "disk" backend 

103 

104 # Get all entries in the directory 

105 entries = filemanager.list_dir(workspace_path, Backend.DISK.value) 

106 

107 # Look for a directory matching any of the common_dirs patterns 

108 image_dir = workspace_path 

109 for entry in entries: 109 ↛ 118line 109 didn't jump to line 118 because the loop on line 109 didn't complete

110 entry_lower = entry.lower() 

111 if any(common_dir.lower() in entry_lower for common_dir in self.common_dirs): 111 ↛ 109line 111 didn't jump to line 109 because the condition on line 111 was always true

112 # Found a matching directory 

113 image_dir = Path(workspace_path) / entry if isinstance(workspace_path, (str, Path)) else workspace_path / entry 

114 logger.info("Found directory matching common_dirs pattern: %s", image_dir) 

115 break 

116 

117 # Default to empty field mapping (no remapping) 

118 field_mapping = {} 

119 

120 # Try to load field mapping from Index.xml if available 

121 try: 

122 # Clause 245: Workspace operations are disk-only by design 

123 # This call is structurally hardcoded to use the "disk" backend 

124 index_xml = filemanager.find_file_recursive(workspace_path, "Index.xml", Backend.DISK.value) 

125 if index_xml: 125 ↛ 130line 125 didn't jump to line 130 because the condition on line 125 was always true

126 xml_parser = OperaPhenixXmlParser(index_xml) 

127 field_mapping = xml_parser.get_field_id_mapping() 

128 logger.debug("Loaded field mapping from Index.xml: %s", field_mapping) 

129 else: 

130 logger.debug("Index.xml not found. Using default field mapping.") 

131 except Exception as e: 

132 logger.error("Error loading Index.xml: %s", e) 

133 logger.debug("Using default field mapping due to error.") 

134 

135 # Get all image files in the directory BEFORE creating temp directory 

136 # This prevents recursive mirroring of the temp directory 

137 # Clause 245: Workspace operations are disk-only by design 

138 # This call is structurally hardcoded to use the "disk" backend 

139 image_files = filemanager.list_image_files(image_dir, Backend.DISK.value) 

140 

141 # Create a uniquely named temporary directory for renamed files 

142 # Use "__opera_phenix_temp" to make it clearly identifiable 

143 if isinstance(image_dir, str): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 temp_dir = os.path.join(image_dir, "__opera_phenix_temp") 

145 else: # Path object 

146 temp_dir = image_dir / "__opera_phenix_temp" 

147 

148 # SAFETY CHECK: Ensure temp directory is within workspace 

149 if not str(temp_dir).startswith(str(workspace_path)): 149 ↛ 150line 149 didn't jump to line 150 because the condition on line 149 was never true

150 logger.error("SAFETY VIOLATION: Temp directory would be created outside workspace: %s", temp_dir) 

151 raise RuntimeError(f"Temp directory would be created outside workspace: {temp_dir}") 

152 

153 # Clause 245: Workspace operations are disk-only by design 

154 # This call is structurally hardcoded to use the "disk" backend 

155 filemanager.ensure_directory(temp_dir, Backend.DISK.value) 

156 

157 logger.debug("Created temporary directory for Opera Phenix workspace preparation: %s", temp_dir) 

158 

159 # Process each file 

160 for file_path in image_files: 

161 # FileManager should return strings, but handle Path objects too 

162 if isinstance(file_path, str): 162 ↛ 165line 162 didn't jump to line 165 because the condition on line 162 was always true

163 file_name = os.path.basename(file_path) 

164 file_path_obj = Path(file_path) 

165 elif isinstance(file_path, Path): 

166 file_name = file_path.name 

167 file_path_obj = file_path 

168 else: 

169 # Skip any unexpected types 

170 logger.warning("Unexpected file path type: %s", type(file_path).__name__) 

171 continue 

172 

173 # Check if this is a symlink 

174 if file_path_obj.is_symlink(): 174 ↛ 189line 174 didn't jump to line 189 because the condition on line 174 was always true

175 try: 

176 # Get the target of the symlink (what it points to) 

177 real_file_path = file_path_obj.resolve() 

178 if not real_file_path.exists(): 178 ↛ 179line 178 didn't jump to line 179 because the condition on line 178 was never true

179 logger.warning("Broken symlink detected: %s -> %s", file_path, real_file_path) 

180 continue 

181 # Store both the symlink path and the real file path 

182 source_path = str(real_file_path) 

183 symlink_target = str(real_file_path) 

184 except Exception as e: 

185 logger.warning("Failed to resolve symlink %s: %s", file_path, e) 

186 continue 

187 else: 

188 # This should never happen in a properly mirrored workspace 

189 logger.error("SAFETY VIOLATION: Found real file in workspace (should be symlink): %s", file_path) 

190 raise RuntimeError(f"Workspace contains real file instead of symlink: {file_path}") 

191 

192 # Store the original symlink path for reference 

193 original_symlink_path = str(file_path_obj) 

194 

195 # Parse file metadata 

196 metadata = self.parser.parse_filename(file_name) 

197 if not metadata or 'site' not in metadata or metadata['site'] is None: 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true

198 continue 

199 

200 # Remap the field ID using the spatial layout 

201 original_field_id = metadata['site'] 

202 new_field_id = field_mapping.get(original_field_id, original_field_id) 

203 

204 # Construct the new filename with proper padding 

205 metadata['site'] = new_field_id # Update site with remapped value 

206 new_name = self.parser.construct_filename(**metadata) 

207 

208 # Create the new path in the temporary directory 

209 if isinstance(temp_dir, str): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 new_path = os.path.join(temp_dir, new_name) 

211 else: # Path object 

212 new_path = temp_dir / new_name 

213 

214 # Check if destination already exists in temp directory 

215 try: 

216 # Clause 245: Workspace operations are disk-only by design 

217 # This call is structurally hardcoded to use the "disk" backend 

218 if filemanager.exists(new_path, Backend.DISK.value): 218 ↛ 220line 218 didn't jump to line 220 because the condition on line 218 was never true

219 # For temp directory, we can be more aggressive and delete any existing file 

220 logger.debug("File exists in temp directory, removing before copy: %s", new_path) 

221 filemanager.delete(new_path, Backend.DISK.value) 

222 

223 # Create a symlink in the temp directory pointing to the original file 

224 # Clause 245: Workspace operations are disk-only by design 

225 # This call is structurally hardcoded to use the "disk" backend 

226 filemanager.create_symlink(source_path, new_path, Backend.DISK.value) 

227 logger.debug("Created symlink in temp directory: %s -> %s", new_path, source_path) 

228 

229 except Exception as e: 

230 logger.error("Failed to copy file to temp directory: %s -> %s: %s", 

231 source_path, new_path, e) 

232 raise RuntimeError(f"Failed to copy file to temp directory: {e}") from e 

233 

234 # Clean up and replace old files - ONLY delete symlinks in workspace, NEVER original files 

235 for file_path in image_files: 

236 # Convert to Path object for symlink checking 

237 file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path 

238 

239 # SAFETY CHECK: Only delete if it's within the workspace directory 

240 if not str(file_path_obj).startswith(str(workspace_path)): 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 logger.error("SAFETY VIOLATION: Attempted to delete file outside workspace: %s", file_path) 

242 raise RuntimeError(f"Workspace preparation tried to delete file outside workspace: {file_path}") 

243 

244 # SAFETY CHECK: In workspace, only delete symlinks, never real files 

245 if file_path_obj.is_symlink(): 245 ↛ 249line 245 didn't jump to line 249 because the condition on line 245 was always true

246 # Safe to delete - it's a symlink in the workspace 

247 logger.debug("Deleting symlink in workspace: %s", file_path) 

248 filemanager.delete(file_path, Backend.DISK.value) 

249 elif file_path_obj.is_file(): 

250 # This should never happen in a properly mirrored workspace 

251 logger.error("SAFETY VIOLATION: Found real file in workspace (should be symlink): %s", file_path) 

252 raise RuntimeError(f"Workspace contains real file instead of symlink: {file_path}") 

253 else: 

254 logger.warning("File not found or not accessible: %s", file_path) 

255 

256 # Get all files in the temporary directory 

257 # Clause 245: Workspace operations are disk-only by design 

258 # This call is structurally hardcoded to use the "disk" backend 

259 temp_files = filemanager.list_files(temp_dir, Backend.DISK.value) 

260 

261 # Move files from temporary directory to image directory 

262 for temp_file in temp_files: 

263 # FileManager should return strings, but handle Path objects too 

264 if isinstance(temp_file, str): 264 ↛ 266line 264 didn't jump to line 266 because the condition on line 264 was always true

265 temp_file_name = os.path.basename(temp_file) 

266 elif isinstance(temp_file, Path): 

267 temp_file_name = temp_file.name 

268 else: 

269 # Skip any unexpected types 

270 logger.warning("Unexpected file path type: %s", type(temp_file).__name__) 

271 continue 

272 if isinstance(image_dir, str): 272 ↛ 273line 272 didn't jump to line 273 because the condition on line 272 was never true

273 dest_path = os.path.join(image_dir, temp_file_name) 

274 else: # Path object 

275 dest_path = image_dir / temp_file_name 

276 

277 try: 

278 # Check if destination already exists in image directory 

279 # Clause 245: Workspace operations are disk-only by design 

280 # This call is structurally hardcoded to use the "disk" backend 

281 if filemanager.exists(dest_path, Backend.DISK.value): 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was never true

282 # If destination is a symlink, ok to remove and replace 

283 if filemanager.is_symlink(dest_path, Backend.DISK.value): 

284 logger.debug("Destination is a symlink, removing before copy: %s", dest_path) 

285 filemanager.delete(dest_path, Backend.DISK.value) 

286 else: 

287 # Not a symlink - could be a real file 

288 logger.error("SAFETY VIOLATION: Destination exists and is not a symlink: %s", dest_path) 

289 raise FileExistsError(f"Destination exists and is not a symlink: {dest_path}") 

290 

291 # First, if the temp file is a symlink, get its target 

292 temp_file_obj = Path(temp_file) if isinstance(temp_file, str) else temp_file 

293 if temp_file_obj.is_symlink(): 293 ↛ 309line 293 didn't jump to line 309 because the condition on line 293 was always true

294 try: 

295 # Get the target that the temp symlink points to 

296 real_target = temp_file_obj.resolve() 

297 real_target_path = str(real_target) 

298 

299 # Create a new symlink in the image directory pointing to the original file 

300 # Clause 245: Workspace operations are disk-only by design 

301 # This call is structurally hardcoded to use the "disk" backend 

302 filemanager.create_symlink(real_target_path, dest_path, Backend.DISK.value) 

303 logger.debug("Created symlink in image directory: %s -> %s", dest_path, real_target_path) 

304 except Exception as e: 

305 logger.error("Failed to resolve symlink in temp directory: %s: %s", temp_file, e) 

306 raise RuntimeError(f"Failed to resolve symlink: {e}") from e 

307 else: 

308 # This should never happen if we're using symlinks consistently 

309 logger.warning("Temp file is not a symlink: %s", temp_file) 

310 # Fall back to copying the file 

311 filemanager.copy(temp_file, dest_path, Backend.DISK.value) 

312 logger.debug("Copied file (not symlink) to image directory: %s -> %s", temp_file, dest_path) 

313 

314 # Remove the file from the temporary directory 

315 # Clause 245: Workspace operations are disk-only by design 

316 # This call is structurally hardcoded to use the "disk" backend 

317 filemanager.delete(temp_file, Backend.DISK.value) 

318 

319 except FileExistsError as e: 

320 # Re-raise with clear message 

321 logger.error("Cannot copy to destination: %s", e) 

322 raise 

323 except Exception as e: 

324 logger.error("Error copying from temp to destination: %s -> %s: %s", 

325 temp_file, dest_path, e) 

326 raise RuntimeError(f"Failed to process file from temp directory: {e}") from e 

327 

328 # SAFETY CHECK: Validate temp directory before deletion  

329 if not str(temp_dir).startswith(str(workspace_path)): 329 ↛ 330line 329 didn't jump to line 330 because the condition on line 329 was never true

330 logger.error("SAFETY VIOLATION: Attempted to delete temp directory outside workspace: %s", temp_dir) 

331 raise RuntimeError(f"Attempted to delete temp directory outside workspace: {temp_dir}") 

332 

333 if not "__opera_phenix_temp" in str(temp_dir): 333 ↛ 334line 333 didn't jump to line 334 because the condition on line 333 was never true

334 logger.error("SAFETY VIOLATION: Attempted to delete non-temp directory: %s", temp_dir) 

335 raise RuntimeError(f"Attempted to delete non-temp directory: {temp_dir}") 

336 

337 # Remove the temporary directory 

338 # Clause 245: Workspace operations are disk-only by design 

339 # This call is structurally hardcoded to use the "disk" backend 

340 try: 

341 filemanager.delete(temp_dir, Backend.DISK.value) 

342 logger.debug("Successfully removed temporary directory: %s", temp_dir) 

343 except Exception as e: 

344 # Non-fatal error, just log it 

345 logger.warning("Failed to remove temporary directory %s: %s", temp_dir, e) 

346 

347 return image_dir 

348 

349 

350class OperaPhenixFilenameParser(FilenameParser): 

351 """Parser for Opera Phenix microscope filenames. 

352 

353 Handles Opera Phenix format filenames like: 

354 - r01c01f001p01-ch1sk1fk1fl1.tiff 

355 - r01c01f001p01-ch1.tiff 

356 """ 

357 

358 # Regular expression pattern for Opera Phenix filenames 

359 _pattern = re.compile(r"r(\d{1,2})c(\d{1,2})f(\d+|\{[^\}]*\})p(\d+|\{[^\}]*\})-ch(\d+|\{[^\}]*\})(?:sk\d+)?(?:fk\d+)?(?:fl\d+)?(\.\w+)$", re.I) 

360 

361 # Pattern for extracting row and column from Opera Phenix well format 

362 _well_pattern = re.compile(r"R(\d{2})C(\d{2})", re.I) 

363 

364 def __init__(self, filemanager=None, pattern_format=None): 

365 """ 

366 Initialize the parser. 

367 

368 Args: 

369 filemanager: FileManager instance (not used, but required for interface compatibility) 

370 pattern_format: Optional pattern format (not used, but required for interface compatibility) 

371 """ 

372 super().__init__() # Initialize the generic parser interface 

373 

374 # These parameters are not used by this parser, but are required for interface compatibility 

375 self.filemanager = filemanager 

376 self.pattern_format = pattern_format 

377 

378 @classmethod 

379 def can_parse(cls, filename: str) -> bool: 

380 """ 

381 Check if this parser can parse the given filename. 

382 

383 Args: 

384 filename (str): Filename to check 

385 

386 Returns: 

387 bool: True if this parser can parse the filename, False otherwise 

388 """ 

389 # 🔒 Clause 17 — VFS Boundary Method 

390 # This is a string operation that doesn't perform actual file I/O 

391 # Extract just the basename 

392 basename = os.path.basename(filename) 

393 # Check if the filename matches the Opera Phenix pattern 

394 return bool(cls._pattern.match(basename)) 

395 

396 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]: 

397 """ 

398 Parse an Opera Phenix filename to extract all components. 

399 Supports placeholders like {iii} which will return None for that field. 

400 

401 Args: 

402 filename (str): Filename to parse 

403 

404 Returns: 

405 dict or None: Dictionary with extracted components or None if parsing fails. 

406 """ 

407 # 🔒 Clause 17 — VFS Boundary Method 

408 # This is a string operation that doesn't perform actual file I/O 

409 basename = os.path.basename(filename) 

410 logger.debug("OperaPhenixFilenameParser attempting to parse basename: '%s'", basename) 

411 

412 # Try parsing using the Opera Phenix pattern 

413 match = self._pattern.match(basename) 

414 if match: 414 ↛ 443line 414 didn't jump to line 443 because the condition on line 414 was always true

415 logger.debug("Regex match successful for '%s'", basename) 

416 row, col, site_str, z_str, channel_str, ext = match.groups() 

417 

418 # Helper function to parse component strings 

419 def parse_comp(s): 

420 """Parse component string to int or None if it's a placeholder.""" 

421 if not s or '{' in s: 421 ↛ 422line 421 didn't jump to line 422 because the condition on line 421 was never true

422 return None 

423 return int(s) 

424 

425 # Create well ID from row and column 

426 well = f"R{int(row):02d}C{int(col):02d}" 

427 

428 # Parse components 

429 site = parse_comp(site_str) 

430 channel = parse_comp(channel_str) 

431 z_index = parse_comp(z_str) 

432 

433 result = { 

434 'well': well, 

435 'site': site, 

436 'channel': channel, 

437 'wavelength': channel, # For backward compatibility 

438 'z_index': z_index, 

439 'extension': ext if ext else '.tif' 

440 } 

441 return result 

442 

443 logger.warning("Regex match failed for basename: '%s'", basename) 

444 return None 

445 

446 def construct_filename(self, extension: str = '.tiff', site_padding: int = 3, z_padding: int = 3, **component_values) -> str: 

447 """ 

448 Construct an Opera Phenix filename from components. 

449 

450 This method now uses **kwargs to accept any component values dynamically, 

451 making it compatible with the generic parser interface. 

452 

453 Args: 

454 extension (str, optional): File extension (default: '.tiff') 

455 site_padding (int, optional): Width to pad site numbers to (default: 3) 

456 z_padding (int, optional): Width to pad Z-index numbers to (default: 3) 

457 **component_values: Component values as keyword arguments. 

458 Expected keys: well, site, channel, z_index 

459 

460 Returns: 

461 str: Constructed filename 

462 """ 

463 # Extract components from kwargs 

464 well = component_values.get('well') 

465 site = component_values.get('site') 

466 channel = component_values.get('channel') 

467 z_index = component_values.get('z_index') 

468 

469 if not well: 469 ↛ 470line 469 didn't jump to line 470 because the condition on line 469 was never true

470 raise ValueError("Well component is required for filename construction") 

471 

472 # Extract row and column from well name 

473 # Check if well is in Opera Phenix format (e.g., 'R01C03') 

474 match = self._well_pattern.match(well) 

475 if match: 475 ↛ 480line 475 didn't jump to line 480 because the condition on line 475 was always true

476 # Extract row and column from Opera Phenix format 

477 row = int(match.group(1)) 

478 col = int(match.group(2)) 

479 else: 

480 raise ValueError(f"Invalid well format: {well}. Expected format: 'R01C03'") 

481 

482 # Default Z-index to 1 if not provided 

483 z_index = 1 if z_index is None else z_index 

484 channel = 1 if channel is None else channel 

485 

486 # Construct filename in Opera Phenix format 

487 if isinstance(site, str): 487 ↛ 489line 487 didn't jump to line 489 because the condition on line 487 was never true

488 # If site is a string (e.g., '{iii}'), use it directly 

489 site_part = f"f{site}" 

490 else: 

491 # Otherwise, format it as a padded integer 

492 site_part = f"f{site:0{site_padding}d}" 

493 

494 if isinstance(z_index, str): 494 ↛ 496line 494 didn't jump to line 496 because the condition on line 494 was never true

495 # If z_index is a string (e.g., '{zzz}'), use it directly 

496 z_part = f"p{z_index}" 

497 else: 

498 # Otherwise, format it as a padded integer 

499 z_part = f"p{z_index:0{z_padding}d}" 

500 

501 return f"r{row:02d}c{col:02d}{site_part}{z_part}-ch{channel}sk1fk1fl1{extension}" 

502 

503 def remap_field_in_filename(self, filename: str, xml_parser: Optional[OperaPhenixXmlParser] = None) -> str: 

504 """ 

505 Remap the field ID in a filename to follow a top-left to bottom-right pattern. 

506 

507 Args: 

508 filename: Original filename 

509 xml_parser: Parser with XML data 

510 

511 Returns: 

512 str: New filename with remapped field ID 

513 """ 

514 if xml_parser is None: 

515 return filename 

516 

517 # Parse the filename 

518 metadata = self.parse_filename(filename) 

519 if not metadata or 'site' not in metadata or metadata['site'] is None: 

520 return filename 

521 

522 # Get the mapping and remap the field ID 

523 mapping = xml_parser.get_field_id_mapping() 

524 new_field_id = xml_parser.remap_field_id(metadata['site'], mapping) 

525 

526 # Always create a new filename with the remapped field ID and consistent padding 

527 # This ensures all filenames have the same format, even if the field ID didn't change 

528 metadata['site'] = new_field_id # Update site with remapped value 

529 return self.construct_filename(**metadata) 

530 

531 def extract_component_coordinates(self, component_value: str) -> Tuple[str, str]: 

532 """ 

533 Extract coordinates from component identifier (typically well). 

534 

535 Args: 

536 component_value (str): Component identifier (e.g., 'R03C04' or 'A01') 

537 

538 Returns: 

539 Tuple[str, str]: (row, column) where row is like 'A', 'B' and column is like '01', '04' 

540 

541 Raises: 

542 ValueError: If component format is invalid 

543 """ 

544 if not component_value: 

545 raise ValueError(f"Invalid component format: {component_value}") 

546 

547 # Check if component is in Opera Phenix format (e.g., 'R01C03') 

548 match = self._well_pattern.match(component_value) 

549 if match: 

550 # Extract row and column from Opera Phenix format 

551 row_num = int(match.group(1)) 

552 col_num = int(match.group(2)) 

553 # Convert to letter-number format: R01C03 -> A, 03 

554 row = chr(ord('A') + row_num - 1) # R01 -> A, R02 -> B, etc. 

555 col = f"{col_num:02d}" # Ensure 2-digit padding 

556 return row, col 

557 else: 

558 # Assume simple format like 'A01', 'C04' 

559 if len(component_value) < 2: 

560 raise ValueError(f"Invalid component format: {component_value}") 

561 row = component_value[0] 

562 col = component_value[1:] 

563 if not row.isalpha() or not col.isdigit(): 

564 raise ValueError(f"Invalid Opera Phenix component format: {component_value}. Expected 'R01C03' or 'A01' format") 

565 return row, col 

566 

567 

568class OperaPhenixMetadataHandler(MetadataHandler): 

569 """ 

570 Metadata handler for Opera Phenix microscopes. 

571 

572 Handles finding and parsing Index.xml files for Opera Phenix microscopes. 

573 """ 

574 

575 def __init__(self, filemanager: FileManager): 

576 """ 

577 Initialize the metadata handler. 

578 

579 Args: 

580 filemanager: FileManager instance for file operations. 

581 """ 

582 super().__init__() 

583 self.filemanager = filemanager 

584 

585 # Legacy mode has been completely purged 

586 

587 def find_metadata_file(self, plate_path: Union[str, Path]): 

588 """ 

589 Find the Index.xml file in the plate directory. 

590 

591 Args: 

592 plate_path: Path to the plate directory 

593 

594 Returns: 

595 Path to the Index.xml file 

596 

597 Raises: 

598 FileNotFoundError: If no Index.xml file is found 

599 """ 

600 # Ensure plate_path is a Path object 

601 if isinstance(plate_path, str): 601 ↛ 602line 601 didn't jump to line 602 because the condition on line 601 was never true

602 plate_path = Path(plate_path) 

603 

604 # Ensure the path exists 

605 if not plate_path.exists(): 605 ↛ 606line 605 didn't jump to line 606 because the condition on line 605 was never true

606 raise FileNotFoundError(f"Plate path does not exist: {plate_path}") 

607 

608 # Check for Index.xml in the plate directory 

609 index_xml = plate_path / "Index.xml" 

610 if index_xml.exists(): 610 ↛ 611line 610 didn't jump to line 611 because the condition on line 610 was never true

611 return index_xml 

612 

613 # Check for Index.xml in the Images directory 

614 images_dir = plate_path / "Images" 

615 if images_dir.exists(): 615 ↛ 621line 615 didn't jump to line 621 because the condition on line 615 was always true

616 index_xml = images_dir / "Index.xml" 

617 if index_xml.exists(): 617 ↛ 621line 617 didn't jump to line 621 because the condition on line 617 was always true

618 return index_xml 

619 

620 # No recursive search - only check root and Images directories 

621 raise FileNotFoundError( 

622 f"Index.xml not found in {plate_path} or {plate_path}/Images. " 

623 "Opera Phenix metadata requires Index.xml file." 

624 ) 

625 

626 # Ensure result is a Path object 

627 if isinstance(result, str): 

628 return Path(result) 

629 if isinstance(result, Path): 

630 return result 

631 # This should not happen if FileManager is properly implemented 

632 logger.warning("Unexpected result type from find_file_recursive: %s", type(result).__name__) 

633 return Path(str(result)) 

634 

635 def get_grid_dimensions(self, plate_path: Union[str, Path]): 

636 """ 

637 Get grid dimensions for stitching from Index.xml file. 

638 

639 Args: 

640 plate_path: Path to the plate folder 

641 

642 Returns: 

643 Tuple of (grid_rows, grid_cols) - UPDATED: Now returns (rows, cols) for MIST compatibility 

644 

645 Raises: 

646 FileNotFoundError: If no Index.xml file is found 

647 OperaPhenixXmlParseError: If the XML cannot be parsed 

648 OperaPhenixXmlContentError: If grid dimensions cannot be determined 

649 """ 

650 # Ensure plate_path is a Path object 

651 if isinstance(plate_path, str): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true

652 plate_path = Path(plate_path) 

653 

654 # Ensure the path exists 

655 if not plate_path.exists(): 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true

656 raise FileNotFoundError(f"Plate path does not exist: {plate_path}") 

657 

658 # Find the Index.xml file - this will raise FileNotFoundError if not found 

659 index_xml = self.find_metadata_file(plate_path) 

660 

661 # Use the OperaPhenixXmlParser to get the grid size 

662 # This will raise appropriate exceptions if parsing fails 

663 xml_parser = self.create_xml_parser(index_xml) 

664 grid_size = xml_parser.get_grid_size() 

665 

666 # Validate the grid size 

667 if grid_size[0] <= 0 or grid_size[1] <= 0: 667 ↛ 668line 667 didn't jump to line 668 because the condition on line 667 was never true

668 raise ValueError( 

669 f"Invalid grid dimensions: {grid_size[0]}x{grid_size[1]}. " 

670 "Grid dimensions must be positive integers." 

671 ) 

672 

673 logger.info("Grid size from Index.xml: %dx%d (cols x rows)", grid_size[0], grid_size[1]) 

674 # FIXED: Return (rows, cols) for MIST compatibility instead of (cols, rows) 

675 return (grid_size[1], grid_size[0]) 

676 

677 def get_pixel_size(self, plate_path: Union[str, Path]): 

678 """ 

679 Get the pixel size from Index.xml file. 

680 

681 Args: 

682 plate_path: Path to the plate folder 

683 

684 Returns: 

685 Pixel size in micrometers 

686 

687 Raises: 

688 FileNotFoundError: If no Index.xml file is found 

689 OperaPhenixXmlParseError: If the XML cannot be parsed 

690 OperaPhenixXmlContentError: If pixel size cannot be determined 

691 """ 

692 # Ensure plate_path is a Path object 

693 if isinstance(plate_path, str): 

694 plate_path = Path(plate_path) 

695 

696 # Ensure the path exists 

697 if not plate_path.exists(): 

698 raise FileNotFoundError(f"Plate path does not exist: {plate_path}") 

699 

700 # Find the Index.xml file - this will raise FileNotFoundError if not found 

701 index_xml = self.find_metadata_file(plate_path) 

702 

703 # Use the OperaPhenixXmlParser to get the pixel size 

704 # This will raise appropriate exceptions if parsing fails 

705 xml_parser = self.create_xml_parser(index_xml) 

706 pixel_size = xml_parser.get_pixel_size() 

707 

708 # Validate the pixel size 

709 if pixel_size <= 0: 

710 raise ValueError( 

711 f"Invalid pixel size: {pixel_size}. " 

712 "Pixel size must be a positive number." 

713 ) 

714 

715 logger.info("Pixel size from Index.xml: %.4f μm", pixel_size) 

716 return pixel_size 

717 

718 def get_channel_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

719 """ 

720 Get channel key→name mapping from Opera Phenix Index.xml. 

721 

722 Args: 

723 plate_path: Path to the plate folder (str or Path) 

724 

725 Returns: 

726 Dict mapping channel IDs to channel names from metadata 

727 Example: {"1": "HOECHST 33342", "2": "Calcein", "3": "Alexa 647"} 

728 """ 

729 try: 

730 # Ensure plate_path is a Path object 

731 if isinstance(plate_path, str): 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true

732 plate_path = Path(plate_path) 

733 

734 # Find and parse Index.xml 

735 index_xml = self.find_metadata_file(plate_path) 

736 xml_parser = self.create_xml_parser(index_xml) 

737 

738 # Extract channel information 

739 channel_mapping = {} 

740 

741 # Look for channel entries in the XML 

742 # Opera Phenix stores channel info in multiple places, try the most common 

743 root = xml_parser.root 

744 namespace = xml_parser.namespace 

745 

746 # Find channel entries with ChannelName elements 

747 channel_entries = root.findall(f".//{namespace}Entry[@ChannelID]") 

748 for entry in channel_entries: 

749 channel_id = entry.get('ChannelID') 

750 channel_name_elem = entry.find(f"{namespace}ChannelName") 

751 

752 if channel_id and channel_name_elem is not None: 752 ↛ 748line 752 didn't jump to line 748 because the condition on line 752 was always true

753 channel_name = channel_name_elem.text 

754 if channel_name: 754 ↛ 748line 754 didn't jump to line 748 because the condition on line 754 was always true

755 channel_mapping[channel_id] = channel_name 

756 

757 return channel_mapping if channel_mapping else None 

758 

759 except Exception as e: 

760 logger.debug(f"Could not extract channel names from Opera Phenix metadata: {e}") 

761 return None 

762 

763 def get_well_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

764 """ 

765 Get well key→name mapping from Opera Phenix metadata. 

766 

767 Args: 

768 plate_path: Path to the plate folder (str or Path) 

769 

770 Returns: 

771 None - Opera Phenix doesn't provide rich well names in metadata 

772 """ 

773 return None 

774 

775 def get_site_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

776 """ 

777 Get site key→name mapping from Opera Phenix metadata. 

778 

779 Args: 

780 plate_path: Path to the plate folder (str or Path) 

781 

782 Returns: 

783 None - Opera Phenix doesn't provide rich site names in metadata 

784 """ 

785 return None 

786 

787 def get_z_index_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

788 """ 

789 Get z_index key→name mapping from Opera Phenix metadata. 

790 

791 Args: 

792 plate_path: Path to the plate folder (str or Path) 

793 

794 Returns: 

795 None - Opera Phenix doesn't provide rich z_index names in metadata 

796 """ 

797 return None 

798 

799 

800 

801 def create_xml_parser(self, xml_path: Union[str, Path]): 

802 """ 

803 Create an OperaPhenixXmlParser for the given XML file. 

804 

805 Args: 

806 xml_path: Path to the XML file 

807 

808 Returns: 

809 OperaPhenixXmlParser: Parser for the XML file 

810 

811 Raises: 

812 FileNotFoundError: If the XML file does not exist 

813 """ 

814 # Ensure xml_path is a Path object 

815 if isinstance(xml_path, str): 815 ↛ 816line 815 didn't jump to line 816 because the condition on line 815 was never true

816 xml_path = Path(xml_path) 

817 

818 # Ensure the path exists 

819 if not xml_path.exists(): 819 ↛ 820line 819 didn't jump to line 820 because the condition on line 819 was never true

820 raise FileNotFoundError(f"XML file does not exist: {xml_path}") 

821 

822 # Create the parser 

823 return OperaPhenixXmlParser(xml_path) 

824 

825 

826# Set metadata handler class after class definition for automatic registration 

827from openhcs.microscopes.microscope_base import register_metadata_handler 

828OperaPhenixHandler._metadata_handler_class = OperaPhenixMetadataHandler 

829register_metadata_handler(OperaPhenixHandler, OperaPhenixMetadataHandler)