Coverage for openhcs/microscopes/opera_phenix.py: 57.3%
326 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
1"""
2Opera Phenix microscope implementations for openhcs.
4This module provides concrete implementations of FilenameParser and MetadataHandler
5for Opera Phenix microscopes.
6"""
8import logging
9import os
10import re
11from pathlib import Path
12from typing import Any, Dict, List, Optional, Union, Type, Tuple
14from openhcs.constants.constants import Backend
15from openhcs.microscopes.opera_phenix_xml_parser import OperaPhenixXmlParser
16from openhcs.io.filemanager import FileManager
17from openhcs.microscopes.microscope_base import MicroscopeHandler
18from openhcs.microscopes.microscope_interfaces import (FilenameParser,
19 MetadataHandler)
21logger = logging.getLogger(__name__)
25class OperaPhenixHandler(MicroscopeHandler):
26 """
27 MicroscopeHandler implementation for Opera Phenix systems.
29 This handler combines the OperaPhenix filename parser with its
30 corresponding metadata handler. It guarantees aligned behavior
31 for plate structure parsing, metadata extraction, and any optional
32 post-processing steps required after workspace setup.
33 """
35 # Explicit microscope type for proper registration
36 _microscope_type = 'opera_phenix'
38 # Class attribute for automatic metadata handler registration (set after class definition)
39 _metadata_handler_class = None
41 def __init__(self, filemanager: FileManager, pattern_format: Optional[str] = None):
42 self.parser = OperaPhenixFilenameParser(filemanager, pattern_format=pattern_format)
43 self.metadata_handler = OperaPhenixMetadataHandler(filemanager)
44 super().__init__(parser=self.parser, metadata_handler=self.metadata_handler)
46 @property
47 def common_dirs(self) -> List[str]:
48 """Subdirectory names commonly used by Opera Phenix."""
49 return ['Images']
51 @property
52 def microscope_type(self) -> str:
53 """Microscope type identifier (for interface enforcement only)."""
54 return 'opera_phenix'
56 @property
57 def metadata_handler_class(self) -> Type[MetadataHandler]:
58 """Metadata handler class (for interface enforcement only)."""
59 return OperaPhenixMetadataHandler
61 @property
62 def compatible_backends(self) -> List[Backend]:
63 """
64 Opera Phenix is compatible with DISK backend only.
66 Legacy microscope format with standard file operations.
67 """
68 return [Backend.DISK]
72 # Uses default workspace initialization from base class
74 def _prepare_workspace(self, workspace_path: Path, filemanager: FileManager):
75 """
76 Renames Opera Phenix images to follow a consistent field order
77 based on spatial layout extracted from Index.xml. Uses remapped
78 filenames and replaces the directory in-place.
80 This method performs preparation but does not determine the final image directory.
82 Args:
83 workspace_path: Path to the symlinked workspace
84 filemanager: FileManager instance for file operations
86 Returns:
87 Path to the normalized image directory.
88 """
90 # Check if workspace has already been processed by looking for temp directory
91 # If temp directory exists, workspace was already processed - skip processing
92 temp_dir_name = "__opera_phenix_temp"
93 for entry in filemanager.list_dir(workspace_path, Backend.DISK.value):
94 entry_path = Path(workspace_path) / entry
95 if entry_path.is_dir() and entry_path.name == temp_dir_name: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true
96 logger.info(f"📁 WORKSPACE ALREADY PROCESSED: Found {temp_dir_name} - skipping Opera Phenix preparation")
97 return workspace_path
99 logger.info(f"🔄 PROCESSING WORKSPACE: Applying Opera Phenix name remapping to {workspace_path}")
100 # Find the image directory using the common_dirs property
101 # Clause 245: Workspace operations are disk-only by design
102 # This call is structurally hardcoded to use the "disk" backend
104 # Get all entries in the directory
105 entries = filemanager.list_dir(workspace_path, Backend.DISK.value)
107 # Look for a directory matching any of the common_dirs patterns
108 image_dir = workspace_path
109 for entry in entries: 109 ↛ 118line 109 didn't jump to line 118 because the loop on line 109 didn't complete
110 entry_lower = entry.lower()
111 if any(common_dir.lower() in entry_lower for common_dir in self.common_dirs): 111 ↛ 109line 111 didn't jump to line 109 because the condition on line 111 was always true
112 # Found a matching directory
113 image_dir = Path(workspace_path) / entry if isinstance(workspace_path, (str, Path)) else workspace_path / entry
114 logger.info("Found directory matching common_dirs pattern: %s", image_dir)
115 break
117 # Default to empty field mapping (no remapping)
118 field_mapping = {}
120 # Try to load field mapping from Index.xml if available
121 try:
122 # Clause 245: Workspace operations are disk-only by design
123 # This call is structurally hardcoded to use the "disk" backend
124 index_xml = filemanager.find_file_recursive(workspace_path, "Index.xml", Backend.DISK.value)
125 if index_xml: 125 ↛ 130line 125 didn't jump to line 130 because the condition on line 125 was always true
126 xml_parser = OperaPhenixXmlParser(index_xml)
127 field_mapping = xml_parser.get_field_id_mapping()
128 logger.debug("Loaded field mapping from Index.xml: %s", field_mapping)
129 else:
130 logger.debug("Index.xml not found. Using default field mapping.")
131 except Exception as e:
132 logger.error("Error loading Index.xml: %s", e)
133 logger.debug("Using default field mapping due to error.")
135 # Get all image files in the directory BEFORE creating temp directory
136 # This prevents recursive mirroring of the temp directory
137 # Clause 245: Workspace operations are disk-only by design
138 # This call is structurally hardcoded to use the "disk" backend
139 image_files = filemanager.list_image_files(image_dir, Backend.DISK.value)
141 # Create a uniquely named temporary directory for renamed files
142 # Use "__opera_phenix_temp" to make it clearly identifiable
143 if isinstance(image_dir, str): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true
144 temp_dir = os.path.join(image_dir, "__opera_phenix_temp")
145 else: # Path object
146 temp_dir = image_dir / "__opera_phenix_temp"
148 # SAFETY CHECK: Ensure temp directory is within workspace
149 if not str(temp_dir).startswith(str(workspace_path)): 149 ↛ 150line 149 didn't jump to line 150 because the condition on line 149 was never true
150 logger.error("SAFETY VIOLATION: Temp directory would be created outside workspace: %s", temp_dir)
151 raise RuntimeError(f"Temp directory would be created outside workspace: {temp_dir}")
153 # Clause 245: Workspace operations are disk-only by design
154 # This call is structurally hardcoded to use the "disk" backend
155 filemanager.ensure_directory(temp_dir, Backend.DISK.value)
157 logger.debug("Created temporary directory for Opera Phenix workspace preparation: %s", temp_dir)
159 # Process each file
160 for file_path in image_files:
161 # FileManager should return strings, but handle Path objects too
162 if isinstance(file_path, str): 162 ↛ 165line 162 didn't jump to line 165 because the condition on line 162 was always true
163 file_name = os.path.basename(file_path)
164 file_path_obj = Path(file_path)
165 elif isinstance(file_path, Path):
166 file_name = file_path.name
167 file_path_obj = file_path
168 else:
169 # Skip any unexpected types
170 logger.warning("Unexpected file path type: %s", type(file_path).__name__)
171 continue
173 # Check if this is a symlink
174 if file_path_obj.is_symlink(): 174 ↛ 189line 174 didn't jump to line 189 because the condition on line 174 was always true
175 try:
176 # Get the target of the symlink (what it points to)
177 real_file_path = file_path_obj.resolve()
178 if not real_file_path.exists(): 178 ↛ 179line 178 didn't jump to line 179 because the condition on line 178 was never true
179 logger.warning("Broken symlink detected: %s -> %s", file_path, real_file_path)
180 continue
181 # Store both the symlink path and the real file path
182 source_path = str(real_file_path)
183 symlink_target = str(real_file_path)
184 except Exception as e:
185 logger.warning("Failed to resolve symlink %s: %s", file_path, e)
186 continue
187 else:
188 # This should never happen in a properly mirrored workspace
189 logger.error("SAFETY VIOLATION: Found real file in workspace (should be symlink): %s", file_path)
190 raise RuntimeError(f"Workspace contains real file instead of symlink: {file_path}")
192 # Store the original symlink path for reference
193 original_symlink_path = str(file_path_obj)
195 # Parse file metadata
196 metadata = self.parser.parse_filename(file_name)
197 if not metadata or 'site' not in metadata or metadata['site'] is None: 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true
198 continue
200 # Remap the field ID using the spatial layout
201 original_field_id = metadata['site']
202 new_field_id = field_mapping.get(original_field_id, original_field_id)
204 # Construct the new filename with proper padding
205 metadata['site'] = new_field_id # Update site with remapped value
206 new_name = self.parser.construct_filename(**metadata)
208 # Create the new path in the temporary directory
209 if isinstance(temp_dir, str): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true
210 new_path = os.path.join(temp_dir, new_name)
211 else: # Path object
212 new_path = temp_dir / new_name
214 # Check if destination already exists in temp directory
215 try:
216 # Clause 245: Workspace operations are disk-only by design
217 # This call is structurally hardcoded to use the "disk" backend
218 if filemanager.exists(new_path, Backend.DISK.value): 218 ↛ 220line 218 didn't jump to line 220 because the condition on line 218 was never true
219 # For temp directory, we can be more aggressive and delete any existing file
220 logger.debug("File exists in temp directory, removing before copy: %s", new_path)
221 filemanager.delete(new_path, Backend.DISK.value)
223 # Create a symlink in the temp directory pointing to the original file
224 # Clause 245: Workspace operations are disk-only by design
225 # This call is structurally hardcoded to use the "disk" backend
226 filemanager.create_symlink(source_path, new_path, Backend.DISK.value)
227 logger.debug("Created symlink in temp directory: %s -> %s", new_path, source_path)
229 except Exception as e:
230 logger.error("Failed to copy file to temp directory: %s -> %s: %s",
231 source_path, new_path, e)
232 raise RuntimeError(f"Failed to copy file to temp directory: {e}") from e
234 # Clean up and replace old files - ONLY delete symlinks in workspace, NEVER original files
235 for file_path in image_files:
236 # Convert to Path object for symlink checking
237 file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
239 # SAFETY CHECK: Only delete if it's within the workspace directory
240 if not str(file_path_obj).startswith(str(workspace_path)): 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true
241 logger.error("SAFETY VIOLATION: Attempted to delete file outside workspace: %s", file_path)
242 raise RuntimeError(f"Workspace preparation tried to delete file outside workspace: {file_path}")
244 # SAFETY CHECK: In workspace, only delete symlinks, never real files
245 if file_path_obj.is_symlink(): 245 ↛ 249line 245 didn't jump to line 249 because the condition on line 245 was always true
246 # Safe to delete - it's a symlink in the workspace
247 logger.debug("Deleting symlink in workspace: %s", file_path)
248 filemanager.delete(file_path, Backend.DISK.value)
249 elif file_path_obj.is_file():
250 # This should never happen in a properly mirrored workspace
251 logger.error("SAFETY VIOLATION: Found real file in workspace (should be symlink): %s", file_path)
252 raise RuntimeError(f"Workspace contains real file instead of symlink: {file_path}")
253 else:
254 logger.warning("File not found or not accessible: %s", file_path)
256 # Get all files in the temporary directory
257 # Clause 245: Workspace operations are disk-only by design
258 # This call is structurally hardcoded to use the "disk" backend
259 temp_files = filemanager.list_files(temp_dir, Backend.DISK.value)
261 # Move files from temporary directory to image directory
262 for temp_file in temp_files:
263 # FileManager should return strings, but handle Path objects too
264 if isinstance(temp_file, str): 264 ↛ 266line 264 didn't jump to line 266 because the condition on line 264 was always true
265 temp_file_name = os.path.basename(temp_file)
266 elif isinstance(temp_file, Path):
267 temp_file_name = temp_file.name
268 else:
269 # Skip any unexpected types
270 logger.warning("Unexpected file path type: %s", type(temp_file).__name__)
271 continue
272 if isinstance(image_dir, str): 272 ↛ 273line 272 didn't jump to line 273 because the condition on line 272 was never true
273 dest_path = os.path.join(image_dir, temp_file_name)
274 else: # Path object
275 dest_path = image_dir / temp_file_name
277 try:
278 # Check if destination already exists in image directory
279 # Clause 245: Workspace operations are disk-only by design
280 # This call is structurally hardcoded to use the "disk" backend
281 if filemanager.exists(dest_path, Backend.DISK.value): 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was never true
282 # If destination is a symlink, ok to remove and replace
283 if filemanager.is_symlink(dest_path, Backend.DISK.value):
284 logger.debug("Destination is a symlink, removing before copy: %s", dest_path)
285 filemanager.delete(dest_path, Backend.DISK.value)
286 else:
287 # Not a symlink - could be a real file
288 logger.error("SAFETY VIOLATION: Destination exists and is not a symlink: %s", dest_path)
289 raise FileExistsError(f"Destination exists and is not a symlink: {dest_path}")
291 # First, if the temp file is a symlink, get its target
292 temp_file_obj = Path(temp_file) if isinstance(temp_file, str) else temp_file
293 if temp_file_obj.is_symlink(): 293 ↛ 309line 293 didn't jump to line 309 because the condition on line 293 was always true
294 try:
295 # Get the target that the temp symlink points to
296 real_target = temp_file_obj.resolve()
297 real_target_path = str(real_target)
299 # Create a new symlink in the image directory pointing to the original file
300 # Clause 245: Workspace operations are disk-only by design
301 # This call is structurally hardcoded to use the "disk" backend
302 filemanager.create_symlink(real_target_path, dest_path, Backend.DISK.value)
303 logger.debug("Created symlink in image directory: %s -> %s", dest_path, real_target_path)
304 except Exception as e:
305 logger.error("Failed to resolve symlink in temp directory: %s: %s", temp_file, e)
306 raise RuntimeError(f"Failed to resolve symlink: {e}") from e
307 else:
308 # This should never happen if we're using symlinks consistently
309 logger.warning("Temp file is not a symlink: %s", temp_file)
310 # Fall back to copying the file
311 filemanager.copy(temp_file, dest_path, Backend.DISK.value)
312 logger.debug("Copied file (not symlink) to image directory: %s -> %s", temp_file, dest_path)
314 # Remove the file from the temporary directory
315 # Clause 245: Workspace operations are disk-only by design
316 # This call is structurally hardcoded to use the "disk" backend
317 filemanager.delete(temp_file, Backend.DISK.value)
319 except FileExistsError as e:
320 # Re-raise with clear message
321 logger.error("Cannot copy to destination: %s", e)
322 raise
323 except Exception as e:
324 logger.error("Error copying from temp to destination: %s -> %s: %s",
325 temp_file, dest_path, e)
326 raise RuntimeError(f"Failed to process file from temp directory: {e}") from e
328 # SAFETY CHECK: Validate temp directory before deletion
329 if not str(temp_dir).startswith(str(workspace_path)): 329 ↛ 330line 329 didn't jump to line 330 because the condition on line 329 was never true
330 logger.error("SAFETY VIOLATION: Attempted to delete temp directory outside workspace: %s", temp_dir)
331 raise RuntimeError(f"Attempted to delete temp directory outside workspace: {temp_dir}")
333 if not "__opera_phenix_temp" in str(temp_dir): 333 ↛ 334line 333 didn't jump to line 334 because the condition on line 333 was never true
334 logger.error("SAFETY VIOLATION: Attempted to delete non-temp directory: %s", temp_dir)
335 raise RuntimeError(f"Attempted to delete non-temp directory: {temp_dir}")
337 # Remove the temporary directory
338 # Clause 245: Workspace operations are disk-only by design
339 # This call is structurally hardcoded to use the "disk" backend
340 try:
341 filemanager.delete(temp_dir, Backend.DISK.value)
342 logger.debug("Successfully removed temporary directory: %s", temp_dir)
343 except Exception as e:
344 # Non-fatal error, just log it
345 logger.warning("Failed to remove temporary directory %s: %s", temp_dir, e)
347 return image_dir
350class OperaPhenixFilenameParser(FilenameParser):
351 """Parser for Opera Phenix microscope filenames.
353 Handles Opera Phenix format filenames like:
354 - r01c01f001p01-ch1sk1fk1fl1.tiff
355 - r01c01f001p01-ch1.tiff
356 """
358 # Regular expression pattern for Opera Phenix filenames
359 _pattern = re.compile(r"r(\d{1,2})c(\d{1,2})f(\d+|\{[^\}]*\})p(\d+|\{[^\}]*\})-ch(\d+|\{[^\}]*\})(?:sk\d+)?(?:fk\d+)?(?:fl\d+)?(\.\w+)$", re.I)
361 # Pattern for extracting row and column from Opera Phenix well format
362 _well_pattern = re.compile(r"R(\d{2})C(\d{2})", re.I)
364 def __init__(self, filemanager=None, pattern_format=None):
365 """
366 Initialize the parser.
368 Args:
369 filemanager: FileManager instance (not used, but required for interface compatibility)
370 pattern_format: Optional pattern format (not used, but required for interface compatibility)
371 """
372 super().__init__() # Initialize the generic parser interface
374 # These parameters are not used by this parser, but are required for interface compatibility
375 self.filemanager = filemanager
376 self.pattern_format = pattern_format
378 @classmethod
379 def can_parse(cls, filename: str) -> bool:
380 """
381 Check if this parser can parse the given filename.
383 Args:
384 filename (str): Filename to check
386 Returns:
387 bool: True if this parser can parse the filename, False otherwise
388 """
389 # 🔒 Clause 17 — VFS Boundary Method
390 # This is a string operation that doesn't perform actual file I/O
391 # Extract just the basename
392 basename = os.path.basename(filename)
393 # Check if the filename matches the Opera Phenix pattern
394 return bool(cls._pattern.match(basename))
396 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]:
397 """
398 Parse an Opera Phenix filename to extract all components.
399 Supports placeholders like {iii} which will return None for that field.
401 Args:
402 filename (str): Filename to parse
404 Returns:
405 dict or None: Dictionary with extracted components or None if parsing fails.
406 """
407 # 🔒 Clause 17 — VFS Boundary Method
408 # This is a string operation that doesn't perform actual file I/O
409 basename = os.path.basename(filename)
410 logger.debug("OperaPhenixFilenameParser attempting to parse basename: '%s'", basename)
412 # Try parsing using the Opera Phenix pattern
413 match = self._pattern.match(basename)
414 if match: 414 ↛ 443line 414 didn't jump to line 443 because the condition on line 414 was always true
415 logger.debug("Regex match successful for '%s'", basename)
416 row, col, site_str, z_str, channel_str, ext = match.groups()
418 # Helper function to parse component strings
419 def parse_comp(s):
420 """Parse component string to int or None if it's a placeholder."""
421 if not s or '{' in s: 421 ↛ 422line 421 didn't jump to line 422 because the condition on line 421 was never true
422 return None
423 return int(s)
425 # Create well ID from row and column
426 well = f"R{int(row):02d}C{int(col):02d}"
428 # Parse components
429 site = parse_comp(site_str)
430 channel = parse_comp(channel_str)
431 z_index = parse_comp(z_str)
433 result = {
434 'well': well,
435 'site': site,
436 'channel': channel,
437 'wavelength': channel, # For backward compatibility
438 'z_index': z_index,
439 'extension': ext if ext else '.tif'
440 }
441 return result
443 logger.warning("Regex match failed for basename: '%s'", basename)
444 return None
446 def construct_filename(self, extension: str = '.tiff', site_padding: int = 3, z_padding: int = 3, **component_values) -> str:
447 """
448 Construct an Opera Phenix filename from components.
450 This method now uses **kwargs to accept any component values dynamically,
451 making it compatible with the generic parser interface.
453 Args:
454 extension (str, optional): File extension (default: '.tiff')
455 site_padding (int, optional): Width to pad site numbers to (default: 3)
456 z_padding (int, optional): Width to pad Z-index numbers to (default: 3)
457 **component_values: Component values as keyword arguments.
458 Expected keys: well, site, channel, z_index
460 Returns:
461 str: Constructed filename
462 """
463 # Extract components from kwargs
464 well = component_values.get('well')
465 site = component_values.get('site')
466 channel = component_values.get('channel')
467 z_index = component_values.get('z_index')
469 if not well: 469 ↛ 470line 469 didn't jump to line 470 because the condition on line 469 was never true
470 raise ValueError("Well component is required for filename construction")
472 # Extract row and column from well name
473 # Check if well is in Opera Phenix format (e.g., 'R01C03')
474 match = self._well_pattern.match(well)
475 if match: 475 ↛ 480line 475 didn't jump to line 480 because the condition on line 475 was always true
476 # Extract row and column from Opera Phenix format
477 row = int(match.group(1))
478 col = int(match.group(2))
479 else:
480 raise ValueError(f"Invalid well format: {well}. Expected format: 'R01C03'")
482 # Default Z-index to 1 if not provided
483 z_index = 1 if z_index is None else z_index
484 channel = 1 if channel is None else channel
486 # Construct filename in Opera Phenix format
487 if isinstance(site, str): 487 ↛ 489line 487 didn't jump to line 489 because the condition on line 487 was never true
488 # If site is a string (e.g., '{iii}'), use it directly
489 site_part = f"f{site}"
490 else:
491 # Otherwise, format it as a padded integer
492 site_part = f"f{site:0{site_padding}d}"
494 if isinstance(z_index, str): 494 ↛ 496line 494 didn't jump to line 496 because the condition on line 494 was never true
495 # If z_index is a string (e.g., '{zzz}'), use it directly
496 z_part = f"p{z_index}"
497 else:
498 # Otherwise, format it as a padded integer
499 z_part = f"p{z_index:0{z_padding}d}"
501 return f"r{row:02d}c{col:02d}{site_part}{z_part}-ch{channel}sk1fk1fl1{extension}"
503 def remap_field_in_filename(self, filename: str, xml_parser: Optional[OperaPhenixXmlParser] = None) -> str:
504 """
505 Remap the field ID in a filename to follow a top-left to bottom-right pattern.
507 Args:
508 filename: Original filename
509 xml_parser: Parser with XML data
511 Returns:
512 str: New filename with remapped field ID
513 """
514 if xml_parser is None:
515 return filename
517 # Parse the filename
518 metadata = self.parse_filename(filename)
519 if not metadata or 'site' not in metadata or metadata['site'] is None:
520 return filename
522 # Get the mapping and remap the field ID
523 mapping = xml_parser.get_field_id_mapping()
524 new_field_id = xml_parser.remap_field_id(metadata['site'], mapping)
526 # Always create a new filename with the remapped field ID and consistent padding
527 # This ensures all filenames have the same format, even if the field ID didn't change
528 metadata['site'] = new_field_id # Update site with remapped value
529 return self.construct_filename(**metadata)
531 def extract_component_coordinates(self, component_value: str) -> Tuple[str, str]:
532 """
533 Extract coordinates from component identifier (typically well).
535 Args:
536 component_value (str): Component identifier (e.g., 'R03C04' or 'A01')
538 Returns:
539 Tuple[str, str]: (row, column) where row is like 'A', 'B' and column is like '01', '04'
541 Raises:
542 ValueError: If component format is invalid
543 """
544 if not component_value:
545 raise ValueError(f"Invalid component format: {component_value}")
547 # Check if component is in Opera Phenix format (e.g., 'R01C03')
548 match = self._well_pattern.match(component_value)
549 if match:
550 # Extract row and column from Opera Phenix format
551 row_num = int(match.group(1))
552 col_num = int(match.group(2))
553 # Convert to letter-number format: R01C03 -> A, 03
554 row = chr(ord('A') + row_num - 1) # R01 -> A, R02 -> B, etc.
555 col = f"{col_num:02d}" # Ensure 2-digit padding
556 return row, col
557 else:
558 # Assume simple format like 'A01', 'C04'
559 if len(component_value) < 2:
560 raise ValueError(f"Invalid component format: {component_value}")
561 row = component_value[0]
562 col = component_value[1:]
563 if not row.isalpha() or not col.isdigit():
564 raise ValueError(f"Invalid Opera Phenix component format: {component_value}. Expected 'R01C03' or 'A01' format")
565 return row, col
568class OperaPhenixMetadataHandler(MetadataHandler):
569 """
570 Metadata handler for Opera Phenix microscopes.
572 Handles finding and parsing Index.xml files for Opera Phenix microscopes.
573 """
575 def __init__(self, filemanager: FileManager):
576 """
577 Initialize the metadata handler.
579 Args:
580 filemanager: FileManager instance for file operations.
581 """
582 super().__init__()
583 self.filemanager = filemanager
585 # Legacy mode has been completely purged
587 def find_metadata_file(self, plate_path: Union[str, Path]):
588 """
589 Find the Index.xml file in the plate directory.
591 Args:
592 plate_path: Path to the plate directory
594 Returns:
595 Path to the Index.xml file
597 Raises:
598 FileNotFoundError: If no Index.xml file is found
599 """
600 # Ensure plate_path is a Path object
601 if isinstance(plate_path, str): 601 ↛ 602line 601 didn't jump to line 602 because the condition on line 601 was never true
602 plate_path = Path(plate_path)
604 # Ensure the path exists
605 if not plate_path.exists(): 605 ↛ 606line 605 didn't jump to line 606 because the condition on line 605 was never true
606 raise FileNotFoundError(f"Plate path does not exist: {plate_path}")
608 # Check for Index.xml in the plate directory
609 index_xml = plate_path / "Index.xml"
610 if index_xml.exists(): 610 ↛ 611line 610 didn't jump to line 611 because the condition on line 610 was never true
611 return index_xml
613 # Check for Index.xml in the Images directory
614 images_dir = plate_path / "Images"
615 if images_dir.exists(): 615 ↛ 621line 615 didn't jump to line 621 because the condition on line 615 was always true
616 index_xml = images_dir / "Index.xml"
617 if index_xml.exists(): 617 ↛ 621line 617 didn't jump to line 621 because the condition on line 617 was always true
618 return index_xml
620 # No recursive search - only check root and Images directories
621 raise FileNotFoundError(
622 f"Index.xml not found in {plate_path} or {plate_path}/Images. "
623 "Opera Phenix metadata requires Index.xml file."
624 )
626 # Ensure result is a Path object
627 if isinstance(result, str):
628 return Path(result)
629 if isinstance(result, Path):
630 return result
631 # This should not happen if FileManager is properly implemented
632 logger.warning("Unexpected result type from find_file_recursive: %s", type(result).__name__)
633 return Path(str(result))
635 def get_grid_dimensions(self, plate_path: Union[str, Path]):
636 """
637 Get grid dimensions for stitching from Index.xml file.
639 Args:
640 plate_path: Path to the plate folder
642 Returns:
643 Tuple of (grid_rows, grid_cols) - UPDATED: Now returns (rows, cols) for MIST compatibility
645 Raises:
646 FileNotFoundError: If no Index.xml file is found
647 OperaPhenixXmlParseError: If the XML cannot be parsed
648 OperaPhenixXmlContentError: If grid dimensions cannot be determined
649 """
650 # Ensure plate_path is a Path object
651 if isinstance(plate_path, str): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true
652 plate_path = Path(plate_path)
654 # Ensure the path exists
655 if not plate_path.exists(): 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true
656 raise FileNotFoundError(f"Plate path does not exist: {plate_path}")
658 # Find the Index.xml file - this will raise FileNotFoundError if not found
659 index_xml = self.find_metadata_file(plate_path)
661 # Use the OperaPhenixXmlParser to get the grid size
662 # This will raise appropriate exceptions if parsing fails
663 xml_parser = self.create_xml_parser(index_xml)
664 grid_size = xml_parser.get_grid_size()
666 # Validate the grid size
667 if grid_size[0] <= 0 or grid_size[1] <= 0: 667 ↛ 668line 667 didn't jump to line 668 because the condition on line 667 was never true
668 raise ValueError(
669 f"Invalid grid dimensions: {grid_size[0]}x{grid_size[1]}. "
670 "Grid dimensions must be positive integers."
671 )
673 logger.info("Grid size from Index.xml: %dx%d (cols x rows)", grid_size[0], grid_size[1])
674 # FIXED: Return (rows, cols) for MIST compatibility instead of (cols, rows)
675 return (grid_size[1], grid_size[0])
677 def get_pixel_size(self, plate_path: Union[str, Path]):
678 """
679 Get the pixel size from Index.xml file.
681 Args:
682 plate_path: Path to the plate folder
684 Returns:
685 Pixel size in micrometers
687 Raises:
688 FileNotFoundError: If no Index.xml file is found
689 OperaPhenixXmlParseError: If the XML cannot be parsed
690 OperaPhenixXmlContentError: If pixel size cannot be determined
691 """
692 # Ensure plate_path is a Path object
693 if isinstance(plate_path, str):
694 plate_path = Path(plate_path)
696 # Ensure the path exists
697 if not plate_path.exists():
698 raise FileNotFoundError(f"Plate path does not exist: {plate_path}")
700 # Find the Index.xml file - this will raise FileNotFoundError if not found
701 index_xml = self.find_metadata_file(plate_path)
703 # Use the OperaPhenixXmlParser to get the pixel size
704 # This will raise appropriate exceptions if parsing fails
705 xml_parser = self.create_xml_parser(index_xml)
706 pixel_size = xml_parser.get_pixel_size()
708 # Validate the pixel size
709 if pixel_size <= 0:
710 raise ValueError(
711 f"Invalid pixel size: {pixel_size}. "
712 "Pixel size must be a positive number."
713 )
715 logger.info("Pixel size from Index.xml: %.4f μm", pixel_size)
716 return pixel_size
718 def get_channel_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:
719 """
720 Get channel key→name mapping from Opera Phenix Index.xml.
722 Args:
723 plate_path: Path to the plate folder (str or Path)
725 Returns:
726 Dict mapping channel IDs to channel names from metadata
727 Example: {"1": "HOECHST 33342", "2": "Calcein", "3": "Alexa 647"}
728 """
729 try:
730 # Ensure plate_path is a Path object
731 if isinstance(plate_path, str): 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true
732 plate_path = Path(plate_path)
734 # Find and parse Index.xml
735 index_xml = self.find_metadata_file(plate_path)
736 xml_parser = self.create_xml_parser(index_xml)
738 # Extract channel information
739 channel_mapping = {}
741 # Look for channel entries in the XML
742 # Opera Phenix stores channel info in multiple places, try the most common
743 root = xml_parser.root
744 namespace = xml_parser.namespace
746 # Find channel entries with ChannelName elements
747 channel_entries = root.findall(f".//{namespace}Entry[@ChannelID]")
748 for entry in channel_entries:
749 channel_id = entry.get('ChannelID')
750 channel_name_elem = entry.find(f"{namespace}ChannelName")
752 if channel_id and channel_name_elem is not None: 752 ↛ 748line 752 didn't jump to line 748 because the condition on line 752 was always true
753 channel_name = channel_name_elem.text
754 if channel_name: 754 ↛ 748line 754 didn't jump to line 748 because the condition on line 754 was always true
755 channel_mapping[channel_id] = channel_name
757 return channel_mapping if channel_mapping else None
759 except Exception as e:
760 logger.debug(f"Could not extract channel names from Opera Phenix metadata: {e}")
761 return None
763 def get_well_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:
764 """
765 Get well key→name mapping from Opera Phenix metadata.
767 Args:
768 plate_path: Path to the plate folder (str or Path)
770 Returns:
771 None - Opera Phenix doesn't provide rich well names in metadata
772 """
773 return None
775 def get_site_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:
776 """
777 Get site key→name mapping from Opera Phenix metadata.
779 Args:
780 plate_path: Path to the plate folder (str or Path)
782 Returns:
783 None - Opera Phenix doesn't provide rich site names in metadata
784 """
785 return None
787 def get_z_index_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:
788 """
789 Get z_index key→name mapping from Opera Phenix metadata.
791 Args:
792 plate_path: Path to the plate folder (str or Path)
794 Returns:
795 None - Opera Phenix doesn't provide rich z_index names in metadata
796 """
797 return None
801 def create_xml_parser(self, xml_path: Union[str, Path]):
802 """
803 Create an OperaPhenixXmlParser for the given XML file.
805 Args:
806 xml_path: Path to the XML file
808 Returns:
809 OperaPhenixXmlParser: Parser for the XML file
811 Raises:
812 FileNotFoundError: If the XML file does not exist
813 """
814 # Ensure xml_path is a Path object
815 if isinstance(xml_path, str): 815 ↛ 816line 815 didn't jump to line 816 because the condition on line 815 was never true
816 xml_path = Path(xml_path)
818 # Ensure the path exists
819 if not xml_path.exists(): 819 ↛ 820line 819 didn't jump to line 820 because the condition on line 819 was never true
820 raise FileNotFoundError(f"XML file does not exist: {xml_path}")
822 # Create the parser
823 return OperaPhenixXmlParser(xml_path)
826# Set metadata handler class after class definition for automatic registration
827from openhcs.microscopes.microscope_base import register_metadata_handler
828OperaPhenixHandler._metadata_handler_class = OperaPhenixMetadataHandler
829register_metadata_handler(OperaPhenixHandler, OperaPhenixMetadataHandler)