Coverage for openhcs/microscopes/opera_phenix.py: 71.9%

291 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1""" 

2Opera Phenix microscope implementations for openhcs. 

3 

4This module provides concrete implementations of FilenameParser and MetadataHandler 

5for Opera Phenix microscopes. 

6""" 

7 

8import logging 

9import os 

10import re 

11from pathlib import Path 

12from typing import Any, Dict, List, Optional, Union, Type, Tuple 

13 

14from openhcs.constants.constants import Backend 

15from openhcs.microscopes.opera_phenix_xml_parser import OperaPhenixXmlParser 

16from openhcs.io.filemanager import FileManager 

17from openhcs.io.metadata_writer import AtomicMetadataWriter 

18from openhcs.microscopes.microscope_base import MicroscopeHandler 

19from openhcs.microscopes.microscope_interfaces import (FilenameParser, 

20 MetadataHandler) 

21 

22logger = logging.getLogger(__name__) 

23 

24 

25 

26class OperaPhenixHandler(MicroscopeHandler): 

27 """ 

28 MicroscopeHandler implementation for Opera Phenix systems. 

29 

30 This handler combines the OperaPhenix filename parser with its 

31 corresponding metadata handler. It guarantees aligned behavior 

32 for plate structure parsing, metadata extraction, and any optional 

33 post-processing steps required after workspace setup. 

34 """ 

35 

36 # Explicit microscope type for proper registration 

37 _microscope_type = 'opera_phenix' 

38 

39 # Class attribute for automatic metadata handler registration (set after class definition) 

40 _metadata_handler_class = None 

41 

42 def __init__(self, filemanager: FileManager, pattern_format: Optional[str] = None): 

43 self.parser = OperaPhenixFilenameParser(filemanager, pattern_format=pattern_format) 

44 self.metadata_handler = OperaPhenixMetadataHandler(filemanager) 

45 super().__init__(parser=self.parser, metadata_handler=self.metadata_handler) 

46 

47 @property 

48 def root_dir(self) -> str: 

49 """ 

50 Root directory for Opera Phenix virtual workspace preparation. 

51 

52 Returns "Images" because Opera Phenix field remapping is applied 

53 to images in the Images/ subdirectory, and virtual paths include Images/ prefix. 

54 """ 

55 return "Images" 

56 

57 @property 

58 def microscope_type(self) -> str: 

59 """Microscope type identifier (for interface enforcement only).""" 

60 return 'opera_phenix' 

61 

62 @property 

63 def metadata_handler_class(self) -> Type[MetadataHandler]: 

64 """Metadata handler class (for interface enforcement only).""" 

65 return OperaPhenixMetadataHandler 

66 

67 @property 

68 def compatible_backends(self) -> List[Backend]: 

69 """ 

70 Opera Phenix is compatible with DISK backend only. 

71 

72 Legacy microscope format with standard file operations. 

73 """ 

74 return [Backend.DISK] 

75 

76 

77 

78 # Uses default workspace initialization from base class 

79 

80 def _build_virtual_mapping(self, plate_path: Path, filemanager: FileManager) -> Path: 

81 """ 

82 Build Opera Phenix virtual workspace mapping using plate-relative paths. 

83 

84 Args: 

85 plate_path: Path to plate directory 

86 filemanager: FileManager instance for file operations 

87 

88 Returns: 

89 Path to image directory 

90 """ 

91 plate_path = Path(plate_path) # Ensure Path object 

92 

93 logger.info(f"🔄 BUILDING VIRTUAL MAPPING: Opera Phenix field remapping for {plate_path}") 

94 

95 # Opera Phenix images are always in Images/ subdirectory 

96 image_dir = plate_path / self.root_dir 

97 

98 # Default to empty field mapping (no remapping) 

99 field_mapping = {} 

100 

101 # Try to load field mapping from Index.xml if available 

102 xml_parser = None 

103 try: 

104 index_xml = filemanager.find_file_recursive(plate_path, "Index.xml", Backend.DISK.value) 

105 if index_xml: 105 ↛ 110line 105 didn't jump to line 110 because the condition on line 105 was always true

106 xml_parser = OperaPhenixXmlParser(index_xml) 

107 field_mapping = xml_parser.get_field_id_mapping() 

108 logger.debug("Loaded field mapping from Index.xml: %s", field_mapping) 

109 else: 

110 logger.debug("Index.xml not found. Using default field mapping.") 

111 except Exception as e: 

112 logger.error("Error loading Index.xml: %s", e) 

113 logger.debug("Using default field mapping due to error.") 

114 

115 # Fill missing images BEFORE building virtual mapping 

116 # This handles autofocus failures by creating black placeholder images 

117 if xml_parser: 117 ↛ 123line 117 didn't jump to line 123 because the condition on line 117 was always true

118 num_filled = self._fill_missing_images(image_dir, xml_parser, filemanager) 

119 if num_filled > 0: 119 ↛ 123line 119 didn't jump to line 123 because the condition on line 119 was always true

120 logger.info(f"Created {num_filled} placeholder images for autofocus failures") 

121 

122 # Get all image files in the directory (including newly created placeholders) 

123 image_files = filemanager.list_image_files(image_dir, Backend.DISK.value) 

124 

125 # Initialize mapping dict (PLATE-RELATIVE paths) 

126 workspace_mapping = {} 

127 

128 # Process each file 

129 for file_path in image_files: 

130 # FileManager should return strings, but handle Path objects too 

131 if isinstance(file_path, str): 131 ↛ 133line 131 didn't jump to line 133 because the condition on line 131 was always true

132 file_name = os.path.basename(file_path) 

133 elif isinstance(file_path, Path): 

134 file_name = file_path.name 

135 else: 

136 # Skip any unexpected types 

137 logger.warning("Unexpected file path type: %s", type(file_path).__name__) 

138 continue 

139 

140 # Parse file metadata 

141 metadata = self.parser.parse_filename(file_name) 

142 if not metadata or 'site' not in metadata or metadata['site'] is None: 142 ↛ 143line 142 didn't jump to line 143 because the condition on line 142 was never true

143 continue 

144 

145 # Remap the field ID using the spatial layout 

146 original_field_id = metadata['site'] 

147 new_field_id = field_mapping.get(original_field_id, original_field_id) 

148 

149 # Construct the new filename with proper padding 

150 metadata['site'] = new_field_id # Update site with remapped value 

151 new_name = self.parser.construct_filename(**metadata) 

152 

153 # Build PLATE-RELATIVE mapping (no workspace directory) 

154 # Use .as_posix() to ensure forward slashes on all platforms (Windows uses backslashes with str()) 

155 virtual_relative = (Path("Images") / new_name).as_posix() 

156 real_relative = (Path("Images") / file_name).as_posix() 

157 workspace_mapping[virtual_relative] = real_relative 

158 

159 logger.info(f"Built {len(workspace_mapping)} virtual path mappings for Opera Phenix") 

160 

161 # Save virtual workspace mapping to metadata using root_dir as subdirectory key 

162 metadata_path = plate_path / "openhcs_metadata.json" 

163 writer = AtomicMetadataWriter() 

164 writer.merge_subdirectory_metadata(metadata_path, { 

165 self.root_dir: { 

166 "workspace_mapping": workspace_mapping, # Plate-relative paths 

167 "available_backends": {"disk": True, "virtual_workspace": True} 

168 } 

169 }) 

170 

171 logger.info(f"✅ Saved virtual workspace mapping to {metadata_path}") 

172 

173 return image_dir 

174 

175 def _fill_missing_images( 

176 self, 

177 image_dir: Path, 

178 xml_parser: OperaPhenixXmlParser, 

179 filemanager: FileManager 

180 ) -> int: 

181 """ 

182 Fill in missing images with black pixels for wells where autofocus failed. 

183 

184 Opera Phenix autofocus failures result in missing images. This method: 

185 1. Extracts expected image structure from Index.xml 

186 2. Compares with actual files in workspace 

187 3. Creates black (zero-filled) images for missing files 

188 

189 Args: 

190 image_dir: Path to the image directory 

191 xml_parser: Parsed Index.xml 

192 filemanager: FileManager for file operations 

193 

194 Returns: 

195 Number of missing images created 

196 """ 

197 import numpy as np 

198 

199 logger.debug("Checking for missing images in Opera Phenix workspace") 

200 

201 # 1. Get expected images from XML 

202 try: 

203 image_info = xml_parser.get_image_info() 

204 field_mapping = xml_parser.get_field_id_mapping() 

205 except Exception as e: 

206 logger.warning(f"Could not extract image info from XML: {e}") 

207 return 0 

208 

209 # 2. Build set of expected filenames (with remapped field IDs) 

210 expected_files = set() 

211 for img_id, img_data in image_info.items(): 

212 # Remap field ID 

213 original_field = img_data['field_id'] 

214 remapped_field = xml_parser.remap_field_id(original_field, field_mapping) 

215 

216 # Construct filename 

217 well = f"R{img_data['row']:02d}C{img_data['col']:02d}" 

218 

219 # Note: plane_id in XML corresponds to z_index in filenames 

220 # For timepoint, we default to 1 as XML doesn't always have explicit timepoint info 

221 # Use ORIGINAL Opera Phenix padding (1-digit site, 2-digit z-index) 

222 # NOT the standardized 3-digit padding used in virtual workspace mapping 

223 filename = self.parser.construct_filename( 

224 well=well, 

225 site=remapped_field, 

226 channel=img_data['channel_id'], 

227 z_index=img_data['plane_id'], 

228 timepoint=1, # Default timepoint 

229 extension='.tiff', 

230 site_padding=1, # Original Opera Phenix format 

231 z_padding=2 # Original Opera Phenix format 

232 ) 

233 expected_files.add(filename) 

234 

235 # 3. Get actual files (excluding broken symlinks) 

236 # Clause 245: Workspace operations are disk-only by design 

237 actual_file_paths = filemanager.list_image_files(image_dir, Backend.DISK.value) 

238 actual_files = set() 

239 for file_path in actual_file_paths: 

240 # Check if file is a broken symlink 

241 file_path_obj = Path(file_path) 

242 if file_path_obj.is_symlink() and not file_path_obj.exists(): 242 ↛ 244line 242 didn't jump to line 244 because the condition on line 242 was never true

243 # Broken symlink - treat as missing 

244 logger.debug(f"Found broken symlink (will be replaced): {file_path}") 

245 continue 

246 actual_files.add(os.path.basename(file_path)) 

247 

248 # 4. Find missing files 

249 missing_files = expected_files - actual_files 

250 

251 if not missing_files: 251 ↛ 252line 251 didn't jump to line 252 because the condition on line 251 was never true

252 logger.debug("No missing images detected") 

253 return 0 

254 

255 logger.info(f"Found {len(missing_files)} missing images (likely autofocus failures)") 

256 

257 # 5. Get image dimensions from first existing image 

258 if actual_file_paths: 258 ↛ 274line 258 didn't jump to line 274 because the condition on line 258 was always true

259 try: 

260 first_image_path = actual_file_paths[0] 

261 # Clause 245: Workspace operations are disk-only by design 

262 first_image = filemanager.load(first_image_path, Backend.DISK.value) 

263 height, width = first_image.shape 

264 dtype = first_image.dtype 

265 logger.debug(f"Using dimensions from existing image: {height}x{width}, dtype={dtype}") 

266 except Exception as e: 

267 logger.warning(f"Could not load existing image for dimensions: {e}") 

268 # Default dimensions for Opera Phenix 

269 height, width = 2160, 2160 

270 dtype = np.uint16 

271 logger.debug(f"Using default dimensions: {height}x{width}, dtype={dtype}") 

272 else: 

273 # Default dimensions for Opera Phenix 

274 height, width = 2160, 2160 

275 dtype = np.uint16 

276 logger.debug(f"No existing images, using default dimensions: {height}x{width}, dtype={dtype}") 

277 

278 # 6. Create black images for missing files 

279 black_image = np.zeros((height, width), dtype=dtype) 

280 

281 for filename in missing_files: 

282 output_path = image_dir / filename 

283 # Clause 245: Workspace operations are disk-only by design 

284 filemanager.save(black_image, output_path, Backend.DISK.value) 

285 logger.debug(f"Created missing image: {filename}") 

286 

287 logger.info(f"Successfully created {len(missing_files)} missing images with black pixels") 

288 return len(missing_files) 

289 

290 

291class OperaPhenixFilenameParser(FilenameParser): 

292 """Parser for Opera Phenix microscope filenames. 

293 

294 Handles Opera Phenix format filenames like: 

295 - r01c01f001p01-ch1sk1fk1fl1.tiff 

296 - r01c01f001p01-ch1.tiff 

297 """ 

298 

299 # Regular expression pattern for Opera Phenix filenames 

300 # Supports: row, column, site (field), z_index (plane), channel, timepoint (sk=stack) 

301 # sk = stack/timepoint, fk = field stack, fl = focal level 

302 # Also supports result files with suffixes like: r01c01f001p01-ch1_cell_counts_step7.json 

303 _pattern = re.compile(r"r(\d{1,2})c(\d{1,2})f(\d+|\{[^\}]*\})p(\d+|\{[^\}]*\})-ch(\d+|\{[^\}]*\})(?:sk(\d+|\{[^\}]*\}))?(?:fk\d+)?(?:fl\d+)?(?:_.*?)?(\.\w+)$", re.I) 

304 

305 # Pattern for extracting row and column from Opera Phenix well format 

306 _well_pattern = re.compile(r"R(\d{2})C(\d{2})", re.I) 

307 

308 def __init__(self, filemanager=None, pattern_format=None): 

309 """ 

310 Initialize the parser. 

311 

312 Args: 

313 filemanager: FileManager instance (not used, but required for interface compatibility) 

314 pattern_format: Optional pattern format (not used, but required for interface compatibility) 

315 """ 

316 super().__init__() # Initialize the generic parser interface 

317 

318 # These parameters are not used by this parser, but are required for interface compatibility 

319 self.filemanager = filemanager 

320 self.pattern_format = pattern_format 

321 

322 @classmethod 

323 def can_parse(cls, filename: str) -> bool: 

324 """ 

325 Check if this parser can parse the given filename. 

326 

327 Args: 

328 filename (str): Filename to check 

329 

330 Returns: 

331 bool: True if this parser can parse the filename, False otherwise 

332 """ 

333 # 🔒 Clause 17 — VFS Boundary Method 

334 # This is a string operation that doesn't perform actual file I/O 

335 # Extract just the basename 

336 basename = os.path.basename(filename) 

337 # Check if the filename matches the Opera Phenix pattern 

338 return bool(cls._pattern.match(basename)) 

339 

340 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]: 

341 """ 

342 Parse an Opera Phenix filename to extract all components. 

343 Supports placeholders like {iii} which will return None for that field. 

344 

345 Args: 

346 filename (str): Filename to parse 

347 

348 Returns: 

349 dict or None: Dictionary with extracted components or None if parsing fails. 

350 """ 

351 # 🔒 Clause 17 — VFS Boundary Method 

352 # This is a string operation that doesn't perform actual file I/O 

353 basename = os.path.basename(filename) 

354 logger.debug("OperaPhenixFilenameParser attempting to parse basename: '%s'", basename) 

355 

356 # Try parsing using the Opera Phenix pattern 

357 match = self._pattern.match(basename) 

358 if match: 358 ↛ 389line 358 didn't jump to line 389 because the condition on line 358 was always true

359 logger.debug("Regex match successful for '%s'", basename) 

360 row, col, site_str, z_str, channel_str, sk_str, ext = match.groups() 

361 

362 # Helper function to parse component strings 

363 def parse_comp(s): 

364 """Parse component string to int or None if it's a placeholder.""" 

365 if not s or '{' in s: 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true

366 return None 

367 return int(s) 

368 

369 # Create well ID from row and column 

370 well = f"R{int(row):02d}C{int(col):02d}" 

371 

372 # Parse components 

373 site = parse_comp(site_str) 

374 channel = parse_comp(channel_str) 

375 z_index = parse_comp(z_str) 

376 timepoint = parse_comp(sk_str) # sk = stack/timepoint 

377 

378 result = { 

379 'well': well, 

380 'site': site, 

381 'channel': channel, 

382 'wavelength': channel, # For backward compatibility 

383 'z_index': z_index, 

384 'timepoint': timepoint, # sk = stack/timepoint 

385 'extension': ext if ext else '.tif' 

386 } 

387 return result 

388 

389 logger.warning("Regex match failed for basename: '%s'", basename) 

390 return None 

391 

392 def construct_filename(self, extension: str = '.tiff', site_padding: int = 3, z_padding: int = 3, **component_values) -> str: 

393 """ 

394 Construct an Opera Phenix filename from components. 

395 

396 This method now uses **kwargs to accept any component values dynamically, 

397 making it compatible with the generic parser interface. 

398 

399 Note: Opera Phenix uses 'sk' (stack) for timepoint in filenames. 

400 

401 Args: 

402 extension (str, optional): File extension (default: '.tiff') 

403 site_padding (int, optional): Width to pad site numbers to (default: 3) 

404 z_padding (int, optional): Width to pad Z-index numbers to (default: 3) 

405 **component_values: Component values as keyword arguments. 

406 Expected keys: well, site, channel, z_index, timepoint 

407 

408 Returns: 

409 str: Constructed filename 

410 """ 

411 # Extract components from kwargs 

412 well = component_values.get('well') 

413 site = component_values.get('site') 

414 channel = component_values.get('channel') 

415 z_index = component_values.get('z_index') 

416 timepoint = component_values.get('timepoint') 

417 

418 if not well: 418 ↛ 419line 418 didn't jump to line 419 because the condition on line 418 was never true

419 raise ValueError("Well component is required for filename construction") 

420 

421 # Extract row and column from well name 

422 # Check if well is in Opera Phenix format (e.g., 'R01C03') 

423 match = self._well_pattern.match(well) 

424 if match: 424 ↛ 429line 424 didn't jump to line 429 because the condition on line 424 was always true

425 # Extract row and column from Opera Phenix format 

426 row = int(match.group(1)) 

427 col = int(match.group(2)) 

428 else: 

429 raise ValueError(f"Invalid well format: {well}. Expected format: 'R01C03'") 

430 

431 # Default Z-index and timepoint to 1 if not provided 

432 z_index = 1 if z_index is None else z_index 

433 channel = 1 if channel is None else channel 

434 timepoint = 1 if timepoint is None else timepoint 

435 

436 # Construct filename in Opera Phenix format 

437 if isinstance(site, str): 

438 # If site is a string (e.g., '{iii}'), use it directly 

439 site_part = f"f{site}" 

440 else: 

441 # Otherwise, format it as a padded integer 

442 site_part = f"f{site:0{site_padding}d}" 

443 

444 if isinstance(z_index, str): 

445 # If z_index is a string (e.g., '{zzz}'), use it directly 

446 z_part = f"p{z_index}" 

447 else: 

448 # Otherwise, format it as a padded integer 

449 z_part = f"p{z_index:0{z_padding}d}" 

450 

451 # Always include sk (stack/timepoint) - like ImageXpress always includes _t 

452 if isinstance(timepoint, str): 452 ↛ 453line 452 didn't jump to line 453 because the condition on line 452 was never true

453 sk_part = f"sk{timepoint}" 

454 else: 

455 sk_part = f"sk{timepoint}" 

456 

457 return f"r{row:02d}c{col:02d}{site_part}{z_part}-ch{channel}{sk_part}fk1fl1{extension}" 

458 

459 def remap_field_in_filename(self, filename: str, xml_parser: Optional[OperaPhenixXmlParser] = None) -> str: 

460 """ 

461 Remap the field ID in a filename to follow a top-left to bottom-right pattern. 

462 

463 Args: 

464 filename: Original filename 

465 xml_parser: Parser with XML data 

466 

467 Returns: 

468 str: New filename with remapped field ID 

469 """ 

470 if xml_parser is None: 

471 return filename 

472 

473 # Parse the filename 

474 metadata = self.parse_filename(filename) 

475 if not metadata or 'site' not in metadata or metadata['site'] is None: 

476 return filename 

477 

478 # Get the mapping and remap the field ID 

479 mapping = xml_parser.get_field_id_mapping() 

480 new_field_id = xml_parser.remap_field_id(metadata['site'], mapping) 

481 

482 # Always create a new filename with the remapped field ID and consistent padding 

483 # This ensures all filenames have the same format, even if the field ID didn't change 

484 metadata['site'] = new_field_id # Update site with remapped value 

485 return self.construct_filename(**metadata) 

486 

487 def extract_component_coordinates(self, component_value: str) -> Tuple[str, str]: 

488 """ 

489 Extract coordinates from component identifier (typically well). 

490 

491 Args: 

492 component_value (str): Component identifier (e.g., 'R03C04' or 'A01') 

493 

494 Returns: 

495 Tuple[str, str]: (row, column) where row is like 'A', 'B' and column is like '01', '04' 

496 

497 Raises: 

498 ValueError: If component format is invalid 

499 """ 

500 if not component_value: 500 ↛ 501line 500 didn't jump to line 501 because the condition on line 500 was never true

501 raise ValueError(f"Invalid component format: {component_value}") 

502 

503 # Check if component is in Opera Phenix format (e.g., 'R01C03') 

504 match = self._well_pattern.match(component_value) 

505 if match: 505 ↛ 515line 505 didn't jump to line 515 because the condition on line 505 was always true

506 # Extract row and column from Opera Phenix format 

507 row_num = int(match.group(1)) 

508 col_num = int(match.group(2)) 

509 # Convert to letter-number format: R01C03 -> A, 03 

510 row = chr(ord('A') + row_num - 1) # R01 -> A, R02 -> B, etc. 

511 col = f"{col_num:02d}" # Ensure 2-digit padding 

512 return row, col 

513 else: 

514 # Assume simple format like 'A01', 'C04' 

515 if len(component_value) < 2: 

516 raise ValueError(f"Invalid component format: {component_value}") 

517 row = component_value[0] 

518 col = component_value[1:] 

519 if not row.isalpha() or not col.isdigit(): 

520 raise ValueError(f"Invalid Opera Phenix component format: {component_value}. Expected 'R01C03' or 'A01' format") 

521 return row, col 

522 

523 

524class OperaPhenixMetadataHandler(MetadataHandler): 

525 """ 

526 Metadata handler for Opera Phenix microscopes. 

527 

528 Handles finding and parsing Index.xml files for Opera Phenix microscopes. 

529 """ 

530 

531 def __init__(self, filemanager: FileManager): 

532 """ 

533 Initialize the metadata handler. 

534 

535 Args: 

536 filemanager: FileManager instance for file operations. 

537 """ 

538 super().__init__() 

539 self.filemanager = filemanager 

540 

541 # Legacy mode has been completely purged 

542 

543 def find_metadata_file(self, plate_path: Union[str, Path]): 

544 """ 

545 Find the Index.xml file in the plate directory. 

546 

547 Args: 

548 plate_path: Path to the plate directory 

549 

550 Returns: 

551 Path to the Index.xml file 

552 

553 Raises: 

554 FileNotFoundError: If no Index.xml file is found 

555 """ 

556 # Ensure plate_path is a Path object 

557 if isinstance(plate_path, str): 557 ↛ 558line 557 didn't jump to line 558 because the condition on line 557 was never true

558 plate_path = Path(plate_path) 

559 

560 # Ensure the path exists 

561 if not plate_path.exists(): 561 ↛ 562line 561 didn't jump to line 562 because the condition on line 561 was never true

562 raise FileNotFoundError(f"Plate path does not exist: {plate_path}") 

563 

564 # Check for Index.xml in the plate directory 

565 index_xml = plate_path / "Index.xml" 

566 if index_xml.exists(): 

567 return index_xml 

568 

569 # Check for Index.xml in the Images directory 

570 images_dir = plate_path / "Images" 

571 if images_dir.exists(): 571 ↛ 577line 571 didn't jump to line 577 because the condition on line 571 was always true

572 index_xml = images_dir / "Index.xml" 

573 if index_xml.exists(): 573 ↛ 577line 573 didn't jump to line 577 because the condition on line 573 was always true

574 return index_xml 

575 

576 # No recursive search - only check root and Images directories 

577 raise FileNotFoundError( 

578 f"Index.xml not found in {plate_path} or {plate_path}/Images. " 

579 "Opera Phenix metadata requires Index.xml file." 

580 ) 

581 

582 # Ensure result is a Path object 

583 if isinstance(result, str): 

584 return Path(result) 

585 if isinstance(result, Path): 

586 return result 

587 # This should not happen if FileManager is properly implemented 

588 logger.warning("Unexpected result type from find_file_recursive: %s", type(result).__name__) 

589 return Path(str(result)) 

590 

591 def get_grid_dimensions(self, plate_path: Union[str, Path]): 

592 """ 

593 Get grid dimensions for stitching from Index.xml file. 

594 

595 Args: 

596 plate_path: Path to the plate folder 

597 

598 Returns: 

599 Tuple of (grid_rows, grid_cols) - UPDATED: Now returns (rows, cols) for MIST compatibility 

600 

601 Raises: 

602 FileNotFoundError: If no Index.xml file is found 

603 OperaPhenixXmlParseError: If the XML cannot be parsed 

604 OperaPhenixXmlContentError: If grid dimensions cannot be determined 

605 """ 

606 # Ensure plate_path is a Path object 

607 if isinstance(plate_path, str): 607 ↛ 608line 607 didn't jump to line 608 because the condition on line 607 was never true

608 plate_path = Path(plate_path) 

609 

610 # Ensure the path exists 

611 if not plate_path.exists(): 611 ↛ 612line 611 didn't jump to line 612 because the condition on line 611 was never true

612 raise FileNotFoundError(f"Plate path does not exist: {plate_path}") 

613 

614 # Find the Index.xml file - this will raise FileNotFoundError if not found 

615 index_xml = self.find_metadata_file(plate_path) 

616 

617 # Use the OperaPhenixXmlParser to get the grid size 

618 # This will raise appropriate exceptions if parsing fails 

619 xml_parser = self.create_xml_parser(index_xml) 

620 grid_size = xml_parser.get_grid_size() 

621 

622 # Validate the grid size 

623 if grid_size[0] <= 0 or grid_size[1] <= 0: 623 ↛ 624line 623 didn't jump to line 624 because the condition on line 623 was never true

624 raise ValueError( 

625 f"Invalid grid dimensions: {grid_size[0]}x{grid_size[1]}. " 

626 "Grid dimensions must be positive integers." 

627 ) 

628 

629 logger.info("Grid size from Index.xml: %dx%d (cols x rows)", grid_size[0], grid_size[1]) 

630 # FIXED: Return (rows, cols) for MIST compatibility instead of (cols, rows) 

631 return (grid_size[1], grid_size[0]) 

632 

633 def get_pixel_size(self, plate_path: Union[str, Path]): 

634 """ 

635 Get the pixel size from Index.xml file. 

636 

637 Args: 

638 plate_path: Path to the plate folder 

639 

640 Returns: 

641 Pixel size in micrometers 

642 

643 Raises: 

644 FileNotFoundError: If no Index.xml file is found 

645 OperaPhenixXmlParseError: If the XML cannot be parsed 

646 OperaPhenixXmlContentError: If pixel size cannot be determined 

647 """ 

648 # Ensure plate_path is a Path object 

649 if isinstance(plate_path, str): 649 ↛ 650line 649 didn't jump to line 650 because the condition on line 649 was never true

650 plate_path = Path(plate_path) 

651 

652 # Ensure the path exists 

653 if not plate_path.exists(): 653 ↛ 654line 653 didn't jump to line 654 because the condition on line 653 was never true

654 raise FileNotFoundError(f"Plate path does not exist: {plate_path}") 

655 

656 # Find the Index.xml file - this will raise FileNotFoundError if not found 

657 index_xml = self.find_metadata_file(plate_path) 

658 

659 # Use the OperaPhenixXmlParser to get the pixel size 

660 # This will raise appropriate exceptions if parsing fails 

661 xml_parser = self.create_xml_parser(index_xml) 

662 pixel_size = xml_parser.get_pixel_size() 

663 

664 # Validate the pixel size 

665 if pixel_size <= 0: 665 ↛ 666line 665 didn't jump to line 666 because the condition on line 665 was never true

666 raise ValueError( 

667 f"Invalid pixel size: {pixel_size}. " 

668 "Pixel size must be a positive number." 

669 ) 

670 

671 logger.info("Pixel size from Index.xml: %.4f μm", pixel_size) 

672 return pixel_size 

673 

674 def get_channel_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

675 """ 

676 Get channel key→name mapping from Opera Phenix Index.xml. 

677 

678 Args: 

679 plate_path: Path to the plate folder (str or Path) 

680 

681 Returns: 

682 Dict mapping channel IDs to channel names from metadata 

683 Example: {"1": "HOECHST 33342", "2": "Calcein", "3": "Alexa 647"} 

684 """ 

685 try: 

686 # Ensure plate_path is a Path object 

687 if isinstance(plate_path, str): 687 ↛ 688line 687 didn't jump to line 688 because the condition on line 687 was never true

688 plate_path = Path(plate_path) 

689 

690 # Find and parse Index.xml 

691 index_xml = self.find_metadata_file(plate_path) 

692 xml_parser = self.create_xml_parser(index_xml) 

693 

694 # Extract channel information 

695 channel_mapping = {} 

696 

697 # Look for channel entries in the XML 

698 # Opera Phenix stores channel info in multiple places, try the most common 

699 root = xml_parser.root 

700 namespace = xml_parser.namespace 

701 

702 # Find channel entries with ChannelName elements 

703 channel_entries = root.findall(f".//{namespace}Entry[@ChannelID]") 

704 for entry in channel_entries: 

705 channel_id = entry.get('ChannelID') 

706 channel_name_elem = entry.find(f"{namespace}ChannelName") 

707 

708 if channel_id and channel_name_elem is not None: 708 ↛ 704line 708 didn't jump to line 704 because the condition on line 708 was always true

709 channel_name = channel_name_elem.text 

710 if channel_name: 710 ↛ 704line 710 didn't jump to line 704 because the condition on line 710 was always true

711 channel_mapping[channel_id] = channel_name 

712 

713 return channel_mapping if channel_mapping else None 

714 

715 except Exception as e: 

716 logger.debug(f"Could not extract channel names from Opera Phenix metadata: {e}") 

717 return None 

718 

719 def get_well_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

720 """ 

721 Get well key→name mapping from Opera Phenix metadata. 

722 

723 Args: 

724 plate_path: Path to the plate folder (str or Path) 

725 

726 Returns: 

727 None - Opera Phenix doesn't provide rich well names in metadata 

728 """ 

729 return None 

730 

731 def get_site_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

732 """ 

733 Get site key→name mapping from Opera Phenix metadata. 

734 

735 Args: 

736 plate_path: Path to the plate folder (str or Path) 

737 

738 Returns: 

739 None - Opera Phenix doesn't provide rich site names in metadata 

740 """ 

741 return None 

742 

743 def get_z_index_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

744 """ 

745 Get z_index key→name mapping from Opera Phenix metadata. 

746 

747 Args: 

748 plate_path: Path to the plate folder (str or Path) 

749 

750 Returns: 

751 None - Opera Phenix doesn't provide rich z_index names in metadata 

752 """ 

753 return None 

754 

755 def get_timepoint_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]: 

756 """ 

757 Get timepoint key→name mapping from Opera Phenix metadata. 

758 

759 Args: 

760 plate_path: Path to the plate folder (str or Path) 

761 

762 Returns: 

763 None - Opera Phenix doesn't provide rich timepoint names in metadata 

764 """ 

765 return None 

766 

767 # Uses default get_image_files() implementation from MetadataHandler ABC 

768 

769 def create_xml_parser(self, xml_path: Union[str, Path]): 

770 """ 

771 Create an OperaPhenixXmlParser for the given XML file. 

772 

773 Args: 

774 xml_path: Path to the XML file 

775 

776 Returns: 

777 OperaPhenixXmlParser: Parser for the XML file 

778 

779 Raises: 

780 FileNotFoundError: If the XML file does not exist 

781 """ 

782 # Ensure xml_path is a Path object 

783 if isinstance(xml_path, str): 783 ↛ 784line 783 didn't jump to line 784 because the condition on line 783 was never true

784 xml_path = Path(xml_path) 

785 

786 # Ensure the path exists 

787 if not xml_path.exists(): 787 ↛ 788line 787 didn't jump to line 788 because the condition on line 787 was never true

788 raise FileNotFoundError(f"XML file does not exist: {xml_path}") 

789 

790 # Create the parser 

791 return OperaPhenixXmlParser(xml_path) 

792 

793 

794# Set metadata handler class after class definition for automatic registration 

795from openhcs.microscopes.microscope_base import register_metadata_handler 

796OperaPhenixHandler._metadata_handler_class = OperaPhenixMetadataHandler 

797register_metadata_handler(OperaPhenixHandler, OperaPhenixMetadataHandler)