Coverage for openhcs/microscopes/opera

1"""

2Opera Phenix microscope implementations for openhcs.

4This module provides concrete implementations of FilenameParser and MetadataHandler

5for Opera Phenix microscopes.

6"""

8import logging

9import os

10import re

11from pathlib import Path

12from typing import Any, Dict, List, Optional, Union, Type, Tuple

14from openhcs.constants.constants import Backend

15from openhcs.microscopes.opera_phenix_xml_parser import OperaPhenixXmlParser

16from openhcs.io.filemanager import FileManager

17from openhcs.microscopes.microscope_base import MicroscopeHandler

18from openhcs.microscopes.microscope_interfaces import (FilenameParser,

19 MetadataHandler)

21logger = logging.getLogger(__name__)

25class OperaPhenixHandler(MicroscopeHandler):

26 """

27 MicroscopeHandler implementation for Opera Phenix systems.

29 This handler combines the OperaPhenix filename parser with its

30 corresponding metadata handler. It guarantees aligned behavior

31 for plate structure parsing, metadata extraction, and any optional

32 post-processing steps required after workspace setup.

33 """

35 # Explicit microscope type for proper registration

36 _microscope_type = 'opera_phenix'

38 # Class attribute for automatic metadata handler registration (set after class definition)

39 _metadata_handler_class = None

41 def __init__(self, filemanager: FileManager, pattern_format: Optional[str] = None):

42 self.parser = OperaPhenixFilenameParser(filemanager, pattern_format=pattern_format)

43 self.metadata_handler = OperaPhenixMetadataHandler(filemanager)

44 super().__init__(parser=self.parser, metadata_handler=self.metadata_handler)

46 @property

47 def common_dirs(self) -> List[str]:

48 """Subdirectory names commonly used by Opera Phenix."""

49 return ['Images']

51 @property

52 def microscope_type(self) -> str:

53 """Microscope type identifier (for interface enforcement only)."""

54 return 'opera_phenix'

56 @property

57 def metadata_handler_class(self) -> Type[MetadataHandler]:

58 """Metadata handler class (for interface enforcement only)."""

59 return OperaPhenixMetadataHandler

61 @property

62 def compatible_backends(self) -> List[Backend]:

63 """

64 Opera Phenix is compatible with DISK backend only.

66 Legacy microscope format with standard file operations.

67 """

68 return [Backend.DISK]

72 # Uses default workspace initialization from base class

74 def _prepare_workspace(self, workspace_path: Path, filemanager: FileManager):

75 """

76 Renames Opera Phenix images to follow a consistent field order

77 based on spatial layout extracted from Index.xml. Uses remapped

78 filenames and replaces the directory in-place.

80 This method performs preparation but does not determine the final image directory.

82 Args:

83 workspace_path: Path to the symlinked workspace

84 filemanager: FileManager instance for file operations

86 Returns:

87 Path to the normalized image directory.

88 """

90 # Check if workspace has already been processed by looking for temp directory

91 # If temp directory exists, workspace was already processed - skip processing

92 temp_dir_name = "__opera_phenix_temp"

93 for entry in filemanager.list_dir(workspace_path, Backend.DISK.value):

94 entry_path = Path(workspace_path) / entry

95 if entry_path.is_dir() and entry_path.name == temp_dir_name: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true

96 logger.info(f"📁 WORKSPACE ALREADY PROCESSED: Found {temp_dir_name} - skipping Opera Phenix preparation")

97 return workspace_path

99 logger.info(f"🔄 PROCESSING WORKSPACE: Applying Opera Phenix name remapping to {workspace_path}")

100 # Find the image directory using the common_dirs property

101 # Clause 245: Workspace operations are disk-only by design

102 # This call is structurally hardcoded to use the "disk" backend

103

104 # Get all entries in the directory

105 entries = filemanager.list_dir(workspace_path, Backend.DISK.value)

106

107 # Look for a directory matching any of the common_dirs patterns

108 image_dir = workspace_path

109 for entry in entries: 109 ↛ 118line 109 didn't jump to line 118 because the loop on line 109 didn't complete

110 entry_lower = entry.lower()

111 if any(common_dir.lower() in entry_lower for common_dir in self.common_dirs): 111 ↛ 109line 111 didn't jump to line 109 because the condition on line 111 was always true

112 # Found a matching directory

113 image_dir = Path(workspace_path) / entry if isinstance(workspace_path, (str, Path)) else workspace_path / entry

114 logger.info("Found directory matching common_dirs pattern: %s", image_dir)

115 break

116

117 # Default to empty field mapping (no remapping)

118 field_mapping = {}

119

120 # Try to load field mapping from Index.xml if available

121 try:

122 # Clause 245: Workspace operations are disk-only by design

123 # This call is structurally hardcoded to use the "disk" backend

124 index_xml = filemanager.find_file_recursive(workspace_path, "Index.xml", Backend.DISK.value)

125 if index_xml: 125 ↛ 130line 125 didn't jump to line 130 because the condition on line 125 was always true

126 xml_parser = OperaPhenixXmlParser(index_xml)

127 field_mapping = xml_parser.get_field_id_mapping()

128 logger.debug("Loaded field mapping from Index.xml: %s", field_mapping)

129 else:

130 logger.debug("Index.xml not found. Using default field mapping.")

131 except Exception as e:

132 logger.error("Error loading Index.xml: %s", e)

133 logger.debug("Using default field mapping due to error.")

134

135 # Get all image files in the directory BEFORE creating temp directory

136 # This prevents recursive mirroring of the temp directory

137 # Clause 245: Workspace operations are disk-only by design

138 # This call is structurally hardcoded to use the "disk" backend

139 image_files = filemanager.list_image_files(image_dir, Backend.DISK.value)

140

141 # Create a uniquely named temporary directory for renamed files

142 # Use "__opera_phenix_temp" to make it clearly identifiable

143 if isinstance(image_dir, str): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 temp_dir = os.path.join(image_dir, "__opera_phenix_temp")

145 else: # Path object

146 temp_dir = image_dir / "__opera_phenix_temp"

147

148 # SAFETY CHECK: Ensure temp directory is within workspace

149 if not str(temp_dir).startswith(str(workspace_path)): 149 ↛ 150line 149 didn't jump to line 150 because the condition on line 149 was never true

150 logger.error("SAFETY VIOLATION: Temp directory would be created outside workspace: %s", temp_dir)

151 raise RuntimeError(f"Temp directory would be created outside workspace: {temp_dir}")

152

153 # Clause 245: Workspace operations are disk-only by design

154 # This call is structurally hardcoded to use the "disk" backend

155 filemanager.ensure_directory(temp_dir, Backend.DISK.value)

156

157 logger.debug("Created temporary directory for Opera Phenix workspace preparation: %s", temp_dir)

158

159 # Process each file

160 for file_path in image_files:

161 # FileManager should return strings, but handle Path objects too

162 if isinstance(file_path, str): 162 ↛ 165line 162 didn't jump to line 165 because the condition on line 162 was always true

163 file_name = os.path.basename(file_path)

164 file_path_obj = Path(file_path)

165 elif isinstance(file_path, Path):

166 file_name = file_path.name

167 file_path_obj = file_path

168 else:

169 # Skip any unexpected types

170 logger.warning("Unexpected file path type: %s", type(file_path).__name__)

171 continue

172

173 # Check if this is a symlink

174 if file_path_obj.is_symlink(): 174 ↛ 189line 174 didn't jump to line 189 because the condition on line 174 was always true

175 try:

176 # Get the target of the symlink (what it points to)

177 real_file_path = file_path_obj.resolve()

178 if not real_file_path.exists(): 178 ↛ 179line 178 didn't jump to line 179 because the condition on line 178 was never true

179 logger.warning("Broken symlink detected: %s -> %s", file_path, real_file_path)

180 continue

181 # Store both the symlink path and the real file path

182 source_path = str(real_file_path)

183 symlink_target = str(real_file_path)

184 except Exception as e:

185 logger.warning("Failed to resolve symlink %s: %s", file_path, e)

186 continue

187 else:

188 # This should never happen in a properly mirrored workspace

189 logger.error("SAFETY VIOLATION: Found real file in workspace (should be symlink): %s", file_path)

190 raise RuntimeError(f"Workspace contains real file instead of symlink: {file_path}")

191

192 # Store the original symlink path for reference

193 original_symlink_path = str(file_path_obj)

194

195 # Parse file metadata

196 metadata = self.parser.parse_filename(file_name)

197 if not metadata or 'site' not in metadata or metadata['site'] is None: 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true

198 continue

199

200 # Remap the field ID using the spatial layout

201 original_field_id = metadata['site']

202 new_field_id = field_mapping.get(original_field_id, original_field_id)

203

204 # Construct the new filename with proper padding

205 metadata['site'] = new_field_id # Update site with remapped value

206 new_name = self.parser.construct_filename(**metadata)

207

208 # Create the new path in the temporary directory

209 if isinstance(temp_dir, str): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 new_path = os.path.join(temp_dir, new_name)

211 else: # Path object

212 new_path = temp_dir / new_name

213

214 # Check if destination already exists in temp directory

215 try:

216 # Clause 245: Workspace operations are disk-only by design

217 # This call is structurally hardcoded to use the "disk" backend

218 if filemanager.exists(new_path, Backend.DISK.value): 218 ↛ 220line 218 didn't jump to line 220 because the condition on line 218 was never true

219 # For temp directory, we can be more aggressive and delete any existing file

220 logger.debug("File exists in temp directory, removing before copy: %s", new_path)

221 filemanager.delete(new_path, Backend.DISK.value)

222

223 # Create a symlink in the temp directory pointing to the original file

224 # Clause 245: Workspace operations are disk-only by design

225 # This call is structurally hardcoded to use the "disk" backend

226 filemanager.create_symlink(source_path, new_path, Backend.DISK.value)

227 logger.debug("Created symlink in temp directory: %s -> %s", new_path, source_path)

228

229 except Exception as e:

230 logger.error("Failed to copy file to temp directory: %s -> %s: %s",

231 source_path, new_path, e)

232 raise RuntimeError(f"Failed to copy file to temp directory: {e}") from e

233

234 # Clean up and replace old files - ONLY delete symlinks in workspace, NEVER original files

235 for file_path in image_files:

236 # Convert to Path object for symlink checking

237 file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path

238

239 # SAFETY CHECK: Only delete if it's within the workspace directory

240 if not str(file_path_obj).startswith(str(workspace_path)): 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 logger.error("SAFETY VIOLATION: Attempted to delete file outside workspace: %s", file_path)

242 raise RuntimeError(f"Workspace preparation tried to delete file outside workspace: {file_path}")

243

244 # SAFETY CHECK: In workspace, only delete symlinks, never real files

245 if file_path_obj.is_symlink(): 245 ↛ 249line 245 didn't jump to line 249 because the condition on line 245 was always true

246 # Safe to delete - it's a symlink in the workspace

247 logger.debug("Deleting symlink in workspace: %s", file_path)

248 filemanager.delete(file_path, Backend.DISK.value)

249 elif file_path_obj.is_file():

250 # This should never happen in a properly mirrored workspace

251 logger.error("SAFETY VIOLATION: Found real file in workspace (should be symlink): %s", file_path)

252 raise RuntimeError(f"Workspace contains real file instead of symlink: {file_path}")

253 else:

254 logger.warning("File not found or not accessible: %s", file_path)

255

256 # Get all files in the temporary directory

257 # Clause 245: Workspace operations are disk-only by design

258 # This call is structurally hardcoded to use the "disk" backend

259 temp_files = filemanager.list_files(temp_dir, Backend.DISK.value)

260

261 # Move files from temporary directory to image directory

262 for temp_file in temp_files:

263 # FileManager should return strings, but handle Path objects too

264 if isinstance(temp_file, str): 264 ↛ 266line 264 didn't jump to line 266 because the condition on line 264 was always true

265 temp_file_name = os.path.basename(temp_file)

266 elif isinstance(temp_file, Path):

267 temp_file_name = temp_file.name

268 else:

269 # Skip any unexpected types

270 logger.warning("Unexpected file path type: %s", type(temp_file).__name__)

271 continue

272 if isinstance(image_dir, str): 272 ↛ 273line 272 didn't jump to line 273 because the condition on line 272 was never true

273 dest_path = os.path.join(image_dir, temp_file_name)

274 else: # Path object

275 dest_path = image_dir / temp_file_name

276

277 try:

278 # Check if destination already exists in image directory

279 # Clause 245: Workspace operations are disk-only by design

280 # This call is structurally hardcoded to use the "disk" backend

281 if filemanager.exists(dest_path, Backend.DISK.value): 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was never true

282 # If destination is a symlink, ok to remove and replace

283 if filemanager.is_symlink(dest_path, Backend.DISK.value):

284 logger.debug("Destination is a symlink, removing before copy: %s", dest_path)

285 filemanager.delete(dest_path, Backend.DISK.value)

286 else:

287 # Not a symlink - could be a real file

288 logger.error("SAFETY VIOLATION: Destination exists and is not a symlink: %s", dest_path)

289 raise FileExistsError(f"Destination exists and is not a symlink: {dest_path}")

290

291 # First, if the temp file is a symlink, get its target

292 temp_file_obj = Path(temp_file) if isinstance(temp_file, str) else temp_file

293 if temp_file_obj.is_symlink(): 293 ↛ 309line 293 didn't jump to line 309 because the condition on line 293 was always true

294 try:

295 # Get the target that the temp symlink points to

296 real_target = temp_file_obj.resolve()

297 real_target_path = str(real_target)

298

299 # Create a new symlink in the image directory pointing to the original file

300 # Clause 245: Workspace operations are disk-only by design

301 # This call is structurally hardcoded to use the "disk" backend

302 filemanager.create_symlink(real_target_path, dest_path, Backend.DISK.value)

303 logger.debug("Created symlink in image directory: %s -> %s", dest_path, real_target_path)

304 except Exception as e:

305 logger.error("Failed to resolve symlink in temp directory: %s: %s", temp_file, e)

306 raise RuntimeError(f"Failed to resolve symlink: {e}") from e

307 else:

308 # This should never happen if we're using symlinks consistently

309 logger.warning("Temp file is not a symlink: %s", temp_file)

310 # Fall back to copying the file

311 filemanager.copy(temp_file, dest_path, Backend.DISK.value)

312 logger.debug("Copied file (not symlink) to image directory: %s -> %s", temp_file, dest_path)

313

314 # Remove the file from the temporary directory

315 # Clause 245: Workspace operations are disk-only by design

316 # This call is structurally hardcoded to use the "disk" backend

317 filemanager.delete(temp_file, Backend.DISK.value)

318

319 except FileExistsError as e:

320 # Re-raise with clear message

321 logger.error("Cannot copy to destination: %s", e)

322 raise

323 except Exception as e:

324 logger.error("Error copying from temp to destination: %s -> %s: %s",

325 temp_file, dest_path, e)

326 raise RuntimeError(f"Failed to process file from temp directory: {e}") from e

327

328 # SAFETY CHECK: Validate temp directory before deletion

329 if not str(temp_dir).startswith(str(workspace_path)): 329 ↛ 330line 329 didn't jump to line 330 because the condition on line 329 was never true

330 logger.error("SAFETY VIOLATION: Attempted to delete temp directory outside workspace: %s", temp_dir)

331 raise RuntimeError(f"Attempted to delete temp directory outside workspace: {temp_dir}")

332

333 if not "__opera_phenix_temp" in str(temp_dir): 333 ↛ 334line 333 didn't jump to line 334 because the condition on line 333 was never true

334 logger.error("SAFETY VIOLATION: Attempted to delete non-temp directory: %s", temp_dir)

335 raise RuntimeError(f"Attempted to delete non-temp directory: {temp_dir}")

336

337 # Remove the temporary directory

338 # Clause 245: Workspace operations are disk-only by design

339 # This call is structurally hardcoded to use the "disk" backend

340 try:

341 filemanager.delete(temp_dir, Backend.DISK.value)

342 logger.debug("Successfully removed temporary directory: %s", temp_dir)

343 except Exception as e:

344 # Non-fatal error, just log it

345 logger.warning("Failed to remove temporary directory %s: %s", temp_dir, e)

346

347 return image_dir

348

349

350class OperaPhenixFilenameParser(FilenameParser):

351 """Parser for Opera Phenix microscope filenames.

352

353 Handles Opera Phenix format filenames like:

354 - r01c01f001p01-ch1sk1fk1fl1.tiff

355 - r01c01f001p01-ch1.tiff

356 """

357

358 # Regular expression pattern for Opera Phenix filenames

359 _pattern = re.compile(r"r(\d{1,2})c(\d{1,2})f(\d+|\{[^\}]*\})p(\d+|\{[^\}]*\})-ch(\d+|\{[^\}]*\})(?:sk\d+)?(?:fk\d+)?(?:fl\d+)?(\.\w+)$", re.I)

360

361 # Pattern for extracting row and column from Opera Phenix well format

362 _well_pattern = re.compile(r"R(\d{2})C(\d{2})", re.I)

363

364 def __init__(self, filemanager=None, pattern_format=None):

365 """

366 Initialize the parser.

367

368 Args:

369 filemanager: FileManager instance (not used, but required for interface compatibility)

370 pattern_format: Optional pattern format (not used, but required for interface compatibility)

371 """

372 super().__init__() # Initialize the generic parser interface

373

374 # These parameters are not used by this parser, but are required for interface compatibility

375 self.filemanager = filemanager

376 self.pattern_format = pattern_format

377

378 @classmethod

379 def can_parse(cls, filename: str) -> bool:

380 """

381 Check if this parser can parse the given filename.

382

383 Args:

384 filename (str): Filename to check

385

386 Returns:

387 bool: True if this parser can parse the filename, False otherwise

388 """

389 # 🔒 Clause 17 — VFS Boundary Method

390 # This is a string operation that doesn't perform actual file I/O

391 # Extract just the basename

392 basename = os.path.basename(filename)

393 # Check if the filename matches the Opera Phenix pattern

394 return bool(cls._pattern.match(basename))

395

396 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]:

397 """

398 Parse an Opera Phenix filename to extract all components.

399 Supports placeholders like {iii} which will return None for that field.

400

401 Args:

402 filename (str): Filename to parse

403

404 Returns:

405 dict or None: Dictionary with extracted components or None if parsing fails.

406 """

407 # 🔒 Clause 17 — VFS Boundary Method

408 # This is a string operation that doesn't perform actual file I/O

409 basename = os.path.basename(filename)

410 logger.debug("OperaPhenixFilenameParser attempting to parse basename: '%s'", basename)

411

412 # Try parsing using the Opera Phenix pattern

413 match = self._pattern.match(basename)

414 if match: 414 ↛ 443line 414 didn't jump to line 443 because the condition on line 414 was always true

415 logger.debug("Regex match successful for '%s'", basename)

416 row, col, site_str, z_str, channel_str, ext = match.groups()

417

418 # Helper function to parse component strings

419 def parse_comp(s):

420 """Parse component string to int or None if it's a placeholder."""

421 if not s or '{' in s: 421 ↛ 422line 421 didn't jump to line 422 because the condition on line 421 was never true

422 return None

423 return int(s)

424

425 # Create well ID from row and column

426 well = f"R{int(row):02d}C{int(col):02d}"

427

428 # Parse components

429 site = parse_comp(site_str)

430 channel = parse_comp(channel_str)

431 z_index = parse_comp(z_str)

432

433 result = {

434 'well': well,

435 'site': site,

436 'channel': channel,

437 'wavelength': channel, # For backward compatibility

438 'z_index': z_index,

439 'extension': ext if ext else '.tif'

440 }

441 return result

442

443 logger.warning("Regex match failed for basename: '%s'", basename)

444 return None

445

446 def construct_filename(self, extension: str = '.tiff', site_padding: int = 3, z_padding: int = 3, **component_values) -> str:

447 """

448 Construct an Opera Phenix filename from components.

449

450 This method now uses **kwargs to accept any component values dynamically,

451 making it compatible with the generic parser interface.

452

453 Args:

454 extension (str, optional): File extension (default: '.tiff')

455 site_padding (int, optional): Width to pad site numbers to (default: 3)

456 z_padding (int, optional): Width to pad Z-index numbers to (default: 3)

457 **component_values: Component values as keyword arguments.

458 Expected keys: well, site, channel, z_index

459

460 Returns:

461 str: Constructed filename

462 """

463 # Extract components from kwargs

464 well = component_values.get('well')

465 site = component_values.get('site')

466 channel = component_values.get('channel')

467 z_index = component_values.get('z_index')

468

469 if not well: 469 ↛ 470line 469 didn't jump to line 470 because the condition on line 469 was never true

470 raise ValueError("Well component is required for filename construction")

471

472 # Extract row and column from well name

473 # Check if well is in Opera Phenix format (e.g., 'R01C03')

474 match = self._well_pattern.match(well)

475 if match: 475 ↛ 480line 475 didn't jump to line 480 because the condition on line 475 was always true

476 # Extract row and column from Opera Phenix format

477 row = int(match.group(1))

478 col = int(match.group(2))

479 else:

480 raise ValueError(f"Invalid well format: {well}. Expected format: 'R01C03'")

481

482 # Default Z-index to 1 if not provided

483 z_index = 1 if z_index is None else z_index

484 channel = 1 if channel is None else channel

485

486 # Construct filename in Opera Phenix format

487 if isinstance(site, str): 487 ↛ 489line 487 didn't jump to line 489 because the condition on line 487 was never true

488 # If site is a string (e.g., '{iii}'), use it directly

489 site_part = f"f{site}"

490 else:

491 # Otherwise, format it as a padded integer

492 site_part = f"f{site:0{site_padding}d}"

493

494 if isinstance(z_index, str): 494 ↛ 496line 494 didn't jump to line 496 because the condition on line 494 was never true

495 # If z_index is a string (e.g., '{zzz}'), use it directly

496 z_part = f"p{z_index}"

497 else:

498 # Otherwise, format it as a padded integer

499 z_part = f"p{z_index:0{z_padding}d}"

500

501 return f"r{row:02d}c{col:02d}{site_part}{z_part}-ch{channel}sk1fk1fl1{extension}"

502

503 def remap_field_in_filename(self, filename: str, xml_parser: Optional[OperaPhenixXmlParser] = None) -> str:

504 """

505 Remap the field ID in a filename to follow a top-left to bottom-right pattern.

506

507 Args:

508 filename: Original filename

509 xml_parser: Parser with XML data

510

511 Returns:

512 str: New filename with remapped field ID

513 """

514 if xml_parser is None:

515 return filename

516

517 # Parse the filename

518 metadata = self.parse_filename(filename)

519 if not metadata or 'site' not in metadata or metadata['site'] is None:

520 return filename

521

522 # Get the mapping and remap the field ID

523 mapping = xml_parser.get_field_id_mapping()

524 new_field_id = xml_parser.remap_field_id(metadata['site'], mapping)

525

526 # Always create a new filename with the remapped field ID and consistent padding

527 # This ensures all filenames have the same format, even if the field ID didn't change

528 metadata['site'] = new_field_id # Update site with remapped value

529 return self.construct_filename(**metadata)

530

531 def extract_component_coordinates(self, component_value: str) -> Tuple[str, str]:

532 """

533 Extract coordinates from component identifier (typically well).

534

535 Args:

536 component_value (str): Component identifier (e.g., 'R03C04' or 'A01')

537

538 Returns:

539 Tuple[str, str]: (row, column) where row is like 'A', 'B' and column is like '01', '04'

540

541 Raises:

542 ValueError: If component format is invalid

543 """

544 if not component_value:

545 raise ValueError(f"Invalid component format: {component_value}")

546

547 # Check if component is in Opera Phenix format (e.g., 'R01C03')

548 match = self._well_pattern.match(component_value)

549 if match:

550 # Extract row and column from Opera Phenix format

551 row_num = int(match.group(1))

552 col_num = int(match.group(2))

553 # Convert to letter-number format: R01C03 -> A, 03

554 row = chr(ord('A') + row_num - 1) # R01 -> A, R02 -> B, etc.

555 col = f"{col_num:02d}" # Ensure 2-digit padding

556 return row, col

557 else:

558 # Assume simple format like 'A01', 'C04'

559 if len(component_value) < 2:

560 raise ValueError(f"Invalid component format: {component_value}")

561 row = component_value[0]

562 col = component_value[1:]

563 if not row.isalpha() or not col.isdigit():

564 raise ValueError(f"Invalid Opera Phenix component format: {component_value}. Expected 'R01C03' or 'A01' format")

565 return row, col

566

567

568class OperaPhenixMetadataHandler(MetadataHandler):

569 """

570 Metadata handler for Opera Phenix microscopes.

571

572 Handles finding and parsing Index.xml files for Opera Phenix microscopes.

573 """

574

575 def __init__(self, filemanager: FileManager):

576 """

577 Initialize the metadata handler.

578

579 Args:

580 filemanager: FileManager instance for file operations.

581 """

582 super().__init__()

583 self.filemanager = filemanager

584

585 # Legacy mode has been completely purged

586

587 def find_metadata_file(self, plate_path: Union[str, Path]):

588 """

589 Find the Index.xml file in the plate directory.

590

591 Args:

592 plate_path: Path to the plate directory

593

594 Returns:

595 Path to the Index.xml file

596

597 Raises:

598 FileNotFoundError: If no Index.xml file is found

599 """

600 # Ensure plate_path is a Path object

601 if isinstance(plate_path, str): 601 ↛ 602line 601 didn't jump to line 602 because the condition on line 601 was never true

602 plate_path = Path(plate_path)

603

604 # Ensure the path exists

605 if not plate_path.exists(): 605 ↛ 606line 605 didn't jump to line 606 because the condition on line 605 was never true

606 raise FileNotFoundError(f"Plate path does not exist: {plate_path}")

607

608 # Check for Index.xml in the plate directory

609 index_xml = plate_path / "Index.xml"

610 if index_xml.exists(): 610 ↛ 611line 610 didn't jump to line 611 because the condition on line 610 was never true

611 return index_xml

612

613 # Check for Index.xml in the Images directory

614 images_dir = plate_path / "Images"

615 if images_dir.exists(): 615 ↛ 621line 615 didn't jump to line 621 because the condition on line 615 was always true

616 index_xml = images_dir / "Index.xml"

617 if index_xml.exists(): 617 ↛ 621line 617 didn't jump to line 621 because the condition on line 617 was always true

618 return index_xml

619

620 # No recursive search - only check root and Images directories

621 raise FileNotFoundError(

622 f"Index.xml not found in {plate_path} or {plate_path}/Images. "

623 "Opera Phenix metadata requires Index.xml file."

624 )

625

626 # Ensure result is a Path object

627 if isinstance(result, str):

628 return Path(result)

629 if isinstance(result, Path):

630 return result

631 # This should not happen if FileManager is properly implemented

632 logger.warning("Unexpected result type from find_file_recursive: %s", type(result).__name__)

633 return Path(str(result))

634

635 def get_grid_dimensions(self, plate_path: Union[str, Path]):

636 """

637 Get grid dimensions for stitching from Index.xml file.

638

639 Args:

640 plate_path: Path to the plate folder

641

642 Returns:

643 Tuple of (grid_rows, grid_cols) - UPDATED: Now returns (rows, cols) for MIST compatibility

644

645 Raises:

646 FileNotFoundError: If no Index.xml file is found

647 OperaPhenixXmlParseError: If the XML cannot be parsed

648 OperaPhenixXmlContentError: If grid dimensions cannot be determined

649 """

650 # Ensure plate_path is a Path object

651 if isinstance(plate_path, str): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true

652 plate_path = Path(plate_path)

653

654 # Ensure the path exists

655 if not plate_path.exists(): 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true

656 raise FileNotFoundError(f"Plate path does not exist: {plate_path}")

657

658 # Find the Index.xml file - this will raise FileNotFoundError if not found

659 index_xml = self.find_metadata_file(plate_path)

660

661 # Use the OperaPhenixXmlParser to get the grid size

662 # This will raise appropriate exceptions if parsing fails

663 xml_parser = self.create_xml_parser(index_xml)

664 grid_size = xml_parser.get_grid_size()

665

666 # Validate the grid size

667 if grid_size[0] <= 0 or grid_size[1] <= 0: 667 ↛ 668line 667 didn't jump to line 668 because the condition on line 667 was never true

668 raise ValueError(

669 f"Invalid grid dimensions: {grid_size[0]}x{grid_size[1]}. "

670 "Grid dimensions must be positive integers."

671 )

672

673 logger.info("Grid size from Index.xml: %dx%d (cols x rows)", grid_size[0], grid_size[1])

674 # FIXED: Return (rows, cols) for MIST compatibility instead of (cols, rows)

675 return (grid_size[1], grid_size[0])

676

677 def get_pixel_size(self, plate_path: Union[str, Path]):

678 """

679 Get the pixel size from Index.xml file.

680

681 Args:

682 plate_path: Path to the plate folder

683

684 Returns:

685 Pixel size in micrometers

686

687 Raises:

688 FileNotFoundError: If no Index.xml file is found

689 OperaPhenixXmlParseError: If the XML cannot be parsed

690 OperaPhenixXmlContentError: If pixel size cannot be determined

691 """

692 # Ensure plate_path is a Path object

693 if isinstance(plate_path, str):

694 plate_path = Path(plate_path)

695

696 # Ensure the path exists

697 if not plate_path.exists():

698 raise FileNotFoundError(f"Plate path does not exist: {plate_path}")

699

700 # Find the Index.xml file - this will raise FileNotFoundError if not found

701 index_xml = self.find_metadata_file(plate_path)

702

703 # Use the OperaPhenixXmlParser to get the pixel size

704 # This will raise appropriate exceptions if parsing fails

705 xml_parser = self.create_xml_parser(index_xml)

706 pixel_size = xml_parser.get_pixel_size()

707

708 # Validate the pixel size

709 if pixel_size <= 0:

710 raise ValueError(

711 f"Invalid pixel size: {pixel_size}. "

712 "Pixel size must be a positive number."

713 )

714

715 logger.info("Pixel size from Index.xml: %.4f μm", pixel_size)

716 return pixel_size

717

718 def get_channel_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:

719 """

720 Get channel key→name mapping from Opera Phenix Index.xml.

721

722 Args:

723 plate_path: Path to the plate folder (str or Path)

724

725 Returns:

726 Dict mapping channel IDs to channel names from metadata

727 Example: {"1": "HOECHST 33342", "2": "Calcein", "3": "Alexa 647"}

728 """

729 try:

730 # Ensure plate_path is a Path object

731 if isinstance(plate_path, str): 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true

732 plate_path = Path(plate_path)

733

734 # Find and parse Index.xml

735 index_xml = self.find_metadata_file(plate_path)

736 xml_parser = self.create_xml_parser(index_xml)

737

738 # Extract channel information

739 channel_mapping = {}

740

741 # Look for channel entries in the XML

742 # Opera Phenix stores channel info in multiple places, try the most common

743 root = xml_parser.root

744 namespace = xml_parser.namespace

745

746 # Find channel entries with ChannelName elements

747 channel_entries = root.findall(f".//{namespace}Entry[@ChannelID]")

748 for entry in channel_entries:

749 channel_id = entry.get('ChannelID')

750 channel_name_elem = entry.find(f"{namespace}ChannelName")

751

752 if channel_id and channel_name_elem is not None: 752 ↛ 748line 752 didn't jump to line 748 because the condition on line 752 was always true

753 channel_name = channel_name_elem.text

754 if channel_name: 754 ↛ 748line 754 didn't jump to line 748 because the condition on line 754 was always true

755 channel_mapping[channel_id] = channel_name

756

757 return channel_mapping if channel_mapping else None

758

759 except Exception as e:

760 logger.debug(f"Could not extract channel names from Opera Phenix metadata: {e}")

761 return None

762

763 def get_well_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:

764 """

765 Get well key→name mapping from Opera Phenix metadata.

766

767 Args:

768 plate_path: Path to the plate folder (str or Path)

769

770 Returns:

771 None - Opera Phenix doesn't provide rich well names in metadata

772 """

773 return None

774

775 def get_site_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:

776 """

777 Get site key→name mapping from Opera Phenix metadata.

778

779 Args:

780 plate_path: Path to the plate folder (str or Path)

781

782 Returns:

783 None - Opera Phenix doesn't provide rich site names in metadata

784 """

785 return None

786

787 def get_z_index_values(self, plate_path: Union[str, Path]) -> Optional[Dict[str, Optional[str]]]:

788 """

789 Get z_index key→name mapping from Opera Phenix metadata.

790

791 Args:

792 plate_path: Path to the plate folder (str or Path)

793

794 Returns:

795 None - Opera Phenix doesn't provide rich z_index names in metadata

796 """

797 return None

798

799

800

801 def create_xml_parser(self, xml_path: Union[str, Path]):

802 """

803 Create an OperaPhenixXmlParser for the given XML file.

804

805 Args:

806 xml_path: Path to the XML file

807

808 Returns:

809 OperaPhenixXmlParser: Parser for the XML file

810

811 Raises:

812 FileNotFoundError: If the XML file does not exist

813 """

814 # Ensure xml_path is a Path object

815 if isinstance(xml_path, str): 815 ↛ 816line 815 didn't jump to line 816 because the condition on line 815 was never true

816 xml_path = Path(xml_path)

817

818 # Ensure the path exists

819 if not xml_path.exists(): 819 ↛ 820line 819 didn't jump to line 820 because the condition on line 819 was never true

820 raise FileNotFoundError(f"XML file does not exist: {xml_path}")

821

822 # Create the parser

823 return OperaPhenixXmlParser(xml_path)

824

825

826# Set metadata handler class after class definition for automatic registration

827from openhcs.microscopes.microscope_base import register_metadata_handler

828OperaPhenixHandler._metadata_handler_class = OperaPhenixMetadataHandler

829register_metadata_handler(OperaPhenixHandler, OperaPhenixMetadataHandler)

Coverage for openhcs/microscopes/opera_phenix.py: 57.3%

326 statements