Coverage for src/polystore/filemanager.py: 52%

222 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-03 06:58 +0000

1""" 

2FileManager directory operations. 

3 

4This module contains the directory-related methods of the FileManager class, 

5including directory listing, existence checking, mkdir, symlink, and mirror operations. 

6""" 

7 

8import logging 

9from pathlib import Path 

10from typing import List, Set, Union, Tuple, Any 

11 

12from .formats import DEFAULT_IMAGE_EXTENSIONS 

13from .base import DataSink 

14from .exceptions import StorageResolutionError 

15 

16logger = logging.getLogger(__name__) 

17 

18class FileManager: 

19 

20 def __init__(self, registry): 

21 """ 

22 Initialize the file manager. 

23 

24 Args: 

25 registry: Registry for storage backends. Must be provided. 

26 Now accepts Dict[str, DataSink] (includes StorageBackend and StreamingBackend) 

27 

28 Raises: 

29 ValueError: If registry is not provided. 

30 

31 Note: 

32 This class is a backend-agnostic router. It maintains no default backend 

33 or fallback behavior, and all state is instance-local and declarative. 

34 Each operation must explicitly specify which backend to use. 

35 

36 Thread Safety: 

37 Each FileManager instance must be scoped to a single execution context. 

38 Do NOT share FileManager instances across pipelines or threads. 

39 For isolation, create a dedicated registry for each FileManager. 

40 """ 

41 # Validate registry parameter 

42 if registry is None: 

43 raise ValueError("Registry must be provided to FileManager. Default fallback has been removed.") 

44 

45 # Store registry 

46 self.registry = registry 

47 

48 

49 

50 logger.debug("FileManager initialized with registry") 

51 

52 def _get_backend(self, backend_name: str) -> DataSink: 

53 """ 

54 Get a backend by name. 

55 

56 This method uses the instance registry to get the backend instance directly. 

57 All FileManagers that use the same registry share the same backend instances. 

58 

59 Returns DataSink (base interface) - could be StorageBackend or StreamingBackend. 

60 Load operations will fail-loud on StreamingBackend (no load method). 

61 

62 Args: 

63 backend_name: Name of the backend to get (e.g., "disk", "memory", "zarr") 

64 

65 Returns: 

66 The backend instance (DataSink - polymorphic) 

67 

68 Raises: 

69 StorageResolutionError: If the backend is not found in the registry 

70 

71 Thread Safety: 

72 Backend instances are shared across all FileManager instances that use 

73 the same registry. This ensures shared state (especially for memory backend). 

74 """ 

75 # Normalize backend name 

76 backend_name = backend_name.lower() 

77 

78 if backend_name is None: 

79 raise StorageResolutionError(f"Backend '{backend_name}' not found in registry") 

80 

81 try: 

82 # Get the backend instance from the registry dictionary 

83 if backend_name not in self.registry: 

84 raise KeyError(f"Backend '{backend_name}' not found in registry") 

85 

86 # Return the backend instance directly 

87 return self.registry[backend_name] 

88 except Exception as e: 

89 raise StorageResolutionError(f"Failed to get backend '{backend_name}': {e}") from e 

90 

91 def load(self, file_path: Union[str, Path], backend: str, **kwargs) -> Any: 

92 """ 

93 Load data from a file using the specified backend. 

94 

95 This method assumes the file path is already backend-compatible and performs no inference or fallback. 

96 All semantic validation and file format decoding must occur within the backend. 

97 

98 Args: 

99 file_path: Path to the file to load (str or Path) 

100 backend: Backend enum to use for loading (StorageBackendType.DISK, etc.) — POSITIONAL argument 

101 **kwargs: Additional keyword arguments passed to the backend's load method 

102 

103 Returns: 

104 Any: The loaded data object 

105 

106 Raises: 

107 StorageResolutionError: If the backend is not supported or load fails 

108 """ 

109 

110 try: 

111 backend_instance = self._get_backend(backend) 

112 return backend_instance.load(file_path, **kwargs) 

113 except StorageResolutionError: # Allow specific backend errors to propagate 

114 raise 

115 except Exception as e: 

116 logger.error(f"Unexpected error during load from {file_path} with backend {backend}: {e}", exc_info=True) 

117 raise StorageResolutionError( 

118 f"Failed to load file at {file_path} using backend '{backend}'" 

119 ) from e 

120 

121 def save(self, data: Any, output_path: Union[str, Path], backend: str, **kwargs) -> None: 

122 """ 

123 Save data to a file using the specified backend. 

124 

125 This method performs no semantic transformation, format inference, or fallback logic. 

126 It assumes the output path and data are valid and structurally aligned with the backend’s expectations. 

127 

128 Args: 

129 data: The data object to save (e.g., np.ndarray, torch.Tensor, dict, etc.) 

130 output_path: Destination path to write to (str or Path) 

131 backend: Backend enum to use for saving (StorageBackendType.DISK, etc.) — POSITIONAL argument 

132 **kwargs: Additional keyword arguments passed to the backend's save method 

133 

134 Raises: 

135 StorageResolutionError: If the backend is not supported or save fails 

136 """ 

137 

138 try: 

139 backend_instance = self._get_backend(backend) 

140 

141 # If materialization context exists, merge it into kwargs 

142 # This allows backends to access context like images_dir for OMERO ROI/analysis linking 

143 if hasattr(self, '_materialization_context') and self._materialization_context: 

144 # Merge context into kwargs (kwargs takes precedence if keys overlap) 

145 merged_kwargs = {**self._materialization_context, **kwargs} 

146 backend_instance.save(data, output_path, **merged_kwargs) 

147 else: 

148 backend_instance.save(data, output_path, **kwargs) 

149 except StorageResolutionError: # Allow specific backend errors to propagate if they are StorageResolutionError 

150 raise 

151 except Exception as e: 

152 logger.error(f"Unexpected error during save to {output_path} with backend {backend}: {e}", exc_info=True) 

153 raise StorageResolutionError( 

154 f"Failed to save data to {output_path} using backend '{backend}'" 

155 ) from e 

156 

157 def load_batch(self, file_paths: List[Union[str, Path]], backend: str, **kwargs) -> List[Any]: 

158 """ 

159 Load multiple files using the specified backend. 

160 

161 Args: 

162 file_paths: List of file paths to load 

163 backend: Backend to use for loading 

164 **kwargs: Additional keyword arguments passed to the backend's load_batch method 

165 

166 Returns: 

167 List of loaded data objects in the same order as file_paths 

168 

169 Raises: 

170 StorageResolutionError: If the backend is not supported or load fails 

171 """ 

172 try: 

173 backend_instance = self._get_backend(backend) 

174 return backend_instance.load_batch(file_paths, **kwargs) 

175 except StorageResolutionError: 

176 raise 

177 except Exception as e: 

178 logger.error(f"Unexpected error during batch load with backend {backend}: {e}", exc_info=True) 

179 raise StorageResolutionError( 

180 f"Failed to load batch of {len(file_paths)} files using backend '{backend}'" 

181 ) from e 

182 

183 def save_batch(self, data_list: List[Any], output_paths: List[Union[str, Path]], backend: str, **kwargs) -> None: 

184 """ 

185 Save multiple data objects using the specified backend. 

186 

187 Args: 

188 data_list: List of data objects to save 

189 output_paths: List of destination paths (must match length of data_list) 

190 backend: Backend to use for saving 

191 **kwargs: Additional keyword arguments passed to the backend's save_batch method 

192 

193 Raises: 

194 StorageResolutionError: If the backend is not supported or save fails 

195 ValueError: If data_list and output_paths have different lengths 

196 """ 

197 try: 

198 backend_instance = self._get_backend(backend) 

199 backend_instance.save_batch(data_list, output_paths, **kwargs) 

200 except StorageResolutionError: 

201 raise 

202 except Exception as e: 

203 logger.error(f"Unexpected error during batch save with backend {backend}: {e}", exc_info=True) 

204 raise StorageResolutionError( 

205 f"Failed to save batch of {len(data_list)} files using backend '{backend}'" 

206 ) from e 

207 

208 def list_image_files(self, directory: Union[str, Path], backend: str, 

209 pattern: str = None, extensions: Set[str] = DEFAULT_IMAGE_EXTENSIONS, recursive: bool = False) -> List[str]: 

210 """ 

211 List all image files in a directory using the specified backend. 

212 

213 This method performs no semantic validation, normalization, or naming enforcement on the input path. 

214 It assumes the caller has provided a valid, backend-compatible path and merely dispatches it for execution. 

215 

216 Note: ONLY backend is a POSITIONAL argument. Other parameters may remain as kwargs. 

217 

218 Args: 

219 directory: Directory to search (str or Path) 

220 backend: Backend to use for listing ('disk', 'memory', 'zarr') - POSITIONAL 

221 pattern: Pattern to filter files (e.g., "*.tif") - can be keyword arg 

222 extensions: Set of file extensions to filter by - can be keyword arg 

223 recursive: Whether to search recursively - can be keyword arg 

224 

225 Returns: 

226 List of string paths for image files found 

227 

228 Raises: 

229 StorageResolutionError: If the backend is not supported 

230 TypeError: If directory is not a valid path type 

231 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

232 """ 

233 # Get backend instance 

234 backend_instance = self._get_backend(backend) 

235 

236 # List image files and apply natural sorting 

237 from .utils import natural_sort 

238 files = backend_instance.list_files(str(directory), pattern, extensions, recursive) 

239 # Ensure we pass strings to natural_sort (backends may return Path objects) 

240 files = [str(f) for f in files] 

241 return natural_sort(files) 

242 

243 

244 def list_files(self, directory: Union[str, Path], backend: str, 

245 pattern: str = None, extensions: Set[str] = None, recursive: bool = False, 

246 **kwargs) -> List[str]: 

247 """ 

248 List all files in a directory using the specified backend. 

249 

250 This method performs no semantic validation, normalization, or naming enforcement on the input path. 

251 It assumes the caller has provided a valid, backend-compatible path and merely dispatches it for execution. 

252 

253 Note: ONLY backend is a POSITIONAL argument. Other parameters may remain as kwargs. 

254 

255 Args: 

256 directory: Directory to search (str or Path) 

257 backend: Backend to use for listing ('disk', 'memory', 'zarr', 'omero_local') - POSITIONAL 

258 pattern: Pattern to filter files (e.g., "*.txt") - can be keyword arg 

259 extensions: Set of file extensions to filter by - can be keyword arg 

260 recursive: Whether to search recursively - can be keyword arg 

261 **kwargs: Backend-specific arguments (e.g., plate_id for OMERO) 

262 

263 Returns: 

264 List of string paths for files found 

265 

266 Raises: 

267 StorageResolutionError: If the backend is not supported 

268 TypeError: If directory is not a valid path type or required kwargs missing 

269 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

270 """ 

271 # Get backend instance 

272 backend_instance = self._get_backend(backend) 

273 

274 # List files and apply natural sorting 

275 from .utils import natural_sort 

276 files = backend_instance.list_files(str(directory), pattern, extensions, recursive, **kwargs) 

277 # Ensure we pass strings to natural_sort (backends may return Path objects) 

278 files = [str(f) for f in files] 

279 return natural_sort(files) 

280 

281 

282 def find_file_recursive(self, directory: Union[str, Path], filename: str, backend: str) -> Union[str, None]: 

283 """ 

284 Find a file recursively in a directory using the specified backend. 

285 

286 This is a convenience method that uses list_files with recursive=True and filters for the specific filename. 

287 

288 Args: 

289 directory: Directory to search (str or Path) 

290 filename: Name of the file to find 

291 backend: Backend to use for listing ('disk', 'memory', 'zarr') - POSITIONAL 

292 

293 Returns: 

294 String path to the file if found, None otherwise 

295 

296 Raises: 

297 StorageResolutionError: If the backend is not supported 

298 TypeError: If directory is not a valid path type 

299 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

300 """ 

301 # List all files recursively 

302 all_files = self.list_files(directory, backend, recursive=True) 

303 

304 # Filter for the specific filename 

305 for file_path in all_files: 

306 if Path(file_path).name == filename: 

307 return file_path 

308 

309 # File not found 

310 return None 

311 

312 

313 def list_dir(self, path: Union[str, Path], backend: str) -> List[str]: 

314 if not isinstance(path, (str, Path)): 

315 raise TypeError(f"Expected str or Path, got {type(path)}") 

316 

317 path = str(path) 

318 backend_instance = self._get_backend(backend) 

319 

320 try: 

321 # Get directory listing and apply natural sorting 

322 from .utils import natural_sort 

323 entries = backend_instance.list_dir(str(path)) 

324 return natural_sort(entries) 

325 except (FileNotFoundError, NotADirectoryError): 

326 # Let these bubble up for structural truth-checking 

327 raise 

328 except Exception as e: 

329 # Optional trace wrapper, no type mutation 

330 raise RuntimeError(f"Unexpected failure in list_dir({path}) for backend {backend}") from e 

331 

332 def ensure_directory(self, directory: Union[str, Path], backend: str) -> str: 

333 """ 

334 Ensure a directory exists, creating it if necessary. 

335 

336 This method performs no semantic validation, normalization, or naming enforcement on the input path. 

337 It assumes the caller has provided a valid, backend-compatible path and merely dispatches it for execution. 

338 

339 Note: ONLY backend is a POSITIONAL argument. All parameters are required. 

340 

341 Args: 

342 directory: Directory to ensure exists (str or Path) 

343 backend: Backend to use for directory operations ('disk', 'memory', 'zarr') - POSITIONAL 

344 

345 Returns: 

346 String path to the directory 

347 

348 Raises: 

349 StorageResolutionError: If the backend is not supported 

350 TypeError: If directory is not a valid path type 

351 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

352 """ 

353 # Get backend instance 

354 backend_instance = self._get_backend(backend) 

355 

356 # Ensure directory 

357 return backend_instance.ensure_directory(str(directory)) 

358 

359 

360 

361 def exists(self, path: Union[str, Path], backend: str) -> bool: 

362 """ 

363 Check if a path exists. 

364 

365 This method performs no semantic validation, normalization, or naming enforcement on the input path. 

366 It assumes the caller has provided a valid, backend-compatible path and merely dispatches it for execution. 

367 

368 Note: ONLY backend is a POSITIONAL argument. All parameters are required. 

369 

370 Args: 

371 path: Path to check (str or Path) 

372 backend: Backend to use for checking ('disk', 'memory', 'zarr') - POSITIONAL 

373 

374 Returns: 

375 True if the path exists, False otherwise 

376 

377 Raises: 

378 StorageResolutionError: If the backend is not supported 

379 TypeError: If path is not a valid path type 

380 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

381 """ 

382 # Get backend instance 

383 backend_instance = self._get_backend(backend) 

384 

385 # Check if path exists 

386 return backend_instance.exists(str(path)) 

387 

388 

389 def mirror_directory_with_symlinks( 

390 self, 

391 source_dir: Union[str, Path], 

392 target_dir: Union[str, Path], 

393 backend: str, 

394 recursive: bool = True, 

395 overwrite_symlinks_only: bool = False 

396 ) -> int: 

397 """ 

398 Mirror a directory structure from source to target and create symlinks to all files. 

399 

400 This method performs no semantic validation, normalization, or naming enforcement on the input paths. 

401 It assumes the caller has provided valid, backend-compatible paths and merely dispatches them for execution. 

402 

403 By default, this method will NOT overwrite existing files. Use overwrite_symlinks_only=True to allow 

404 overwriting existing symlinks (but not regular files). 

405 

406 Note: ONLY backend is a POSITIONAL argument. Other parameters may remain as kwargs. 

407 

408 Args: 

409 source_dir: Path to the source directory to mirror (str or Path) 

410 target_dir: Path to the target directory where the mirrored structure will be created (str or Path) 

411 backend: Backend to use for mirroring ('disk', 'memory', 'zarr') - POSITIONAL 

412 recursive: Whether to recursively mirror subdirectories - can be keyword arg 

413 overwrite_symlinks_only: If True, allows overwriting existing symlinks but blocks overwriting regular files. 

414 If False (default), no overwriting is allowed. - can be keyword arg 

415 

416 Returns: 

417 int: Number of symlinks created 

418 

419 Raises: 

420 StorageResolutionError: If the backend is not supported 

421 FileExistsError: If target files exist and overwrite_symlinks_only=False, or if trying to overwrite regular files 

422 TypeError: If source_dir or target_dir is not a valid path type 

423 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

424 """ 

425 # Get backend instance 

426 backend_instance = self._get_backend(backend) 

427 # Mirror the directory structure and create symlinks for files recursively 

428 self.ensure_directory(target_dir, backend) 

429 try: 

430 # Ensure target directory exists 

431 

432 # Count symlinks 

433 symlink_count = 0 

434 

435 # Get all directories under source_dir (including source_dir itself) 

436 

437 _, all_files = self.collect_dirs_and_files(source_dir, backend, recursive=True) 

438 

439 # 1. Ensure base target exists 

440 self.ensure_directory(target_dir, backend) 

441 

442 # 2. Symlink all file paths 

443 for file_path in all_files: 

444 rel_path = Path(file_path).relative_to(Path(source_dir)) 

445 symlink_path = Path(target_dir) / rel_path 

446 self.create_symlink(file_path, str(symlink_path), backend, overwrite_symlinks_only=overwrite_symlinks_only) 

447 symlink_count += 1 

448 

449 return symlink_count 

450 

451 except Exception as e: 

452 raise StorageResolutionError(f"Failed to mirror directory {source_dir} to {target_dir} with backend {backend}") from e 

453 

454 def create_symlink( 

455 self, 

456 source_path: Union[str, Path], 

457 symlink_path: Union[str, Path], 

458 backend: str, 

459 overwrite_symlinks_only: bool = False 

460 ) -> bool: 

461 """ 

462 Create a symbolic link from source_path to symlink_path. 

463 

464 This method performs no semantic validation, normalization, or naming enforcement on the input paths. 

465 It assumes the caller has provided valid, backend-compatible paths and merely dispatches them for execution. 

466 

467 Note: ONLY backend is a POSITIONAL argument. All parameters are required. 

468 

469 Args: 

470 source_path: Path to the source file or directory (str or Path) 

471 symlink_path: Path where the symlink should be created (str or Path) 

472 backend: Backend to use for symlink creation ('disk', 'memory', 'zarr') - POSITIONAL 

473 overwrite_symlinks_only: If True, only allow overwriting existing symlinks (not regular files) 

474 

475 Returns: 

476 bool: True if successful, False otherwise 

477 

478 Raises: 

479 StorageResolutionError: If the backend is not supported 

480 FileExistsError: If target exists and is not a symlink when overwrite_symlinks_only=True 

481 VFSTypeError: If source_path or symlink_path cannot be converted to internal path format 

482 PathMismatchError: If the path scheme doesn't match the expected scheme for the backend 

483 """ 

484 # Get backend instance 

485 backend_instance = self._get_backend(backend) 

486 

487 # Check if target exists and handle overwrite policy 

488 try: 

489 if backend_instance.exists(str(symlink_path)): 

490 if overwrite_symlinks_only: 

491 # Check if existing target is a symlink 

492 if not self.is_symlink(symlink_path, backend): 

493 raise FileExistsError( 

494 f"Target exists and is not a symlink (overwrite_symlinks_only=True): {symlink_path}" 

495 ) 

496 # Target is a symlink, allow overwrite 

497 backend_instance.create_symlink(str(source_path), str(symlink_path), overwrite=True) 

498 else: 

499 # No overwrite allowed 

500 raise FileExistsError(f"Target already exists: {symlink_path}") 

501 else: 

502 # Target doesn't exist, create new symlink 

503 backend_instance.create_symlink(str(source_path), str(symlink_path), overwrite=False) 

504 

505 return True 

506 except FileExistsError: 

507 # Re-raise FileExistsError from our check or from backend 

508 raise 

509 except Exception as e: 

510 raise StorageResolutionError( 

511 f"Failed to create symlink from {source_path} to {symlink_path} with backend {backend}" 

512 ) from e 

513 

514 def delete(self, path: Union[str, Path], backend: str, recursive: bool = False) -> bool: 

515 """ 

516 Delete a file or directory. 

517 

518 This method performs no semantic validation, normalization, or naming enforcement on the input path. 

519 It assumes the caller has provided a valid, backend-compatible path and merely dispatches it for execution. 

520 

521 Note: ONLY backend is a POSITIONAL argument. All parameters are required. 

522 

523 Args: 

524 path: Path to the file or directory to delete (str or Path) 

525 backend: Backend to use for deletion ('disk', 'memory', 'zarr') - POSITIONAL 

526 

527 Returns: 

528 True if successful, False otherwise 

529 

530 Raises: 

531 StorageResolutionError: If the backend is not supported 

532 FileNotFoundError: If the file does not exist 

533 TypeError: If the path is not a valid path type 

534 """ 

535 # Get backend instance 

536 backend_instance = self._get_backend(backend) 

537 

538 # Delete the file or directory 

539 try: 

540 # No virtual path conversion needed 

541 return backend_instance.delete(str(path)) 

542 except Exception as e: 

543 raise StorageResolutionError( 

544 f"Failed to delete {path} with backend {backend}" 

545 ) from e 

546 

547 def delete_all(self, path: Union[str, Path], backend: str) -> bool: 

548 """ 

549 Recursively delete a file, symlink, or directory at the given path. 

550  

551 This method performs no fallback, coercion, or resolution — it dispatches to the backend. 

552 All resolution and deletion behavior must be encoded in the backend's `delete_all()` method. 

553  

554 Args: 

555 path: The path to delete 

556 backend: The backend key (e.g., 'disk', 'memory', 'zarr') 

557  

558 Returns: 

559 True if successful 

560  

561 Raises: 

562 StorageResolutionError: If the backend operation fails 

563 FileNotFoundError: If the path does not exist 

564 TypeError: If the path is not a str or Path 

565 """ 

566 backend_instance = self._get_backend(backend) 

567 path_str = str(path) 

568 

569 try: 

570 backend_instance.delete_all(path_str) 

571 return True 

572 except Exception as e: 

573 raise StorageResolutionError( 

574 f"Failed to delete_all({path_str}) using backend '{backend}'" 

575 ) from e 

576 

577 

578 def copy(self, source_path: Union[str, Path], dest_path: Union[str, Path], backend: str) -> bool: 

579 """ 

580 Copy a file, directory, or symlink from source_path to dest_path using the given backend. 

581 

582 - Will NOT overwrite existing files/directories. 

583 - Handles symlinks as first-class objects (not dereferenced). 

584 - Raises on broken links or mismatched structure. 

585 

586 Raises: 

587 FileExistsError: If destination exists 

588 FileNotFoundError: If source does not exist 

589 StorageResolutionError: On backend failure 

590 """ 

591 backend_instance = self._get_backend(backend) 

592 

593 try: 

594 # Prevent overwriting 

595 if backend_instance.exists(dest_path): 

596 raise FileExistsError(f"Destination already exists: {dest_path}") 

597 

598 # Ensure destination parent exists 

599 dest_parent = Path(dest_path).parent 

600 self.ensure_directory(dest_parent, backend) 

601 

602 # Delegate to backend-native copy 

603 return backend_instance.copy(str(source_path), str(dest_path)) 

604 except Exception as e: 

605 raise StorageResolutionError( 

606 f"Failed to copy from {source_path} to {dest_path} on backend {backend}" 

607 ) from e 

608 

609 

610 def move(self, source_path: Union[str, Path], dest_path: Union[str, Path], backend: str, 

611 replace_symlinks: bool = False) -> bool: 

612 """ 

613 Move a file, directory, or symlink from source_path to dest_path. 

614 

615 - Will NOT overwrite by default. 

616 - Preserves symbolic identity (moves links as links). 

617 - Uses backend-native move if available. 

618 - Can optionally replace existing symlinks when replace_symlinks=True. 

619 

620 Args: 

621 source_path: Source file or directory path 

622 dest_path: Destination file or directory path 

623 backend: Backend to use for the operation 

624 replace_symlinks: If True, allows overwriting existing symlinks at destination. 

625 If False (default), raises FileExistsError if destination exists. 

626 

627 Raises: 

628 FileExistsError: If destination exists and replace_symlinks=False, or if 

629 destination exists and is not a symlink when replace_symlinks=True 

630 FileNotFoundError: If source is missing 

631 StorageResolutionError: On backend failure 

632 """ 

633 backend_instance = self._get_backend(backend) 

634 

635 try: 

636 # Handle destination existence based on replace_symlinks setting 

637 if backend_instance.exists(dest_path): 

638 if replace_symlinks: 

639 # Check if destination is a symlink 

640 if backend_instance.is_symlink(dest_path): 

641 logger.debug("Destination is a symlink, removing before move: %s", dest_path) 

642 backend_instance.delete(dest_path) 

643 else: 

644 # Destination exists but is not a symlink 

645 raise FileExistsError(f"Destination already exists and is not a symlink: {dest_path}") 

646 else: 

647 # replace_symlinks=False, don't allow any overwriting 

648 raise FileExistsError(f"Destination already exists: {dest_path}") 

649 

650 dest_parent = Path(dest_path).parent 

651 self.ensure_directory(dest_parent, backend) 

652 return backend_instance.move(str(source_path), str(dest_path)) 

653 

654 except Exception as e: 

655 raise StorageResolutionError( 

656 f"Failed to move from {source_path} to {dest_path} on backend {backend}" 

657 ) from e 

658 

659 def collect_dirs_and_files( 

660 self, 

661 base_dir: Union[str, Path], 

662 backend: str, 

663 recursive: bool = True 

664 ) -> Tuple[List[str], List[str]]: 

665 """ 

666 Collect all valid directories and files starting from base_dir using breadth-first traversal. 

667 

668 Returns: 

669 (dirs, files): Lists of string paths for directories and files 

670 """ 

671 from collections import deque 

672 

673 base_dir = str(base_dir) 

674 # Use deque for breadth-first traversal (FIFO instead of LIFO) 

675 queue = deque([base_dir]) 

676 dirs: List[str] = [] 

677 files: List[str] = [] 

678 

679 while queue: 

680 current_path = queue.popleft() # FIFO for breadth-first 

681 

682 try: 

683 entries = self.list_dir(current_path, backend) 

684 dirs.append(current_path) 

685 except (NotADirectoryError, FileNotFoundError): 

686 files.append(current_path) 

687 continue 

688 except Exception as e: 

689 print(f"[collect_dirs_and_files] Unexpected error at {current_path}: {type(e).__name__}{e}") 

690 continue # Fail-safe: skip unexpected issues 

691 

692 if entries is None: 

693 # Defensive fallback — entries must be iterable 

694 print(f"[collect_dirs_and_files] WARNING: list_dir() returned None at {current_path}") 

695 continue 

696 

697 for entry in entries: 

698 full_path = str(Path(current_path) / entry) 

699 try: 

700 self.list_dir(full_path, backend) 

701 dirs.append(full_path) 

702 if recursive: 

703 queue.append(full_path) # Add to end of queue for breadth-first 

704 except (NotADirectoryError, FileNotFoundError): 

705 files.append(full_path) 

706 except Exception as e: 

707 print(f"[collect_dirs_and_files] Skipping {full_path}: {type(e).__name__}{e}") 

708 continue 

709 

710 # Apply natural sorting to both dirs and files before returning 

711 from .utils import natural_sort 

712 return natural_sort(dirs), natural_sort(files) 

713 

714 def is_file(self, path: Union[str, Path], backend: str) -> bool: 

715 """ 

716 Check if a given path is a file using the specified backend. 

717 

718 Args: 

719 path: Path to check (raw string or Path) 

720 backend: Backend key ('disk', 'memory', 'zarr') — must be positional 

721 

722 Returns: 

723 bool: True if the path is a file, False otherwise (including if path doesn't exist) 

724 """ 

725 try: 

726 backend_instance = self._get_backend(backend) 

727 return backend_instance.is_file(path) 

728 except Exception: 

729 # Return False for any error (file not found, is a directory, backend issues) 

730 return False 

731 

732 def is_dir(self, path: Union[str, Path], backend: str) -> bool: 

733 """ 

734 Check if a given path is a directory using the specified backend. 

735 

736 Args: 

737 path: Path to check (raw string or Path) 

738 backend: Backend key ('disk', 'memory', 'zarr') — must be positional 

739 

740 Returns: 

741 bool: True if the path is a directory, False if it's a file or doesn't exist 

742 

743 Raises: 

744 StorageResolutionError: If resolution fails or backend misbehaves 

745 """ 

746 try: 

747 backend_instance = self._get_backend(backend) 

748 return backend_instance.is_dir(path) 

749 except (FileNotFoundError, NotADirectoryError): 

750 # Return False for files or non-existent paths instead of raising 

751 return False 

752 except Exception as e: 

753 raise StorageResolutionError( 

754 f"Failed to check if {path} is a directory with backend '{backend}'" 

755 ) from e 

756 

757 def is_symlink(self, path: Union[str, Path], backend: str) -> bool: 

758 """ 

759 Check if a given path is a symbolic link using the specified backend. 

760 

761 Args: 

762 path: Path to check (raw string or Path) 

763 backend: Backend key ('disk', 'memory', 'zarr') — must be positional 

764 

765 Returns: 

766 bool: True if the path is a symbolic link, False otherwise (including if path doesn't exist) 

767 """ 

768 try: 

769 backend_instance = self._get_backend(backend) 

770 return backend_instance.is_symlink(str(path)) 

771 except Exception: 

772 # Return False for any error (file not found, not a symlink, backend issues) 

773 return False 

774