Coverage for openhcs/io/base.py: 94.1%

34 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 05:57 +0000

1# openhcs/io/storage/backends/base.py 

2""" 

3Abstract base classes for storage backends. 

4 

5This module defines the fundamental interfaces for storage backends, 

6independent of specific implementations. It establishes the contract 

7that all storage backends must fulfill. 

8""" 

9 

10import logging 

11from abc import ABC, abstractmethod 

12from pathlib import Path 

13from typing import Any, Dict, List, Optional, Set, Type, Union, Callable 

14from functools import wraps 

15from openhcs.constants.constants import Backend 

16from openhcs.io.exceptions import StorageResolutionError 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21class StorageBackend(ABC): 

22 """ 

23 Abstract base class for basic storage operations. 

24 

25 Defines the fundamental operations required for interacting with a storage system, 

26 independent of specific data types like microscopy images. 

27 """ 

28 

29 @abstractmethod 

30 def load(self, file_path: Union[str, Path], **kwargs) -> Any: 

31 """ 

32 Load data from a file. 

33 

34 Args: 

35 file_path: Path to the file to load 

36 **kwargs: Additional arguments for the load operation 

37 

38 Returns: 

39 The loaded data 

40 

41 Raises: 

42 FileNotFoundError: If the file does not exist 

43 TypeError: If the file_path is not a valid path type 

44 ValueError: If the file cannot be loaded 

45 """ 

46 pass 

47 

48 @abstractmethod 

49 def save(self, data: Any, output_path: Union[str, Path], **kwargs) -> None: 

50 """ 

51 Save data to a file. 

52 

53 Args: 

54 data: The data to save 

55 output_path: Path where the data should be saved 

56 **kwargs: Additional arguments for the save operation 

57 

58 Raises: 

59 TypeError: If the output_path is not a valid path type 

60 ValueError: If the data cannot be saved 

61 """ 

62 pass 

63 

64 @abstractmethod 

65 def load_batch(self, file_paths: List[Union[str, Path]], **kwargs) -> List[Any]: 

66 """ 

67 Load multiple files in a single batch operation. 

68 

69 Args: 

70 file_paths: List of file paths to load 

71 **kwargs: Additional arguments for the load operation 

72 

73 Returns: 

74 List of loaded data objects in the same order as file_paths 

75 

76 Raises: 

77 FileNotFoundError: If any file does not exist 

78 TypeError: If any file_path is not a valid path type 

79 ValueError: If any file cannot be loaded 

80 """ 

81 pass 

82 

83 @abstractmethod 

84 def save_batch(self, data_list: List[Any], output_paths: List[Union[str, Path]], **kwargs) -> None: 

85 """ 

86 Save multiple data objects in a single batch operation. 

87 

88 Args: 

89 data_list: List of data objects to save 

90 output_paths: List of destination paths (must match length of data_list) 

91 **kwargs: Additional arguments for the save operation 

92 

93 Raises: 

94 ValueError: If data_list and output_paths have different lengths 

95 TypeError: If any output_path is not a valid path type 

96 ValueError: If any data cannot be saved 

97 """ 

98 pass 

99 

100 @abstractmethod 

101 def list_files(self, directory: Union[str, Path], pattern: Optional[str] = None, 

102 extensions: Optional[Set[str]] = None, recursive: bool = False) -> List[Path]: 

103 """ 

104 List files in a directory, optionally filtering by pattern and extensions. 

105 

106 Args: 

107 directory: Directory to search. 

108 pattern: Optional glob pattern to match filenames. 

109 extensions: Optional set of file extensions to filter by (e.g., {'.tif', '.png'}). 

110 Extensions should include the dot and are case-insensitive. 

111 recursive: Whether to search recursively. 

112 

113 Returns: 

114 List of paths to matching files. 

115 

116 Raises: 

117 TypeError: If the directory is not a valid path type 

118 FileNotFoundError: If the directory does not exist 

119 """ 

120 pass 

121 

122 @abstractmethod 

123 def list_dir(self, path: Union[str, Path]) -> List[str]: 

124 """ 

125 List the names of immediate entries in a directory. 

126 

127 Args: 

128 path: Directory path to list. 

129 

130 Returns: 

131 List of entry names (not full paths) in the directory. 

132 

133 Raises: 

134 FileNotFoundError: If the path does not exist. 

135 NotADirectoryError: If the path is not a directory. 

136 TypeError: If the path is not a valid path type. 

137 """ 

138 pass 

139 

140 @abstractmethod 

141 def delete(self, file_path: Union[str, Path]) -> None: 

142 """ 

143 Delete a file. 

144 

145 Args: 

146 file_path: Path to the file to delete 

147 

148 Raises: 

149 TypeError: If the file_path is not a valid path type 

150 FileNotFoundError: If the file does not exist 

151 ValueError: If the file cannot be deleted 

152 """ 

153 pass 

154 

155 @abstractmethod 

156 def delete_all(self, file_path: Union[str, Path]) -> None: 

157 """ 

158 Deletes a file or a folder in full. 

159 

160 Args: 

161 file_path: Path to the file to delete 

162 

163 Raises: 

164 TypeError: If the file_path is not a valid path type 

165 ValueError: If the file cannot be deleted 

166 """ 

167 pass 

168 

169 

170 @abstractmethod 

171 def ensure_directory(self, directory: Union[str, Path]) -> Path: 

172 """ 

173 Ensure a directory exists, creating it if necessary. 

174 

175 Args: 

176 directory: Path to the directory to ensure exists 

177 

178 Returns: 

179 The path to the directory 

180 

181 Raises: 

182 TypeError: If the directory is not a valid path type 

183 ValueError: If the directory cannot be created 

184 """ 

185 pass 

186 

187 

188 @abstractmethod 

189 def create_symlink(self, source: Union[str, Path], link_name: Union[str, Path]): 

190 """ 

191 Creates a symlink from source to link_name. 

192 

193 Args: 

194 source: Path to the source file 

195 link_name: Path where the symlink should be created 

196 

197 Raises: 

198 TypeError: If the path is not a valid path type 

199 """ 

200 pass 

201 

202 @abstractmethod 

203 def is_symlink(self, source: Union[str, Path]) -> bool: 

204 """ 

205 Checks if a path is a symlink. 

206 

207 Args: 

208 source: Path to the source file 

209 

210 Raises: 

211 TypeError: If the path is not a valid path type 

212 """ 

213 

214 @abstractmethod 

215 def is_file(self, source: Union[str, Path]) -> bool: 

216 """ 

217 Checks if a path is a file. 

218 

219 Args: 

220 source: Path to the source file 

221 

222 Raises: 

223 TypeError: If the path is not a valid path type 

224 """ 

225 @abstractmethod 

226 def is_dir(self, source: Union[str, Path]) -> bool: 

227 """ 

228 Checks if a path is a symlink. 

229 

230 Args: 

231 source: Path to the source file 

232 

233 Raises: 

234 TypeError: If the path is not a valid path type 

235 """ 

236 

237 @abstractmethod 

238 def move(self, src: Union[str, Path], dst: Union[str, Path]) -> None: 

239 """  

240 Move a file or directory from src to dst. 

241 

242 Args: 

243 src: Path to the source file 

244 dst: Path to the destination file 

245 

246 Raises: 

247 TypeError: If the path is not a valid path type 

248 FileNotFoundError: If the source file does not exist 

249 FileExistsError: If the destination file already exists 

250 ValueError: If the file cannot be moved 

251 """ 

252 pass 

253 

254 @abstractmethod 

255 def copy(self, src: Union[str, Path], dst: Union[str, Path]) -> None: 

256 """ 

257 Copy a file or directory from src to dst. 

258 

259 Args: 

260 src: Path to the source file 

261 dst: Path to the destination file 

262 

263 Raises: 

264 TypeError: If the path is not a valid path type 

265 FileNotFoundError: If the source file does not exist 

266 FileExistsError: If the destination file already exists 

267 ValueError: If the file cannot be copied 

268 """ 

269 pass 

270 

271 @abstractmethod 

272 def stat(self, path: Union[str, Path]) -> Dict[str, Any]: 

273 """ 

274 Get metadata for a file or directory. 

275 

276 Args: 

277 src: Path to the source file 

278 

279 Raises: 

280 TypeError: If the path is not a valid path type 

281 FileNotFoundError: If the source file does not exist 

282 """ 

283 pass 

284 

285 def exists(self, path: Union[str, Path]) -> bool: 

286 """ 

287 Declarative truth test: does the path resolve to a valid object? 

288 

289 A path only 'exists' if: 

290 - it is a valid file or directory 

291 - or it is a symlink that resolves to a valid file or directory 

292 

293 Returns: 

294 bool: True if path structurally resolves to a real object 

295 """ 

296 try: 

297 return self.is_file(path) 

298 except (FileNotFoundError, NotADirectoryError, StorageResolutionError): 

299 pass 

300 except IsADirectoryError: 

301 # Path exists but is a directory, so check if it's a valid directory 

302 try: 

303 return self.is_dir(path) 

304 except (FileNotFoundError, NotADirectoryError, StorageResolutionError): 

305 return False 

306 

307 # If is_file failed for other reasons, try is_dir 

308 try: 

309 return self.is_dir(path) 

310 except (FileNotFoundError, NotADirectoryError, StorageResolutionError): 

311 return False 

312 

313 

314def _create_storage_registry() -> Dict[str, StorageBackend]: 

315 """ 

316 Create a new storage registry. 

317 

318 This function creates a dictionary mapping backend names to their respective 

319 storage backend instances. It is the canonical factory for creating backend 

320 registries in the system. 

321 

322 Returns: 

323 A dictionary mapping backend names to backend instances 

324 

325 Note: 

326 This is an internal factory function. Use the global storage_registry 

327 instance instead of calling this directly. 

328 """ 

329 # Import here to avoid circular imports 

330 from openhcs.io.disk import DiskStorageBackend 

331 from openhcs.io.memory import MemoryStorageBackend 

332 from openhcs.io.zarr import ZarrStorageBackend 

333 

334 return { 

335 Backend.DISK.value: DiskStorageBackend(), 

336 Backend.MEMORY.value: MemoryStorageBackend(), 

337 Backend.ZARR.value: ZarrStorageBackend() 

338 } 

339 

340 

341# Global singleton storage registry - created once at module import time 

342# This is the shared registry instance that all components should use 

343storage_registry = _create_storage_registry() 

344 

345 

346def reset_memory_backend() -> None: 

347 """ 

348 Clear files from the memory backend while preserving directory structure. 

349 

350 This function clears all file entries from the existing memory backend but preserves 

351 directory entries (None values). This prevents key collisions between plate executions 

352 while maintaining the directory structure needed for subsequent operations. 

353 

354 Benefits over full reset: 

355 - Preserves directory structure created by path planner 

356 - Prevents "Parent path does not exist" errors on subsequent runs 

357 - Avoids key collisions for special inputs/outputs 

358 - Maintains performance by not recreating directory hierarchy 

359 

360 Note: 

361 This only affects the memory backend. Other backends (disk, zarr) are not modified. 

362 """ 

363 from openhcs.constants.constants import Backend 

364 

365 # Clear files from existing memory backend while preserving directories 

366 memory_backend = storage_registry[Backend.MEMORY.value] 

367 memory_backend.clear_files_only() 

368 logger.info("Memory backend reset - files cleared, directories preserved")