Coverage for src/polystore/base.py: 57%

37 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-03 06:58 +0000

1""" 

2Abstract base classes for storage backends. 

3 

4This module defines the fundamental interfaces for storage backends, 

5independent of specific implementations. It establishes the contract 

6that all storage backends must fulfill. 

7""" 

8 

9import logging 

10from abc import ABC, abstractmethod 

11from pathlib import Path 

12from typing import Any, Dict, List, Optional, Set, Union 

13 

14from .exceptions import StorageResolutionError 

15from .registry import AutoRegisterMeta 

16 

17logger = logging.getLogger(__name__) 

18 

19 

20class DataSink(ABC): 

21 """ 

22 Abstract base class for data destinations. 

23 

24 Defines the minimal interface for sending data to any destination, 

25 whether storage, streaming, or other data handling systems. 

26 """ 

27 

28 @abstractmethod 

29 def save(self, data: Any, identifier: Union[str, Path], **kwargs) -> None: 

30 """ 

31 Send data to the destination. 

32 

33 Args: 

34 data: The data to send 

35 identifier: Unique identifier for the data (path-like for compatibility) 

36 **kwargs: Backend-specific arguments 

37 

38 Raises: 

39 TypeError: If identifier is not a valid type 

40 ValueError: If data cannot be sent to destination 

41 """ 

42 pass 

43 

44 @abstractmethod 

45 def save_batch( 

46 self, data_list: List[Any], identifiers: List[Union[str, Path]], **kwargs 

47 ) -> None: 

48 """ 

49 Send multiple data objects to the destination in a single operation. 

50 

51 Args: 

52 data_list: List of data objects to send 

53 identifiers: List of unique identifiers (must match length of data_list) 

54 **kwargs: Backend-specific arguments 

55 

56 Raises: 

57 ValueError: If data_list and identifiers have different lengths 

58 TypeError: If any identifier is not a valid type 

59 ValueError: If any data cannot be sent to destination 

60 """ 

61 pass 

62 

63 

64class DataSource(ABC): 

65 """ 

66 Abstract base class for read-only data sources. 

67 

68 Defines the minimal interface for loading data from any source, 

69 whether filesystem, virtual workspace, remote storage, or databases. 

70 """ 

71 

72 @abstractmethod 

73 def load(self, file_path: Union[str, Path], **kwargs) -> Any: 

74 """ 

75 Load data from a file path. 

76 

77 Args: 

78 file_path: Path to the file to load 

79 **kwargs: Backend-specific arguments 

80 

81 Raises: 

82 FileNotFoundError: If the file does not exist 

83 TypeError: If file_path is not a valid type 

84 ValueError: If the data cannot be loaded 

85 """ 

86 pass 

87 

88 @abstractmethod 

89 def load_batch(self, file_paths: List[Union[str, Path]], **kwargs) -> List[Any]: 

90 """ 

91 Load multiple files in a single batch operation. 

92 

93 Args: 

94 file_paths: List of file paths to load 

95 **kwargs: Backend-specific arguments 

96 

97 Raises: 

98 FileNotFoundError: If any file does not exist 

99 TypeError: If any file_path is not a valid type 

100 ValueError: If any data cannot be loaded 

101 """ 

102 pass 

103 

104 @abstractmethod 

105 def list_files( 

106 self, 

107 directory: Union[str, Path], 

108 pattern: Optional[str] = None, 

109 extensions: Optional[Set[str]] = None, 

110 recursive: bool = False, 

111 **kwargs, 

112 ) -> List[str]: 

113 """ 

114 List files in a directory. 

115 

116 Args: 

117 directory: Directory to list files from 

118 pattern: Optional glob pattern to filter files 

119 extensions: Optional set of file extensions to filter (e.g., {'.tif', '.png'}) 

120 recursive: Whether to search recursively 

121 **kwargs: Backend-specific arguments 

122 

123 Returns: 

124 List of file paths (absolute or relative depending on backend) 

125 """ 

126 pass 

127 

128 @abstractmethod 

129 def exists(self, path: Union[str, Path]) -> bool: 

130 """Check if a path exists.""" 

131 pass 

132 

133 @abstractmethod 

134 def is_file(self, path: Union[str, Path]) -> bool: 

135 """Check if a path is a file.""" 

136 pass 

137 

138 @abstractmethod 

139 def is_dir(self, path: Union[str, Path]) -> bool: 

140 """Check if a path is a directory.""" 

141 pass 

142 

143 @abstractmethod 

144 def list_dir(self, path: Union[str, Path]) -> List[str]: 

145 """List immediate entries in a directory (names only).""" 

146 pass 

147 

148 

149class VirtualBackend(DataSink): 

150 """ 

151 Abstract base for backends that provide virtual filesystem semantics. 

152 

153 Virtual backends generate file listings on-demand without real filesystem operations. 

154 Examples: OMERO (generates filenames from plate structure), S3 (lists objects), HTTP APIs. 

155 """ 

156 

157 @abstractmethod 

158 def load(self, file_path: Union[str, Path], **kwargs) -> Any: 

159 """ 

160 Load data from virtual path. 

161 

162 Args: 

163 file_path: Virtual path to load 

164 **kwargs: Backend-specific context 

165 

166 Returns: 

167 The loaded data 

168 

169 Raises: 

170 FileNotFoundError: If the virtual path does not exist 

171 TypeError: If required kwargs are missing 

172 ValueError: If the data cannot be loaded 

173 """ 

174 pass 

175 

176 @abstractmethod 

177 def load_batch(self, file_paths: List[Union[str, Path]], **kwargs) -> List[Any]: 

178 """ 

179 Load multiple virtual paths in a single batch operation. 

180 

181 Args: 

182 file_paths: List of virtual paths to load 

183 **kwargs: Backend-specific context 

184 

185 Returns: 

186 List of loaded data objects in the same order as file_paths 

187 

188 Raises: 

189 FileNotFoundError: If any virtual path does not exist 

190 TypeError: If required kwargs are missing 

191 ValueError: If any data cannot be loaded 

192 """ 

193 pass 

194 

195 @abstractmethod 

196 def list_files( 

197 self, 

198 directory: Union[str, Path], 

199 pattern: Optional[str] = None, 

200 extensions: Optional[Set[str]] = None, 

201 recursive: bool = False, 

202 **kwargs, 

203 ) -> List[str]: 

204 """ 

205 Generate virtual file listing. 

206 

207 Args: 

208 directory: Virtual directory path 

209 pattern: Optional file pattern filter 

210 extensions: Optional set of file extensions to filter 

211 recursive: Whether to list recursively 

212 **kwargs: Backend-specific context 

213 

214 Returns: 

215 List of virtual filenames 

216 

217 Raises: 

218 TypeError: If required kwargs are missing 

219 ValueError: If directory is invalid 

220 """ 

221 pass 

222 

223 @property 

224 def requires_filesystem_validation(self) -> bool: 

225 """ 

226 Whether this backend requires filesystem validation. 

227 

228 Virtual backends return False - they don't have real filesystem paths. 

229 Real backends return True - they need path validation. 

230 

231 Returns: 

232 False for virtual backends 

233 """ 

234 return False 

235 

236 

237class BackendBase(metaclass=AutoRegisterMeta): 

238 """ 

239 Base class for all storage backends (read-only and read-write). 

240 

241 Defines the registry and common interface for backend discovery. 

242 Concrete backends should inherit from StorageBackend or ReadOnlyBackend. 

243 """ 

244 

245 __registry_key__ = "_backend_type" 

246 

247 @property 

248 @abstractmethod 

249 def requires_filesystem_validation(self) -> bool: 

250 """Whether this backend requires filesystem validation.""" 

251 pass 

252 

253 

254class ReadOnlyBackend(BackendBase, DataSource): 

255 """ 

256 Abstract base class for read-only storage backends with auto-registration. 

257 

258 Use this for backends that only need to read data (virtual workspaces, 

259 read-only mounts, archive viewers, etc.). 

260 """ 

261 

262 @property 

263 def requires_filesystem_validation(self) -> bool: 

264 """ 

265 Whether this backend requires filesystem validation. 

266 

267 Returns: 

268 False for virtual/remote backends, True for local filesystem 

269 """ 

270 return False 

271 

272 

273class StorageBackend(BackendBase, DataSource, DataSink): 

274 """ 

275 Abstract base class for read-write storage backends. 

276 

277 Extends DataSource (read) and DataSink (write) with file system operations 

278 for backends that provide persistent storage with file-like semantics. 

279 

280 Concrete implementations are automatically registered via AutoRegisterMeta. 

281 """ 

282 

283 @property 

284 def requires_filesystem_validation(self) -> bool: 

285 """ 

286 Whether this backend requires filesystem validation. 

287 

288 Returns: 

289 True for real filesystem backends (default for StorageBackend) 

290 """ 

291 return True 

292 

293 def exists(self, path: Union[str, Path]) -> bool: 

294 """ 

295 Check if a path exists (is a valid file or directory). 

296 

297 Args: 

298 path: Path to check 

299 

300 Returns: 

301 bool: True if path resolves to a real object 

302 """ 

303 try: 

304 return self.is_file(path) 

305 except (FileNotFoundError, NotADirectoryError, StorageResolutionError): 

306 pass 

307 except IsADirectoryError: 

308 # Path exists but is a directory 

309 try: 

310 return self.is_dir(path) 

311 except (FileNotFoundError, NotADirectoryError, StorageResolutionError): 

312 return False 

313 

314 # If is_file failed for other reasons, try is_dir 

315 try: 

316 return self.is_dir(path) 

317 except (FileNotFoundError, NotADirectoryError, StorageResolutionError): 

318 return False