Coverage for src/polystore/base.py: 57%
37 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 06:58 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 06:58 +0000
1"""
2Abstract base classes for storage backends.
4This module defines the fundamental interfaces for storage backends,
5independent of specific implementations. It establishes the contract
6that all storage backends must fulfill.
7"""
9import logging
10from abc import ABC, abstractmethod
11from pathlib import Path
12from typing import Any, Dict, List, Optional, Set, Union
14from .exceptions import StorageResolutionError
15from .registry import AutoRegisterMeta
17logger = logging.getLogger(__name__)
20class DataSink(ABC):
21 """
22 Abstract base class for data destinations.
24 Defines the minimal interface for sending data to any destination,
25 whether storage, streaming, or other data handling systems.
26 """
28 @abstractmethod
29 def save(self, data: Any, identifier: Union[str, Path], **kwargs) -> None:
30 """
31 Send data to the destination.
33 Args:
34 data: The data to send
35 identifier: Unique identifier for the data (path-like for compatibility)
36 **kwargs: Backend-specific arguments
38 Raises:
39 TypeError: If identifier is not a valid type
40 ValueError: If data cannot be sent to destination
41 """
42 pass
44 @abstractmethod
45 def save_batch(
46 self, data_list: List[Any], identifiers: List[Union[str, Path]], **kwargs
47 ) -> None:
48 """
49 Send multiple data objects to the destination in a single operation.
51 Args:
52 data_list: List of data objects to send
53 identifiers: List of unique identifiers (must match length of data_list)
54 **kwargs: Backend-specific arguments
56 Raises:
57 ValueError: If data_list and identifiers have different lengths
58 TypeError: If any identifier is not a valid type
59 ValueError: If any data cannot be sent to destination
60 """
61 pass
64class DataSource(ABC):
65 """
66 Abstract base class for read-only data sources.
68 Defines the minimal interface for loading data from any source,
69 whether filesystem, virtual workspace, remote storage, or databases.
70 """
72 @abstractmethod
73 def load(self, file_path: Union[str, Path], **kwargs) -> Any:
74 """
75 Load data from a file path.
77 Args:
78 file_path: Path to the file to load
79 **kwargs: Backend-specific arguments
81 Raises:
82 FileNotFoundError: If the file does not exist
83 TypeError: If file_path is not a valid type
84 ValueError: If the data cannot be loaded
85 """
86 pass
88 @abstractmethod
89 def load_batch(self, file_paths: List[Union[str, Path]], **kwargs) -> List[Any]:
90 """
91 Load multiple files in a single batch operation.
93 Args:
94 file_paths: List of file paths to load
95 **kwargs: Backend-specific arguments
97 Raises:
98 FileNotFoundError: If any file does not exist
99 TypeError: If any file_path is not a valid type
100 ValueError: If any data cannot be loaded
101 """
102 pass
104 @abstractmethod
105 def list_files(
106 self,
107 directory: Union[str, Path],
108 pattern: Optional[str] = None,
109 extensions: Optional[Set[str]] = None,
110 recursive: bool = False,
111 **kwargs,
112 ) -> List[str]:
113 """
114 List files in a directory.
116 Args:
117 directory: Directory to list files from
118 pattern: Optional glob pattern to filter files
119 extensions: Optional set of file extensions to filter (e.g., {'.tif', '.png'})
120 recursive: Whether to search recursively
121 **kwargs: Backend-specific arguments
123 Returns:
124 List of file paths (absolute or relative depending on backend)
125 """
126 pass
128 @abstractmethod
129 def exists(self, path: Union[str, Path]) -> bool:
130 """Check if a path exists."""
131 pass
133 @abstractmethod
134 def is_file(self, path: Union[str, Path]) -> bool:
135 """Check if a path is a file."""
136 pass
138 @abstractmethod
139 def is_dir(self, path: Union[str, Path]) -> bool:
140 """Check if a path is a directory."""
141 pass
143 @abstractmethod
144 def list_dir(self, path: Union[str, Path]) -> List[str]:
145 """List immediate entries in a directory (names only)."""
146 pass
149class VirtualBackend(DataSink):
150 """
151 Abstract base for backends that provide virtual filesystem semantics.
153 Virtual backends generate file listings on-demand without real filesystem operations.
154 Examples: OMERO (generates filenames from plate structure), S3 (lists objects), HTTP APIs.
155 """
157 @abstractmethod
158 def load(self, file_path: Union[str, Path], **kwargs) -> Any:
159 """
160 Load data from virtual path.
162 Args:
163 file_path: Virtual path to load
164 **kwargs: Backend-specific context
166 Returns:
167 The loaded data
169 Raises:
170 FileNotFoundError: If the virtual path does not exist
171 TypeError: If required kwargs are missing
172 ValueError: If the data cannot be loaded
173 """
174 pass
176 @abstractmethod
177 def load_batch(self, file_paths: List[Union[str, Path]], **kwargs) -> List[Any]:
178 """
179 Load multiple virtual paths in a single batch operation.
181 Args:
182 file_paths: List of virtual paths to load
183 **kwargs: Backend-specific context
185 Returns:
186 List of loaded data objects in the same order as file_paths
188 Raises:
189 FileNotFoundError: If any virtual path does not exist
190 TypeError: If required kwargs are missing
191 ValueError: If any data cannot be loaded
192 """
193 pass
195 @abstractmethod
196 def list_files(
197 self,
198 directory: Union[str, Path],
199 pattern: Optional[str] = None,
200 extensions: Optional[Set[str]] = None,
201 recursive: bool = False,
202 **kwargs,
203 ) -> List[str]:
204 """
205 Generate virtual file listing.
207 Args:
208 directory: Virtual directory path
209 pattern: Optional file pattern filter
210 extensions: Optional set of file extensions to filter
211 recursive: Whether to list recursively
212 **kwargs: Backend-specific context
214 Returns:
215 List of virtual filenames
217 Raises:
218 TypeError: If required kwargs are missing
219 ValueError: If directory is invalid
220 """
221 pass
223 @property
224 def requires_filesystem_validation(self) -> bool:
225 """
226 Whether this backend requires filesystem validation.
228 Virtual backends return False - they don't have real filesystem paths.
229 Real backends return True - they need path validation.
231 Returns:
232 False for virtual backends
233 """
234 return False
237class BackendBase(metaclass=AutoRegisterMeta):
238 """
239 Base class for all storage backends (read-only and read-write).
241 Defines the registry and common interface for backend discovery.
242 Concrete backends should inherit from StorageBackend or ReadOnlyBackend.
243 """
245 __registry_key__ = "_backend_type"
247 @property
248 @abstractmethod
249 def requires_filesystem_validation(self) -> bool:
250 """Whether this backend requires filesystem validation."""
251 pass
254class ReadOnlyBackend(BackendBase, DataSource):
255 """
256 Abstract base class for read-only storage backends with auto-registration.
258 Use this for backends that only need to read data (virtual workspaces,
259 read-only mounts, archive viewers, etc.).
260 """
262 @property
263 def requires_filesystem_validation(self) -> bool:
264 """
265 Whether this backend requires filesystem validation.
267 Returns:
268 False for virtual/remote backends, True for local filesystem
269 """
270 return False
273class StorageBackend(BackendBase, DataSource, DataSink):
274 """
275 Abstract base class for read-write storage backends.
277 Extends DataSource (read) and DataSink (write) with file system operations
278 for backends that provide persistent storage with file-like semantics.
280 Concrete implementations are automatically registered via AutoRegisterMeta.
281 """
283 @property
284 def requires_filesystem_validation(self) -> bool:
285 """
286 Whether this backend requires filesystem validation.
288 Returns:
289 True for real filesystem backends (default for StorageBackend)
290 """
291 return True
293 def exists(self, path: Union[str, Path]) -> bool:
294 """
295 Check if a path exists (is a valid file or directory).
297 Args:
298 path: Path to check
300 Returns:
301 bool: True if path resolves to a real object
302 """
303 try:
304 return self.is_file(path)
305 except (FileNotFoundError, NotADirectoryError, StorageResolutionError):
306 pass
307 except IsADirectoryError:
308 # Path exists but is a directory
309 try:
310 return self.is_dir(path)
311 except (FileNotFoundError, NotADirectoryError, StorageResolutionError):
312 return False
314 # If is_file failed for other reasons, try is_dir
315 try:
316 return self.is_dir(path)
317 except (FileNotFoundError, NotADirectoryError, StorageResolutionError):
318 return False