Coverage for openhcs/core/registry_discovery.py: 43.3%

72 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1""" 

2Generic registry class discovery utility. 

3 

4Consolidates duplicated registry discovery patterns across: 

5- Library registries (processing backends) 

6- Format registries (experimental analysis) 

7- Microscope handler registries 

8- Storage backend registries 

9 

10This module eliminates ~70 lines of duplicated pkgutil + importlib boilerplate 

11by providing a single, well-tested discovery function. 

12""" 

13 

14import importlib 

15import inspect 

16import logging 

17import pkgutil 

18from collections.abc import Iterable 

19from typing import Callable, List, Optional, Set, Type 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24def discover_registry_classes( 

25 package_path: Iterable[str], 

26 package_prefix: str, 

27 base_class: Type, 

28 exclude_modules: Optional[Set[str]] = None, 

29 validation_func: Optional[Callable[[Type], bool]] = None, 

30 skip_packages: bool = True 

31) -> List[Type]: 

32 """ 

33 Generic registry class discovery using pkgutil + importlib pattern. 

34  

35 Scans a package for classes that inherit from a base class and automatically 

36 discovers them for registration. This eliminates duplicated discovery code 

37 across different registry systems. 

38  

39 Args: 

40 package_path: Package __path__ attribute to scan (e.g., openhcs.io.__path__) 

41 Accepts any iterable of strings (List, Tuple, _NamespacePath, etc.) 

42 package_prefix: Module prefix for importlib (e.g., "openhcs.io.") 

43 base_class: Base class to filter for (e.g., StorageBackend) 

44 exclude_modules: Set of module name substrings to skip (e.g., {'base', 'registry'}) 

45 validation_func: Optional function to validate discovered classes 

46 Should return True to include, False to exclude 

47 skip_packages: If True, skip package directories (default: True) 

48  

49 Returns: 

50 List of discovered registry classes 

51  

52 Example: 

53 >>> from openhcs.io.base import StorageBackend 

54 >>> import openhcs.io 

55 >>> backends = discover_registry_classes( 

56 ... package_path=openhcs.io.__path__, 

57 ... package_prefix="openhcs.io.", 

58 ... base_class=StorageBackend, 

59 ... exclude_modules={'base', 'backend_registry'} 

60 ... ) 

61 >>> print([b.__name__ for b in backends]) 

62 ['DiskStorageBackend', 'MemoryStorageBackend', 'ZarrStorageBackend'] 

63 """ 

64 registry_classes = [] 

65 exclude_modules = exclude_modules or set() 

66 

67 logger.debug( 

68 f"Discovering registry classes: base={base_class.__name__}, " 

69 f"prefix={package_prefix}, exclude={exclude_modules}" 

70 ) 

71 

72 for importer, module_name, ispkg in pkgutil.iter_modules(package_path, package_prefix): 

73 # Skip packages if requested 

74 if ispkg and skip_packages: 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 continue 

76 

77 # Skip excluded modules 

78 if any(excluded in module_name for excluded in exclude_modules): 78 ↛ 79line 78 didn't jump to line 79 because the condition on line 78 was never true

79 logger.debug(f"Skipping excluded module: {module_name}") 

80 continue 

81 

82 try: 

83 # Import the module 

84 module = importlib.import_module(module_name) 

85 

86 # Find all classes in the module 

87 for name, obj in inspect.getmembers(module, inspect.isclass): 

88 # Filter for subclasses of base_class 

89 if not issubclass(obj, base_class): 

90 continue 

91 

92 # Exclude the base class itself 

93 if obj is base_class: 

94 continue 

95 

96 # Only include classes defined in this module (not imported) 

97 if obj.__module__ != module_name: 

98 continue 

99 

100 # Apply optional validation function 

101 if validation_func and not validation_func(obj): 101 ↛ 102line 101 didn't jump to line 102 because the condition on line 101 was never true

102 logger.debug(f"Validation failed for {obj.__name__}") 

103 continue 

104 

105 logger.debug(f"Discovered registry class: {obj.__name__} from {module_name}") 

106 registry_classes.append(obj) 

107 

108 except ImportError as e: 

109 # Skip modules that can't be imported (e.g., missing optional dependencies) 

110 logger.debug(f"Could not import module {module_name}: {e}") 

111 continue 

112 except Exception as e: 

113 # Log unexpected errors but continue discovery 

114 logger.warning(f"Failed to load registry module {module_name}: {e}") 

115 continue 

116 

117 logger.info( 

118 f"Discovered {len(registry_classes)} registry classes for {base_class.__name__}: " 

119 f"{[cls.__name__ for cls in registry_classes]}" 

120 ) 

121 

122 return registry_classes 

123 

124 

125def discover_registry_classes_recursive( 

126 package_path: Iterable[str], 

127 package_prefix: str, 

128 base_class: Type, 

129 exclude_modules: Optional[Set[str]] = None, 

130 validation_func: Optional[Callable[[Type], bool]] = None 

131) -> List[Type]: 

132 """ 

133 Recursive version of discover_registry_classes that walks entire package tree. 

134  

135 Uses pkgutil.walk_packages instead of iter_modules to recursively scan 

136 all subpackages. Useful for deeply nested registry structures. 

137  

138 Args: 

139 package_path: Package __path__ attribute to scan 

140 Accepts any iterable of strings (List, Tuple, _NamespacePath, etc.) 

141 package_prefix: Module prefix for importlib 

142 base_class: Base class to filter for 

143 exclude_modules: Set of module name substrings to skip 

144 validation_func: Optional function to validate discovered classes 

145  

146 Returns: 

147 List of discovered registry classes 

148  

149 Example: 

150 >>> from openhcs.processing.backends.lib_registry.unified_registry import LibraryRegistryBase 

151 >>> import openhcs.processing.backends.experimental_analysis 

152 >>> registries = discover_registry_classes_recursive( 

153 ... package_path=openhcs.processing.backends.experimental_analysis.__path__, 

154 ... package_prefix="openhcs.processing.backends.experimental_analysis.", 

155 ... base_class=MicroscopeFormatRegistryBase, 

156 ... exclude_modules={'base'} 

157 ... ) 

158 """ 

159 registry_classes = [] 

160 exclude_modules = exclude_modules or set() 

161 

162 logger.debug( 

163 f"Discovering registry classes (recursive): base={base_class.__name__}, " 

164 f"prefix={package_prefix}, exclude={exclude_modules}" 

165 ) 

166 

167 # Walk through all modules in the package tree 

168 for importer, modname, ispkg in pkgutil.walk_packages(package_path, prefix=package_prefix): 

169 # Skip packages (only process modules) 

170 if ispkg: 

171 continue 

172 

173 # Skip excluded modules 

174 if any(excluded in modname for excluded in exclude_modules): 

175 logger.debug(f"Skipping excluded module: {modname}") 

176 continue 

177 

178 try: 

179 # Import the module 

180 module = importlib.import_module(modname) 

181 

182 # Find all classes in the module 

183 for attr_name in dir(module): 

184 attr = getattr(module, attr_name) 

185 

186 # Check if it's a class 

187 if not isinstance(attr, type): 

188 continue 

189 

190 # Check if it's a subclass of base_class 

191 if not issubclass(attr, base_class): 

192 continue 

193 

194 # Exclude the base class itself 

195 if attr is base_class: 

196 continue 

197 

198 # Apply optional validation function 

199 if validation_func and not validation_func(attr): 

200 logger.debug(f"Validation failed for {attr.__name__}") 

201 continue 

202 

203 logger.debug(f"Discovered registry class: {attr.__name__} from {modname}") 

204 registry_classes.append(attr) 

205 

206 except ImportError as e: 

207 # Skip modules that can't be imported 

208 logger.debug(f"Could not import module {modname}: {e}") 

209 continue 

210 except Exception as e: 

211 # Log unexpected errors but continue discovery 

212 logger.warning(f"Failed to load registry module {modname}: {e}") 

213 continue 

214 

215 logger.info( 

216 f"Discovered {len(registry_classes)} registry classes (recursive) for {base_class.__name__}: " 

217 f"{[cls.__name__ for cls in registry_classes]}" 

218 ) 

219 

220 return registry_classes 

221