Coverage for openhcs/processing/backends/experimental_analysis/format_registry_service.py: 21.4%

84 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 18:33 +0000

1""" 

2Format registry service for automatic discovery and management. 

3 

4This module provides automatic discovery of microscope format registries 

5following OpenHCS generic solution principles. 

6""" 

7 

8import pkgutil 

9import importlib 

10from typing import Dict, List, Optional, Type 

11from pathlib import Path 

12 

13from .format_registry import MicroscopeFormatRegistryBase, FormatDetectionError 

14 

15 

16class FormatRegistryService: 

17 """ 

18 Service for automatic discovery and access to microscope format registries. 

19  

20 Following OpenHCS generic solution principles, this service automatically 

21 discovers all format registry implementations without hardcoded imports. 

22 """ 

23 

24 _registry_cache: Optional[Dict[str, Type[MicroscopeFormatRegistryBase]]] = None 

25 _instance_cache: Optional[Dict[str, MicroscopeFormatRegistryBase]] = None 

26 

27 @classmethod 

28 def _discover_registries(cls) -> Dict[str, Type[MicroscopeFormatRegistryBase]]: 

29 """ 

30 Automatically discover all format registry classes. 

31  

32 Returns: 

33 Dictionary mapping format names to registry classes 

34 """ 

35 if cls._registry_cache is not None: 

36 return cls._registry_cache 

37 

38 registries = {} 

39 

40 # Get the package path for experimental_analysis 

41 import openhcs.processing.backends.experimental_analysis 

42 package_path = openhcs.processing.backends.experimental_analysis.__path__ 

43 

44 # Walk through all modules in the package 

45 for importer, modname, ispkg in pkgutil.walk_packages( 

46 package_path, 

47 prefix="openhcs.processing.backends.experimental_analysis." 

48 ): 

49 if ispkg: 

50 continue 

51 

52 try: 

53 module = importlib.import_module(modname) 

54 

55 # Find all classes that inherit from MicroscopeFormatRegistryBase 

56 for attr_name in dir(module): 

57 attr = getattr(module, attr_name) 

58 

59 if (isinstance(attr, type) and 

60 issubclass(attr, MicroscopeFormatRegistryBase) and 

61 attr is not MicroscopeFormatRegistryBase): 

62 

63 # Validate that the registry has required class attributes 

64 if hasattr(attr, 'FORMAT_NAME') and attr.FORMAT_NAME: 

65 registries[attr.FORMAT_NAME] = attr 

66 

67 except ImportError: 

68 # Skip modules that can't be imported 

69 continue 

70 

71 cls._registry_cache = registries 

72 return registries 

73 

74 @classmethod 

75 def get_all_format_registries(cls) -> Dict[str, Type[MicroscopeFormatRegistryBase]]: 

76 """ 

77 Get all discovered format registry classes. 

78  

79 Returns: 

80 Dictionary mapping format names to registry classes 

81 """ 

82 return cls._discover_registries() 

83 

84 @classmethod 

85 def get_registry_class_for_format(cls, format_name: str) -> Type[MicroscopeFormatRegistryBase]: 

86 """ 

87 Get registry class for specific format. 

88  

89 Args: 

90 format_name: Name of the microscope format 

91  

92 Returns: 

93 Registry class for the format 

94  

95 Raises: 

96 FormatDetectionError: If format is not supported 

97 """ 

98 registries = cls.get_all_format_registries() 

99 

100 if format_name not in registries: 

101 available_formats = list(registries.keys()) 

102 raise FormatDetectionError( 

103 f"Unsupported format '{format_name}'. Available formats: {available_formats}" 

104 ) 

105 

106 return registries[format_name] 

107 

108 @classmethod 

109 def get_registry_instance_for_format(cls, format_name: str) -> MicroscopeFormatRegistryBase: 

110 """ 

111 Get registry instance for specific format. 

112  

113 Args: 

114 format_name: Name of the microscope format 

115  

116 Returns: 

117 Registry instance for the format 

118  

119 Raises: 

120 FormatDetectionError: If format is not supported 

121 """ 

122 if cls._instance_cache is None: 

123 cls._instance_cache = {} 

124 

125 if format_name not in cls._instance_cache: 

126 registry_class = cls.get_registry_class_for_format(format_name) 

127 cls._instance_cache[format_name] = registry_class() 

128 

129 return cls._instance_cache[format_name] 

130 

131 @classmethod 

132 def detect_format_from_file(cls, file_path: str) -> str: 

133 """ 

134 Automatically detect microscope format from file. 

135  

136 Args: 

137 file_path: Path to the results file 

138  

139 Returns: 

140 Detected format name 

141  

142 Raises: 

143 FormatDetectionError: If format cannot be detected 

144 """ 

145 file_path_obj = Path(file_path) 

146 

147 if not file_path_obj.exists(): 

148 raise FormatDetectionError(f"File not found: {file_path}") 

149 

150 registries = cls.get_all_format_registries() 

151 

152 # Try each registry to see which one can handle the file 

153 for format_name, registry_class in registries.items(): 

154 try: 

155 registry_instance = cls.get_registry_instance_for_format(format_name) 

156 

157 # Check if file extension is supported 

158 if file_path_obj.suffix in registry_instance.SUPPORTED_EXTENSIONS: 

159 # Try to read and process a small sample 

160 try: 

161 raw_df = registry_instance.read_results(file_path) 

162 features = registry_instance.extract_features(raw_df) 

163 

164 # If we can extract features, this format works 

165 if features: 

166 return format_name 

167 

168 except Exception: 

169 # This format doesn't work, try next one 

170 continue 

171 

172 except Exception: 

173 # Skip this registry if it fails 

174 continue 

175 

176 # If no format worked, raise error 

177 available_formats = list(registries.keys()) 

178 raise FormatDetectionError( 

179 f"Could not detect format for file {file_path}. " 

180 f"Available formats: {available_formats}" 

181 ) 

182 

183 @classmethod 

184 def get_supported_formats(cls) -> List[str]: 

185 """ 

186 Get list of all supported format names. 

187  

188 Returns: 

189 List of supported format names 

190 """ 

191 registries = cls.get_all_format_registries() 

192 return list(registries.keys()) 

193 

194 @classmethod 

195 def get_supported_extensions(cls) -> Dict[str, List[str]]: 

196 """ 

197 Get mapping of formats to their supported file extensions. 

198  

199 Returns: 

200 Dictionary mapping format names to supported extensions 

201 """ 

202 registries = cls.get_all_format_registries() 

203 extensions_map = {} 

204 

205 for format_name, registry_class in registries.items(): 

206 registry_instance = cls.get_registry_instance_for_format(format_name) 

207 extensions_map[format_name] = list(registry_instance.SUPPORTED_EXTENSIONS) 

208 

209 return extensions_map 

210 

211 @classmethod 

212 def clear_cache(cls): 

213 """Clear registry and instance caches (useful for testing).""" 

214 cls._registry_cache = None 

215 cls._instance_cache = None