Coverage for openhcs/core/components/parser_metaprogramming.py: 27.1%

159 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 18:33 +0000

1""" 

2Metaprogramming system for dynamic parser interface generation. 

3 

4This module applies metaprogramming to the parser system, generating parser interfaces 

5dynamically based on VariableComponents enum contents. This eliminates hardcoded 

6assumptions about component names and makes the parser system truly generic. 

7""" 

8 

9import logging 

10from abc import ABC, ABCMeta, abstractmethod 

11from typing import Any, Dict, Type, TypeVar, Optional, Union, Tuple 

12from enum import Enum 

13import inspect 

14 

15logger = logging.getLogger(__name__) 

16 

17T = TypeVar('T', bound=Enum) 

18 

19 

20class ParserMethodRegistry: 

21 """Registry for tracking dynamically generated parser methods.""" 

22 

23 def __init__(self): 

24 self._methods: Dict[str, Dict[str, str]] = {} 

25 self._component_enums: Dict[str, Type[Enum]] = {} 

26 

27 def register_parser_interface(self, interface_name: str, component_enum: Type[Enum]): 

28 """Register a parser interface with its component enum.""" 

29 self._component_enums[interface_name] = component_enum 

30 self._methods[interface_name] = {} 

31 

32 # Generate method names for each component 

33 for component in component_enum: 

34 component_name = component.value 

35 

36 # Generate parse method name: parse_well, parse_site, etc. 

37 parse_method = f"parse_{component_name}" 

38 self._methods[interface_name][parse_method] = f"Parse {component_name} from filename" 

39 

40 # Generate construct method name: construct_with_well, construct_with_site, etc. 

41 construct_method = f"construct_with_{component_name}" 

42 self._methods[interface_name][construct_method] = f"Construct filename with {component_name}" 

43 

44 logger.debug(f"Registered parser interface {interface_name} with {len(self._methods[interface_name])} methods") 

45 

46 def get_methods(self, interface_name: str) -> Dict[str, str]: 

47 """Get all methods for a parser interface.""" 

48 return self._methods.get(interface_name, {}) 

49 

50 def get_component_enum(self, interface_name: str) -> Optional[Type[Enum]]: 

51 """Get the component enum for a parser interface.""" 

52 return self._component_enums.get(interface_name) 

53 

54 

55# Global parser method registry 

56_parser_registry = ParserMethodRegistry() 

57 

58 

59class DynamicParserMeta(ABCMeta): 

60 """ 

61 Metaclass that dynamically generates parser interface methods based on component enums. 

62  

63 This metaclass creates component-specific parsing and construction methods, enabling 

64 truly generic parser interfaces that adapt to any component configuration. 

65 """ 

66 

67 def __new__(mcs, name, bases, namespace, component_enum=None, **kwargs): 

68 """ 

69 Create a new parser interface class with dynamically generated methods. 

70  

71 Args: 

72 name: Class name 

73 bases: Base classes 

74 namespace: Class namespace 

75 component_enum: Enum class to generate methods from 

76 **kwargs: Additional arguments 

77 """ 

78 # Generate methods if component_enum is provided 

79 if component_enum is not None: 

80 logger.info(f"Generating dynamic parser interface {name} for enum {component_enum.__name__}") 

81 mcs._generate_parser_methods(namespace, component_enum, name) 

82 

83 # Register the interface 

84 _parser_registry.register_parser_interface(name, component_enum) 

85 

86 # Create the class 

87 cls = super().__new__(mcs, name, bases, namespace) 

88 

89 # Store metadata on the class 

90 if component_enum is not None: 

91 cls._component_enum = component_enum 

92 cls.FILENAME_COMPONENTS = [component.value for component in component_enum] + ['extension'] 

93 logger.info(f"Created dynamic parser interface {name} with {len(_parser_registry.get_methods(name))} methods") 

94 

95 return cls 

96 

97 @staticmethod 

98 def _generate_parser_methods(namespace: Dict[str, Any], component_enum: Type[Enum], interface_name: str): 

99 """Generate abstract parser methods for each component.""" 

100 

101 # Generate generic parse_filename method that returns all components 

102 def create_parse_filename_method(): 

103 @abstractmethod 

104 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]: 

105 """ 

106 Parse a filename to extract all components. 

107  

108 Returns a dictionary with keys matching component enum values plus 'extension'. 

109 """ 

110 raise NotImplementedError("parse_filename must be implemented") 

111 return parse_filename 

112 

113 namespace['parse_filename'] = create_parse_filename_method() 

114 

115 # Generate generic construct_filename method with **kwargs for all components 

116 def create_construct_filename_method(): 

117 @abstractmethod 

118 def construct_filename(self, extension: str = '.tif', **component_values) -> str: 

119 """ 

120 Construct a filename from component values. 

121  

122 Args: 

123 extension: File extension 

124 **component_values: Component values as keyword arguments 

125  

126 Returns: 

127 Constructed filename string 

128 """ 

129 raise NotImplementedError("construct_filename must be implemented") 

130 return construct_filename 

131 

132 namespace['construct_filename'] = create_construct_filename_method() 

133 

134 # Generate component-specific validation methods 

135 for component in component_enum: 

136 component_name = component.value 

137 

138 # Generate validate_{component} method 

139 def create_validate_method(comp_name=component_name): 

140 @abstractmethod 

141 def validate_component(self, value: Any) -> bool: 

142 f"""Validate {comp_name} component value.""" 

143 raise NotImplementedError(f"validate_{comp_name} must be implemented") 

144 

145 validate_component.__name__ = f"validate_{comp_name}" 

146 validate_component.__qualname__ = f"{interface_name}.validate_{comp_name}" 

147 return validate_component 

148 

149 namespace[f"validate_{component_name}"] = create_validate_method() 

150 

151 # Generate extract_{component} method for component-specific extraction 

152 def create_extract_method(comp_name=component_name): 

153 @abstractmethod 

154 def extract_component(self, filename: str) -> Optional[Any]: 

155 f"""Extract {comp_name} component from filename.""" 

156 raise NotImplementedError(f"extract_{comp_name} must be implemented") 

157 

158 extract_component.__name__ = f"extract_{comp_name}" 

159 extract_component.__qualname__ = f"{interface_name}.extract_{comp_name}" 

160 return extract_component 

161 

162 namespace[f"extract_{component_name}"] = create_extract_method() 

163 

164 

165class GenericFilenameParser(ABC): 

166 """ 

167 Generic base class for filename parsers with dynamically generated methods. 

168 

169 This class provides the foundation for truly generic parser interfaces that 

170 adapt to any component configuration without hardcoded assumptions. 

171 """ 

172 

173 def __init__(self, component_enum: Type[T]): 

174 """ 

175 Initialize the generic parser. 

176 

177 Args: 

178 component_enum: The component enum this parser handles 

179 """ 

180 self.component_enum = component_enum 

181 self.FILENAME_COMPONENTS = [component.value for component in component_enum] + ['extension'] 

182 self.PLACEHOLDER_PATTERN = '{iii}' 

183 self._generate_dynamic_methods() 

184 

185 def _generate_dynamic_methods(self): 

186 """ 

187 Generate dynamic validation and extraction methods for each component. 

188 

189 Creates methods that can be properly pickled by resolving them before serialization. 

190 """ 

191 for component in self.component_enum: 

192 component_name = component.value 

193 

194 # Create validator and extractor methods 

195 validator = self._create_generic_validator(component) 

196 extractor = self._create_generic_extractor(component) 

197 

198 # Set methods on instance for direct access 

199 setattr(self, f"validate_{component_name}", validator) 

200 setattr(self, f"extract_{component_name}", extractor) 

201 

202 def _create_generic_validator(self, component: Enum): 

203 """ 

204 Create a generic validator for a component based on enum metadata. 

205 

206 This approach uses the component enum itself to determine validation rules, 

207 making it truly generic and adaptable to any component configuration. 

208 """ 

209 component_name = component.value 

210 

211 # Define validation rules based on component enum metadata 

212 # This is generic and doesn't hardcode specific component names 

213 def validate_component(value: Any) -> bool: 

214 """Generic validation for any component value.""" 

215 if value is None: 

216 return True # Allow None values (placeholders) 

217 

218 # Generic validation based on value type and placeholder patterns 

219 if isinstance(value, str): 

220 # String values: allow non-empty strings or placeholder patterns 

221 return len(value) > 0 or '{' in value 

222 elif isinstance(value, int): 

223 # Integer values: allow positive integers 

224 return value >= 0 

225 else: 

226 # Other types: allow any value (extensible for future component types) 

227 return True 

228 

229 return validate_component 

230 

231 def _create_generic_extractor(self, component: Enum): 

232 """ 

233 Create a generic extractor for a component based on enum metadata. 

234 

235 This approach uses the component enum to create extractors that work 

236 with any component configuration without hardcoded assumptions. 

237 """ 

238 component_name = component.value 

239 

240 def extract_component(filename: str) -> Optional[Any]: 

241 """Generic extraction for any component using parse_filename.""" 

242 parsed = self.parse_filename(filename) 

243 if parsed and component_name in parsed: 

244 return parsed[component_name] 

245 return None 

246 

247 return extract_component 

248 

249 @classmethod 

250 @abstractmethod 

251 def can_parse(cls, filename: str) -> bool: 

252 """Check if this parser can parse the given filename.""" 

253 pass 

254 

255 @abstractmethod 

256 def extract_component_coordinates(self, component_value: str) -> Tuple[str, str]: 

257 """Extract coordinates from component identifier (typically well).""" 

258 pass 

259 

260 @abstractmethod 

261 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]: 

262 """Parse a filename to extract all components.""" 

263 pass 

264 

265 @abstractmethod 

266 def construct_filename(self, extension: str = '.tif', **component_values) -> str: 

267 """Construct a filename from component values.""" 

268 pass 

269 

270 def __getstate__(self): 

271 """ 

272 Custom pickling method to handle dynamic functions. 

273 

274 Removes dynamic methods before pickling since they can't be serialized, 

275 but preserves the component_enum so they can be regenerated. 

276 """ 

277 state = self.__dict__.copy() 

278 

279 # Remove dynamic methods that can't be pickled 

280 dynamic_methods = [] 

281 for component in self.component_enum: 

282 component_name = component.value 

283 validate_method = f"validate_{component_name}" 

284 extract_method = f"extract_{component_name}" 

285 

286 if validate_method in state: 

287 dynamic_methods.append(validate_method) 

288 del state[validate_method] 

289 if extract_method in state: 

290 dynamic_methods.append(extract_method) 

291 del state[extract_method] 

292 

293 # Store the list of removed methods for restoration 

294 state['_removed_dynamic_methods'] = dynamic_methods 

295 return state 

296 

297 def __setstate__(self, state): 

298 """ 

299 Custom unpickling method to regenerate dynamic functions. 

300 

301 Restores the object state and regenerates the dynamic methods 

302 that were removed during pickling. 

303 """ 

304 # Restore the object state 

305 self.__dict__.update(state) 

306 

307 # Remove the temporary list 

308 if '_removed_dynamic_methods' in self.__dict__: 

309 del self.__dict__['_removed_dynamic_methods'] 

310 

311 # Regenerate dynamic methods 

312 self._generate_dynamic_methods() 

313 

314 def get_component_names(self) -> list: 

315 """Get all component names for this parser.""" 

316 return [component.value for component in self.component_enum] 

317 

318 def validate_component_by_name(self, component_name: str, value: Any) -> bool: 

319 """ 

320 Validate a component value using the dynamic validation methods. 

321 

322 Args: 

323 component_name: Name of the component to validate 

324 value: Value to validate 

325 

326 Returns: 

327 True if the value is valid for the component 

328 """ 

329 validate_method_name = f"validate_{component_name}" 

330 validate_method = getattr(self, validate_method_name) 

331 return validate_method(value) 

332 

333 def extract_component_by_name(self, filename: str, component_name: str) -> Optional[Any]: 

334 """ 

335 Extract a specific component from filename using dynamic extraction methods. 

336 

337 Args: 

338 filename: Filename to parse 

339 component_name: Name of component to extract 

340 

341 Returns: 

342 Component value or None if extraction fails 

343 

344 Raises: 

345 AttributeError: If no extraction method exists for the component 

346 """ 

347 extract_method_name = f"extract_{component_name}" 

348 extract_method = getattr(self, extract_method_name) 

349 return extract_method(filename) 

350 

351 def validate_component_dict(self, components: Dict[str, Any]) -> bool: 

352 """ 

353 Validate that a component dictionary contains all required components. 

354  

355 Args: 

356 components: Dictionary of component values 

357  

358 Returns: 

359 True if all required components are present and valid 

360 """ 

361 required_components = set(self.get_component_names()) 

362 provided_components = set(components.keys()) - {'extension'} 

363 

364 # Check if all required components are provided 

365 if not required_components.issubset(provided_components): 

366 missing = required_components - provided_components 

367 logger.warning(f"Missing required components: {missing}") 

368 return False 

369 

370 # Validate each component using the generic validation system 

371 for component_name, value in components.items(): 

372 if component_name == 'extension': 

373 continue 

374 

375 if not self.validate_component_by_name(component_name, value): 

376 logger.warning(f"Invalid value for {component_name}: {value}") 

377 return False 

378 

379 return True 

380 

381 

382 

383 

384class ParserInterfaceGenerator: 

385 """ 

386 Factory for creating component-specific parser interfaces dynamically. 

387  

388 This class provides a high-level API for generating parser interfaces based on 

389 component enums, with caching and backward compatibility features. 

390 """ 

391 

392 def __init__(self): 

393 self._interface_cache: Dict[str, Type] = {} 

394 

395 def create_parser_interface(self, 

396 component_enum: Type[T], 

397 interface_name: Optional[str] = None, 

398 base_classes: Optional[tuple] = None) -> Type[GenericFilenameParser]: 

399 """ 

400 Create a component-specific parser interface class. 

401  

402 Args: 

403 component_enum: The component enum to generate interface for 

404 interface_name: Optional custom interface name 

405 base_classes: Optional additional base classes 

406  

407 Returns: 

408 Dynamically generated parser interface class 

409 """ 

410 # Generate interface name if not provided 

411 if interface_name is None: 

412 interface_name = f"{component_enum.__name__}FilenameParser" 

413 

414 # Check cache 

415 cache_key = f"{interface_name}_{id(component_enum)}" 

416 if cache_key in self._interface_cache: 

417 logger.debug(f"Returning cached parser interface {interface_name}") 

418 return self._interface_cache[cache_key] 

419 

420 # Set default base classes 

421 if base_classes is None: 

422 base_classes = (GenericFilenameParser,) 

423 

424 # Create the interface class dynamically 

425 interface_class = DynamicParserMeta( 

426 interface_name, 

427 base_classes, 

428 {}, 

429 component_enum=component_enum 

430 ) 

431 

432 # Cache the interface 

433 self._interface_cache[cache_key] = interface_class 

434 

435 logger.info(f"Created parser interface {interface_name} for {component_enum.__name__}") 

436 return interface_class 

437 

438 def get_cached_interface(self, interface_name: str) -> Optional[Type]: 

439 """Get a cached parser interface by name.""" 

440 for key, interface in self._interface_cache.items(): 

441 if key.startswith(interface_name): 

442 return interface 

443 return None 

444 

445 def clear_cache(self): 

446 """Clear the parser interface cache.""" 

447 self._interface_cache.clear() 

448 logger.debug("Cleared parser interface cache") 

449 

450 

451# Global parser interface generator instance 

452parser_interface_generator = ParserInterfaceGenerator()