Coverage for openhcs/core/components/parser_metaprogramming.py: 29.7%

158 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1""" 

2Metaprogramming system for dynamic parser interface generation. 

3 

4This module applies metaprogramming to the parser system, generating parser interfaces 

5dynamically based on VariableComponents enum contents. This eliminates hardcoded 

6assumptions about component names and makes the parser system truly generic. 

7""" 

8 

9import logging 

10from abc import ABC, ABCMeta, abstractmethod 

11from typing import Any, Dict, Type, TypeVar, Optional, Tuple 

12from enum import Enum 

13 

14logger = logging.getLogger(__name__) 

15 

16T = TypeVar('T', bound=Enum) 

17 

18 

19class ParserMethodRegistry: 

20 """Registry for tracking dynamically generated parser methods.""" 

21 

22 def __init__(self): 

23 self._methods: Dict[str, Dict[str, str]] = {} 

24 self._component_enums: Dict[str, Type[Enum]] = {} 

25 

26 def register_parser_interface(self, interface_name: str, component_enum: Type[Enum]): 

27 """Register a parser interface with its component enum.""" 

28 self._component_enums[interface_name] = component_enum 

29 self._methods[interface_name] = {} 

30 

31 # Generate method names for each component 

32 for component in component_enum: 

33 component_name = component.value 

34 

35 # Generate parse method name: parse_well, parse_site, etc. 

36 parse_method = f"parse_{component_name}" 

37 self._methods[interface_name][parse_method] = f"Parse {component_name} from filename" 

38 

39 # Generate construct method name: construct_with_well, construct_with_site, etc. 

40 construct_method = f"construct_with_{component_name}" 

41 self._methods[interface_name][construct_method] = f"Construct filename with {component_name}" 

42 

43 logger.debug(f"Registered parser interface {interface_name} with {len(self._methods[interface_name])} methods") 

44 

45 def get_methods(self, interface_name: str) -> Dict[str, str]: 

46 """Get all methods for a parser interface.""" 

47 return self._methods.get(interface_name, {}) 

48 

49 def get_component_enum(self, interface_name: str) -> Optional[Type[Enum]]: 

50 """Get the component enum for a parser interface.""" 

51 return self._component_enums.get(interface_name) 

52 

53 

54# Global parser method registry 

55_parser_registry = ParserMethodRegistry() 

56 

57 

58class DynamicParserMeta(ABCMeta): 

59 """ 

60 Metaclass that dynamically generates parser interface methods based on component enums. 

61  

62 This metaclass creates component-specific parsing and construction methods, enabling 

63 truly generic parser interfaces that adapt to any component configuration. 

64 """ 

65 

66 def __new__(mcs, name, bases, namespace, component_enum=None, **kwargs): 

67 """ 

68 Create a new parser interface class with dynamically generated methods. 

69  

70 Args: 

71 name: Class name 

72 bases: Base classes 

73 namespace: Class namespace 

74 component_enum: Enum class to generate methods from 

75 **kwargs: Additional arguments 

76 """ 

77 # Generate methods if component_enum is provided 

78 if component_enum is not None: 

79 logger.info(f"Generating dynamic parser interface {name} for enum {component_enum.__name__}") 

80 mcs._generate_parser_methods(namespace, component_enum, name) 

81 

82 # Register the interface 

83 _parser_registry.register_parser_interface(name, component_enum) 

84 

85 # Create the class 

86 cls = super().__new__(mcs, name, bases, namespace) 

87 

88 # Store metadata on the class 

89 if component_enum is not None: 

90 cls._component_enum = component_enum 

91 cls.FILENAME_COMPONENTS = [component.value for component in component_enum] + ['extension'] 

92 logger.info(f"Created dynamic parser interface {name} with {len(_parser_registry.get_methods(name))} methods") 

93 

94 return cls 

95 

96 @staticmethod 

97 def _generate_parser_methods(namespace: Dict[str, Any], component_enum: Type[Enum], interface_name: str): 

98 """Generate abstract parser methods for each component.""" 

99 

100 # Generate generic parse_filename method that returns all components 

101 def create_parse_filename_method(): 

102 @abstractmethod 

103 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]: 

104 """ 

105 Parse a filename to extract all components. 

106  

107 Returns a dictionary with keys matching component enum values plus 'extension'. 

108 """ 

109 raise NotImplementedError("parse_filename must be implemented") 

110 return parse_filename 

111 

112 namespace['parse_filename'] = create_parse_filename_method() 

113 

114 # Generate generic construct_filename method with **kwargs for all components 

115 def create_construct_filename_method(): 

116 @abstractmethod 

117 def construct_filename(self, extension: str = '.tif', **component_values) -> str: 

118 """ 

119 Construct a filename from component values. 

120  

121 Args: 

122 extension: File extension 

123 **component_values: Component values as keyword arguments 

124  

125 Returns: 

126 Constructed filename string 

127 """ 

128 raise NotImplementedError("construct_filename must be implemented") 

129 return construct_filename 

130 

131 namespace['construct_filename'] = create_construct_filename_method() 

132 

133 # Generate component-specific validation methods 

134 for component in component_enum: 

135 component_name = component.value 

136 

137 # Generate validate_{component} method 

138 def create_validate_method(comp_name=component_name): 

139 @abstractmethod 

140 def validate_component(self, value: Any) -> bool: 

141 f"""Validate {comp_name} component value.""" 

142 raise NotImplementedError(f"validate_{comp_name} must be implemented") 

143 

144 validate_component.__name__ = f"validate_{comp_name}" 

145 validate_component.__qualname__ = f"{interface_name}.validate_{comp_name}" 

146 return validate_component 

147 

148 namespace[f"validate_{component_name}"] = create_validate_method() 

149 

150 # Generate extract_{component} method for component-specific extraction 

151 def create_extract_method(comp_name=component_name): 

152 @abstractmethod 

153 def extract_component(self, filename: str) -> Optional[Any]: 

154 f"""Extract {comp_name} component from filename.""" 

155 raise NotImplementedError(f"extract_{comp_name} must be implemented") 

156 

157 extract_component.__name__ = f"extract_{comp_name}" 

158 extract_component.__qualname__ = f"{interface_name}.extract_{comp_name}" 

159 return extract_component 

160 

161 namespace[f"extract_{component_name}"] = create_extract_method() 

162 

163 

164class GenericFilenameParser(ABC): 

165 """ 

166 Generic base class for filename parsers with dynamically generated methods. 

167 

168 This class provides the foundation for truly generic parser interfaces that 

169 adapt to any component configuration without hardcoded assumptions. 

170 """ 

171 

172 def __init__(self, component_enum: Type[T]): 

173 """ 

174 Initialize the generic parser. 

175 

176 Args: 

177 component_enum: The component enum this parser handles 

178 """ 

179 self.component_enum = component_enum 

180 self.FILENAME_COMPONENTS = [component.value for component in component_enum] + ['extension'] 

181 self.PLACEHOLDER_PATTERN = '{iii}' 

182 self._generate_dynamic_methods() 

183 

184 def _generate_dynamic_methods(self): 

185 """ 

186 Generate dynamic validation and extraction methods for each component. 

187 

188 Creates methods that can be properly pickled by resolving them before serialization. 

189 """ 

190 for component in self.component_enum: 

191 component_name = component.value 

192 

193 # Create validator and extractor methods 

194 validator = self._create_generic_validator(component) 

195 extractor = self._create_generic_extractor(component) 

196 

197 # Set methods on instance for direct access 

198 setattr(self, f"validate_{component_name}", validator) 

199 setattr(self, f"extract_{component_name}", extractor) 

200 

201 def _create_generic_validator(self, component: Enum): 

202 """ 

203 Create a generic validator for a component based on enum metadata. 

204 

205 This approach uses the component enum itself to determine validation rules, 

206 making it truly generic and adaptable to any component configuration. 

207 """ 

208 component_name = component.value 

209 

210 # Define validation rules based on component enum metadata 

211 # This is generic and doesn't hardcode specific component names 

212 def validate_component(value: Any) -> bool: 

213 """Generic validation for any component value.""" 

214 if value is None: 

215 return True # Allow None values (placeholders) 

216 

217 # Generic validation based on value type and placeholder patterns 

218 if isinstance(value, str): 

219 # String values: allow non-empty strings or placeholder patterns 

220 return len(value) > 0 or '{' in value 

221 elif isinstance(value, int): 

222 # Integer values: allow positive integers 

223 return value >= 0 

224 else: 

225 # Other types: allow any value (extensible for future component types) 

226 return True 

227 

228 return validate_component 

229 

230 def _create_generic_extractor(self, component: Enum): 

231 """ 

232 Create a generic extractor for a component based on enum metadata. 

233 

234 This approach uses the component enum to create extractors that work 

235 with any component configuration without hardcoded assumptions. 

236 """ 

237 component_name = component.value 

238 

239 def extract_component(filename: str) -> Optional[Any]: 

240 """Generic extraction for any component using parse_filename.""" 

241 parsed = self.parse_filename(filename) 

242 if parsed and component_name in parsed: 

243 return parsed[component_name] 

244 return None 

245 

246 return extract_component 

247 

248 @classmethod 

249 @abstractmethod 

250 def can_parse(cls, filename: str) -> bool: 

251 """Check if this parser can parse the given filename.""" 

252 pass 

253 

254 @abstractmethod 

255 def extract_component_coordinates(self, component_value: str) -> Tuple[str, str]: 

256 """Extract coordinates from component identifier (typically well).""" 

257 pass 

258 

259 @abstractmethod 

260 def parse_filename(self, filename: str) -> Optional[Dict[str, Any]]: 

261 """Parse a filename to extract all components.""" 

262 pass 

263 

264 @abstractmethod 

265 def construct_filename(self, extension: str = '.tif', **component_values) -> str: 

266 """Construct a filename from component values.""" 

267 pass 

268 

269 def __getstate__(self): 

270 """ 

271 Custom pickling method to handle dynamic functions. 

272 

273 Removes dynamic methods before pickling since they can't be serialized, 

274 but preserves the component_enum so they can be regenerated. 

275 """ 

276 state = self.__dict__.copy() 

277 

278 # Remove dynamic methods that can't be pickled 

279 dynamic_methods = [] 

280 for component in self.component_enum: 

281 component_name = component.value 

282 validate_method = f"validate_{component_name}" 

283 extract_method = f"extract_{component_name}" 

284 

285 if validate_method in state: 

286 dynamic_methods.append(validate_method) 

287 del state[validate_method] 

288 if extract_method in state: 

289 dynamic_methods.append(extract_method) 

290 del state[extract_method] 

291 

292 # Store the list of removed methods for restoration 

293 state['_removed_dynamic_methods'] = dynamic_methods 

294 return state 

295 

296 def __setstate__(self, state): 

297 """ 

298 Custom unpickling method to regenerate dynamic functions. 

299 

300 Restores the object state and regenerates the dynamic methods 

301 that were removed during pickling. 

302 """ 

303 # Restore the object state 

304 self.__dict__.update(state) 

305 

306 # Remove the temporary list 

307 if '_removed_dynamic_methods' in self.__dict__: 307 ↛ 311line 307 didn't jump to line 311 because the condition on line 307 was always true

308 del self.__dict__['_removed_dynamic_methods'] 

309 

310 # Regenerate dynamic methods 

311 self._generate_dynamic_methods() 

312 

313 def get_component_names(self) -> list: 

314 """Get all component names for this parser.""" 

315 return [component.value for component in self.component_enum] 

316 

317 def validate_component_by_name(self, component_name: str, value: Any) -> bool: 

318 """ 

319 Validate a component value using the dynamic validation methods. 

320 

321 Args: 

322 component_name: Name of the component to validate 

323 value: Value to validate 

324 

325 Returns: 

326 True if the value is valid for the component 

327 """ 

328 validate_method_name = f"validate_{component_name}" 

329 validate_method = getattr(self, validate_method_name) 

330 return validate_method(value) 

331 

332 def extract_component_by_name(self, filename: str, component_name: str) -> Optional[Any]: 

333 """ 

334 Extract a specific component from filename using dynamic extraction methods. 

335 

336 Args: 

337 filename: Filename to parse 

338 component_name: Name of component to extract 

339 

340 Returns: 

341 Component value or None if extraction fails 

342 

343 Raises: 

344 AttributeError: If no extraction method exists for the component 

345 """ 

346 extract_method_name = f"extract_{component_name}" 

347 extract_method = getattr(self, extract_method_name) 

348 return extract_method(filename) 

349 

350 def validate_component_dict(self, components: Dict[str, Any]) -> bool: 

351 """ 

352 Validate that a component dictionary contains all required components. 

353  

354 Args: 

355 components: Dictionary of component values 

356  

357 Returns: 

358 True if all required components are present and valid 

359 """ 

360 required_components = set(self.get_component_names()) 

361 provided_components = set(components.keys()) - {'extension'} 

362 

363 # Check if all required components are provided 

364 if not required_components.issubset(provided_components): 

365 missing = required_components - provided_components 

366 logger.warning(f"Missing required components: {missing}") 

367 return False 

368 

369 # Validate each component using the generic validation system 

370 for component_name, value in components.items(): 

371 if component_name == 'extension': 

372 continue 

373 

374 if not self.validate_component_by_name(component_name, value): 

375 logger.warning(f"Invalid value for {component_name}: {value}") 

376 return False 

377 

378 return True 

379 

380 

381 

382 

383class ParserInterfaceGenerator: 

384 """ 

385 Factory for creating component-specific parser interfaces dynamically. 

386  

387 This class provides a high-level API for generating parser interfaces based on 

388 component enums, with caching and backward compatibility features. 

389 """ 

390 

391 def __init__(self): 

392 self._interface_cache: Dict[str, Type] = {} 

393 

394 def create_parser_interface(self, 

395 component_enum: Type[T], 

396 interface_name: Optional[str] = None, 

397 base_classes: Optional[tuple] = None) -> Type[GenericFilenameParser]: 

398 """ 

399 Create a component-specific parser interface class. 

400  

401 Args: 

402 component_enum: The component enum to generate interface for 

403 interface_name: Optional custom interface name 

404 base_classes: Optional additional base classes 

405  

406 Returns: 

407 Dynamically generated parser interface class 

408 """ 

409 # Generate interface name if not provided 

410 if interface_name is None: 

411 interface_name = f"{component_enum.__name__}FilenameParser" 

412 

413 # Check cache 

414 cache_key = f"{interface_name}_{id(component_enum)}" 

415 if cache_key in self._interface_cache: 

416 logger.debug(f"Returning cached parser interface {interface_name}") 

417 return self._interface_cache[cache_key] 

418 

419 # Set default base classes 

420 if base_classes is None: 

421 base_classes = (GenericFilenameParser,) 

422 

423 # Create the interface class dynamically 

424 interface_class = DynamicParserMeta( 

425 interface_name, 

426 base_classes, 

427 {}, 

428 component_enum=component_enum 

429 ) 

430 

431 # Cache the interface 

432 self._interface_cache[cache_key] = interface_class 

433 

434 logger.info(f"Created parser interface {interface_name} for {component_enum.__name__}") 

435 return interface_class 

436 

437 def get_cached_interface(self, interface_name: str) -> Optional[Type]: 

438 """Get a cached parser interface by name.""" 

439 for key, interface in self._interface_cache.items(): 

440 if key.startswith(interface_name): 

441 return interface 

442 return None 

443 

444 def clear_cache(self): 

445 """Clear the parser interface cache.""" 

446 self._interface_cache.clear() 

447 logger.debug("Cleared parser interface cache") 

448 

449 

450# Global parser interface generator instance 

451parser_interface_generator = ParserInterfaceGenerator()