Coverage for openhcs/core/lazy_config.py: 65.6%

165 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 05:57 +0000

1""" 

2Generic lazy dataclass factory using flexible resolution. 

3 

4This module provides a truly generic lazy loading abstraction that works with any dataclass 

5using dataclass field introspection for delayed object creation, eliminating hardcoded 

6configuration types and maintaining zero knowledge of specific configuration types. 

7Supports both static resolution (from class) and dynamic resolution (from instance). 

8Creates complete lazy dataclasses with bound methods - no mixin inheritance needed. 

9""" 

10 

11# Standard library imports 

12import logging 

13import re 

14# No ABC needed - using simple functions instead of strategy pattern 

15from dataclasses import dataclass, fields, is_dataclass, make_dataclass 

16from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21@dataclass(frozen=True) 

22class LazyConfigConstants: 

23 """Centralized constants for lazy configuration system.""" 

24 

25 # Field paths for thread-local resolution 

26 MATERIALIZATION_DEFAULTS_PATH: str = "materialization_defaults" 

27 THREAD_LOCAL_VALUE_ATTR: str = "value" 

28 

29 # Class names for backward compatibility 

30 PIPELINE_CONFIG_NAME: str = "PipelineConfig" 

31 LAZY_STEP_MATERIALIZATION_CONFIG_NAME: str = "LazyStepMaterializationConfig" 

32 

33 # Method names for dynamic binding 

34 RESOLVE_FIELD_VALUE_METHOD: str = "_resolve_field_value" 

35 GET_ATTRIBUTE_METHOD: str = "__getattribute__" 

36 TO_BASE_CONFIG_METHOD: str = "to_base_config" 

37 WITH_DEFAULTS_METHOD: str = "with_defaults" 

38 WITH_OVERRIDES_METHOD: str = "with_overrides" 

39 

40 # Debug message templates 

41 LAZY_FIELD_DEBUG_TEMPLATE: str = "LAZY FIELD CREATION: {field_name} - original={original_type}, has_default={has_default}, final={final_type}" 

42 THREAD_LOCAL_FIELD_DEBUG_TEMPLATE: str = "THREAD-LOCAL LAZY FIELD: {field_name} - original={original_type}, has_default={has_default}, final={final_type}" 

43 

44 # Class name generation 

45 LAZY_CLASS_NAME_PREFIX: str = "Lazy" 

46 

47 

48# Create constants instance for use throughout module 

49CONSTANTS = LazyConfigConstants() 

50 

51# Generic imports for lazy configuration system 

52def _get_generic_config_imports(): 

53 """Get generic config imports with delayed loading to avoid circular dependencies.""" 

54 from openhcs.core.config import get_current_global_config, set_current_global_config 

55 return get_current_global_config, set_current_global_config 

56 

57 

58# No strategy pattern needed - just use instance provider functions directly 

59 

60 

61class FieldPathNavigator: 

62 """Utility for navigating dot-separated field paths in object hierarchies.""" 

63 

64 @staticmethod 

65 def navigate_to_instance(current_global_config: Any, field_path: Optional[str] = None) -> Optional[Any]: 

66 """ 

67 Navigate to instance using explicit field path. 

68 

69 Args: 

70 current_global_config: Thread-local storage object or global config instance 

71 field_path: Dot-separated path to navigate (None = root) 

72 

73 Returns: 

74 Instance at the specified field path, or None if not found 

75 """ 

76 # Handle both thread-local storage objects and direct config instances 

77 if hasattr(current_global_config, CONSTANTS.THREAD_LOCAL_VALUE_ATTR): 77 ↛ 78line 77 didn't jump to line 78 because the condition on line 77 was never true

78 if not current_global_config.value: 

79 return None 

80 instance = current_global_config.value 

81 else: 

82 # Direct config instance 

83 instance = current_global_config 

84 

85 if field_path is None: 85 ↛ 87line 85 didn't jump to line 87 because the condition on line 85 was never true

86 # Root instance - return the global config directly 

87 return instance 

88 

89 # Navigate dot-separated path 

90 for field in field_path.split('.'): 

91 if instance is None: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true

92 return None 

93 instance = getattr(instance, field, None) 

94 

95 return instance 

96 

97 

98@dataclass(frozen=True) 

99class ResolutionConfig: 

100 """Declarative configuration for recursive lazy resolution.""" 

101 instance_provider: Callable[[], Any] 

102 fallback_chain: List[Callable[[str], Any]] 

103 

104 def resolve_field(self, field_name: str) -> Any: 

105 """Resolve field through primary instance and fallback chain.""" 

106 return self._try_primary(field_name) or self._try_fallbacks(field_name) 

107 

108 def _try_primary(self, field_name: str) -> Any: 

109 """Attempt resolution from primary instance.""" 

110 try: 

111 instance = self.instance_provider() 

112 if instance and hasattr(instance, field_name): 112 ↛ 117line 112 didn't jump to line 117 because the condition on line 112 was always true

113 value = object.__getattribute__(instance, field_name) 

114 return value if value is not None else None 

115 except (AttributeError, Exception): 

116 pass 

117 return None 

118 

119 def _try_fallbacks(self, field_name: str) -> Any: 

120 """Attempt resolution through fallback chain.""" 

121 for fallback in self.fallback_chain: 

122 try: 

123 value = fallback(field_name) 

124 if value is not None: 

125 return value 

126 except (AttributeError, Exception): 

127 continue 

128 return None 

129 

130 

131# Functional fallback strategies 

132def create_static_defaults_fallback(base_class: Type) -> Callable[[str], Any]: 

133 """Create fallback that resolves to static dataclass defaults.""" 

134 default_instance = base_class() 

135 return lambda field_name: getattr(default_instance, field_name, None) 

136 

137 

138def create_instance_fallback(instance_provider: Callable[[], Any]) -> Callable[[str], Any]: 

139 """Create fallback that resolves from specific instance.""" 

140 return lambda field_name: ( 

141 getattr(instance_provider(), field_name, None) 

142 if (instance := instance_provider()) else None 

143 ) 

144 

145 

146@dataclass(frozen=True) 

147class LazyMethodBindings: 

148 """Declarative method bindings for lazy dataclasses.""" 

149 

150 @staticmethod 

151 def create_resolver(resolution_config: ResolutionConfig) -> Callable[[Any, str], Any]: 

152 """Create field resolver method.""" 

153 return lambda self, field_name: resolution_config.resolve_field(field_name) 

154 

155 @staticmethod 

156 def create_getattribute() -> Callable[[Any, str], Any]: 

157 """Create lazy __getattribute__ method.""" 

158 def __getattribute__(self: Any, name: str) -> Any: 

159 value = object.__getattribute__(self, name) 

160 return (self._resolve_field_value(name) 

161 if value is None and name in {f.name for f in fields(self.__class__)} 

162 else value) 

163 return __getattribute__ 

164 

165 @staticmethod 

166 def create_to_base_config(base_class: Type) -> Callable[[Any], Any]: 

167 """Create base config converter method.""" 

168 return lambda self: base_class(**{ 

169 f.name: getattr(self, f.name) for f in fields(self) 

170 }) 

171 

172 @staticmethod 

173 def create_class_methods() -> Dict[str, Any]: 

174 """Create class-level utility methods.""" 

175 return { 

176 CONSTANTS.WITH_DEFAULTS_METHOD: classmethod(lambda cls: cls()), 

177 CONSTANTS.WITH_OVERRIDES_METHOD: classmethod(lambda cls, **kwargs: cls(**kwargs)) 

178 } 

179 

180 

181class LazyDataclassFactory: 

182 """Generic factory for creating lazy dataclasses with flexible resolution.""" 

183 

184 @staticmethod 

185 def _introspect_dataclass_fields(base_class: Type, debug_template: str) -> List[Tuple[str, Type, None]]: 

186 """ 

187 Unified field introspection logic for lazy dataclass creation. 

188 

189 Analyzes dataclass fields to determine appropriate types for lazy loading, 

190 preserving original types for fields with defaults while making fields 

191 without defaults Optional for lazy resolution. 

192 

193 Args: 

194 base_class: The dataclass to introspect 

195 debug_template: Template string for debug logging 

196 

197 Returns: 

198 List of (field_name, field_type, default_value) tuples for make_dataclass 

199 """ 

200 from dataclasses import MISSING 

201 

202 base_fields = fields(base_class) 

203 lazy_field_definitions = [] 

204 

205 for field in base_fields: 

206 # Check if field already has Optional type 

207 origin = getattr(field.type, '__origin__', None) 

208 is_already_optional = (origin is Union and 

209 type(None) in getattr(field.type, '__args__', ())) 

210 

211 # Check if field has default value or factory 

212 has_default = (field.default is not MISSING or 

213 field.default_factory is not MISSING) 

214 

215 if is_already_optional or not has_default: 

216 # Field is already Optional or has no default - make it Optional for lazy loading 

217 field_type = Union[field.type, type(None)] if not is_already_optional else field.type 

218 else: 

219 # Field has default - preserve original type (don't make Optional) 

220 field_type = field.type 

221 

222 lazy_field_definitions.append((field.name, field_type, None)) 

223 

224 # Debug logging with provided template 

225 logger.info(debug_template.format( 

226 field_name=field.name, 

227 original_type=field.type, 

228 has_default=has_default, 

229 final_type=field_type 

230 )) 

231 

232 return lazy_field_definitions 

233 

234 @staticmethod 

235 def _create_lazy_dataclass_unified( 

236 base_class: Type, 

237 instance_provider: Callable[[], Any], 

238 lazy_class_name: str, 

239 debug_template: str, 

240 use_recursive_resolution: bool = False, 

241 fallback_chain: Optional[List[Callable[[str], Any]]] = None 

242 ) -> Type: 

243 """Create lazy dataclass with declarative configuration.""" 

244 if not is_dataclass(base_class): 244 ↛ 245line 244 didn't jump to line 245 because the condition on line 244 was never true

245 raise ValueError(f"{base_class} must be a dataclass") 

246 

247 # Create resolution configuration 

248 resolution_config = ResolutionConfig( 

249 instance_provider=instance_provider, 

250 fallback_chain=fallback_chain or [create_static_defaults_fallback(base_class)] 

251 ) if use_recursive_resolution else ResolutionConfig( 

252 instance_provider=instance_provider, 

253 fallback_chain=[lambda field_name: getattr(instance_provider(), field_name)] 

254 ) 

255 

256 # Create lazy dataclass with introspected fields 

257 lazy_class = make_dataclass( 

258 lazy_class_name, 

259 LazyDataclassFactory._introspect_dataclass_fields(base_class, debug_template), 

260 frozen=True 

261 ) 

262 

263 # Bind methods declaratively 

264 LazyDataclassFactory._bind_methods_to_class(lazy_class, base_class, resolution_config) 

265 return lazy_class 

266 

267 @staticmethod 

268 def _bind_methods_to_class(lazy_class: Type, base_class: Type, resolution_config: ResolutionConfig) -> None: 

269 """Bind methods to lazy dataclass using declarative configuration.""" 

270 method_bindings = { 

271 CONSTANTS.RESOLVE_FIELD_VALUE_METHOD: LazyMethodBindings.create_resolver(resolution_config), 

272 CONSTANTS.GET_ATTRIBUTE_METHOD: LazyMethodBindings.create_getattribute(), 

273 CONSTANTS.TO_BASE_CONFIG_METHOD: LazyMethodBindings.create_to_base_config(base_class), 

274 **LazyMethodBindings.create_class_methods() 

275 } 

276 

277 for method_name, method_impl in method_bindings.items(): 

278 setattr(lazy_class, method_name, method_impl) 

279 

280 @staticmethod 

281 def create_lazy_dataclass( 

282 defaults_source: Union[Type, Any], 

283 lazy_class_name: str, 

284 use_recursive_resolution: bool = False, 

285 fallback_chain: Optional[List[Callable[[str], Any]]] = None 

286 ) -> Type: 

287 """Create lazy dataclass with functional configuration.""" 

288 base_class = defaults_source if isinstance(defaults_source, type) else type(defaults_source) 

289 instance_provider = (lambda: defaults_source()) if isinstance(defaults_source, type) else (lambda: defaults_source) 

290 

291 return LazyDataclassFactory._create_lazy_dataclass_unified( 

292 base_class, instance_provider, lazy_class_name, 

293 CONSTANTS.LAZY_FIELD_DEBUG_TEMPLATE, use_recursive_resolution, fallback_chain 

294 ) 

295 

296 @staticmethod 

297 def make_lazy_thread_local( 

298 base_class: Type, 

299 global_config_type: Type, 

300 field_path: str = None, 

301 lazy_class_name: str = None, 

302 use_recursive_resolution: bool = False 

303 ) -> Type: 

304 """ 

305 Create lazy dataclass that resolves from thread-local instance using explicit field paths. 

306 

307 This unified approach eliminates algorithmic field name conversion bugs by using 

308 explicit dot-separated paths to navigate the thread-local configuration structure. 

309 

310 Args: 

311 base_class: The dataclass type to make lazy (the target type for lazy resolution) 

312 global_config_type: The global config type used for thread-local storage context 

313 (e.g., GlobalPipelineConfig, GlobalAppConfig) 

314 field_path: Dot-separated path to instance (None = root) 

315 Examples: None, "materialization_defaults", "foo.bar.baz" 

316 lazy_class_name: Optional name for the generated lazy class 

317 use_recursive_resolution: Whether to use recursive resolution for None values 

318 

319 Returns: 

320 Generated lazy dataclass with explicit thread-local resolution 

321 

322 Note: 

323 base_class and global_config_type serve different purposes: 

324 - base_class: The type being made lazy (what the lazy class represents) 

325 - global_config_type: The type used for thread-local context (where values come from) 

326 

327 They are often the same (e.g., both GlobalPipelineConfig) but can differ when 

328 creating lazy versions of nested config types that resolve from a different 

329 global context (e.g., base_class=StepMaterializationConfig, 

330 global_config_type=GlobalPipelineConfig). 

331 

332 Examples: 

333 # Root thread-local instance with recursive resolution 

334 PipelineConfig = make_lazy_thread_local( 

335 GlobalPipelineConfig, 

336 field_path=None, 

337 use_recursive_resolution=True 

338 ) 

339 

340 # Nested field from thread-local instance 

341 LazyStepMaterializationConfig = make_lazy_thread_local( 

342 StepMaterializationConfig, 

343 field_path="materialization_defaults" 

344 ) 

345 """ 

346 # Generate class name if not provided 

347 if lazy_class_name is None: 347 ↛ 348line 347 didn't jump to line 348 because the condition on line 347 was never true

348 lazy_class_name = f"{CONSTANTS.LAZY_CLASS_NAME_PREFIX}{base_class.__name__}" 

349 

350 # Global config type is now a required parameter 

351 

352 # Create instance provider for thread-local resolution 

353 def thread_local_instance_provider() -> Any: 

354 """Get instance from thread-local storage using field path.""" 

355 get_current_global_config, _ = _get_generic_config_imports() 

356 

357 current_config = get_current_global_config(global_config_type) 

358 if current_config is not None: 358 ↛ 361line 358 didn't jump to line 361 because the condition on line 358 was always true

359 return FieldPathNavigator.navigate_to_instance(current_config, field_path) 

360 

361 return None 

362 

363 # Configure fallback chain for recursive resolution 

364 fallback_chain = [create_static_defaults_fallback(base_class)] if use_recursive_resolution else None 

365 

366 return LazyDataclassFactory._create_lazy_dataclass_unified( 

367 base_class, thread_local_instance_provider, lazy_class_name, 

368 CONSTANTS.THREAD_LOCAL_FIELD_DEBUG_TEMPLATE, use_recursive_resolution, fallback_chain 

369 ) 

370 

371 # Deprecated methods removed - use make_lazy_thread_local() with explicit field_path 

372 

373 

374# Generic utility functions for clean thread-local storage management 

375def ensure_global_config_context(global_config_type: Type, global_config_instance: Any) -> None: 

376 """Ensure proper thread-local storage setup for any global config type.""" 

377 _, set_current_global_config = _get_generic_config_imports() 

378 set_current_global_config(global_config_type, global_config_instance) 

379 

380 

381# Generic dataclass editing with configurable value preservation 

382T = TypeVar('T') 

383 

384 

385def create_dataclass_for_editing( 

386 dataclass_type: Type[T], 

387 source_config: Any, 

388 preserve_values: bool = False, 

389 context_provider: Optional[Callable[[Any], None]] = None 

390) -> T: 

391 """ 

392 Create any dataclass for editing with configurable value preservation. 

393 

394 This generic function works with any dataclass type, not just PipelineConfig. 

395 

396 Args: 

397 dataclass_type: The dataclass type to create (e.g., PipelineConfig, ZarrConfig) 

398 source_config: Instance to use for context and optionally field values 

399 preserve_values: 

400 - True: Preserve actual field values (direct editing) 

401 - False: Use None values for placeholders (hierarchical editing) 

402 context_provider: Optional function to set up context (e.g., thread-local storage) 

403 

404 Returns: 

405 Instance of dataclass_type with appropriate field initialization 

406 

407 Examples: 

408 # Edit any dataclass with preserved values 

409 editable_zarr = create_dataclass_for_editing(ZarrConfig, zarr_config, preserve_values=True) 

410 

411 # Create dataclass with placeholders 

412 placeholder_vfs = create_dataclass_for_editing(VFSConfig, vfs_config, preserve_values=False) 

413 """ 

414 if not is_dataclass(dataclass_type): 

415 raise ValueError(f"{dataclass_type} must be a dataclass") 

416 

417 # Set up context if provider is given (e.g., thread-local storage) 

418 if context_provider: 

419 context_provider(source_config) 

420 

421 # Initialize field values based on editing mode 

422 field_values = {} 

423 for field_obj in fields(dataclass_type): 

424 if preserve_values: 

425 # Direct editing: preserve actual field values 

426 field_values[field_obj.name] = getattr(source_config, field_obj.name) 

427 else: 

428 # Hierarchical editing: use None for placeholder behavior 

429 field_values[field_obj.name] = None 

430 

431 return dataclass_type(**field_values) 

432 

433 

434def create_config_for_editing( 

435 global_config_type: Type, 

436 global_config_instance: Any, 

437 preserve_values: bool = False, 

438 placeholder_prefix: str = "Default" 

439) -> Any: 

440 """ 

441 Create editable config for any global dataclass type. 

442 

443 This is the generic version that works with any global config type. 

444 

445 Args: 

446 global_config_type: The global config type (e.g., GlobalPipelineConfig, GlobalAppConfig) 

447 global_config_instance: Instance to use for context and optionally field values 

448 preserve_values: Whether to preserve actual values or use placeholders 

449 placeholder_prefix: Prefix for placeholder text (e.g., "Pipeline default", "App default") 

450 

451 Returns: 

452 Lazy config instance suitable for editing 

453 """ 

454 return create_dataclass_for_editing( 

455 global_config_type, 

456 global_config_instance, 

457 preserve_values=preserve_values, 

458 context_provider=lambda config: ensure_global_config_context(global_config_type, config) 

459 ) 

460 

461 

462 

463 

464 

465def rebuild_lazy_config_with_new_global_reference( 

466 existing_lazy_config: Any, 

467 new_global_config: Any, 

468 global_config_type: Optional[Type] = None 

469) -> Any: 

470 """ 

471 Rebuild lazy config to reference new global config while preserving field states. 

472 

473 This function preserves the exact field state of the existing lazy config: 

474 - Fields that are None (using lazy resolution) remain None 

475 - Fields that have been explicitly set retain their concrete values 

476 - Nested dataclass fields are recursively rebuilt to reference new global config 

477 - The underlying global config reference is updated for None field resolution 

478 

479 Args: 

480 existing_lazy_config: Current lazy config instance 

481 new_global_config: New global config to reference for lazy resolution 

482 global_config_type: Type of the global config (defaults to type of new_global_config) 

483 

484 Returns: 

485 New lazy config instance with preserved field states and updated global reference 

486 """ 

487 if existing_lazy_config is None: 

488 return None 

489 

490 # Determine global config type 

491 if global_config_type is None: 

492 global_config_type = type(new_global_config) 

493 

494 # Set new global config in thread-local storage 

495 ensure_global_config_context(global_config_type, new_global_config) 

496 

497 # Extract current field values without triggering lazy resolution 

498 current_field_values = {} 

499 for field_obj in fields(existing_lazy_config): 

500 # Use object.__getattribute__ to get raw stored value (None or concrete value) 

501 raw_value = object.__getattribute__(existing_lazy_config, field_obj.name) 

502 

503 # If the field is a concrete nested dataclass, rebuild it with new global reference 

504 if raw_value is not None and hasattr(raw_value, '__dataclass_fields__'): 

505 # This is a concrete nested dataclass - get the corresponding field from new global config 

506 try: 

507 new_nested_value = getattr(new_global_config, field_obj.name) 

508 current_field_values[field_obj.name] = new_nested_value 

509 except AttributeError: 

510 # Field doesn't exist in new global config, keep original value 

511 current_field_values[field_obj.name] = raw_value 

512 else: 

513 # Regular field (None or non-dataclass value) - preserve as-is 

514 current_field_values[field_obj.name] = raw_value 

515 

516 # Create new lazy config instance with preserved field values 

517 # This maintains the exact state: None values stay None, concrete values stay concrete 

518 # Nested dataclasses are updated to reference new global config 

519 lazy_class_type = type(existing_lazy_config) 

520 return lazy_class_type(**current_field_values) 

521 

522 

523 

524 

525 

526 

527 

528 

529 

530 

531 

532 

533# This module is now completely generic and contains no pipeline-specific logic. 

534# Pipeline-specific lazy classes are created in openhcs.core.pipeline_config module. 

535 

536