Coverage for openhcs/core/lazy_config.py: 65.6%
165 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
1"""
2Generic lazy dataclass factory using flexible resolution.
4This module provides a truly generic lazy loading abstraction that works with any dataclass
5using dataclass field introspection for delayed object creation, eliminating hardcoded
6configuration types and maintaining zero knowledge of specific configuration types.
7Supports both static resolution (from class) and dynamic resolution (from instance).
8Creates complete lazy dataclasses with bound methods - no mixin inheritance needed.
9"""
11# Standard library imports
12import logging
13import re
14# No ABC needed - using simple functions instead of strategy pattern
15from dataclasses import dataclass, fields, is_dataclass, make_dataclass
16from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
18logger = logging.getLogger(__name__)
21@dataclass(frozen=True)
22class LazyConfigConstants:
23 """Centralized constants for lazy configuration system."""
25 # Field paths for thread-local resolution
26 MATERIALIZATION_DEFAULTS_PATH: str = "materialization_defaults"
27 THREAD_LOCAL_VALUE_ATTR: str = "value"
29 # Class names for backward compatibility
30 PIPELINE_CONFIG_NAME: str = "PipelineConfig"
31 LAZY_STEP_MATERIALIZATION_CONFIG_NAME: str = "LazyStepMaterializationConfig"
33 # Method names for dynamic binding
34 RESOLVE_FIELD_VALUE_METHOD: str = "_resolve_field_value"
35 GET_ATTRIBUTE_METHOD: str = "__getattribute__"
36 TO_BASE_CONFIG_METHOD: str = "to_base_config"
37 WITH_DEFAULTS_METHOD: str = "with_defaults"
38 WITH_OVERRIDES_METHOD: str = "with_overrides"
40 # Debug message templates
41 LAZY_FIELD_DEBUG_TEMPLATE: str = "LAZY FIELD CREATION: {field_name} - original={original_type}, has_default={has_default}, final={final_type}"
42 THREAD_LOCAL_FIELD_DEBUG_TEMPLATE: str = "THREAD-LOCAL LAZY FIELD: {field_name} - original={original_type}, has_default={has_default}, final={final_type}"
44 # Class name generation
45 LAZY_CLASS_NAME_PREFIX: str = "Lazy"
48# Create constants instance for use throughout module
49CONSTANTS = LazyConfigConstants()
51# Generic imports for lazy configuration system
52def _get_generic_config_imports():
53 """Get generic config imports with delayed loading to avoid circular dependencies."""
54 from openhcs.core.config import get_current_global_config, set_current_global_config
55 return get_current_global_config, set_current_global_config
58# No strategy pattern needed - just use instance provider functions directly
61class FieldPathNavigator:
62 """Utility for navigating dot-separated field paths in object hierarchies."""
64 @staticmethod
65 def navigate_to_instance(current_global_config: Any, field_path: Optional[str] = None) -> Optional[Any]:
66 """
67 Navigate to instance using explicit field path.
69 Args:
70 current_global_config: Thread-local storage object or global config instance
71 field_path: Dot-separated path to navigate (None = root)
73 Returns:
74 Instance at the specified field path, or None if not found
75 """
76 # Handle both thread-local storage objects and direct config instances
77 if hasattr(current_global_config, CONSTANTS.THREAD_LOCAL_VALUE_ATTR): 77 ↛ 78line 77 didn't jump to line 78 because the condition on line 77 was never true
78 if not current_global_config.value:
79 return None
80 instance = current_global_config.value
81 else:
82 # Direct config instance
83 instance = current_global_config
85 if field_path is None: 85 ↛ 87line 85 didn't jump to line 87 because the condition on line 85 was never true
86 # Root instance - return the global config directly
87 return instance
89 # Navigate dot-separated path
90 for field in field_path.split('.'):
91 if instance is None: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true
92 return None
93 instance = getattr(instance, field, None)
95 return instance
98@dataclass(frozen=True)
99class ResolutionConfig:
100 """Declarative configuration for recursive lazy resolution."""
101 instance_provider: Callable[[], Any]
102 fallback_chain: List[Callable[[str], Any]]
104 def resolve_field(self, field_name: str) -> Any:
105 """Resolve field through primary instance and fallback chain."""
106 return self._try_primary(field_name) or self._try_fallbacks(field_name)
108 def _try_primary(self, field_name: str) -> Any:
109 """Attempt resolution from primary instance."""
110 try:
111 instance = self.instance_provider()
112 if instance and hasattr(instance, field_name): 112 ↛ 117line 112 didn't jump to line 117 because the condition on line 112 was always true
113 value = object.__getattribute__(instance, field_name)
114 return value if value is not None else None
115 except (AttributeError, Exception):
116 pass
117 return None
119 def _try_fallbacks(self, field_name: str) -> Any:
120 """Attempt resolution through fallback chain."""
121 for fallback in self.fallback_chain:
122 try:
123 value = fallback(field_name)
124 if value is not None:
125 return value
126 except (AttributeError, Exception):
127 continue
128 return None
131# Functional fallback strategies
132def create_static_defaults_fallback(base_class: Type) -> Callable[[str], Any]:
133 """Create fallback that resolves to static dataclass defaults."""
134 default_instance = base_class()
135 return lambda field_name: getattr(default_instance, field_name, None)
138def create_instance_fallback(instance_provider: Callable[[], Any]) -> Callable[[str], Any]:
139 """Create fallback that resolves from specific instance."""
140 return lambda field_name: (
141 getattr(instance_provider(), field_name, None)
142 if (instance := instance_provider()) else None
143 )
146@dataclass(frozen=True)
147class LazyMethodBindings:
148 """Declarative method bindings for lazy dataclasses."""
150 @staticmethod
151 def create_resolver(resolution_config: ResolutionConfig) -> Callable[[Any, str], Any]:
152 """Create field resolver method."""
153 return lambda self, field_name: resolution_config.resolve_field(field_name)
155 @staticmethod
156 def create_getattribute() -> Callable[[Any, str], Any]:
157 """Create lazy __getattribute__ method."""
158 def __getattribute__(self: Any, name: str) -> Any:
159 value = object.__getattribute__(self, name)
160 return (self._resolve_field_value(name)
161 if value is None and name in {f.name for f in fields(self.__class__)}
162 else value)
163 return __getattribute__
165 @staticmethod
166 def create_to_base_config(base_class: Type) -> Callable[[Any], Any]:
167 """Create base config converter method."""
168 return lambda self: base_class(**{
169 f.name: getattr(self, f.name) for f in fields(self)
170 })
172 @staticmethod
173 def create_class_methods() -> Dict[str, Any]:
174 """Create class-level utility methods."""
175 return {
176 CONSTANTS.WITH_DEFAULTS_METHOD: classmethod(lambda cls: cls()),
177 CONSTANTS.WITH_OVERRIDES_METHOD: classmethod(lambda cls, **kwargs: cls(**kwargs))
178 }
181class LazyDataclassFactory:
182 """Generic factory for creating lazy dataclasses with flexible resolution."""
184 @staticmethod
185 def _introspect_dataclass_fields(base_class: Type, debug_template: str) -> List[Tuple[str, Type, None]]:
186 """
187 Unified field introspection logic for lazy dataclass creation.
189 Analyzes dataclass fields to determine appropriate types for lazy loading,
190 preserving original types for fields with defaults while making fields
191 without defaults Optional for lazy resolution.
193 Args:
194 base_class: The dataclass to introspect
195 debug_template: Template string for debug logging
197 Returns:
198 List of (field_name, field_type, default_value) tuples for make_dataclass
199 """
200 from dataclasses import MISSING
202 base_fields = fields(base_class)
203 lazy_field_definitions = []
205 for field in base_fields:
206 # Check if field already has Optional type
207 origin = getattr(field.type, '__origin__', None)
208 is_already_optional = (origin is Union and
209 type(None) in getattr(field.type, '__args__', ()))
211 # Check if field has default value or factory
212 has_default = (field.default is not MISSING or
213 field.default_factory is not MISSING)
215 if is_already_optional or not has_default:
216 # Field is already Optional or has no default - make it Optional for lazy loading
217 field_type = Union[field.type, type(None)] if not is_already_optional else field.type
218 else:
219 # Field has default - preserve original type (don't make Optional)
220 field_type = field.type
222 lazy_field_definitions.append((field.name, field_type, None))
224 # Debug logging with provided template
225 logger.info(debug_template.format(
226 field_name=field.name,
227 original_type=field.type,
228 has_default=has_default,
229 final_type=field_type
230 ))
232 return lazy_field_definitions
234 @staticmethod
235 def _create_lazy_dataclass_unified(
236 base_class: Type,
237 instance_provider: Callable[[], Any],
238 lazy_class_name: str,
239 debug_template: str,
240 use_recursive_resolution: bool = False,
241 fallback_chain: Optional[List[Callable[[str], Any]]] = None
242 ) -> Type:
243 """Create lazy dataclass with declarative configuration."""
244 if not is_dataclass(base_class): 244 ↛ 245line 244 didn't jump to line 245 because the condition on line 244 was never true
245 raise ValueError(f"{base_class} must be a dataclass")
247 # Create resolution configuration
248 resolution_config = ResolutionConfig(
249 instance_provider=instance_provider,
250 fallback_chain=fallback_chain or [create_static_defaults_fallback(base_class)]
251 ) if use_recursive_resolution else ResolutionConfig(
252 instance_provider=instance_provider,
253 fallback_chain=[lambda field_name: getattr(instance_provider(), field_name)]
254 )
256 # Create lazy dataclass with introspected fields
257 lazy_class = make_dataclass(
258 lazy_class_name,
259 LazyDataclassFactory._introspect_dataclass_fields(base_class, debug_template),
260 frozen=True
261 )
263 # Bind methods declaratively
264 LazyDataclassFactory._bind_methods_to_class(lazy_class, base_class, resolution_config)
265 return lazy_class
267 @staticmethod
268 def _bind_methods_to_class(lazy_class: Type, base_class: Type, resolution_config: ResolutionConfig) -> None:
269 """Bind methods to lazy dataclass using declarative configuration."""
270 method_bindings = {
271 CONSTANTS.RESOLVE_FIELD_VALUE_METHOD: LazyMethodBindings.create_resolver(resolution_config),
272 CONSTANTS.GET_ATTRIBUTE_METHOD: LazyMethodBindings.create_getattribute(),
273 CONSTANTS.TO_BASE_CONFIG_METHOD: LazyMethodBindings.create_to_base_config(base_class),
274 **LazyMethodBindings.create_class_methods()
275 }
277 for method_name, method_impl in method_bindings.items():
278 setattr(lazy_class, method_name, method_impl)
280 @staticmethod
281 def create_lazy_dataclass(
282 defaults_source: Union[Type, Any],
283 lazy_class_name: str,
284 use_recursive_resolution: bool = False,
285 fallback_chain: Optional[List[Callable[[str], Any]]] = None
286 ) -> Type:
287 """Create lazy dataclass with functional configuration."""
288 base_class = defaults_source if isinstance(defaults_source, type) else type(defaults_source)
289 instance_provider = (lambda: defaults_source()) if isinstance(defaults_source, type) else (lambda: defaults_source)
291 return LazyDataclassFactory._create_lazy_dataclass_unified(
292 base_class, instance_provider, lazy_class_name,
293 CONSTANTS.LAZY_FIELD_DEBUG_TEMPLATE, use_recursive_resolution, fallback_chain
294 )
296 @staticmethod
297 def make_lazy_thread_local(
298 base_class: Type,
299 global_config_type: Type,
300 field_path: str = None,
301 lazy_class_name: str = None,
302 use_recursive_resolution: bool = False
303 ) -> Type:
304 """
305 Create lazy dataclass that resolves from thread-local instance using explicit field paths.
307 This unified approach eliminates algorithmic field name conversion bugs by using
308 explicit dot-separated paths to navigate the thread-local configuration structure.
310 Args:
311 base_class: The dataclass type to make lazy (the target type for lazy resolution)
312 global_config_type: The global config type used for thread-local storage context
313 (e.g., GlobalPipelineConfig, GlobalAppConfig)
314 field_path: Dot-separated path to instance (None = root)
315 Examples: None, "materialization_defaults", "foo.bar.baz"
316 lazy_class_name: Optional name for the generated lazy class
317 use_recursive_resolution: Whether to use recursive resolution for None values
319 Returns:
320 Generated lazy dataclass with explicit thread-local resolution
322 Note:
323 base_class and global_config_type serve different purposes:
324 - base_class: The type being made lazy (what the lazy class represents)
325 - global_config_type: The type used for thread-local context (where values come from)
327 They are often the same (e.g., both GlobalPipelineConfig) but can differ when
328 creating lazy versions of nested config types that resolve from a different
329 global context (e.g., base_class=StepMaterializationConfig,
330 global_config_type=GlobalPipelineConfig).
332 Examples:
333 # Root thread-local instance with recursive resolution
334 PipelineConfig = make_lazy_thread_local(
335 GlobalPipelineConfig,
336 field_path=None,
337 use_recursive_resolution=True
338 )
340 # Nested field from thread-local instance
341 LazyStepMaterializationConfig = make_lazy_thread_local(
342 StepMaterializationConfig,
343 field_path="materialization_defaults"
344 )
345 """
346 # Generate class name if not provided
347 if lazy_class_name is None: 347 ↛ 348line 347 didn't jump to line 348 because the condition on line 347 was never true
348 lazy_class_name = f"{CONSTANTS.LAZY_CLASS_NAME_PREFIX}{base_class.__name__}"
350 # Global config type is now a required parameter
352 # Create instance provider for thread-local resolution
353 def thread_local_instance_provider() -> Any:
354 """Get instance from thread-local storage using field path."""
355 get_current_global_config, _ = _get_generic_config_imports()
357 current_config = get_current_global_config(global_config_type)
358 if current_config is not None: 358 ↛ 361line 358 didn't jump to line 361 because the condition on line 358 was always true
359 return FieldPathNavigator.navigate_to_instance(current_config, field_path)
361 return None
363 # Configure fallback chain for recursive resolution
364 fallback_chain = [create_static_defaults_fallback(base_class)] if use_recursive_resolution else None
366 return LazyDataclassFactory._create_lazy_dataclass_unified(
367 base_class, thread_local_instance_provider, lazy_class_name,
368 CONSTANTS.THREAD_LOCAL_FIELD_DEBUG_TEMPLATE, use_recursive_resolution, fallback_chain
369 )
371 # Deprecated methods removed - use make_lazy_thread_local() with explicit field_path
374# Generic utility functions for clean thread-local storage management
375def ensure_global_config_context(global_config_type: Type, global_config_instance: Any) -> None:
376 """Ensure proper thread-local storage setup for any global config type."""
377 _, set_current_global_config = _get_generic_config_imports()
378 set_current_global_config(global_config_type, global_config_instance)
381# Generic dataclass editing with configurable value preservation
382T = TypeVar('T')
385def create_dataclass_for_editing(
386 dataclass_type: Type[T],
387 source_config: Any,
388 preserve_values: bool = False,
389 context_provider: Optional[Callable[[Any], None]] = None
390) -> T:
391 """
392 Create any dataclass for editing with configurable value preservation.
394 This generic function works with any dataclass type, not just PipelineConfig.
396 Args:
397 dataclass_type: The dataclass type to create (e.g., PipelineConfig, ZarrConfig)
398 source_config: Instance to use for context and optionally field values
399 preserve_values:
400 - True: Preserve actual field values (direct editing)
401 - False: Use None values for placeholders (hierarchical editing)
402 context_provider: Optional function to set up context (e.g., thread-local storage)
404 Returns:
405 Instance of dataclass_type with appropriate field initialization
407 Examples:
408 # Edit any dataclass with preserved values
409 editable_zarr = create_dataclass_for_editing(ZarrConfig, zarr_config, preserve_values=True)
411 # Create dataclass with placeholders
412 placeholder_vfs = create_dataclass_for_editing(VFSConfig, vfs_config, preserve_values=False)
413 """
414 if not is_dataclass(dataclass_type):
415 raise ValueError(f"{dataclass_type} must be a dataclass")
417 # Set up context if provider is given (e.g., thread-local storage)
418 if context_provider:
419 context_provider(source_config)
421 # Initialize field values based on editing mode
422 field_values = {}
423 for field_obj in fields(dataclass_type):
424 if preserve_values:
425 # Direct editing: preserve actual field values
426 field_values[field_obj.name] = getattr(source_config, field_obj.name)
427 else:
428 # Hierarchical editing: use None for placeholder behavior
429 field_values[field_obj.name] = None
431 return dataclass_type(**field_values)
434def create_config_for_editing(
435 global_config_type: Type,
436 global_config_instance: Any,
437 preserve_values: bool = False,
438 placeholder_prefix: str = "Default"
439) -> Any:
440 """
441 Create editable config for any global dataclass type.
443 This is the generic version that works with any global config type.
445 Args:
446 global_config_type: The global config type (e.g., GlobalPipelineConfig, GlobalAppConfig)
447 global_config_instance: Instance to use for context and optionally field values
448 preserve_values: Whether to preserve actual values or use placeholders
449 placeholder_prefix: Prefix for placeholder text (e.g., "Pipeline default", "App default")
451 Returns:
452 Lazy config instance suitable for editing
453 """
454 return create_dataclass_for_editing(
455 global_config_type,
456 global_config_instance,
457 preserve_values=preserve_values,
458 context_provider=lambda config: ensure_global_config_context(global_config_type, config)
459 )
465def rebuild_lazy_config_with_new_global_reference(
466 existing_lazy_config: Any,
467 new_global_config: Any,
468 global_config_type: Optional[Type] = None
469) -> Any:
470 """
471 Rebuild lazy config to reference new global config while preserving field states.
473 This function preserves the exact field state of the existing lazy config:
474 - Fields that are None (using lazy resolution) remain None
475 - Fields that have been explicitly set retain their concrete values
476 - Nested dataclass fields are recursively rebuilt to reference new global config
477 - The underlying global config reference is updated for None field resolution
479 Args:
480 existing_lazy_config: Current lazy config instance
481 new_global_config: New global config to reference for lazy resolution
482 global_config_type: Type of the global config (defaults to type of new_global_config)
484 Returns:
485 New lazy config instance with preserved field states and updated global reference
486 """
487 if existing_lazy_config is None:
488 return None
490 # Determine global config type
491 if global_config_type is None:
492 global_config_type = type(new_global_config)
494 # Set new global config in thread-local storage
495 ensure_global_config_context(global_config_type, new_global_config)
497 # Extract current field values without triggering lazy resolution
498 current_field_values = {}
499 for field_obj in fields(existing_lazy_config):
500 # Use object.__getattribute__ to get raw stored value (None or concrete value)
501 raw_value = object.__getattribute__(existing_lazy_config, field_obj.name)
503 # If the field is a concrete nested dataclass, rebuild it with new global reference
504 if raw_value is not None and hasattr(raw_value, '__dataclass_fields__'):
505 # This is a concrete nested dataclass - get the corresponding field from new global config
506 try:
507 new_nested_value = getattr(new_global_config, field_obj.name)
508 current_field_values[field_obj.name] = new_nested_value
509 except AttributeError:
510 # Field doesn't exist in new global config, keep original value
511 current_field_values[field_obj.name] = raw_value
512 else:
513 # Regular field (None or non-dataclass value) - preserve as-is
514 current_field_values[field_obj.name] = raw_value
516 # Create new lazy config instance with preserved field values
517 # This maintains the exact state: None values stay None, concrete values stay concrete
518 # Nested dataclasses are updated to reference new global config
519 lazy_class_type = type(existing_lazy_config)
520 return lazy_class_type(**current_field_values)
533# This module is now completely generic and contains no pipeline-specific logic.
534# Pipeline-specific lazy classes are created in openhcs.core.pipeline_config module.