Coverage for openhcs/core/steps/abstract.py: 93.3%
28 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
1"""
2Abstract Step Interface
4This module defines the AbstractStep interface, which is the base class for all steps
5in the OpenHCS pipeline. It provides the core functionality for step execution,
6validation, and state management.
8Doctrinal Clauses:
9- Clause 3 — Declarative Primacy
10- Clause 12 — Absolute Clean Execution
11- Clause 21 — Context Immunity
12- Clause 65 — No Fallback Logic
13- Clause 66 — Immutability After Construction
14- Clause 88 — No Inferred Capabilities
15- Clause 92 — Structural Validation First
16- Clause 106-A — Declared Memory Types
17- Clause 244 — Rot Intolerance
18- Clause 245 — Declarative Enforcement
19- Clause 246 — Statelessness Mandate
20- Clause 251 — Declarative Memory Conversion
21- Clause 503 — Cognitive Load Transfer
22"""
24import abc
25import logging
26from abc import abstractmethod
27from pathlib import Path
28from typing import TYPE_CHECKING, List, Optional, Union
30from openhcs.constants.constants import VariableComponents, GroupBy, DEFAULT_VARIABLE_COMPONENTS
31from openhcs.constants.input_source import InputSource
32from openhcs.core.config import PathPlanningConfig, MaterializationPathConfig
34# ProcessingContext is used in type hints
35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36 because the condition on line 35 was never true
36 from openhcs.core.context.processing_context import ProcessingContext
37# StepResult is no longer returned by process()
40def get_step_id(step: 'AbstractStep') -> str:
41 """
42 Generate a stable step ID from a step object reference.
44 This function provides a deterministic way to derive a step's ID
45 from its object reference, enabling stateless execution where
46 step objects don't need to store their own IDs as attributes.
48 Args:
49 step: The step object to generate an ID for
51 Returns:
52 A stable string ID based on the step object's identity
54 Note:
55 This uses the same algorithm as step.__init__() to ensure
56 consistency between compilation and execution phases.
57 """
58 return str(id(step))
61class AbstractStep(abc.ABC):
62 """
63 Abstract base class for all steps in the OpenHCS pipeline.
65 This class defines the interface that all steps must implement.
66 Steps are stateful during pipeline definition and compilation (holding attributes
67 like name, input/output memory types, etc.). After compilation, these attributes
68 are stripped by the StepAttributeStripper, and the step instances become
69 stateless shells. During execution, steps operate solely based on the
70 ProcessingContext (which is frozen) and their specific plan within
71 context.step_plans.
73 Input Source Control:
75 The input_source parameter controls where a step reads its input data:
77 - InputSource.PREVIOUS_STEP (default): Standard pipeline chaining where the step
78 reads from the output directory of the previous step. This maintains normal
79 sequential data flow.
81 - InputSource.PIPELINE_START: The step reads from the original pipeline input
82 directory, bypassing all previous step outputs. This replaces the @chain_breaker
83 decorator functionality and is used for position generation and quality control.
85 Usage Examples:
87 Standard processing step (default):
88 ```python
89 step = FunctionStep(
90 func=my_processing_function,
91 name="process_images"
92 # input_source defaults to InputSource.PREVIOUS_STEP
93 )
94 ```
96 Position generation accessing original images:
97 ```python
98 step = FunctionStep(
99 func=ashlar_compute_tile_positions_gpu,
100 name="compute_positions",
101 input_source=InputSource.PIPELINE_START
102 )
103 ```
105 # Clause 3 — Declarative Primacy
106 # Clause 66 — Immutability After Construction
107 # Clause 88 — No Inferred Capabilities
108 # Clause 106-A — Declared Memory Types
109 # Clause 246 — Statelessness Mandate
110 # Clause 251 — Declarative Memory Conversion
111 # Clause 503 — Cognitive Load Transfer
112 """
116 # Step metadata - these are primarily used during pipeline definition and compilation
117 step_id: str
118 enabled: bool = True
119 description: Optional[str] = None
120 name: str # Made non-optional, defaults to class name
122 # Attributes like input_memory_type, output_memory_type, etc.,
123 # are defined in concrete subclasses (e.g., FunctionStep) as needed.
125 def __init__(
126 self,
127 *, # Force keyword-only arguments
128 name: Optional[str] = None,
129 variable_components: List[VariableComponents] = DEFAULT_VARIABLE_COMPONENTS,
130 group_by: Optional[GroupBy] = None,
131 __input_dir__: Optional[Union[str,Path]] = None, # Internal: Used during path planning
132 __output_dir__: Optional[Union[str,Path]] = None, # Internal: Used during path planning
133 input_source: InputSource = InputSource.PREVIOUS_STEP,
134 materialization_config: Optional['MaterializationPathConfig'] = None
135 ) -> None:
136 """
137 Initialize a step. These attributes are primarily used during the
138 pipeline definition and compilation phase. After compilation, step
139 instances are stripped of these attributes by StepAttributeStripper
140 to enforce statelessness during execution.
142 Args:
143 name: Human-readable name for the step. Defaults to class name.
144 variable_components: List of variable components for this step.
145 group_by: Optional grouping hint for step execution.
146 __input_dir__: Internal hint for input directory, used by path planner.
147 Dunder naming indicates this is a compiler-internal field.
148 __output_dir__: Internal hint for output directory, used by path planner.
149 Dunder naming indicates this is a compiler-internal field.
150 input_source: Input source strategy for this step. Defaults to PREVIOUS_STEP
151 for normal pipeline chaining. Use PIPELINE_START to access
152 original input data (replaces @chain_breaker decorator).
153 materialization_config: Optional PathPlanningConfig or MaterializationPathConfig for per-step materialized output.
154 When provided, enables saving materialized copy of step output
155 to custom location in addition to normal memory backend processing.
156 Use MaterializationPathConfig() for safe defaults that prevent path collisions.
157 """
158 self.name = name or self.__class__.__name__
159 self.variable_components = variable_components
160 self.group_by = group_by
161 self.__input_dir__ = __input_dir__
162 self.__output_dir__ = __output_dir__
163 self.input_source = input_source
164 self.materialization_config = materialization_config
166 # Generate a stable step_id based on object id at instantiation.
167 # This ID is used to link the step object to its plan in the context.
168 self.step_id = str(id(self))
170 logger_instance = logging.getLogger(__name__)
171 logger_instance.debug(f"Created step '{self.name}' (type: {self.__class__.__name__}) with ID {self.step_id}")
173 @abc.abstractmethod
174 def process(self, context: 'ProcessingContext') -> None:
175 """
176 Process the step with the given context.
178 This method must be implemented by all step subclasses.
179 During execution, the step instance is stateless. All necessary
180 configuration and paths are retrieved from `context.step_plans[self.step_id]`.
181 The `context` itself is frozen and must not be modified.
182 Outputs are written to VFS via `context.filemanager` based on the step's plan.
183 This method returns None.
185 Args:
186 context: The frozen ProcessingContext containing all required fields,
187 including step_plans and filemanager.
189 # Clause 246 — Statelessness Mandate
190 # Clause 21 — Context Immunity (Context is read-only for steps)
191 """
192 raise NotImplementedError("AbstractStep.process() must be implemented by subclasses")